· 4 years ago · Apr 04, 2021, 08:08 PM
1import sqlite3
2import os
3db = sqlite3.connect("fb_usa_dump.db")
4db.execute('PRAGMA synchronous = OFF;')
5db.execute('PRAGMA journal_mode = MEMORY;')
6db.execute('PRAGMA secure_delete = OFF;')
7db.execute('PRAGMA locking_mode = EXCLUSIVE;')
8db.isolation_level = None
9db.execute("""
10 CREATE TABLE IF NOT EXISTS
11 usa(
12 phone_no, user_id, fname, lname,
13 gender, city1, city2, relationship,
14 company, seen, email
15 );
16""")
17insert_sql = "INSERT INTO usa VALUES (?,?,?,?,?,?,?,?,?,?,?);"
18db.commit()
19cnt = 0
20for cur in os.listdir("."):
21 if cur.startswith("USA") and cur.endswith(".txt"):
22 todo = []
23 with open(cur, encoding="utf-8") as f:
24 for row in f:
25 row = row.split(":")
26 if len(row) == 14:
27 (
28 phone_no, user_id, fname, lname, gender, city1, city2,
29 relationship, company, d1, d2, d3, email, _) = row
30 seen = d1 + ":" + d2 + ":" + d3
31 todo.append((
32 phone_no, user_id, fname, lname, gender,
33 city1, city2, relationship, company, seen, email))
34 if len(todo) % 1000000 == 0 and len(todo) > 0:
35 cnt += len(todo)
36 print("inserted %i records" % cnt)
37 db.executemany(insert_sql, todo)
38 db.commit()
39 todo = []
40 if len(todo) > 0:
41 cnt += len(todo)
42 print("inserting %i records" % cnt)
43 db.executemany(insert_sql, todo)
44 db.commit()
45
46db.execute("CREATE UNIQUE INDEX pnum ON usa (phone_no);")
47db.commit()