· 5 years ago · May 25, 2020, 01:42 AM
1import re, time
2
3def append_file(filename, data):
4 with open(filename, encoding="utf-8", mode='a') as f:
5 f.write(data)
6
7def search_emails(filename, searches, chunk_size=1024*64):
8 import time
9 start_time = time.time()
10
11 with open(filename, encoding="utf-8", mode='r') as f:
12 pattern = re.compile(r'([\w0-9._-]+@[\w0-9._-]+\.[\w0-9_-]+)\n')
13 left_over = ' '
14 while left_over:
15 chunk = f.read(chunk_size)
16 if not chunk:
17 chunk = left_over
18 left_over = None
19 else:
20 chunk = left_over + chunk
21 left_over = chunk.split('\n')[-1]
22
23 emails = pattern.findall(chunk, re.M | re.I)
24 to_append = dict((search, []) for search in searches)
25 for email in emails:
26 for search in searches:
27 if search in email:
28 to_append[search].append(email)
29 for search in searches:
30 append_file('output-' + search + '.txt', '\n'.join(to_append[search]))
31
32 print('time: ', time.time() - start_time)
33
34if __name__ == "__main__":
35 searches = ["jeff", "eric", "@gmail.com"]
36 search_emails('emails.txt', searches)