· 6 years ago · Jul 19, 2019, 12:42 AM
1# Scraper email from google. Use extra parameters for targeted extraction. change site to scraper other sites
2
3from bs4 import BeautifulSoup
4import requests
5import re
6
7url = 'https://www.google.com/search?'
8payload = { 'q' : 'site:in.linkedin.com "@gmail.com OR "@yahoo.com"', 'start' : 0}
9email_pattern = 'r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"'
10num = 100
11final_list = []
12for n in range(0, num, 10):
13 payload['start'] = n
14 r = requests.get(url, payload)
15 emails = re.findall('\w+[.|\w]\w+@\w+[.]\w+[.|\w+]\w+',r.text)
16 for email in emails:
17 final_list.append(email)
18final_list = set(final_list)
19print(len(final_list))