· 5 years ago · Nov 05, 2020, 03:42 PM
1####################################################################################################
2#google_scrape.py
3
4import os
5import requests
6import time
7import random
8from bs4 import BeautifulSoup
9from scrapers.headers_parser import header_parse
10from collections import defaultdict
11import base64
12
13__author__ = "Matan Ram"
14__version__ = "1.0.1"
15
16
17def gquery_to_b64(query):
18 """
19 convert legal file name
20 :param fname: str
21 :return: str
22 """
23 return base64.b64encode(query.encode('ascii')).decode('ascii')
24
25
26def b64_to_gquery(fname):
27 """
28 convert b64 string to google query
29 :param fname: str
30 :return: str
31 """
32 return base64.b64decode(fname).decode('ascii')
33
34
35class Google(object):
36 GOOGLE_ADDR = "https://www.google.com"
37 CACHE = "/tmp/cache"
38
39 def __init__(self, save_files=False, cache=False):
40 with open("/home/matan/Documents/hde65/web_requests/scrapers/new_headers.txt", 'r') as f:
41 self.headers = header_parse(f.read())
42 self.session = requests.session()
43 self.links = defaultdict(list)
44 self.save_files = save_files
45 self.cache = cache
46 self.first_request = True
47 if not os.path.exists(Google.CACHE):
48 os.makedirs(Google.CACHE)
49
50 def create_search_keyword(self, keywords):
51 """
52 get keywords and return chaining keywords with + delimiter
53 :param keywords: str of keywords
54 :return: str
55 """
56 keywords = keywords.strip()
57 return '+'.join(keywords.split(' '))
58
59 def search(self, keyword, pages=1):
60 """
61 get searched links by keyword and number of pages
62 :param keyword: str
63 :param pages: int
64 :return: Google object
65 """
66
67 print(f"Loading links from Google By Keyword {keyword}...")
68
69 page_number = pages
70 # g_keyword = self.create_search_keyword(keyword)
71 params = {
72 'q': keyword
73 }
74 pcontent = ""
75 if self.cache:
76 with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p0", 'r') as fp:
77 pcontent = fp.read()
78 else:
79 try:
80 if self.first_request:
81 res = self.session.get(Google.GOOGLE_ADDR + '/search', params=params, headers=self.headers,
82 timeout=10)
83 self.first_request = False
84 else:
85 res = self.session.get(Google.GOOGLE_ADDR + '/search', params=params, timeout=10)
86
87 if res.ok:
88 pcontent = res.text
89 if self.save_files:
90 with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p0", 'w') as fp:
91 fp.write(pcontent)
92 except Exception as e:
93 print(f"Get data from Google search Failed\n {e}")
94
95 self.links[keyword] += self._parse_result(pcontent)
96
97 while pages - 1:
98 pcontent = ""
99 if self.cache:
100 try:
101 with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p{page_number - pages + 1}", 'r') as fp:
102 pcontent = fp.read()
103 except:
104 break
105 else:
106 try:
107 link = self._next_page(pcontent)
108 if not link:
109 break
110 time.sleep(random.randint(2, 5))
111 res = self.session.get(Google.GOOGLE_ADDR + link, timeout=10)
112 pcontent = res.text if res.ok else ""
113 if self.save_files:
114 with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p{page_number - pages + 1}", 'w') as fp:
115 fp.write(pcontent)
116 except Exception as e:
117 print(f"Get data from Google Failed\n {e}")
118 break
119 self.links[keyword] += self._parse_result(pcontent)
120 pages -= 1
121 return self.links[keyword]
122
123 def print_links_by_query(self, keyword):
124 """
125 print all links
126 :return: None
127 """
128 if keyword in self.links:
129 for link in self.links[keyword]:
130 print(f'{link[0]} - {link[1]}')
131
132 def print_all_links(self):
133 """
134
135 :return:
136 """
137 for kword in self.links:
138 self.print_links_by_query(kword)
139
140 def _parse_result(self, html):
141 """
142 get html str and parse headers and their links
143 :param html: str
144 :return: list of tuples
145 """
146 soup = BeautifulSoup(html, 'html.parser')
147 links = []
148 divs = soup.select('.ZINbbc .kCrYT:first-child')
149 for div in divs:
150 try:
151 url = div.select_one('a')['href'].split("?q=")[-1]
152 title = div.select_one('h3 span').text
153 links.append((title, url))
154 except:
155 pass
156 return links
157
158 def _next_page(self, html):
159 """
160 get the html page content and return the next page link
161 :param html: str
162 :return: str
163 """
164
165 soup = BeautifulSoup(html, 'html.parser')
166 links = soup.select("footer .nBDE1b.G5eFlf")
167 if links:
168 # check for <
169 for link in links:
170 if '>' in link.text:
171 return link.get('href')
172
173
174
175#####################################################################################
176#####linkedin.py file
177import requests
178import time
179import random
180from bs4 import BeautifulSoup
181from scrapers.headers_parser import header_parse
182import json
183import pprint
184
185__author__ = "Matan Ram"
186__version__ = "1.0.1"
187
188
189class Linkedin(object):
190 Link = "https://www.linkedin.com/voyager/api/voyagerSearchDashTypeahead"
191
192 def __init__(self, headers_file):
193 with open(headers_file, 'r') as f:
194 self.headers = header_parse(f.read())
195 self.session = requests.session()
196 self.companies = {}
197
198 def search(self, keyword):
199 """
200 get searched links by keyword and number of pages
201 :param keyword: str
202 :param pages: int
203 :return: Google object
204 """
205
206 params = {
207 'decorationId': "com.linkedin.voyager.dash.deco.search.typeahead.GlobalTypeaheadCollection-6",
208 'query': keyword,
209 'q': 'globalTypeahead'
210 }
211
212 res = self.session.get(Linkedin.Link, params=params, headers=self.headers)
213 if res.ok:
214 return res.json()
215 return {}
216
217 def parse_company_data(self, json_data):
218 """
219 parse json data and update companies dict
220 :param json_data: json obj
221 :return: None
222 """
223 for r in json_data['data']['elements']:
224 if 'trackingUrn' in r['entityLockupView'] and r['entityLockupView']['subtitle']['text'].startswith(
225 '• Company •'):
226 title = r['entityLockupView']['title']['text']
227 self.companies[title] = {'title': title,
228 'url': r['entityLockupView']['navigationUrl'],
229 'company_id': r['entityLockupView']['trackingUrn'].split(':')[-1],
230 'sub_text': r['entityLockupView']['subtitle']['text'],
231 'employees': []}
232 # print(r['entityLockupView']['title']['text'] + '-' + r['entityLockupView']['navigationUrl'] + '\t-' +
233 # r['entityLockupView']['trackingUrn'] + '\t-\t' + r['entityLockupView']['subtitle']['text'])
234
235 def get_employees_data(self, company_title, pages=1):
236 """
237 get employ data by given company name and update them in in self.companies
238 :param company_title: str
239 :param pages: int
240 :return: None
241 """
242 url = 'https://www.linkedin.com/search/results/people/'
243 params = {
244 'facetCurrentCompany': '["{}"]',
245 'origin': 'COMPANY_PAGE_CANNED_SEARCH',
246 'page': 1,
247 }
248
249 for company in self.companies:
250 if company_title.lower() in company.lower():
251 for i in range(pages):
252 params['facetCurrentCompany'] = f'["{self.companies[company]["company_id"]}"]'
253 params['page'] = i + 1
254 response = requests.get(url=url, headers=self.headers, params=params)
255 if response.ok:
256 soup = BeautifulSoup(response.text, 'html.parser')
257 code_tags = [r for r in soup.select('code') if 'About ' in r.text and ' results' in r.text]
258 if code_tags:
259 json_data = json.loads(code_tags[0].text)
260 self.companies[company]['employees'] += Linkedin.parse_employee_data(json_data)
261
262 time.sleep(random.randint(3, 6))
263
264 @staticmethod
265 def parse_employee_data(j_emp_data, verbose=False):
266 """
267 parse json data of employee
268 :param j_emp_data: json object
269 :param verbose: boolean
270 :return: list
271 """
272 results = []
273 try:
274 for elem in j_emp_data['data']['elements']:
275 if elem['type'] == 'SEARCH_HITS':
276 for emp in elem['elements']:
277 if emp['title']['text'] != 'LinkedIn Member':
278 results.append({'full_name': emp['title']['text'],
279 'role': emp['headline']['text'],
280 'url': emp['navigationUrl'],
281 'e_id':
282 emp['trackingUrn'].split(':')[
283 -1]})
284 if verbose:
285 print(emp['title']['text'] + ' - ' + emp['headline']['text'] + ' - ' + emp[
286 'navigationUrl'] + ' - ' + emp['trackingUrn'])
287 except Exception as e:
288 print(f"Can't parse employees from json data\n{e}")
289 return results
290
291 def scrape_company(self, company_keyword, pages=1, rfile_path=""):
292 """
293 start to scrape company info and employees
294 :param company_keyword: str
295 :param pages: int
296 :param rfile_path: path to saved info
297 :return: None
298 """
299
300 json_data = self.get_links_from_file(rfile_path) if rfile_path else self.search(company_keyword)
301 if json_data:
302 self.parse_company_data(json_data)
303 self.get_employees_data(company_keyword, pages=pages)
304 else:
305 print('No data to parse..')
306 return self.companies
307
308 def get_links_from_file(self, f_path):
309 """
310 by given file path return json
311 :param f_path:
312 :return: json object
313 """
314 with open(f_path, 'r') as f:
315 data = f.read()
316 return json.loads(data)
317
318 def print_results(self):
319 """
320 pretty print of self data
321 :return: None
322 """
323 for i, comp in enumerate(self.companies):
324 print(f'{i + 1}) Company:{comp},'
325 f'\tID:{self.companies[comp]["company_id"]},'
326 f'Sub_T:{self.companies[comp]["sub_text"]},'
327 f'URL:{self.companies[comp]["url"]}')
328 # results[comp] = {'company_id':self.companies[comp]["company_id"],}
329 print("\t***Employees***")
330 for j, emp in enumerate(self.companies[comp]['employees']):
331 print(f'\t{i + 1}.{j + 1})Full_name:{emp["full_name"]},'
332 f'ID:{emp["e_id"]},Role:{emp["role"]},'
333 f'url:{emp["url"]}')
334
335###############################################################################################
336#recon.py
337
338#!/usr/bin/env python
339
340from scrapers.google_scrape import Google
341from scrapers.linkedin import Linkedin
342
343import os
344import subprocess
345import time
346import requests
347import pprint
348import uuid
349import smtplib
350import re
351import random
352from concurrent.futures import ThreadPoolExecutor
353import dns.resolver
354from fierce import fierce
355import functools
356import ipaddress
357import dns.zone
358import sqlite3
359
360
361class ReconDB(object):
362 NEW_DB_SCRIPT = """BEGIN TRANSACTION;
363CREATE TABLE IF NOT EXISTS "USERS" (
364 "ID" INTEGER PRIMARY KEY AUTOINCREMENT,
365 "FNAME" INTEGER,
366 "LNAME" TEXT,
367 "ROLE" TEXT,
368 "L_ID" TEXT,
369 "L_URL" TEXT,
370 "EMAIL" INTEGER NOT NULL UNIQUE,
371 "E_VERIFY" TEXT,
372 "K_COMPANY" TEXT NOT NULL,
373 "COMPANY_FULL_NAME" TEXT
374);
375CREATE TABLE IF NOT EXISTS "SERVERS" (
376 "SID" INTEGER PRIMARY KEY AUTOINCREMENT,
377 "S_NAME" TEXT NOT NULL UNIQUE,
378 "IP_ADDR" TEXT NOT NULL UNIQUE,
379 "LOCATION" TEXT,
380 "SUBNETS" TEXT,
381 "K_DOMAIN" TEXT
382);
383COMMIT;
384"""
385
386 def __init__(self, db_file_path=""):
387 self.db_file = db_file_path if db_file_path else f'recov_{str(uuid.uuid4())[:4]}.db'
388 self.db_con = None
389
390 def connect(self):
391 """
392 connect to sqlite db
393 :return: None
394 """
395 try:
396 is_db_exist = os.path.exists(self.db_file)
397 self.db_con = sqlite3.connect(self.db_file)
398 # self.cur = self.db_con.cursor()
399
400 if not is_db_exist:
401 self.db_con.executescript(ReconDB.NEW_DB_SCRIPT)
402 print(f'connect to db {self.db_file}')
403 except Exception as e:
404 print(f'Failed to connect db {self.db_file}\n{e}')
405
406 def close(self):
407 """
408 close connection to sqlite db
409 :return: None
410 """
411 try:
412 self.db_con.close()
413 print(f"Close db {self.db_file}.")
414 except:
415 print(f'Failed to close db {self.db_file}')
416
417 def add_user(self, email, company, fname="", lname="", l_id="", l_url="", role="", e_verify="UNKNOWN",
418 full_company_name=""):
419 """
420 add user into User Table
421 :param email:str
422 :param company: str
423 :param fname: str
424 :param lname: str
425 :param l_id: str
426 :param l_url: str
427 :param role: str
428 :param e_verify:str
429 :param full_company_name:str
430 :return: None
431 """
432 try:
433 self.db_con.execute("INSERT INTO USERS (FNAME,LNAME,ROLE,EMAIL,E_VERIFY,L_ID,L_URL,K_COMPANY,COMPANY_FULL_NAME) \
434 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
435 (fname, lname, role, email, e_verify, l_id, l_url, company.lower(), full_company_name))
436 self.db_con.commit()
437 except Exception as e:
438 pass
439 # print("Failed to add user .\n {e}")
440
441 def get_users(self, company):
442 """
443 get users by companu
444 :param company: str
445 :return: list
446 """
447 results = []
448 try:
449 cursor = self.db_con.execute("SELECT * from USERS where K_COMPANY = ?", (company.lower(),))
450 for row in cursor:
451 results.append(
452 {"ID": row[0], "FNAME": row[1], "LNAME": row[2], "ROLE": row[3], "EMAIL": row[6],
453 "E_VERIFY": row[7],
454 "L_ID": row[4], "L_URL": row[5], "K_COMPANY": row[8], "COMPANY_FULL_NAME": row[9]})
455 except:
456 pass
457 return results
458
459 def add_server(self, s_name, ip_addr, domain, loaction="", subnets=""):
460 """
461 add server into SERVERS table
462 :param s_name:str
463 :param ip_addr: str
464 :param domain: str
465 :param loaction: str
466 :param subnets: str
467 :return: None
468 """
469 try:
470 self.db_con.execute("INSERT INTO SERVERS (S_NAME,IP_ADDR,LOCATION,SUBNETS,K_DOMAIN) \
471 VALUES (?, ?, ?, ?, ? )", (s_name, ip_addr, loaction, subnets, domain.lower(),))
472 self.db_con.commit()
473 # print(
474 # f"added server {s_name},IP_ADDR={ip_addr},LOCATION={loaction},SUBNETS={subnets},DOMAIN={domain.lower()} ")
475 except Exception as e:
476 pass
477 # print(
478 # f'Failed to add serverS_NAME={s_name},IP_ADDR={ip_addr},LOCATION={loaction},SUBNETS={subnets},DOMAIN={domain.lower()}.\n{e}')
479
480 def get_servers(self, domain):
481 """
482 get servers by domain/server name
483 :param domain: str
484 :return: list
485 """
486 results = []
487 try:
488 cursor = self.db_con.execute("SELECT * from Servers where K_DOMAIN = ?", (domain.lower(),))
489 for row in cursor:
490 results.append(
491 {"SID": row[0], "S_NAME": row[1], "IP_ADDR": row[2], "LOCATION": row[3], "SUBNETS": row[4],
492 "DOMAIN": row[5]})
493 except:
494 pass
495 return results
496
497
498class ReconV(object):
499 ADMIN_PAGE = '/admin'
500 LOGIN_PAGE = '/login'
501 PASTEBIN_URL = 'pastebin.com'
502 EMAIL_PATTERN = '[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'
503 HUNTER_IO_KEY = os.environ['HUNTER_KEY']
504
505 def __init__(self, domain, company, db_path=""):
506 self.domain = domain
507 self.company = company
508 self.google_scraper = Google(save_files=True, cache=False)
509 self.linkedin_scraper = Linkedin(
510 headers_file="/home/matan/Documents/hde65/web_requests/scrapers/linkedin_headers.txt")
511 self.reconDb = ReconDB(db_path)
512 self.reconDb.connect()
513 self.login_links = []
514 self.admin_links = []
515 self.pastebin_links = []
516 self.emails = set()
517 self.robot_links = {}
518 self.hunter_emails = set()
519 self.hosts_addr = {}
520 self.hacker_target_domains = {}
521 self.google_domains = {}
522 self.wordlist_domains = {}
523 self.fierce_info = {}
524
525 @staticmethod
526 def print_links(links):
527 """
528 pretty print links
529 :param links:list of tuples [(str,str),]
530 :return: None
531 """
532 for i, link in enumerate(links):
533 print(f'{i + 1}) {link[0]} - {link[1]}')
534
535 @staticmethod
536 def print_emails(emails):
537 """
538 pretty print of emails
539 :param emails: list
540 :return: None
541 """
542 for i, e in enumerate(emails):
543 print(f'{i + 1}) {e}')
544
545 def start_url_recon(self, domain, pages=1):
546 """
547 By give domain keyword start recon process
548 :param domain: str
549 :param pages: int
550 :return: None
551 """
552 # TODO:check for domain pattern with re
553 print("start Url Recon process..")
554 self.admin_links = self.google_scraper.search(f"site:{domain} inurl:{ReconV.ADMIN_PAGE}", pages)
555 time.sleep(random.randint(3, 6))
556 self.login_links = self.google_scraper.search(f"site:{domain} inurl:{ReconV.LOGIN_PAGE}", pages)
557 time.sleep(random.randint(3, 6))
558 p_links = self.google_scraper.search(
559 f'site:{ReconV.PASTEBIN_URL} site:pastebin.com intext:"*@*.{domain}:*"', pages)
560 # self.pastebin_links
561 self.pastebin_links = [(title, link.split('&')[0]) for title, link in p_links]
562
563 time.sleep(random.randint(3, 6))
564 self.robot_links = self.check_robots(domain)
565
566 def get_email_from_links(self, links):
567 """
568 Get links content and parse to get their emails
569 :param links: list
570 :return: set
571 """
572 emails_set = set()
573
574 def get_email(emails, link):
575 headers = self.headers
576 try:
577 result = requests.get(link, headers=headers, timeout=10)
578 if result.ok:
579 for email in re.findall(ReconV.EMAIL_PATTERN, result.text):
580 emails.add(email.lower())
581 except Exception as e:
582 print(f"Load emails from {link} failed.\n{e}")
583
584 with ThreadPoolExecutor(max_workers=3) as executor:
585 future_to_url = {executor.submit(get_email, emails_set, url): url for url in links}
586 executor.shutdown(wait=True)
587
588 return emails_set
589
590 @staticmethod
591 def get_emails_from_hunter(domain):
592 """
593 By given get emails from Hunter.IO
594 :param domain: str
595 :return: list
596 """
597 print("Get email From Hunter.IO..")
598 params = {'domain': domain,
599 'api_key': ReconV.HUNTER_IO_KEY}
600 link = 'https://api.hunter.io/v2/domain-search'
601 try:
602 res = requests.get(link, params=params)
603 if res.ok:
604 j_results = res.json()
605 return [email['value'] for email in j_results['data']['emails']]
606 except Exception as e:
607 print(f"Get data from Hunter.io Failed\n {e}")
608 return []
609
610 def start_email_recon(self, domain, pages=1):
611 """
612 By given Domain start email Recon
613 :param domain: str
614 :param pages: int
615 :return: None
616 """
617 # get emails from google
618 print("start Email Recon process..")
619 results = self.google_scraper.search(f"site:{domain} intext:@{domain}", pages)
620 self.emails = self.get_email_from_links([result[1] for result in results])
621 # get email from hunter.io
622 self.hunter_emails = set(ReconV.get_emails_from_hunter(domain))
623
624 def check_robots(self, domain):
625 """
626 By given Domain check if robot files exist, if tru return their links
627 :param domain: str
628 :return: dict
629 """
630 print("Check for robots files...")
631 links = {}
632 for rfile in ['robot.txt', 'robots.txt']:
633 try:
634 for htext in ['http', 'https']:
635 link = f"{htext}://www.{domain}/{rfile}"
636 res = requests.get(link, timeout=10)
637 if res.ok:
638 links[rfile] = link
639 except Exception as e:
640 pass
641 return links
642
643 def start_linkedin_recon(self, company_keyword, pages):
644 """
645 By given comapny name start linkedin recon
646 :param company_keyword: str
647 :param pages: int
648 :return: None
649 """
650 print("Start Linkedin recon process..")
651 return self.linkedin_scraper.scrape_company(company_keyword, pages)
652
653 def start_full_recon(self, domain_name, company_name, d_pages=1, c_pages=1):
654 """
655 start full recon process URL,emails,linkdin company info
656 :param domain_name: str
657 :param company_name: str
658 :param d_pages: int
659 :param c_pages: int
660 :return: None
661 """
662 print(f"\n\n*** Domain:{domain_name} ***")
663 self.hacker_target_domains = ReconV.find_hosts_hackertarget(domain=domain_name) # dict with {domain:ip}
664 print("hackertarget")
665 pprint.pp(self.hacker_target_domains)
666 print("domains from google")
667 self.google_domains = self.find_hosts_google(domain_name, 1)
668 pprint.pp(self.google_domains) # dict {doamin:ip}
669 print("bruteforce")
670 self.wordlist_domains = self.domain_brute_force_wordlist(domain_name, '/home/matan/.recon-ng/data/suffixes.txt')
671 pprint.pp(self.wordlist_domains)
672 fierce_args = {'domain': domain_name, 'traverse': 5, 'subdomains': None,
673 'subdomain_file': '/home/matan/Documents/hde65/web_requests/lib/python3.8/site-packages/fierce/lists/default.txt',
674 'dns_servers': None, 'dns_file': None, 'tcp': False}
675 print("fierce info")
676 self.fierce_info = self.simulate_fierce(
677 **fierce_args) # {subdomain:{url:subdomain,ip:ip,nearby:{ip:subdomain} }}
678 pprint.pp(self.fierce_info)
679 self.start_url_recon(domain_name, d_pages)
680 print("\n******************* Potential Login Pages *******************")
681 ReconV.print_links(self.login_links)
682 print("\n******************* Potential Admin Pages *******************")
683 ReconV.print_links(self.admin_links)
684 print("\n******************* Potential Pastebin Pages *******************")
685 ReconV.print_links(self.pastebin_links)
686 print(f"\n\n*** Company:{company_name} ***")
687 self.start_linkedin_recon(company_name, c_pages)
688 print(f"\nLinkedin info by keyword={company_name} ")
689 self.linkedin_scraper.print_results()
690 self.start_email_recon(domain_name, d_pages)
691 print("\n******************* Potential Emails From Google ******************")
692 ReconV.print_emails(self.emails)
693 print("\n******************* Potential Emails From Hunter.IO ******************")
694 ReconV.print_emails(self.hunter_emails)
695
696 def save_domains_results(self, domains_data):
697 for d_name, ip in domains_data.items():
698 self.reconDb.add_server(d_name, ip, self.domain)
699
700 def save_users_results(self, users_data):
701 for email, data in users_data.items():
702 self.reconDb.add_user(email=email, company=self.company, fname=data['fname'], lname=data['lname'],
703 e_verify=data['e_verify'])
704
705 def save_linkedin_results(self, linkedin_data):
706 for email, data in linkedin_data.items():
707 self.reconDb.add_user(email=email, company=self.company,
708 fname=data['fname'],
709 lname=data['lname'],
710 e_verify=data['e_verify'],
711 role=data['role'],
712 l_url=data['l_url'],
713 l_id=data['l_id'],
714 full_company_name=data['c_full_name'])
715
716 @staticmethod
717 def resolve_host(host):
718 ip_addr = ""
719 try:
720 record = dns.resolver.query(host, 'A')
721 ip_addr = str(record[0])
722 except:
723 pass
724 # print(f"no ip addr for {host}")
725 return ip_addr
726
727 @staticmethod
728 def _update_resolve_domain(resolved_dom, host):
729 ip_addr = ReconV.resolve_host(host)
730 if ip_addr:
731 resolved_dom[host] = ip_addr
732
733 def find_hosts_google(self, domain_name, pages):
734 query = f"site:{domain_name} -site:www.{domain_name}"
735 domains = set([f'{domain_name}'])
736 for page in range(pages):
737 results = self.google_scraper.search(query, pages=1)
738 if not results:
739 break
740 for title, link in results:
741 new_domain = link.split('//')[-1].split('/')[0]
742 if new_domain not in domains:
743 query += f' -site:{new_domain}'
744 if new_domain:
745 domains.add(new_domain)
746 resolved_domains = {}
747
748 with ThreadPoolExecutor(max_workers=10) as executor:
749 future_to_url = {executor.submit(ReconV._update_resolve_domain, resolved_domains, dm): dm for dm in domains}
750 executor.shutdown(wait=True)
751
752 return resolved_domains
753
754 @staticmethod
755 def find_hosts_hackertarget(domain):
756 results = {}
757 headers = {'Host': 'api.hackertarget.com',
758 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0',
759 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
760 'Accept-Language': 'en-US,en;q=0.5',
761 'Connection': 'keep-alive',
762 'Upgrade-Insecure-Requests': '1'}
763
764 url = "https://api.hackertarget.com/hostsearch"
765 resp = requests.get(url=url, params={'q': domain}, headers=headers)
766 if resp.ok:
767 for record in resp.text.split('\n'):
768 try:
769 host, addr = record.split(',')
770 results[host] = addr
771 except:
772 pass
773 return results
774
775 def domain_brute_force_wordlist(self, domain, wordlist_path):
776
777 resolved_domains = {}
778
779 def words_itertor():
780 with open(wordlist_path, 'r') as fp:
781 while True:
782 word = fp.readline().strip()
783 if not word:
784 break
785 yield word
786
787 with ThreadPoolExecutor(max_workers=30) as executor:
788 for word in words_itertor():
789 executor.submit(ReconV._update_resolve_domain, resolved_domains, f'{word}.{domain}')
790 executor.shutdown(wait=True)
791
792 return resolved_domains
793
794 @staticmethod
795 def email_verifyier(addressToVerify):
796 verify_result = 'UNKNOWN'
797 try:
798
799 domain = addressToVerify.split('@')[-1]
800 records = dns.resolver.query(domain, 'MX')
801 mxRecord = records[0].exchange
802 mxRecord = str(mxRecord)
803
804 host = "avi.tzanani"
805 server = smtplib.SMTP()
806 server.set_debuglevel(0)
807
808 # SMTP Conversation
809 con_res = server.connect(mxRecord)
810 helo_res = server.helo(host)
811 mail_res = server.mail('david@gmail.com')
812 code, message = server.rcpt(str(addressToVerify))
813 quit_res = server.quit()
814
815 if code == 250:
816 # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:Success')
817 return 'SUCCESS'
818 elif code == 550:
819 # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:Bad')
820 return 'BAD'
821 else:
822 # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:UNKNOWN')
823 return 'UNKNOWN'
824 except:
825 pass
826 finally:
827 time.sleep(1)
828 return verify_result
829
830 def simulate_fierce(self, **kwargs):
831 results = {}
832 resolver = dns.resolver.Resolver()
833 resolver = fierce.update_resolver_nameservers(
834 resolver,
835 kwargs['dns_servers'],
836 kwargs['dns_file']
837 )
838 if not kwargs.get("domain"):
839 return
840 domain = dns.name.from_text(kwargs['domain'])
841 subdomains = fierce.get_subdomains(
842 kwargs["subdomains"],
843 kwargs["subdomain_file"]
844 )
845
846 filter_func = None
847 expander_func = fierce.default_expander
848 if kwargs.get("traverse"):
849 expander_func = functools.partial(fierce.traverse_expander, n=kwargs["traverse"])
850
851 unvisited = fierce.unvisited_closure()
852
853 for subdomain in subdomains:
854 url = fierce.concatenate_subdomains(domain, [subdomain])
855 record = fierce.query(resolver, url, record_type='A', tcp=kwargs["tcp"])
856
857 if record is None or record.rrset is None:
858 continue
859
860 ips = [rr.address for rr in record.rrset]
861 ip = ipaddress.IPv4Address(ips[0])
862
863 ips = expander_func(ip)
864 unvisited_ips = unvisited(ips)
865
866 nearby = fierce.find_nearby(
867 resolver,
868 unvisited_ips,
869 filter_func=filter_func
870 )
871 results[url.to_text(omit_final_dot=True)] = {'host': url.to_text(omit_final_dot=True), 'ip': ip.compressed,
872 'nearby': nearby}
873 return results
874
875 def simulate_fierce_p(self, domain):
876 results = {}
877 p = subprocess.Popen(f"/usr/bin/fierce -dns {domain} -threads 30 ", stdout=subprocess.PIPE, shell=True)
878 output, err = p.communicate()
879
880 if output:
881 for line in output.decode('utf-8').split("\n"):
882 domain = re.match(r'(?P<ip>^\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b)\t(?P<domain>.*$)', line)
883 if domain:
884 domain_details = domain.groupdict()
885 subnet = ".".join(domain_details['ip'].split('.')[:3]) + ".0-255"
886 results[domain_details['domain']] = {'domain': domain_details['domain'],
887 'ip': domain_details['ip'], 'subnet': subnet}
888 # elif '0-255' in line:
889 # print("line:", line.strip().split(' : ')[0])
890 return results
891
892 @staticmethod
893 def printTable(myDict, colList=None):
894 """ Pretty print a list of dictionaries (myDict) as a dynamically sized table.
895 If column names (colList) aren't specified, they will show in random order.
896 """
897 if not colList:
898 colList = list(myDict[0].keys() if myDict else [])
899 myList = [colList] # 1st row = header
900 for item in myDict:
901 myList.append([str(item[col] if item[col] is not None else '') for col in colList])
902 colSize = [max(map(len, col)) for col in zip(*myList)]
903 formatStr = ' | '.join(["{{:<{}}}".format(i) for i in colSize])
904 myList.insert(1, ['-' * i for i in colSize]) # Seperating line
905 for item in myList:
906 print(formatStr.format(*item))
907
908 def start_reconV(self):
909 exit_op = False
910 while not exit_op:
911 g_opt = input(
912 "1)Url recon\n2)Email recon\n3)Doamin recon\n4)Show servers\n5)Show users\ne)exit\nenter your choice:")
913 if g_opt in ['1', '2', '3', '4', '5', 'e']:
914 if g_opt == 'e':
915 print('Bye...')
916 exit_op = True
917 elif g_opt == '1':
918 while True:
919 i_opt = input("1)Admin\n2)Login\n3)Pastebin\n4)robots.txt\nb)back\ne)exit\nenter your choice:")
920 if i_opt in ['1', '2', '3', '4', 'b', 'e']:
921 if i_opt == 'b':
922 break
923 elif i_opt == 'e':
924 print('Bye...')
925 exit_op = True
926 break
927 elif i_opt == '1':
928 self.admin_links = self.google_scraper.search(
929 f"site:{self.domain} inurl:{ReconV.ADMIN_PAGE}", pages=1)
930 time.sleep(random.randint(3, 6))
931 ReconV.print_links(self.admin_links)
932 elif i_opt == '2':
933 self.login_links = self.google_scraper.search(
934 f"site:{self.domain} inurl:{ReconV.LOGIN_PAGE}", pages=1)
935 time.sleep(random.randint(3, 6))
936 ReconV.print_links(self.login_links)
937 elif i_opt == '3':
938 p_links = self.google_scraper.search(
939 f'site:{ReconV.PASTEBIN_URL} site:pastebin.com intext:"*@*.{self.domain}:*"',
940 pages=1)
941 # self.pastebin_links
942 self.pastebin_links = [(title, link.split('&')[0]) for title, link in p_links]
943 time.sleep(random.randint(3, 6))
944 ReconV.print_links(self.pastebin_links)
945 elif i_opt == '4':
946 self.robot_links = self.check_robots(self.domain)
947 pprint.pprint(self.robot_links)
948 else:
949 print("Invalid input.")
950 elif g_opt == '2':
951 while True:
952 i_opt = input("1)Hunter.io\n2)google\n3)Linkedin\nb)back\ne)exit\nenter your choice:")
953 if i_opt in ['1', '2', '3', 'b', 'e']:
954 if i_opt == 'b':
955 break
956 elif i_opt == 'e':
957 print('Bye...')
958 exit_op = True
959 break
960 elif i_opt == '1':
961 self.hunter_emails = set(ReconV.get_emails_from_hunter(self.domain))
962 self.hunter_emails = {
963 email: {'email': email, 'fname': '', 'lname': '',
964 'e_verify': self.email_verifyier(email)} for email
965 in
966 self.hunter_emails}
967 self.save_users_results(self.hunter_emails)
968
969 elif i_opt == '2':
970 results = self.google_scraper.search(f"site:{self.domain} intext:@{self.domain}",
971 pages=1)
972 temp_emails = [e for e in self.get_email_from_links([result[1] for result in results])
973 if self.domain in e]
974 self.emails = {
975 email: {'email': email, 'fname': '', 'lname': '',
976 'e_verify': self.email_verifyier(email)} for email
977 in
978 temp_emails}
979 self.save_users_results(self.emails)
980 elif i_opt == '3':
981 temp_linkedin_results = self.start_linkedin_recon(self.company, pages=1)
982 results = {}
983 for comp, c_info in temp_linkedin_results.items():
984 for emp_info in c_info['employees']:
985 fname, lname = emp_info['full_name'].split(' ')[:2]
986 email = f'{fname}.{lname}@{self.domain}'
987 role = emp_info['role']
988 lurl = emp_info['url']
989 lid = emp_info['e_id']
990 results[email] = {'email': email, 'fname': fname, 'lname': lname,
991 'role': role, 'l_id': lid, 'c_full_name': comp,
992 'e_verify': self.email_verifyier(email), 'l_url': lurl}
993 self.save_linkedin_results(results)
994 else:
995 print("Invalid input.")
996 elif g_opt == '3':
997 while True:
998 i_opt = input(
999 "1)HackerTarget\n2)google\n3)bruteforce\n4)fierce\nb)back\ne)exit\nenter your choice:")
1000 if i_opt in ['1', '2', '3', '4', 'b', 'e']:
1001 if i_opt == 'b':
1002 break
1003 elif i_opt == 'e':
1004 print('Bye...')
1005 exit_op = True
1006 break
1007 elif i_opt == '1':
1008 self.hacker_target_domains = ReconV.find_hosts_hackertarget(
1009 domain=self.domain) # dict with {domain:ip}
1010 self.save_domains_results(self.hacker_target_domains)
1011 elif i_opt == '2':
1012 self.google_domains = self.find_hosts_google(self.domain, 1)
1013 self.save_domains_results(self.google_domains)
1014
1015 elif i_opt == '3':
1016
1017 self.wordlist_domains = self.domain_brute_force_wordlist(self.domain,
1018 '/home/matan/.recon-ng/data/suffixes.txt')
1019 self.save_domains_results(self.wordlist_domains)
1020
1021 elif i_opt == '4':
1022 fierce_args = {'domain': self.domain, 'traverse': 5, 'subdomains': None,
1023 'subdomain_file': '/home/matan/Documents/hde65/web_requests/lib/python3.8/site-packages/fierce/lists/default.txt',
1024 'dns_servers': None, 'dns_file': None, 'tcp': False}
1025 self.fierce_info = self.simulate_fierce(
1026 **fierce_args) # {subdomain:{url:subdomain,ip:ip,nearby:{ip:subdomain} }}
1027 self.save_domains_results({k: v['ip'] for k, v in self.fierce_info.items()})
1028 else:
1029 print("Invalid input.")
1030 elif g_opt == '4':
1031 pprint.pp(self.reconDb.get_servers(self.domain))
1032 ReconV.printTable(self.reconDb.get_servers(self.domain))
1033 elif g_opt == '5':
1034 pprint.pp(self.reconDb.get_users(self.company))
1035 ReconV.printTable(self.reconDb.get_users(self.company))
1036
1037 else:
1038 print("Invalid input.")
1039 self.reconDb.close()
1040
1041
1042if __name__ == '__main__':
1043 domain_key = input("Enter Domain Name:")
1044 company_key = input("Enter Company Name:")
1045 db_file_path = input("Enter path of db File(if not exist just press ENTER:")
1046 if domain_key and company_key:
1047 recon = ReconV(domain=domain_key, company=company_key, db_path=db_file_path)
1048 recon.start_reconV()
1049 # recon.start_full_recon(domain_key, company_key)
1050 print("finish")
1051 else:
1052 print("Invalid input.")
1053