Jas5QAZh

· 5 years ago · Nov 05, 2020, 03:42 PM
1####################################################################################################
2#google_scrape.py
3
4import os
5import requests
6import time
7import random
8from bs4 import BeautifulSoup
9from scrapers.headers_parser import header_parse
10from collections import defaultdict
11import base64
12
13__author__ = "Matan Ram"
14__version__ = "1.0.1"
15
16
17def gquery_to_b64(query):
18    """
19        convert legal file name
20    :param fname: str
21    :return: str
22    """
23    return base64.b64encode(query.encode('ascii')).decode('ascii')
24
25
26def b64_to_gquery(fname):
27    """
28        convert b64 string to google query
29    :param fname: str
30    :return: str
31    """
32    return base64.b64decode(fname).decode('ascii')
33
34
35class Google(object):
36    GOOGLE_ADDR = "https://www.google.com"
37    CACHE = "/tmp/cache"
38
39    def __init__(self, save_files=False, cache=False):
40        with open("/home/matan/Documents/hde65/web_requests/scrapers/new_headers.txt", 'r') as f:
41            self.headers = header_parse(f.read())
42        self.session = requests.session()
43        self.links = defaultdict(list)
44        self.save_files = save_files
45        self.cache = cache
46        self.first_request = True
47        if not os.path.exists(Google.CACHE):
48            os.makedirs(Google.CACHE)
49
50    def create_search_keyword(self, keywords):
51        """
52            get keywords and return chaining keywords with + delimiter
53        :param keywords: str of keywords
54        :return: str
55        """
56        keywords = keywords.strip()
57        return '+'.join(keywords.split(' '))
58
59    def search(self, keyword, pages=1):
60        """
61            get searched links by keyword and number of pages
62        :param keyword: str
63        :param pages: int
64        :return: Google object
65        """
66
67        print(f"Loading links from Google By Keyword {keyword}...")
68
69        page_number = pages
70        # g_keyword = self.create_search_keyword(keyword)
71        params = {
72            'q': keyword
73        }
74        pcontent = ""
75        if self.cache:
76            with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p0", 'r') as fp:
77                pcontent = fp.read()
78        else:
79            try:
80                if self.first_request:
81                    res = self.session.get(Google.GOOGLE_ADDR + '/search', params=params, headers=self.headers,
82                                           timeout=10)
83                    self.first_request = False
84                else:
85                    res = self.session.get(Google.GOOGLE_ADDR + '/search', params=params, timeout=10)
86
87                if res.ok:
88                    pcontent = res.text
89                if self.save_files:
90                    with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p0", 'w') as fp:
91                        fp.write(pcontent)
92            except Exception as e:
93                print(f"Get data from Google search Failed\n {e}")
94
95        self.links[keyword] += self._parse_result(pcontent)
96
97        while pages - 1:
98            pcontent = ""
99            if self.cache:
100                try:
101                    with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p{page_number - pages + 1}", 'r') as fp:
102                        pcontent = fp.read()
103                except:
104                    break
105            else:
106                try:
107                    link = self._next_page(pcontent)
108                    if not link:
109                        break
110                    time.sleep(random.randint(2, 5))
111                    res = self.session.get(Google.GOOGLE_ADDR + link, timeout=10)
112                    pcontent = res.text if res.ok else ""
113                    if self.save_files:
114                        with open(f"{Google.CACHE}/{gquery_to_b64(keyword)}_p{page_number - pages + 1}", 'w') as fp:
115                            fp.write(pcontent)
116                except Exception as e:
117                    print(f"Get data from Google Failed\n {e}")
118                    break
119            self.links[keyword] += self._parse_result(pcontent)
120            pages -= 1
121        return self.links[keyword]
122
123    def print_links_by_query(self, keyword):
124        """
125            print all links
126        :return: None
127        """
128        if keyword in self.links:
129            for link in self.links[keyword]:
130                print(f'{link[0]} - {link[1]}')
131
132    def print_all_links(self):
133        """
134
135        :return:
136        """
137        for kword in self.links:
138            self.print_links_by_query(kword)
139
140    def _parse_result(self, html):
141        """
142            get html str and parse headers and their links
143        :param html: str
144        :return: list of tuples
145        """
146        soup = BeautifulSoup(html, 'html.parser')
147        links = []
148        divs = soup.select('.ZINbbc .kCrYT:first-child')
149        for div in divs:
150            try:
151                url = div.select_one('a')['href'].split("?q=")[-1]
152                title = div.select_one('h3 span').text
153                links.append((title, url))
154            except:
155                pass
156        return links
157
158    def _next_page(self, html):
159        """
160            get the html page content and return the next page link
161        :param html: str
162        :return: str
163        """
164
165        soup = BeautifulSoup(html, 'html.parser')
166        links = soup.select("footer .nBDE1b.G5eFlf")
167        if links:
168            # check for <
169            for link in links:
170                if '>' in link.text:
171                    return link.get('href')
172
173
174
175#####################################################################################
176#####linkedin.py file
177import requests
178import time
179import random
180from bs4 import BeautifulSoup
181from scrapers.headers_parser import header_parse
182import json
183import pprint
184
185__author__ = "Matan Ram"
186__version__ = "1.0.1"
187
188
189class Linkedin(object):
190    Link = "https://www.linkedin.com/voyager/api/voyagerSearchDashTypeahead"
191
192    def __init__(self, headers_file):
193        with open(headers_file, 'r') as f:
194            self.headers = header_parse(f.read())
195        self.session = requests.session()
196        self.companies = {}
197
198    def search(self, keyword):
199        """
200            get searched links by keyword and number of pages
201        :param keyword: str
202        :param pages: int
203        :return: Google object
204        """
205
206        params = {
207            'decorationId': "com.linkedin.voyager.dash.deco.search.typeahead.GlobalTypeaheadCollection-6",
208            'query': keyword,
209            'q': 'globalTypeahead'
210        }
211
212        res = self.session.get(Linkedin.Link, params=params, headers=self.headers)
213        if res.ok:
214            return res.json()
215        return {}
216
217    def parse_company_data(self, json_data):
218        """
219            parse json data and update companies dict
220        :param json_data: json obj
221        :return: None
222        """
223        for r in json_data['data']['elements']:
224            if 'trackingUrn' in r['entityLockupView'] and r['entityLockupView']['subtitle']['text'].startswith(
225                    '• Company •'):
226                title = r['entityLockupView']['title']['text']
227                self.companies[title] = {'title': title,
228                                         'url': r['entityLockupView']['navigationUrl'],
229                                         'company_id': r['entityLockupView']['trackingUrn'].split(':')[-1],
230                                         'sub_text': r['entityLockupView']['subtitle']['text'],
231                                         'employees': []}
232                # print(r['entityLockupView']['title']['text'] + '-' + r['entityLockupView']['navigationUrl'] + '\t-' +
233                #       r['entityLockupView']['trackingUrn'] + '\t-\t' + r['entityLockupView']['subtitle']['text'])
234
235    def get_employees_data(self, company_title, pages=1):
236        """
237            get employ data by given company name and update them in in self.companies
238        :param company_title: str
239        :param pages: int
240        :return: None
241        """
242        url = 'https://www.linkedin.com/search/results/people/'
243        params = {
244            'facetCurrentCompany': '["{}"]',
245            'origin': 'COMPANY_PAGE_CANNED_SEARCH',
246            'page': 1,
247        }
248
249        for company in self.companies:
250            if company_title.lower() in company.lower():
251                for i in range(pages):
252                    params['facetCurrentCompany'] = f'["{self.companies[company]["company_id"]}"]'
253                    params['page'] = i + 1
254                    response = requests.get(url=url, headers=self.headers, params=params)
255                    if response.ok:
256                        soup = BeautifulSoup(response.text, 'html.parser')
257                        code_tags = [r for r in soup.select('code') if 'About ' in r.text and ' results' in r.text]
258                        if code_tags:
259                            json_data = json.loads(code_tags[0].text)
260                            self.companies[company]['employees'] += Linkedin.parse_employee_data(json_data)
261
262                    time.sleep(random.randint(3, 6))
263
264    @staticmethod
265    def parse_employee_data(j_emp_data, verbose=False):
266        """
267            parse json data of employee
268        :param j_emp_data: json object
269        :param verbose: boolean
270        :return: list
271        """
272        results = []
273        try:
274            for elem in j_emp_data['data']['elements']:
275                if elem['type'] == 'SEARCH_HITS':
276                    for emp in elem['elements']:
277                        if emp['title']['text'] != 'LinkedIn Member':
278                            results.append({'full_name': emp['title']['text'],
279                                            'role': emp['headline']['text'],
280                                            'url': emp['navigationUrl'],
281                                            'e_id':
282                                                emp['trackingUrn'].split(':')[
283                                                    -1]})
284                        if verbose:
285                            print(emp['title']['text'] + ' - ' + emp['headline']['text'] + ' - ' + emp[
286                                'navigationUrl'] + ' - ' + emp['trackingUrn'])
287        except Exception as e:
288            print(f"Can't parse employees from json data\n{e}")
289        return results
290
291    def scrape_company(self, company_keyword, pages=1, rfile_path=""):
292        """
293            start to scrape company info and employees
294        :param company_keyword: str
295        :param pages: int
296        :param rfile_path: path to saved info
297        :return: None
298        """
299
300        json_data = self.get_links_from_file(rfile_path) if rfile_path else self.search(company_keyword)
301        if json_data:
302            self.parse_company_data(json_data)
303            self.get_employees_data(company_keyword, pages=pages)
304        else:
305            print('No data to parse..')
306        return self.companies
307
308    def get_links_from_file(self, f_path):
309        """
310            by given file path return json
311        :param f_path:
312        :return: json object
313        """
314        with open(f_path, 'r') as f:
315            data = f.read()
316        return json.loads(data)
317
318    def print_results(self):
319        """
320            pretty print of self data
321        :return: None
322        """
323        for i, comp in enumerate(self.companies):
324            print(f'{i + 1}) Company:{comp},'
325                  f'\tID:{self.companies[comp]["company_id"]},'
326                  f'Sub_T:{self.companies[comp]["sub_text"]},'
327                  f'URL:{self.companies[comp]["url"]}')
328            # results[comp] = {'company_id':self.companies[comp]["company_id"],}
329            print("\t***Employees***")
330            for j, emp in enumerate(self.companies[comp]['employees']):
331                print(f'\t{i + 1}.{j + 1})Full_name:{emp["full_name"]},'
332                      f'ID:{emp["e_id"]},Role:{emp["role"]},'
333                      f'url:{emp["url"]}')
334
335###############################################################################################
336#recon.py
337
338#!/usr/bin/env python
339
340from scrapers.google_scrape import Google
341from scrapers.linkedin import Linkedin
342
343import os
344import subprocess
345import time
346import requests
347import pprint
348import uuid
349import smtplib
350import re
351import random
352from concurrent.futures import ThreadPoolExecutor
353import dns.resolver
354from fierce import fierce
355import functools
356import ipaddress
357import dns.zone
358import sqlite3
359
360
361class ReconDB(object):
362    NEW_DB_SCRIPT = """BEGIN TRANSACTION;
363CREATE TABLE IF NOT EXISTS "USERS" (
364	"ID"	INTEGER PRIMARY KEY AUTOINCREMENT,
365	"FNAME"	INTEGER,
366	"LNAME"	TEXT,
367	"ROLE"	TEXT,
368	"L_ID"	TEXT,
369	"L_URL"	TEXT,
370	"EMAIL"	INTEGER NOT NULL UNIQUE,
371	"E_VERIFY"	TEXT,
372	"K_COMPANY"	TEXT NOT NULL,
373	"COMPANY_FULL_NAME"	TEXT
374);
375CREATE TABLE IF NOT EXISTS "SERVERS" (
376	"SID"	INTEGER PRIMARY KEY AUTOINCREMENT,
377	"S_NAME"	TEXT NOT NULL UNIQUE,
378	"IP_ADDR"	TEXT NOT NULL UNIQUE,
379	"LOCATION"	TEXT,
380	"SUBNETS"	TEXT,
381	"K_DOMAIN"	TEXT
382);
383COMMIT;
384"""
385
386    def __init__(self, db_file_path=""):
387        self.db_file = db_file_path if db_file_path else f'recov_{str(uuid.uuid4())[:4]}.db'
388        self.db_con = None
389
390    def connect(self):
391        """
392            connect to sqlite db
393        :return: None
394        """
395        try:
396            is_db_exist = os.path.exists(self.db_file)
397            self.db_con = sqlite3.connect(self.db_file)
398            # self.cur = self.db_con.cursor()
399
400            if not is_db_exist:
401                self.db_con.executescript(ReconDB.NEW_DB_SCRIPT)
402            print(f'connect to db {self.db_file}')
403        except Exception as e:
404            print(f'Failed to connect db {self.db_file}\n{e}')
405
406    def close(self):
407        """
408            close connection to sqlite db
409        :return: None
410        """
411        try:
412            self.db_con.close()
413            print(f"Close db {self.db_file}.")
414        except:
415            print(f'Failed to close db {self.db_file}')
416
417    def add_user(self, email, company, fname="", lname="", l_id="", l_url="", role="", e_verify="UNKNOWN",
418                 full_company_name=""):
419        """
420            add user into User Table
421        :param email:str 
422        :param company: str
423        :param fname: str
424        :param lname: str
425        :param l_id: str
426        :param l_url: str
427        :param role: str
428        :param e_verify:str 
429        :param full_company_name:str 
430        :return: None
431        """
432        try:
433            self.db_con.execute("INSERT INTO USERS (FNAME,LNAME,ROLE,EMAIL,E_VERIFY,L_ID,L_URL,K_COMPANY,COMPANY_FULL_NAME) \
434                  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
435                                (fname, lname, role, email, e_verify, l_id, l_url, company.lower(), full_company_name))
436            self.db_con.commit()
437        except Exception as e:
438            pass
439            # print("Failed to add user .\n {e}")
440
441    def get_users(self, company):
442        """
443            get users by companu
444        :param company: str
445        :return: list
446        """
447        results = []
448        try:
449            cursor = self.db_con.execute("SELECT *  from USERS where K_COMPANY = ?", (company.lower(),))
450            for row in cursor:
451                results.append(
452                    {"ID": row[0], "FNAME": row[1], "LNAME": row[2], "ROLE": row[3], "EMAIL": row[6],
453                     "E_VERIFY": row[7],
454                     "L_ID": row[4], "L_URL": row[5], "K_COMPANY": row[8], "COMPANY_FULL_NAME": row[9]})
455        except:
456            pass
457        return results
458
459    def add_server(self, s_name, ip_addr, domain, loaction="", subnets=""):
460        """
461            add server into SERVERS table 
462        :param s_name:str 
463        :param ip_addr: str
464        :param domain: str
465        :param loaction: str
466        :param subnets: str
467        :return: None 
468        """
469        try:
470            self.db_con.execute("INSERT INTO SERVERS (S_NAME,IP_ADDR,LOCATION,SUBNETS,K_DOMAIN) \
471                          VALUES (?, ?, ?, ?, ? )", (s_name, ip_addr, loaction, subnets, domain.lower(),))
472            self.db_con.commit()
473            # print(
474            #     f"added server {s_name},IP_ADDR={ip_addr},LOCATION={loaction},SUBNETS={subnets},DOMAIN={domain.lower()} ")
475        except Exception as e:
476            pass
477            # print(
478            #     f'Failed to add serverS_NAME={s_name},IP_ADDR={ip_addr},LOCATION={loaction},SUBNETS={subnets},DOMAIN={domain.lower()}.\n{e}')
479
480    def get_servers(self, domain):
481        """
482            get servers by domain/server name
483        :param domain: str
484        :return: list
485        """
486        results = []
487        try:
488            cursor = self.db_con.execute("SELECT *  from Servers where K_DOMAIN = ?", (domain.lower(),))
489            for row in cursor:
490                results.append(
491                    {"SID": row[0], "S_NAME": row[1], "IP_ADDR": row[2], "LOCATION": row[3], "SUBNETS": row[4],
492                     "DOMAIN": row[5]})
493        except:
494            pass
495        return results
496
497
498class ReconV(object):
499    ADMIN_PAGE = '/admin'
500    LOGIN_PAGE = '/login'
501    PASTEBIN_URL = 'pastebin.com'
502    EMAIL_PATTERN = '[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'
503    HUNTER_IO_KEY = os.environ['HUNTER_KEY']
504
505    def __init__(self, domain, company, db_path=""):
506        self.domain = domain
507        self.company = company
508        self.google_scraper = Google(save_files=True, cache=False)
509        self.linkedin_scraper = Linkedin(
510            headers_file="/home/matan/Documents/hde65/web_requests/scrapers/linkedin_headers.txt")
511        self.reconDb = ReconDB(db_path)
512        self.reconDb.connect()
513        self.login_links = []
514        self.admin_links = []
515        self.pastebin_links = []
516        self.emails = set()
517        self.robot_links = {}
518        self.hunter_emails = set()
519        self.hosts_addr = {}
520        self.hacker_target_domains = {}
521        self.google_domains = {}
522        self.wordlist_domains = {}
523        self.fierce_info = {}
524
525    @staticmethod
526    def print_links(links):
527        """
528            pretty print links
529        :param links:list of tuples [(str,str),]
530        :return: None
531        """
532        for i, link in enumerate(links):
533            print(f'{i + 1}) {link[0]} - {link[1]}')
534
535    @staticmethod
536    def print_emails(emails):
537        """
538            pretty print of emails
539        :param emails: list
540        :return: None
541        """
542        for i, e in enumerate(emails):
543            print(f'{i + 1}) {e}')
544
545    def start_url_recon(self, domain, pages=1):
546        """
547            By give domain keyword start recon process
548        :param domain: str
549        :param pages: int
550        :return: None
551        """
552        # TODO:check for domain pattern with re
553        print("start Url Recon process..")
554        self.admin_links = self.google_scraper.search(f"site:{domain} inurl:{ReconV.ADMIN_PAGE}", pages)
555        time.sleep(random.randint(3, 6))
556        self.login_links = self.google_scraper.search(f"site:{domain} inurl:{ReconV.LOGIN_PAGE}", pages)
557        time.sleep(random.randint(3, 6))
558        p_links = self.google_scraper.search(
559            f'site:{ReconV.PASTEBIN_URL} 	site:pastebin.com intext:"*@*.{domain}:*"', pages)
560        # self.pastebin_links
561        self.pastebin_links = [(title, link.split('&')[0]) for title, link in p_links]
562
563        time.sleep(random.randint(3, 6))
564        self.robot_links = self.check_robots(domain)
565
566    def get_email_from_links(self, links):
567        """
568            Get links content and parse to get their emails
569        :param links: list
570        :return: set
571        """
572        emails_set = set()
573
574        def get_email(emails, link):
575            headers = self.headers
576            try:
577                result = requests.get(link, headers=headers, timeout=10)
578                if result.ok:
579                    for email in re.findall(ReconV.EMAIL_PATTERN, result.text):
580                        emails.add(email.lower())
581            except Exception as e:
582                print(f"Load emails from {link} failed.\n{e}")
583
584        with ThreadPoolExecutor(max_workers=3) as executor:
585            future_to_url = {executor.submit(get_email, emails_set, url): url for url in links}
586            executor.shutdown(wait=True)
587
588        return emails_set
589
590    @staticmethod
591    def get_emails_from_hunter(domain):
592        """
593            By given get emails from Hunter.IO
594        :param domain: str
595        :return: list
596        """
597        print("Get email From Hunter.IO..")
598        params = {'domain': domain,
599                  'api_key': ReconV.HUNTER_IO_KEY}
600        link = 'https://api.hunter.io/v2/domain-search'
601        try:
602            res = requests.get(link, params=params)
603            if res.ok:
604                j_results = res.json()
605                return [email['value'] for email in j_results['data']['emails']]
606        except Exception as e:
607            print(f"Get data from Hunter.io Failed\n {e}")
608        return []
609
610    def start_email_recon(self, domain, pages=1):
611        """
612            By given Domain start email Recon
613        :param domain: str
614        :param pages: int
615        :return: None
616        """
617        # get emails from google
618        print("start Email Recon process..")
619        results = self.google_scraper.search(f"site:{domain} intext:@{domain}", pages)
620        self.emails = self.get_email_from_links([result[1] for result in results])
621        # get email from hunter.io
622        self.hunter_emails = set(ReconV.get_emails_from_hunter(domain))
623
624    def check_robots(self, domain):
625        """
626            By given Domain check if robot files exist, if tru return their links
627        :param domain: str
628        :return: dict
629        """
630        print("Check for robots files...")
631        links = {}
632        for rfile in ['robot.txt', 'robots.txt']:
633            try:
634                for htext in ['http', 'https']:
635                    link = f"{htext}://www.{domain}/{rfile}"
636                    res = requests.get(link, timeout=10)
637                    if res.ok:
638                        links[rfile] = link
639            except Exception as e:
640                pass
641        return links
642
643    def start_linkedin_recon(self, company_keyword, pages):
644        """
645            By given comapny name start linkedin recon
646        :param company_keyword: str
647        :param pages: int
648        :return: None
649        """
650        print("Start Linkedin recon process..")
651        return self.linkedin_scraper.scrape_company(company_keyword, pages)
652
653    def start_full_recon(self, domain_name, company_name, d_pages=1, c_pages=1):
654        """
655            start full recon process URL,emails,linkdin company info
656        :param domain_name: str
657        :param company_name: str
658        :param d_pages: int
659        :param c_pages: int
660        :return: None
661        """
662        print(f"\n\n*** Domain:{domain_name} ***")
663        self.hacker_target_domains = ReconV.find_hosts_hackertarget(domain=domain_name)  # dict with {domain:ip}
664        print("hackertarget")
665        pprint.pp(self.hacker_target_domains)
666        print("domains from google")
667        self.google_domains = self.find_hosts_google(domain_name, 1)
668        pprint.pp(self.google_domains)  # dict {doamin:ip}
669        print("bruteforce")
670        self.wordlist_domains = self.domain_brute_force_wordlist(domain_name, '/home/matan/.recon-ng/data/suffixes.txt')
671        pprint.pp(self.wordlist_domains)
672        fierce_args = {'domain': domain_name, 'traverse': 5, 'subdomains': None,
673                       'subdomain_file': '/home/matan/Documents/hde65/web_requests/lib/python3.8/site-packages/fierce/lists/default.txt',
674                       'dns_servers': None, 'dns_file': None, 'tcp': False}
675        print("fierce info")
676        self.fierce_info = self.simulate_fierce(
677            **fierce_args)  # {subdomain:{url:subdomain,ip:ip,nearby:{ip:subdomain} }}
678        pprint.pp(self.fierce_info)
679        self.start_url_recon(domain_name, d_pages)
680        print("\n******************* Potential Login Pages *******************")
681        ReconV.print_links(self.login_links)
682        print("\n******************* Potential Admin Pages *******************")
683        ReconV.print_links(self.admin_links)
684        print("\n******************* Potential Pastebin Pages *******************")
685        ReconV.print_links(self.pastebin_links)
686        print(f"\n\n*** Company:{company_name} ***")
687        self.start_linkedin_recon(company_name, c_pages)
688        print(f"\nLinkedin info by keyword={company_name} ")
689        self.linkedin_scraper.print_results()
690        self.start_email_recon(domain_name, d_pages)
691        print("\n******************* Potential Emails From Google ******************")
692        ReconV.print_emails(self.emails)
693        print("\n******************* Potential Emails From Hunter.IO ******************")
694        ReconV.print_emails(self.hunter_emails)
695
696    def save_domains_results(self, domains_data):
697        for d_name, ip in domains_data.items():
698            self.reconDb.add_server(d_name, ip, self.domain)
699
700    def save_users_results(self, users_data):
701        for email, data in users_data.items():
702            self.reconDb.add_user(email=email, company=self.company, fname=data['fname'], lname=data['lname'],
703                                  e_verify=data['e_verify'])
704
705    def save_linkedin_results(self, linkedin_data):
706        for email, data in linkedin_data.items():
707            self.reconDb.add_user(email=email, company=self.company,
708                                  fname=data['fname'],
709                                  lname=data['lname'],
710                                  e_verify=data['e_verify'],
711                                  role=data['role'],
712                                  l_url=data['l_url'],
713                                  l_id=data['l_id'],
714                                  full_company_name=data['c_full_name'])
715
716    @staticmethod
717    def resolve_host(host):
718        ip_addr = ""
719        try:
720            record = dns.resolver.query(host, 'A')
721            ip_addr = str(record[0])
722        except:
723            pass
724            # print(f"no ip addr for {host}")
725        return ip_addr
726
727    @staticmethod
728    def _update_resolve_domain(resolved_dom, host):
729        ip_addr = ReconV.resolve_host(host)
730        if ip_addr:
731            resolved_dom[host] = ip_addr
732
733    def find_hosts_google(self, domain_name, pages):
734        query = f"site:{domain_name} -site:www.{domain_name}"
735        domains = set([f'{domain_name}'])
736        for page in range(pages):
737            results = self.google_scraper.search(query, pages=1)
738            if not results:
739                break
740            for title, link in results:
741                new_domain = link.split('//')[-1].split('/')[0]
742                if new_domain not in domains:
743                    query += f' -site:{new_domain}'
744                if new_domain:
745                    domains.add(new_domain)
746        resolved_domains = {}
747
748        with ThreadPoolExecutor(max_workers=10) as executor:
749            future_to_url = {executor.submit(ReconV._update_resolve_domain, resolved_domains, dm): dm for dm in domains}
750            executor.shutdown(wait=True)
751
752        return resolved_domains
753
754    @staticmethod
755    def find_hosts_hackertarget(domain):
756        results = {}
757        headers = {'Host': 'api.hackertarget.com',
758                   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0',
759                   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
760                   'Accept-Language': 'en-US,en;q=0.5',
761                   'Connection': 'keep-alive',
762                   'Upgrade-Insecure-Requests': '1'}
763
764        url = "https://api.hackertarget.com/hostsearch"
765        resp = requests.get(url=url, params={'q': domain}, headers=headers)
766        if resp.ok:
767            for record in resp.text.split('\n'):
768                try:
769                    host, addr = record.split(',')
770                    results[host] = addr
771                except:
772                    pass
773        return results
774
775    def domain_brute_force_wordlist(self, domain, wordlist_path):
776
777        resolved_domains = {}
778
779        def words_itertor():
780            with open(wordlist_path, 'r') as fp:
781                while True:
782                    word = fp.readline().strip()
783                    if not word:
784                        break
785                    yield word
786
787        with ThreadPoolExecutor(max_workers=30) as executor:
788            for word in words_itertor():
789                executor.submit(ReconV._update_resolve_domain, resolved_domains, f'{word}.{domain}')
790            executor.shutdown(wait=True)
791
792        return resolved_domains
793
794    @staticmethod
795    def email_verifyier(addressToVerify):
796        verify_result = 'UNKNOWN'
797        try:
798
799            domain = addressToVerify.split('@')[-1]
800            records = dns.resolver.query(domain, 'MX')
801            mxRecord = records[0].exchange
802            mxRecord = str(mxRecord)
803
804            host = "avi.tzanani"
805            server = smtplib.SMTP()
806            server.set_debuglevel(0)
807
808            # SMTP Conversation
809            con_res = server.connect(mxRecord)
810            helo_res = server.helo(host)
811            mail_res = server.mail('david@gmail.com')
812            code, message = server.rcpt(str(addressToVerify))
813            quit_res = server.quit()
814
815            if code == 250:
816                # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:Success')
817                return 'SUCCESS'
818            elif code == 550:
819                # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:Bad')
820                return 'BAD'
821            else:
822                # print(f'email:{addressToVerify} ,code:{code} , msg:{message} result:UNKNOWN')
823                return 'UNKNOWN'
824        except:
825            pass
826        finally:
827            time.sleep(1)
828        return verify_result
829
830    def simulate_fierce(self, **kwargs):
831        results = {}
832        resolver = dns.resolver.Resolver()
833        resolver = fierce.update_resolver_nameservers(
834            resolver,
835            kwargs['dns_servers'],
836            kwargs['dns_file']
837        )
838        if not kwargs.get("domain"):
839            return
840        domain = dns.name.from_text(kwargs['domain'])
841        subdomains = fierce.get_subdomains(
842            kwargs["subdomains"],
843            kwargs["subdomain_file"]
844        )
845
846        filter_func = None
847        expander_func = fierce.default_expander
848        if kwargs.get("traverse"):
849            expander_func = functools.partial(fierce.traverse_expander, n=kwargs["traverse"])
850
851        unvisited = fierce.unvisited_closure()
852
853        for subdomain in subdomains:
854            url = fierce.concatenate_subdomains(domain, [subdomain])
855            record = fierce.query(resolver, url, record_type='A', tcp=kwargs["tcp"])
856
857            if record is None or record.rrset is None:
858                continue
859
860            ips = [rr.address for rr in record.rrset]
861            ip = ipaddress.IPv4Address(ips[0])
862
863            ips = expander_func(ip)
864            unvisited_ips = unvisited(ips)
865
866            nearby = fierce.find_nearby(
867                resolver,
868                unvisited_ips,
869                filter_func=filter_func
870            )
871            results[url.to_text(omit_final_dot=True)] = {'host': url.to_text(omit_final_dot=True), 'ip': ip.compressed,
872                                                         'nearby': nearby}
873        return results
874
875    def simulate_fierce_p(self, domain):
876        results = {}
877        p = subprocess.Popen(f"/usr/bin/fierce -dns {domain} -threads 30 ", stdout=subprocess.PIPE, shell=True)
878        output, err = p.communicate()
879
880        if output:
881            for line in output.decode('utf-8').split("\n"):
882                domain = re.match(r'(?P<ip>^\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b)\t(?P<domain>.*$)', line)
883                if domain:
884                    domain_details = domain.groupdict()
885                    subnet = ".".join(domain_details['ip'].split('.')[:3]) + ".0-255"
886                    results[domain_details['domain']] = {'domain': domain_details['domain'],
887                                                         'ip': domain_details['ip'], 'subnet': subnet}
888                # elif '0-255' in line:
889                #     print("line:", line.strip().split(' : ')[0])
890        return results
891
892    @staticmethod
893    def printTable(myDict, colList=None):
894        """ Pretty print a list of dictionaries (myDict) as a dynamically sized table.
895        If column names (colList) aren't specified, they will show in random order.
896        """
897        if not colList:
898            colList = list(myDict[0].keys() if myDict else [])
899        myList = [colList]  # 1st row = header
900        for item in myDict:
901            myList.append([str(item[col] if item[col] is not None else '') for col in colList])
902        colSize = [max(map(len, col)) for col in zip(*myList)]
903        formatStr = ' | '.join(["{{:<{}}}".format(i) for i in colSize])
904        myList.insert(1, ['-' * i for i in colSize])  # Seperating line
905        for item in myList:
906            print(formatStr.format(*item))
907
908    def start_reconV(self):
909        exit_op = False
910        while not exit_op:
911            g_opt = input(
912                "1)Url recon\n2)Email recon\n3)Doamin recon\n4)Show servers\n5)Show users\ne)exit\nenter your choice:")
913            if g_opt in ['1', '2', '3', '4', '5', 'e']:
914                if g_opt == 'e':
915                    print('Bye...')
916                    exit_op = True
917                elif g_opt == '1':
918                    while True:
919                        i_opt = input("1)Admin\n2)Login\n3)Pastebin\n4)robots.txt\nb)back\ne)exit\nenter your choice:")
920                        if i_opt in ['1', '2', '3', '4', 'b', 'e']:
921                            if i_opt == 'b':
922                                break
923                            elif i_opt == 'e':
924                                print('Bye...')
925                                exit_op = True
926                                break
927                            elif i_opt == '1':
928                                self.admin_links = self.google_scraper.search(
929                                    f"site:{self.domain} inurl:{ReconV.ADMIN_PAGE}", pages=1)
930                                time.sleep(random.randint(3, 6))
931                                ReconV.print_links(self.admin_links)
932                            elif i_opt == '2':
933                                self.login_links = self.google_scraper.search(
934                                    f"site:{self.domain} inurl:{ReconV.LOGIN_PAGE}", pages=1)
935                                time.sleep(random.randint(3, 6))
936                                ReconV.print_links(self.login_links)
937                            elif i_opt == '3':
938                                p_links = self.google_scraper.search(
939                                    f'site:{ReconV.PASTEBIN_URL} 	site:pastebin.com intext:"*@*.{self.domain}:*"',
940                                    pages=1)
941                                # self.pastebin_links
942                                self.pastebin_links = [(title, link.split('&')[0]) for title, link in p_links]
943                                time.sleep(random.randint(3, 6))
944                                ReconV.print_links(self.pastebin_links)
945                            elif i_opt == '4':
946                                self.robot_links = self.check_robots(self.domain)
947                                pprint.pprint(self.robot_links)
948                        else:
949                            print("Invalid input.")
950                elif g_opt == '2':
951                    while True:
952                        i_opt = input("1)Hunter.io\n2)google\n3)Linkedin\nb)back\ne)exit\nenter your choice:")
953                        if i_opt in ['1', '2', '3', 'b', 'e']:
954                            if i_opt == 'b':
955                                break
956                            elif i_opt == 'e':
957                                print('Bye...')
958                                exit_op = True
959                                break
960                            elif i_opt == '1':
961                                self.hunter_emails = set(ReconV.get_emails_from_hunter(self.domain))
962                                self.hunter_emails = {
963                                    email: {'email': email, 'fname': '', 'lname': '',
964                                            'e_verify': self.email_verifyier(email)} for email
965                                    in
966                                    self.hunter_emails}
967                                self.save_users_results(self.hunter_emails)
968
969                            elif i_opt == '2':
970                                results = self.google_scraper.search(f"site:{self.domain} intext:@{self.domain}",
971                                                                     pages=1)
972                                temp_emails = [e for e in self.get_email_from_links([result[1] for result in results])
973                                               if self.domain in e]
974                                self.emails = {
975                                    email: {'email': email, 'fname': '', 'lname': '',
976                                            'e_verify': self.email_verifyier(email)} for email
977                                    in
978                                    temp_emails}
979                                self.save_users_results(self.emails)
980                            elif i_opt == '3':
981                                temp_linkedin_results = self.start_linkedin_recon(self.company, pages=1)
982                                results = {}
983                                for comp, c_info in temp_linkedin_results.items():
984                                    for emp_info in c_info['employees']:
985                                        fname, lname = emp_info['full_name'].split(' ')[:2]
986                                        email = f'{fname}.{lname}@{self.domain}'
987                                        role = emp_info['role']
988                                        lurl = emp_info['url']
989                                        lid = emp_info['e_id']
990                                        results[email] = {'email': email, 'fname': fname, 'lname': lname,
991                                                          'role': role, 'l_id': lid, 'c_full_name': comp,
992                                                          'e_verify': self.email_verifyier(email), 'l_url': lurl}
993                                self.save_linkedin_results(results)
994                        else:
995                            print("Invalid input.")
996                elif g_opt == '3':
997                    while True:
998                        i_opt = input(
999                            "1)HackerTarget\n2)google\n3)bruteforce\n4)fierce\nb)back\ne)exit\nenter your choice:")
1000                        if i_opt in ['1', '2', '3', '4', 'b', 'e']:
1001                            if i_opt == 'b':
1002                                break
1003                            elif i_opt == 'e':
1004                                print('Bye...')
1005                                exit_op = True
1006                                break
1007                            elif i_opt == '1':
1008                                self.hacker_target_domains = ReconV.find_hosts_hackertarget(
1009                                    domain=self.domain)  # dict with {domain:ip}
1010                                self.save_domains_results(self.hacker_target_domains)
1011                            elif i_opt == '2':
1012                                self.google_domains = self.find_hosts_google(self.domain, 1)
1013                                self.save_domains_results(self.google_domains)
1014
1015                            elif i_opt == '3':
1016
1017                                self.wordlist_domains = self.domain_brute_force_wordlist(self.domain,
1018                                                                                         '/home/matan/.recon-ng/data/suffixes.txt')
1019                                self.save_domains_results(self.wordlist_domains)
1020
1021                            elif i_opt == '4':
1022                                fierce_args = {'domain': self.domain, 'traverse': 5, 'subdomains': None,
1023                                               'subdomain_file': '/home/matan/Documents/hde65/web_requests/lib/python3.8/site-packages/fierce/lists/default.txt',
1024                                               'dns_servers': None, 'dns_file': None, 'tcp': False}
1025                                self.fierce_info = self.simulate_fierce(
1026                                    **fierce_args)  # {subdomain:{url:subdomain,ip:ip,nearby:{ip:subdomain} }}
1027                                self.save_domains_results({k: v['ip'] for k, v in self.fierce_info.items()})
1028                        else:
1029                            print("Invalid input.")
1030                elif g_opt == '4':
1031                    pprint.pp(self.reconDb.get_servers(self.domain))
1032                    ReconV.printTable(self.reconDb.get_servers(self.domain))
1033                elif g_opt == '5':
1034                    pprint.pp(self.reconDb.get_users(self.company))
1035                    ReconV.printTable(self.reconDb.get_users(self.company))
1036
1037            else:
1038                print("Invalid input.")
1039        self.reconDb.close()
1040
1041
1042if __name__ == '__main__':
1043    domain_key = input("Enter Domain Name:")
1044    company_key = input("Enter Company Name:")
1045    db_file_path = input("Enter path of db File(if not exist just press ENTER:")
1046    if domain_key and company_key:
1047        recon = ReconV(domain=domain_key, company=company_key, db_path=db_file_path)
1048        recon.start_reconV()
1049        # recon.start_full_recon(domain_key, company_key)
1050        print("finish")
1051    else:
1052        print("Invalid input.")
1053