· 4 years ago · Aug 23, 2021, 03:02 AM
1from selenium import webdriver
2from selenium.webdriver.common.keys import Keys
3from selenium.webdriver.common.by import By
4from selenium.webdriver.support.ui import WebDriverWait
5from selenium.webdriver.support import expected_conditions as EC
6import time
7import mysql.connector
8
9
10scrape_data = []
11
12db_configs = {
13
14 "user" : "root",
15 "password" : "AaBb@123!@#",
16 "host" : "127.0.0.1",
17 "database" : "stocks",
18 "raise_on_warnings" : True
19}
20
21cnx = mysql.connector.connect(**db_configs)
22cursor = cnx.cursor()
23
24def dump_data_to_db(cursor):
25 try:
26 query = "INSERT INTO amzn_data(title,author,price) values (%s , %s , %s)"
27 for item in scrape_data:
28 cursor.execute(query,(item.get('title'),item.get('author'),item.get('price')))
29 except Exception as e:
30 print(e)
31 pass
32
33def create_table_if_not_exists(cursor):
34 try:
35 query = '''
36 CREATE TABLE IF NOT EXISTS amzn_data (
37 id INT AUTO_INCREMENT PRIMARY KEY,
38 title VARCHAR(255),
39 author VARCHAR(255),
40 price VARCHAR(255),
41 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
42 ) ENGINE=INNODB;
43 '''
44 cursor.execute(query)
45 except Exception as e:
46 print(e)
47 pass
48
49# def get_data_from_rows(driver):
50# rows_required = driver.find_elements_by_css_selector('[data-component-type=s-search-result]')
51# for row in rows_required:
52# try:
53# temp_dict = {}
54# print('###############################')
55# title_temp = row.find_elements_by_class_name('a-size-mini')
56# author_temp = row.find_elements_by_class_name('a-row.a-size-base.a-color-secondary')[0].find_element_by_tag_name('a')
57# price = row.find_element_by_class_name('a-price')
58# main_title = title_temp[0].text
59# author_name = author_temp.text
60# temp_dict['title'] = main_title if main_title else None
61# temp_dict['author'] = author_name if author_name else None
62# temp_dict['price'] = price.text if price.text else None
63# scrape_data.append(temp_dict)
64# print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
65# except Exception as e:
66# pass
67# return None
68
69def get_data_from_rows(driver):
70 driver.execute_script("window.scrollTo(0, document.body.scrollHeight-250);")
71 span_mains = driver.find_elements_by_css_selector("span[cel_widget_id*='MAIN-SEARCH_RESULTS']")
72 for span_main in span_mains:
73 try:
74 temp_dict = {}
75 print('###############################')
76 title_tag = span_main.find_elements_by_css_selector("h2.a-size-mini.a-spacing-none.a-color-base.s-line-clamp-2")[0]
77 author_tag = span_main.find_elements_by_css_selector("div.a-row.a-size-base.a-color-secondary")[0].text
78 price_tag = span_main.find_elements_by_css_selector('span.a-price')
79 author_tag_split = author_tag.split('|')[0]
80 if 'by' in author_tag_split:
81 author_name = author_tag_split
82 else:
83 author_name = 'NONE'
84 price = 'NONE'
85 if len(price_tag) > 0:
86 price=price_tag[0].text
87 print(title_tag.text)
88 print(author_name)
89 print(price)
90 temp_dict['title'] = title_tag.text if title_tag.text else None
91 temp_dict['author'] = author_name if author_name else None
92 temp_dict['price'] = price if price else None
93 scrape_data.append(temp_dict)
94 print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
95 except Exception as e:
96 pass
97
98
99
100chrome_options = webdriver.ChromeOptions()
101chrome_options.add_argument("--incognito")
102
103
104driver = webdriver.Chrome(options=chrome_options)
105# driver = webdriver.Firefox()
106driver.get('https://www.amazon.in/s?k=India')
107try:
108 paginator_counter = 1
109 time.sleep(4)
110 driver.execute_script("window.scrollTo(0, document.body.scrollHeight-250);")
111 get_data_from_rows(driver)
112 while paginator_counter <5:# change back to 11
113 paginator = driver.find_element_by_class_name('s-pagination-strip')
114 partial_next = paginator.find_element_by_partial_link_text('Next')
115 if paginator and partial_next:
116 partial_next.click()
117 time.sleep(6)
118 print('paginator found')
119 get_data_from_rows(driver)
120 driver.implicitly_wait(3)
121 paginator_counter +=1
122 else:
123 print('paginator missing')
124 create_table_if_not_exists(cursor)
125 dump_data_to_db(cursor)
126 cnx.commit()
127 cursor.close()
128
129
130
131except Exception as e:
132 pass