· 6 years ago · May 19, 2019, 06:52 PM
1# -*- coding: utf-8 -*-
2
3
4import json
5import MySQLdb
6import requests
7from lxml import html
8from datetime import datetime
9
10protocol_patterns = ['https://', 'http://']
11def getImage(tree):
12 info = tree.xpath('''
13 //div[@class="product-tiles-list-wrapper u-clearfix"]/div/div/div
14 [@class="c-product-tile-picture__holder"]/a/div/div/img/@data-original
15 ''')
16 return info
17
18def getInfo(tree):
19 info = tree.xpath('''
20 //div[@class="product-tiles-list-wrapper u-clearfix"]/div/div/div[@class="c-product-tile-picture__holder"]
21 /a[@data-track-category="add_to_cart_pop_up"]/@data-product-info
22 ''')
23 return info
24
25def getLink(tree):
26 link = tree.xpath('''
27 //div[@class="product-tiles-list-wrapper u-clearfix"]/div/div/div[@class="c-product-tile-picture__holder"]
28 /a[@data-track-category="add_to_cart_pop_up"]/@href
29 '''
30 )
31 return link
32
33def getRating(tree):
34 link = tree.xpath('''
35 //span[@class="c-star-rating_reviews-qty"]
36 ''')
37 return link
38
39def getStars(tree, link):
40 link = link+'/reviews'
41 path = '''//a[@href="{}"]/span/span[@class="c-star-rating__stars c-star-rating__stars_active font-icon icon-star"]/@style'''.format(link)
42 stars = tree.xpath(path)
43 return stars
44
45def add_to_db(tree, amount):
46 resp = amount
47 info = getInfo(tree)
48 link = getLink(tree)
49 link_to_image = getImage(tree)
50 rating = getRating(tree)
51 count = -1
52 rate_count = -1;
53 for j in info:
54 try:
55 count+=1;
56 rate_count+=1;
57 stats = json.loads(j)
58 name = stats['productName']
59 category = stats['productCategoryName']
60 vendor = stats['productVendorName']
61 price = stats["productPriceLocal"]
62 link_to_product = link[count]
63 image = link_to_image[count]
64 stars = int(getStars(tree, link_to_product)[0].replace('width: ', '').replace("%", '')) - 1
65 rate = 0
66 if stars == 0:
67 rate_count -= 1
68 else:
69 rate = rating[rate_count].text
70 fill_db(name, price, link_to_product, image, category, vendor, int(rate), stars)
71 resp +=1
72 except:
73 continue
74 return resp
75
76def url_scarping(request):
77 url = ['https://www.mvideo.ru/komputernye-komplektuushhie/processory-5431',
78 'https://www.mvideo.ru/komputernye-komplektuushhie/materinskie-platy-5432',
79 'https://www.mvideo.ru/komputernye-komplektuushhie/videokarty-5429',
80 'https://www.mvideo.ru/komputernye-komplektuushhie/bloki-pitaniya-5435',
81 'https://www.mvideo.ru/komputernye-komplektuushhie/operativnaya-pamyat-5433',
82 'https://www.mvideo.ru/komputernye-komplektuushhie/setevye-karty-5428',
83 'https://www.mvideo.ru/komputernye-komplektuushhie/opticheskie-privody-5747',
84 'https://www.mvideo.ru/komputernye-komplektuushhie/sistemy-ohlazhdeniya-6127',
85 'https://www.mvideo.ru/komputernye-komplektuushhie/korpusa-5434',
86 'https://www.mvideo.ru/noutbuki-planshety-komputery/noutbuki-118',
87 'https://www.mvideo.ru/komputernaya-tehnika/monitory-101']
88 for i in url:
89 r = requests.get(i)
90 tree = html.fromstring(r.text)
91 pages = 1000
92 count = 0
93 count = add_to_db(tree, count)
94 for j in range(2,pages):
95 new_url = i+'/f/page='+str(j)
96 r = requests.get(new_url)
97 if r.text.find('По вашему запроÑу ничего не найдено') > 0:
98 print(i + ' - done - ' + str(j) + " pages - products - " + str(count) )
99 break;
100 tree = html.fromstring(r.text)
101 count = add_to_db(tree, count)
102
103
104
105
106
107
108
109def fill_db(title, price, link, image, category, vendor, marks_amount, stars):
110 db = MySQLdb.connect(host="svotin.mysql.pythonanywhere-services.com",user="Svotin",
111 passwd="Qwerty2102",db="Svotin$mvideo_ru", use_unicode=True, charset="utf8")
112 cursor = db.cursor()
113 price_digit = float(price)
114 cursor.execute('''
115 create table IF NOT EXISTS mvideo (id INT AUTO_INCREMENT NOT NULL,
116 name char(70) default 'mvideo' not null ,
117 price int(20) unsigned default '0' not null,
118 currency char(10) default '$' not null,
119 link char(100) not null,
120 link_to_image char(120) not null,
121 category char(100) not null,
122 vendor char(100) not null,
123 marks_amount int not null,
124 stars char(10) not null,
125 primary key (id)
126 );
127 ''')
128 cursor.execute("SELECT * FROM mvideo WHERE name='{}'".format(title))
129 if cursor.rowcount < 1:
130 cursor.execute('''
131 INSERT INTO mvideo (name,price,currency, link, link_to_image, category, vendor, marks_amount, stars) VALUES
132 ('{}',{},'RUB','{}', '{}', '{}', '{}', {}, '{}')'''.format(title, price_digit, link, image, category, vendor, marks_amount, stars))
133 elif cursor.rowcount == 1:
134 add_to_history(cursor, "mvideo", cursor.fetchall()[0][0], datetime.today().strftime('%Y.%m.%d'), cursor.fetchall()[0][2], price_digit)
135 cursor.execute("UPDATE mvideo SET price = {} WHERE name='{}';".format(price_digit ,title))
136 db.commit()
137 db.close()
138
139def add_to_history(cursor, market, id, date, old_sum, new_sum):
140 cursor.execute('''
141 create table IF NOT EXISTS old_prices (id INT AUTO_INCREMENT NOT NULL,
142 market char(70) default 'mvideo' not null ,
143 market_id int(20) unsigned default '0' not null,
144 date char(14) default '10.1.1990' not null,
145 old_sum int(20) not null,
146 new_sum int(20) not null,
147 primary key (id)
148 );
149 ''')
150 cursor.execute('''
151 INSERT INTO old_prices (market, market_id, date, old_sum, new_sum) VALUES
152 ('{}',{},'{}',{}, {})'''.format(market, id, date, old_sum, new_sum))
153
154url_scarping(8)