· 7 years ago · Feb 01, 2019, 10:12 PM
1import requests
2import json
3from bs4 import BeautifulSoup as soup
4from log import log as log
5import time
6from datetime import datetime
7import random
8import sqlite3
9from discord_hooks import Webhook
10import slackweb
11from threading import Thread
12import urllib.request
13
14user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3107.4 Safari/537.36'
15headers = {}
16headers['User-Agent'] = user_agent
17headers['Content-Type'] = 'application/json'
18
19class Product():
20 def __init__(self, title, link, stock, keyword, image_url, stock_options):
21
22 self.title = title
23 self.stock = stock
24 self.link = link
25 self.keyword = keyword
26 self.image_url = image_url
27 self.stock_options = stock_options
28
29def read_from_txt(path):
30
31 # Initialize variables
32 raw_lines = []
33 lines = []
34
35 # Load data from the txt file
36 try:
37 f = open(path, "r")
38 raw_lines = f.readlines()
39 f.close()
40
41 # Raise an error if the file couldn't be found
42 except:
43 log('e', "Couldn't locate: " + path)
44 raise FileNotFound()
45
46 if(len(raw_lines) == 0):
47 log('w', "No data found in: " + path)
48 raise NoDataLoaded()
49
50 # Parse the data
51 for line in raw_lines:
52 lines.append(line.strip("\n"))
53
54 # Return the data
55 return lines
56
57
58def add_to_db(product):
59
60 # Initialize variables
61 title = product.title
62 stock = str(product.stock)
63 link = product.link
64 keyword = product.keyword
65 alert = False
66
67 # log('i', stock)
68
69 # Create database
70 conn = sqlite3.connect('products.db')
71 c = conn.cursor()
72
73 c.execute("""CREATE TABLE IF NOT EXISTS products(title TEXT, link TEXT UNIQUE, stock TEXT, keywords TEXT)""")
74
75 # Add product to database if it's unique
76 try:
77 c.execute("""INSERT INTO products (title, link, stock, keywords) VALUES (?, ?, ?, ?)""", (title, link, stock, keyword))
78 log('s', "Found new product with keyword " + keyword + ". Link = " + link)
79 alert = True
80 except:
81 # Product already exists, let's check for stock updates
82 try:
83 # this is messy as fuck and I'm sorry.. :(
84 d = (link,)
85 c.execute('SELECT (stock) FROM products WHERE link=?', d)
86 old_stock = c.fetchone()
87 stock_str = str(old_stock)[2:-3]
88 if str(stock_str).strip() == str(product.stock).strip():
89 log('w', "Product at URL: " + link + " already exists in the database.")
90 pass
91 else:
92 # update table for that product with new stock
93 log('s', "Product at URL: " + link + " changed stock.")
94 c.execute("""UPDATE products SET stock = ? WHERE link= ?""", (stock_str, link))
95 alert = True
96 except sqlite3.Error as e:
97 log('e', "database error: " + str(e))
98
99 # Close connection to the database
100 conn.commit()
101 c.close()
102 conn.close()
103
104 # Return whether or not it's a new product
105 return alert
106
107def notify(product, slack, discord):
108
109 times = []
110 today = datetime.now()
111 times.append(today)
112 sizes = ""
113
114 for size in product.stock_options:
115 sizes+= (size + " ")
116
117 if slack:
118 sc = slackweb.Slack(url=slack)
119 attachments = []
120 attachment = {
121 "title": product.title,
122 "color":"#EAF4EC",
123 "text": product.link,
124 "fields": [
125 {
126 "title": "Sizes",
127 "value": sizes,
128 "short": False
129 }
130 ],
131 "mrkdwn_in": ["text"],
132 "thumb_url": product.image_url,
133 "footer": "BBGR",
134 "footer_icon": "https://platform.slack-edge.com/img/default_application_icon.png",
135 "ts": time.time()
136 }
137 attachments.append(attachment)
138 sc.notify(attachments=attachments)
139
140 if discord:
141 embed = Webhook(discord, color=0xEAF4EC)
142 embed.set_title(title=product.title, url=product.link)
143 embed.set_thumbnail(url=product.image_url)
144 embed.add_field(name="Sizes", value=sizes)
145 embed.set_footer(text='BBGR', icon='https://cdn.discordapp.com/embed/avatars/0.png', ts=True)
146 embed.post()
147
148def monitor(link, keywords, slack, discord):
149
150 log('i', "Checking site: " + link + "...")
151 isEarlyLink = False
152 links = []
153 pages = []
154 # Parse the site from the link
155 pos_https = link.find("https://")
156 pos_http = link.find("http://")
157 pos_omia = link.find('omia')
158
159 if(pos_https == 0):
160 site = link[8:]
161 end = site.find("/")
162 if(end != -1):
163 site = site[:end]
164 site = "https://" + site
165 else:
166 site = link[7:]
167 end = site.find("/")
168 if(end != -1):
169 site = site[:end]
170 site = "http://" + site
171
172 if pos_omia > 0:
173 isEarlyLink = True
174
175 # build search links
176 if (link.endswith('=')):
177 for word in keywords:
178 links.append(link + word)
179 else:
180 links.append(link)
181
182 for l in links:
183 # go ahead and make the request
184 if isEarlyLink:
185 # parse the page to collect data
186 stock_data = []
187
188 try:
189 r = requests.get(l+"?admin=True", timeout=5, verify=False)
190 except:
191 log('e', 'Connection to URL: ' + l + " failed. Retrying...")
192 time.sleep(5)
193 try:
194 r.requests.get(l+"?admin=True", timeout=8, verify=False)
195 except:
196 log('e', 'Connection to URL: ' + l + " failed.")
197 return
198 if r.status_code == 404:
199 log('e', "Unable to parse that link..")
200
201 page = soup(r.text, "html.parser")
202
203 product = page.findAll('article', class_='product')
204 title = page.findAll('span', class_='prod-title')[0].text.strip()
205 image= page.findAll('img', class_="js-scroll-gallery-snap-target")
206
207 # paddings
208 if not image:
209 image = "N/A"
210
211 if not title:
212 title: "N/A"
213
214 # get the data
215 url = (l+".json"+"?admin=True")
216 req = urllib.request.Request(url, headers=headers)
217 resp = urllib.request.urlopen(req).read()
218
219 size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
220 # parse through the list
221
222 if not size_opts:
223 stock_data.append('Unavailable')
224 else:
225 for size in size_opts:
226 stock_data.append(size['name'])
227 product = Product(title, l, stock_data, "N/A", str(image), stock_data)
228 alert = add_to_db(product)
229
230 if alert:
231 notify(product, slack, discord)
232 # let's do some magic to see if it's a valid link
233 else:
234 try:
235 r = requests.get(l, timeout=5, verify=False)
236 pages.append(r)
237 except:
238 log('e', 'Connection to URL: ' + l + " failed. Retrying...")
239 time.sleep(5)
240 try:
241 r.requests.get(l, timeout=8, verify=False)
242 pages.append(r)
243 except:
244 log('e', 'Connection to URL: ' + l + " failed.")
245 return
246
247 for p in pages:
248 page = soup(p.text, "html.parser")
249 hrefs = []
250 raw_links = page.findAll("article", class_="product")
251 captions = page.findAll("div", class_='brand-name')
252 images = page.findAll('img', class_='top')
253
254 for raw_link in raw_links:
255 link = raw_link.find('a', attrs={"itemprop": "url"})
256 try:
257 hrefs.append(link["href"])
258 except:
259 pass
260
261 index = 0
262 for href in hrefs:
263 found = False
264 if len(keywords) > 0:
265 for keyword in keywords:
266 if keyword.upper() in captions[index].text.upper():
267 found = True
268 stock_data = []
269
270 url = (site+hrefs[index]+'.json')
271
272 req = urllib.request.Request(url, headers=headers)
273 resp = urllib.request.urlopen(req).read()
274
275 size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
276 # parse through the list
277 if not size_opts:
278 stock_data.append('Unavailable')
279 else:
280 for size in size_opts:
281 stock_data.append(size['name'])
282
283 product = Product(captions[index].text, (site + hrefs[index]), stock_data, keyword, str(images[index]['src']), stock_data)
284 alert = add_to_db(product)
285
286 if alert:
287 notify(product, slack, discord)
288 index = index + 1
289
290
291def __main__():
292 # Ignore insecure messages (for now)
293 requests.packages.urllib3.disable_warnings()
294
295 with open('config.json') as config:
296 j = json.load(config)
297
298 ######### CHANGE THESE #########
299 # KEYWORDS: (seperated by -) #
300 keywords = [ #
301 "converse",
302 "UNC",
303 "Jordan",
304 "Mercurial",
305 "Zoom-Fly",
306 "Nike"
307 ]
308 slack = j['slack']
309 discord = j['discord']
310
311 # Load sites from file
312 sites = read_from_txt("ow-pages.txt")
313
314 # Start monitoring sites
315 while(True):
316 threads = []
317 for site in sites:
318 # skip over blank lines and shit
319 if not site.strip():
320 pass
321 else :
322 t = Thread(target=monitor, args=(site, keywords, slack, discord))
323 threads.append(t)
324 t.start()
325 time.sleep(2)