· 7 years ago · Feb 02, 2019, 02:46 PM
1import requests
2import json
3from bs4 import BeautifulSoup as soup
4from log import log as log
5import time
6from datetime import datetime
7import random
8import sqlite3
9from discord_hooks import Webhook
10import slackweb
11from threading import Thread
12import urllib.request
13
14user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3107.4 Safari/537.36'
15headers = {}
16headers['User-Agent'] = user_agent
17headers['Content-Type'] = 'application/json'
18
19class Product():
20 def __init__(self, title, link, stock, keyword, image_url, stock_options):
21
22 self.title = title
23 self.stock = stock
24 self.link = link
25 self.keyword = keyword
26 self.image_url = image_url
27 self.stock_options = stock_options
28
29def read_from_txt(path):
30
31 # Initialize variables
32 raw_lines = []
33 lines = []
34
35 # Load data from the txt file
36 try:
37 f = open(path, "r")
38 raw_lines = f.readlines()
39 f.close()
40
41 # Raise an error if the file couldn't be found
42 except:
43 log('e', "Couldn't locate: " + path)
44 raise FileNotFound()
45
46 if(len(raw_lines) == 0):
47 log('w', "No data found in: " + path)
48 raise NoDataLoaded()
49
50 # Parse the data
51 for line in raw_lines:
52 lines.append(line.strip("\n"))
53
54 # Return the data
55 return lines
56
57
58def add_to_db(product):
59
60 # Initialize variables
61 title = product.title
62 stock = str(product.stock)
63 link = product.link
64 keyword = product.keyword
65 alert = False
66
67 # log('i', stock)
68
69 # Create database
70 conn = sqlite3.connect('products.db')
71 c = conn.cursor()
72
73 c.execute("""CREATE TABLE IF NOT EXISTS products(title TEXT, link TEXT UNIQUE, stock TEXT, keywords TEXT)""")
74
75 # Add product to database if it's unique
76 try:
77 c.execute("""INSERT INTO products (title, link, stock, keywords) VALUES (?, ?, ?, ?)""", (title, link, stock, keyword))
78 log('s', "Found new product with keyword " + keyword + ". Link = " + link)
79 alert = True
80 except:
81 # Product already exists, let's check for stock updates
82 try:
83 # this is messy as fuck and I'm sorry.. :(
84 d = (link,)
85 c.execute('SELECT (stock) FROM products WHERE link=?', d)
86 old_stock = c.fetchone()
87 stock_str = str(old_stock)[2:-3]
88 if str(stock_str).strip() == str(product.stock).strip():
89 log('w', "Product at URL: " + link + " already exists in the database.")
90 pass
91 else:
92 # update table for that product with new stock
93 log('s', "Product at URL: " + link + " changed stock.")
94 c.execute("""UPDATE products SET stock = ? WHERE link= ?""", (stock_str, link))
95 alert = True
96 except sqlite3.Error as e:
97 log('e', "database error: " + str(e))
98
99 # Close connection to the database
100 conn.commit()
101 c.close()
102 conn.close()
103
104 # Return whether or not it's a new product
105 return alert
106
107def notify(product, slack, discord):
108
109 times = []
110 today = datetime.now()
111 times.append(today)
112 sizes = ""
113
114 for size in product.stock_options:
115 sizes+= (size + " ")
116
117 if slack:
118 sc = slackweb.Slack(url=slack)
119 attachments = []
120 attachment = {
121 "title": product.title,
122 "color":"#EAF4EC",
123 "text": product.link,
124 "fields": [
125 {
126 "title": "Sizes",
127 "value": sizes,
128 "short": False
129 }
130 ],
131 "mrkdwn_in": ["text"],
132 "thumb_url": product.image_url,
133 "footer": "BBGR",
134 "footer_icon": "https://platform.slack-edge.com/img/default_application_icon.png",
135 "ts": time.time()
136 }
137 attachments.append(attachment)
138 sc.notify(attachments=attachments)
139
140 if discord:
141 embed = Webhook(discord, color=0xEAF4EC)
142 embed.set_title(title=product.title, url=product.link)
143 embed.set_thumbnail(url=product.image_url)
144 embed.add_field(name="Sizes", value=sizes)
145 embed.set_footer(text='BBGR', icon='https://cdn.discordapp.com/embed/avatars/0.png', ts=True)
146 embed.post()
147
148def monitor(link, keywords, slack, discord):
149
150 log('i', "Checking site: " + link + "...")
151 isEarlyLink = False
152 links = []
153 pages = []
154 proxylist = read_from_txt("proxy.txt")
155 proxy = {"http":proxylist}
156
157 # Parse the site from the link
158 pos_https = link.find("https://")
159 pos_http = link.find("http://")
160 pos_omia = link.find('omia')
161
162 if(pos_https == 0):
163 site = link[8:]
164 end = site.find("/")
165 if(end != -1):
166 site = site[:end]
167 site = "https://" + site
168 else:
169 site = link[7:]
170 end = site.find("/")
171 if(end != -1):
172 site = site[:end]
173 site = "http://" + site
174
175 if pos_omia > 0:
176 isEarlyLink = True
177
178 # build search links
179 if (link.endswith('=')):
180 for word in keywords:
181 links.append(link + word)
182 else:
183 links.append(link)
184
185 for l in links:
186 # go ahead and make the request
187 if isEarlyLink:
188 # parse the page to collect data
189 stock_data = []
190
191 try:
192 r = requests.get(l+"?admin=True", timeout=5, verify=False, proxies=proxy)
193 except:
194 log('e', 'Connection to URL: ' + l + " failed. Retrying...")
195 time.sleep(5)
196 try:
197 r.requests.get(l+"?admin=True", timeout=8, verify=False, proxies=proxy)
198 except:
199 log('e', 'Connection to URL: ' + l + " failed.")
200 return
201 if r.status_code == 404:
202 log('e', "Unable to parse that link..")
203
204 page = soup(r.text, "html.parser")
205
206 product = page.findAll('article', class_='product')
207 title = page.findAll('span', class_='prod-title')[0].text.strip()
208 image= page.findAll('img', class_="js-scroll-gallery-snap-target")
209
210 # paddings
211 if not image:
212 image = "N/A"
213
214 if not title:
215 title: "N/A"
216
217 # get the data
218 url = (l+".json"+"?admin=True")
219 req = urllib.request.Request(url, headers=headers)
220 resp = urllib.request.urlopen(req).read()
221
222 size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
223 # parse through the list
224
225 if not size_opts:
226 stock_data.append('Unavailable')
227 else:
228 for size in size_opts:
229 stock_data.append(size['name'])
230 product = Product(title, l, stock_data, "N/A", str(image), stock_data)
231 alert = add_to_db(product)
232
233 if alert:
234 notify(product, slack, discord)
235 # let's do some magic to see if it's a valid link
236 else:
237 try:
238 r = requests.get(l, timeout=5, verify=False, proxies=proxy)
239 pages.append(r)
240 except:
241 log('e', 'Connection to URL: ' + l + " failed. Retrying...")
242 time.sleep(5)
243 try:
244 r.requests.get(l, timeout=8, verify=False, proxies=proxy)
245 pages.append(r)
246 except:
247 log('e', 'Connection to URL: ' + l + " failed.")
248 return
249
250 for p in pages:
251 page = soup(p.text, "html.parser")
252 hrefs = []
253 raw_links = page.findAll("article", class_="product")
254 captions = page.findAll("div", class_='brand-name')
255 images = page.findAll('img', class_='top')
256
257 for raw_link in raw_links:
258 link = raw_link.find('a', attrs={"itemprop": "url"})
259 try:
260 hrefs.append(link["href"])
261 except:
262 pass
263
264 index = 0
265 for href in hrefs:
266 found = False
267 if len(keywords) > 0:
268 for keyword in keywords:
269 if keyword.upper() in captions[index].text.upper():
270 found = True
271 stock_data = []
272
273 url = (site+hrefs[index]+'.json')
274
275 req = urllib.request.Request(url, headers=headers)
276 resp = urllib.request.urlopen(req).read()
277
278 size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
279 # parse through the list
280 if not size_opts:
281 stock_data.append('Unavailable')
282 else:
283 for size in size_opts:
284 stock_data.append(size['name'])
285
286 product = Product(captions[index].text, (site + hrefs[index]), stock_data, keyword, str(images[index]['src']), stock_data)
287 alert = add_to_db(product)
288
289 if alert:
290 notify(product, slack, discord)
291 index = index + 1
292
293
294def __main__():
295 # Ignore insecure messages (for now)
296 requests.packages.urllib3.disable_warnings()
297
298 with open('config.json') as config:
299 j = json.load(config)
300
301 ######### CHANGE THESE #########
302 # KEYWORDS: (seperated by -) #
303 keywords = [ #
304 "converse",
305 "UNC",
306 "Jordan",
307 "Mercurial",
308 "Zoom-Fly",
309 "Nike"
310 ]
311 slack = j['slack']
312 discord = j['discord']
313
314 # Load sites from file
315 sites = read_from_txt("ow-pages.txt")
316
317 # Start monitoring sites
318 while(True):
319 threads = []
320 for site in sites:
321 # skip over blank lines and shit
322 if not site.strip():
323 pass
324 else :
325 t = Thread(target=monitor, args=(site, keywords, slack, discord))
326 threads.append(t)
327 t.start()
328 time.sleep(2)