M8Ys0feN

· 7 years ago · Feb 02, 2019, 02:46 PM
1import requests
2import json
3from bs4 import BeautifulSoup as soup
4from log import log as log
5import time
6from datetime import datetime
7import random
8import sqlite3
9from discord_hooks import Webhook
10import slackweb
11from threading import Thread
12import urllib.request
13
14user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3107.4 Safari/537.36'
15headers = {}
16headers['User-Agent'] = user_agent
17headers['Content-Type'] = 'application/json'
18
19class Product():
20    def __init__(self, title, link, stock, keyword, image_url, stock_options):
21
22        self.title = title
23        self.stock = stock
24        self.link = link
25        self.keyword = keyword
26        self.image_url = image_url
27        self.stock_options = stock_options
28
29def read_from_txt(path):
30
31    # Initialize variables
32    raw_lines = []
33    lines = []
34
35    # Load data from the txt file
36    try:
37        f = open(path, "r")
38        raw_lines = f.readlines()
39        f.close()
40
41    # Raise an error if the file couldn't be found
42    except:
43        log('e', "Couldn't locate: " + path)
44        raise FileNotFound()
45
46    if(len(raw_lines) == 0):
47        log('w', "No data found in: " + path)
48        raise NoDataLoaded()
49
50    # Parse the data
51    for line in raw_lines:
52        lines.append(line.strip("\n"))
53
54    # Return the data
55    return lines
56
57
58def add_to_db(product):
59
60    # Initialize variables
61    title = product.title
62    stock = str(product.stock)
63    link = product.link
64    keyword = product.keyword
65    alert = False
66
67    # log('i', stock)
68
69    # Create database
70    conn = sqlite3.connect('products.db')
71    c = conn.cursor()
72
73    c.execute("""CREATE TABLE IF NOT EXISTS products(title TEXT, link TEXT UNIQUE, stock TEXT, keywords TEXT)""")
74
75    # Add product to database if it's unique
76    try:
77        c.execute("""INSERT INTO products (title, link, stock, keywords) VALUES (?, ?, ?, ?)""", (title, link, stock, keyword))
78        log('s', "Found new product with keyword " + keyword + ". Link = " + link)        
79        alert = True
80    except:
81        # Product already exists, let's check for stock updates
82        try:
83            # this is messy as fuck and I'm sorry.. :(
84            d = (link,)
85            c.execute('SELECT (stock) FROM products WHERE link=?', d)
86            old_stock = c.fetchone()
87            stock_str = str(old_stock)[2:-3]
88            if str(stock_str).strip() == str(product.stock).strip():
89                log('w', "Product at URL: " + link + " already exists in the database.")
90                pass
91            else:
92                # update table for that product with new stock
93                log('s', "Product at URL: " + link + " changed stock.")
94                c.execute("""UPDATE products SET stock = ? WHERE link= ?""", (stock_str, link))
95                alert = True
96        except sqlite3.Error as e:
97            log('e', "database error: " + str(e))
98
99    # Close connection to the database
100    conn.commit()
101    c.close()
102    conn.close()
103
104    # Return whether or not it's a new product
105    return alert
106
107def notify(product, slack, discord):
108
109    times = []
110    today = datetime.now()
111    times.append(today)
112    sizes = ""
113
114    for size in product.stock_options:
115        sizes+= (size + " ")
116
117    if slack:
118        sc = slackweb.Slack(url=slack)
119        attachments = []
120        attachment = {
121            "title": product.title,
122            "color":"#EAF4EC", 
123            "text": product.link,
124            "fields": [
125                {
126                    "title": "Sizes",
127                    "value": sizes,
128                    "short": False
129                }
130            ],
131            "mrkdwn_in": ["text"],
132            "thumb_url": product.image_url,
133            "footer": "BBGR",
134            "footer_icon": "https://platform.slack-edge.com/img/default_application_icon.png",
135            "ts": time.time()
136        }
137        attachments.append(attachment)
138        sc.notify(attachments=attachments)
139
140    if discord:
141        embed = Webhook(discord, color=0xEAF4EC)
142        embed.set_title(title=product.title, url=product.link)
143        embed.set_thumbnail(url=product.image_url)
144        embed.add_field(name="Sizes", value=sizes)
145        embed.set_footer(text='BBGR', icon='https://cdn.discordapp.com/embed/avatars/0.png', ts=True)
146        embed.post()
147
148def monitor(link, keywords, slack, discord):
149
150    log('i', "Checking site: " + link + "...")
151    isEarlyLink = False
152    links = []
153    pages = []
154    proxylist = read_from_txt("proxy.txt")
155    proxy = {"http":proxylist}
156    
157    # Parse the site from the link
158    pos_https = link.find("https://")
159    pos_http = link.find("http://")
160    pos_omia = link.find('omia')
161
162    if(pos_https == 0):
163        site = link[8:]
164        end = site.find("/")
165        if(end != -1):
166            site = site[:end]
167        site = "https://" + site
168    else:
169        site = link[7:]
170        end = site.find("/")
171        if(end != -1):
172            site = site[:end]
173        site = "http://" + site
174
175    if pos_omia > 0:
176        isEarlyLink = True
177
178    # build search links
179    if (link.endswith('=')):
180        for word in keywords:
181            links.append(link + word)
182    else:
183        links.append(link)
184
185    for l in links:
186        # go ahead and make the request
187        if isEarlyLink:
188            # parse the page to collect data
189            stock_data = []
190
191            try:
192                r = requests.get(l+"?admin=True", timeout=5, verify=False, proxies=proxy)
193            except:
194                log('e', 'Connection to URL: ' + l + " failed. Retrying...")
195                time.sleep(5)
196                try:
197                    r.requests.get(l+"?admin=True", timeout=8, verify=False, proxies=proxy)
198                except:      
199                    log('e', 'Connection to URL: ' + l + " failed.")
200                    return
201            if r.status_code == 404:
202                log('e', "Unable to parse that link..")
203
204            page = soup(r.text, "html.parser")
205
206            product = page.findAll('article', class_='product')
207            title = page.findAll('span', class_='prod-title')[0].text.strip()
208            image= page.findAll('img', class_="js-scroll-gallery-snap-target")
209
210            # paddings
211            if not image:
212                image = "N/A"
213
214            if not title:
215                title: "N/A"
216
217            # get the data
218            url = (l+".json"+"?admin=True")
219            req = urllib.request.Request(url, headers=headers)
220            resp = urllib.request.urlopen(req).read()
221
222            size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
223            # parse through the list
224            
225            if not size_opts:
226                stock_data.append('Unavailable')
227            else:
228                for size in size_opts:
229                    stock_data.append(size['name'])
230            product = Product(title, l, stock_data, "N/A", str(image), stock_data)
231            alert = add_to_db(product)
232
233            if alert:
234                notify(product, slack, discord)
235        # let's do some magic to see if it's a valid link
236        else: 
237            try:
238                r = requests.get(l, timeout=5, verify=False, proxies=proxy)
239                pages.append(r)
240            except:
241                log('e', 'Connection to URL: ' + l + " failed. Retrying...")
242                time.sleep(5)
243                try:
244                    r.requests.get(l, timeout=8, verify=False, proxies=proxy)
245                    pages.append(r)
246                except:      
247                    log('e', 'Connection to URL: ' + l + " failed.")
248                    return
249
250    for p in pages:
251        page = soup(p.text, "html.parser")
252        hrefs = []
253        raw_links = page.findAll("article", class_="product")
254        captions = page.findAll("div", class_='brand-name')
255        images = page.findAll('img', class_='top')
256
257        for raw_link in raw_links:
258            link = raw_link.find('a', attrs={"itemprop": "url"})
259            try:
260                hrefs.append(link["href"])
261            except:
262                pass
263
264        index = 0
265        for href in hrefs:
266            found = False
267            if len(keywords) > 0:
268                for keyword in keywords:
269                    if keyword.upper() in captions[index].text.upper():
270                        found = True
271                        stock_data = []
272                        
273                        url = (site+hrefs[index]+'.json')
274
275                        req = urllib.request.Request(url, headers=headers)
276                        resp = urllib.request.urlopen(req).read()
277
278                        size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
279                        # parse through the list
280                        if not size_opts:
281                            stock_data.append('Unavailable')
282                        else:
283                            for size in size_opts:
284                                stock_data.append(size['name'])
285
286                        product = Product(captions[index].text, (site + hrefs[index]), stock_data, keyword, str(images[index]['src']), stock_data)
287                        alert = add_to_db(product)
288
289                        if alert:
290                            notify(product, slack, discord)
291            index = index + 1
292
293
294def __main__():
295    # Ignore insecure messages (for now)
296    requests.packages.urllib3.disable_warnings()
297
298    with open('config.json') as config:
299        j = json.load(config)
300
301    ######### CHANGE THESE #########
302    #  KEYWORDS: (seperated by -)  #
303    keywords = [                   #
304       "converse",
305       "UNC",
306       "Jordan",
307       "Mercurial",
308       "Zoom-Fly",
309       "Nike"                   
310    ]                             
311    slack = j['slack']
312    discord = j['discord']
313
314    # Load sites from file
315    sites = read_from_txt("ow-pages.txt")
316
317    # Start monitoring sites
318    while(True):
319        threads = []
320        for site in sites:
321            # skip over blank lines and shit
322            if not site.strip():
323                pass
324            else :
325                t = Thread(target=monitor, args=(site, keywords, slack, discord))
326                threads.append(t)
327                t.start()
328                time.sleep(2)