1XsydMdb

· 7 years ago · Feb 01, 2019, 10:12 PM
1import requests
2import json
3from bs4 import BeautifulSoup as soup
4from log import log as log
5import time
6from datetime import datetime
7import random
8import sqlite3
9from discord_hooks import Webhook
10import slackweb
11from threading import Thread
12import urllib.request
13
14user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3107.4 Safari/537.36'
15headers = {}
16headers['User-Agent'] = user_agent
17headers['Content-Type'] = 'application/json'
18
19class Product():
20    def __init__(self, title, link, stock, keyword, image_url, stock_options):
21
22        self.title = title
23        self.stock = stock
24        self.link = link
25        self.keyword = keyword
26        self.image_url = image_url
27        self.stock_options = stock_options
28
29def read_from_txt(path):
30
31    # Initialize variables
32    raw_lines = []
33    lines = []
34
35    # Load data from the txt file
36    try:
37        f = open(path, "r")
38        raw_lines = f.readlines()
39        f.close()
40
41    # Raise an error if the file couldn't be found
42    except:
43        log('e', "Couldn't locate: " + path)
44        raise FileNotFound()
45
46    if(len(raw_lines) == 0):
47        log('w', "No data found in: " + path)
48        raise NoDataLoaded()
49
50    # Parse the data
51    for line in raw_lines:
52        lines.append(line.strip("\n"))
53
54    # Return the data
55    return lines
56
57
58def add_to_db(product):
59
60    # Initialize variables
61    title = product.title
62    stock = str(product.stock)
63    link = product.link
64    keyword = product.keyword
65    alert = False
66
67    # log('i', stock)
68
69    # Create database
70    conn = sqlite3.connect('products.db')
71    c = conn.cursor()
72
73    c.execute("""CREATE TABLE IF NOT EXISTS products(title TEXT, link TEXT UNIQUE, stock TEXT, keywords TEXT)""")
74
75    # Add product to database if it's unique
76    try:
77        c.execute("""INSERT INTO products (title, link, stock, keywords) VALUES (?, ?, ?, ?)""", (title, link, stock, keyword))
78        log('s', "Found new product with keyword " + keyword + ". Link = " + link)        
79        alert = True
80    except:
81        # Product already exists, let's check for stock updates
82        try:
83            # this is messy as fuck and I'm sorry.. :(
84            d = (link,)
85            c.execute('SELECT (stock) FROM products WHERE link=?', d)
86            old_stock = c.fetchone()
87            stock_str = str(old_stock)[2:-3]
88            if str(stock_str).strip() == str(product.stock).strip():
89                log('w', "Product at URL: " + link + " already exists in the database.")
90                pass
91            else:
92                # update table for that product with new stock
93                log('s', "Product at URL: " + link + " changed stock.")
94                c.execute("""UPDATE products SET stock = ? WHERE link= ?""", (stock_str, link))
95                alert = True
96        except sqlite3.Error as e:
97            log('e', "database error: " + str(e))
98
99    # Close connection to the database
100    conn.commit()
101    c.close()
102    conn.close()
103
104    # Return whether or not it's a new product
105    return alert
106
107def notify(product, slack, discord):
108
109    times = []
110    today = datetime.now()
111    times.append(today)
112    sizes = ""
113
114    for size in product.stock_options:
115        sizes+= (size + " ")
116
117    if slack:
118        sc = slackweb.Slack(url=slack)
119        attachments = []
120        attachment = {
121            "title": product.title,
122            "color":"#EAF4EC", 
123            "text": product.link,
124            "fields": [
125                {
126                    "title": "Sizes",
127                    "value": sizes,
128                    "short": False
129                }
130            ],
131            "mrkdwn_in": ["text"],
132            "thumb_url": product.image_url,
133            "footer": "BBGR",
134            "footer_icon": "https://platform.slack-edge.com/img/default_application_icon.png",
135            "ts": time.time()
136        }
137        attachments.append(attachment)
138        sc.notify(attachments=attachments)
139
140    if discord:
141        embed = Webhook(discord, color=0xEAF4EC)
142        embed.set_title(title=product.title, url=product.link)
143        embed.set_thumbnail(url=product.image_url)
144        embed.add_field(name="Sizes", value=sizes)
145        embed.set_footer(text='BBGR', icon='https://cdn.discordapp.com/embed/avatars/0.png', ts=True)
146        embed.post()
147
148def monitor(link, keywords, slack, discord):
149
150    log('i', "Checking site: " + link + "...")
151    isEarlyLink = False
152    links = []
153    pages = []
154    # Parse the site from the link
155    pos_https = link.find("https://")
156    pos_http = link.find("http://")
157    pos_omia = link.find('omia')
158
159    if(pos_https == 0):
160        site = link[8:]
161        end = site.find("/")
162        if(end != -1):
163            site = site[:end]
164        site = "https://" + site
165    else:
166        site = link[7:]
167        end = site.find("/")
168        if(end != -1):
169            site = site[:end]
170        site = "http://" + site
171
172    if pos_omia > 0:
173        isEarlyLink = True
174
175    # build search links
176    if (link.endswith('=')):
177        for word in keywords:
178            links.append(link + word)
179    else:
180        links.append(link)
181
182    for l in links:
183        # go ahead and make the request
184        if isEarlyLink:
185            # parse the page to collect data
186            stock_data = []
187
188            try:
189                r = requests.get(l+"?admin=True", timeout=5, verify=False)
190            except:
191                log('e', 'Connection to URL: ' + l + " failed. Retrying...")
192                time.sleep(5)
193                try:
194                    r.requests.get(l+"?admin=True", timeout=8, verify=False)
195                except:      
196                    log('e', 'Connection to URL: ' + l + " failed.")
197                    return
198            if r.status_code == 404:
199                log('e', "Unable to parse that link..")
200
201            page = soup(r.text, "html.parser")
202
203            product = page.findAll('article', class_='product')
204            title = page.findAll('span', class_='prod-title')[0].text.strip()
205            image= page.findAll('img', class_="js-scroll-gallery-snap-target")
206
207            # paddings
208            if not image:
209                image = "N/A"
210
211            if not title:
212                title: "N/A"
213
214            # get the data
215            url = (l+".json"+"?admin=True")
216            req = urllib.request.Request(url, headers=headers)
217            resp = urllib.request.urlopen(req).read()
218
219            size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
220            # parse through the list
221            
222            if not size_opts:
223                stock_data.append('Unavailable')
224            else:
225                for size in size_opts:
226                    stock_data.append(size['name'])
227            product = Product(title, l, stock_data, "N/A", str(image), stock_data)
228            alert = add_to_db(product)
229
230            if alert:
231                notify(product, slack, discord)
232        # let's do some magic to see if it's a valid link
233        else: 
234            try:
235                r = requests.get(l, timeout=5, verify=False)
236                pages.append(r)
237            except:
238                log('e', 'Connection to URL: ' + l + " failed. Retrying...")
239                time.sleep(5)
240                try:
241                    r.requests.get(l, timeout=8, verify=False)
242                    pages.append(r)
243                except:      
244                    log('e', 'Connection to URL: ' + l + " failed.")
245                    return
246
247    for p in pages:
248        page = soup(p.text, "html.parser")
249        hrefs = []
250        raw_links = page.findAll("article", class_="product")
251        captions = page.findAll("div", class_='brand-name')
252        images = page.findAll('img', class_='top')
253
254        for raw_link in raw_links:
255            link = raw_link.find('a', attrs={"itemprop": "url"})
256            try:
257                hrefs.append(link["href"])
258            except:
259                pass
260
261        index = 0
262        for href in hrefs:
263            found = False
264            if len(keywords) > 0:
265                for keyword in keywords:
266                    if keyword.upper() in captions[index].text.upper():
267                        found = True
268                        stock_data = []
269                        
270                        url = (site+hrefs[index]+'.json')
271
272                        req = urllib.request.Request(url, headers=headers)
273                        resp = urllib.request.urlopen(req).read()
274
275                        size_opts = json.loads(resp.decode('utf-8'))['available_sizes']
276                        # parse through the list
277                        if not size_opts:
278                            stock_data.append('Unavailable')
279                        else:
280                            for size in size_opts:
281                                stock_data.append(size['name'])
282
283                        product = Product(captions[index].text, (site + hrefs[index]), stock_data, keyword, str(images[index]['src']), stock_data)
284                        alert = add_to_db(product)
285
286                        if alert:
287                            notify(product, slack, discord)
288            index = index + 1
289
290
291def __main__():
292    # Ignore insecure messages (for now)
293    requests.packages.urllib3.disable_warnings()
294
295    with open('config.json') as config:
296        j = json.load(config)
297
298    ######### CHANGE THESE #########
299    #  KEYWORDS: (seperated by -)  #
300    keywords = [                   #
301       "converse",
302       "UNC",
303       "Jordan",
304       "Mercurial",
305       "Zoom-Fly",
306       "Nike"                   
307    ]                             
308    slack = j['slack']
309    discord = j['discord']
310
311    # Load sites from file
312    sites = read_from_txt("ow-pages.txt")
313
314    # Start monitoring sites
315    while(True):
316        threads = []
317        for site in sites:
318            # skip over blank lines and shit
319            if not site.strip():
320                pass
321            else :
322                t = Thread(target=monitor, args=(site, keywords, slack, discord))
323                threads.append(t)
324                t.start()
325                time.sleep(2)