· 6 years ago · Oct 28, 2019, 06:28 PM
1import datetime
2import re
3import sqlite3
4import urllib.request as urllib
5from datetime import datetime
6from tkinter import *
7from tkinter import messagebox, ttk
8
9from bs4 import BeautifulSoup
10
11
12windowName = "Web Scraping - Sprinter"
13dbfilename = "zapatillas.db"
14
15
16def createTableZapatillas():
17 conn = sqlite3.connect(dbfilename)
18 print ("Opened database successfully")
19 conn.execute('''DROP TABLE IF EXISTS ZAPATILLAS;''')
20 print("Old table dropped successfully")
21 conn.execute('''CREATE TABLE IF NOT EXISTS ZAPATILLAS
22 (NOMBRE CHAR(255),
23 MARCA CHAR(255),
24 PRECIOOLD REAL,
25 PRECIOACTUAL REAL,
26 ESTRELLAS REAL,
27 PUNTUACION INTEGER);''')
28 print ("Table created successfully")
29 conn.close()
30
31
32def saveZapatillas(zapatillas):
33 conn = sqlite3.connect(dbfilename)
34 print("Saving into database...")
35 for zapatilla in zapatillas:
36 conn.execute("insert into ZAPATILLAS values (?, ?, ?, ?, ?, ?)", (zapatilla.nombre, zapatilla.marca, zapatilla.precio_old, zapatilla.precio_actual, zapatilla.estrellas, zapatilla.puntuacion))
37 conn.commit()
38 conn.close()
39
40
41# Print iterations progress
42def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
43 """
44 Call in a loop to create terminal progress bar
45 @params:
46 iteration - Required : current iteration (Int)
47 total - Required : total iterations (Int)
48 prefix - Optional : prefix string (Str)
49 suffix - Optional : suffix string (Str)
50 decimals - Optional : positive number of decimals in percent complete (Int)
51 length - Optional : character length of bar (Int)
52 fill - Optional : bar fill character (Str)
53 printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
54 """
55 percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
56 filledLength = int(length * iteration // total)
57 bar = fill * filledLength + '-' * (length - filledLength)
58 print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = printEnd)
59 # Print New Line on Complete
60 if iteration == total:
61 print()
62
63
64
65class Zapatilla:
66 def __init__(self, nombre, marca, precio_old, precio_actual, estrellas, puntuacion):
67 self.nombre = nombre
68 self.marca = marca
69 self.precio_old = precio_old
70 self.precio_actual = precio_actual
71 self.estrellas = estrellas
72 self.puntuacion = puntuacion
73
74 def __str__(self):
75 return "\nNombre: " + self.nombre + "\nMarca: " + self.marca + "\nPrecio old: " + str(self.precio_old) + "\nPrecio actual: " + str(self.precio_actual) + "\nEstrellas: "+ str(self.estrellas) + "\nPuntuación: "+ str(self.puntuacion)
76
77
78def getHTML(url):
79 try:
80 f = urllib.urlopen(url)
81 return f.read()
82 f.close()
83 except urllib.error.HTTPError as e:
84 print("Ocurrió un error")
85 print(e.code)
86 return ""
87 except urllib.error.URLError as e:
88 print("Ocurrió un error")
89 print(e.reason)
90 return ""
91
92
93def datosPuntuacion(link):
94 soup = BeautifulSoup(getHTML(link), 'html.parser')
95 num_of_ratings = soup.find("p", {"class": "num__of__ratings"}).find('span').string
96 average = soup.find("div", {"class": "average"}).find('span').string
97 return [num_of_ratings, average]
98
99
100def cargarDatos():
101 print("Getting data...")
102 root.title(windowName + ' - Espere...')
103
104 #try:
105 zapatillas = []
106 createTableZapatillas()
107
108
109 soup = BeautifulSoup(getHTML("https://www.sprinter.es/zapatillas-de-hombre/"), 'html.parser')
110 products = soup.findAll("div", {"class": "product"})
111
112 i = 0
113 for z in products:
114 a = z.find("a", {"class": "product__image"})
115 link = "https://www.sprinter.es" + a['href']
116 par = datosPuntuacion(link)
117
118
119 nombre = ""
120 marca = ""
121 precio_actual = 0.0
122 precio_old = 0.0
123
124 product_right = z.find("div", {"class": "product__right"})
125
126 # nombre y marca
127 nombre = product_right.find("a", {"class": "product__name"}).string
128 marca = nombre.split(" ")[0]
129
130 # precios
131 product__data = product_right.find("div", {"class": "product__data"})
132 product__info = product__data.find("div", {"class": "product__info"})
133 product__price = product__info.find("div", {"class": "product__price"})
134
135
136
137 product__price__actual = product__price.find("span", {"class": "product__price--actual"})
138 precio_actual = float(re.match("(\d+,\d+).*", product__price__actual.string).group(1).replace(",","."))
139
140
141 try:
142
143 product__price__old = product__price.find("span", {"class": "product__price--old"})
144 precio_old = float(re.match("(\d+,\d+).*", product__price__old.string).group(1).replace(",","."))
145
146 except:
147 precio_old = 0.0
148
149 zapatilla = Zapatilla(nombre, marca, precio_old, precio_actual, par[1], par[0])
150 zapatillas.append(zapatilla)
151 printProgressBar(i + 1, len(products), prefix = 'Extrayendo datos:', suffix = 'Complete', length = 50)
152 i = i+1
153
154 saveZapatillas(zapatillas)
155 messagebox.showinfo("Info", str(len(zapatillas))+" zapatillas almacenadas en la base de datos.")
156 root.title(windowName)
157
158
159# Query buscar por nombre
160def getElementosByNombre(nombre):
161 res = []
162 conn = sqlite3.connect(dbfilename)
163 for row in conn.execute('SELECT NOMBRE, MARCA, PRECIOACTUAL FROM ZAPATILLAS where NOMBRE = ?', (nombre,)):
164 res.append(row)
165 conn.close()
166
167 return res
168
169def mostrarDatos(lista):
170 list_window = Toplevel()
171 list_window.geometry("500x400")
172 scrollbar = Scrollbar(list_window, orient="vertical")
173 Lb1 = Listbox(list_window, width=50, yscrollcommand=scrollbar.set)
174 scrollbar.config(command=Lb1.yview)
175 scrollbar.pack(side="right", fill="y")
176 Lb1.pack(side="left",fill="both", expand=True)
177
178 for n in lista:
179 Lb1.insert("end", n[0])
180 Lb1.insert("end", "Autor: " + n[1])
181 Lb1.insert("end", "Fecha de publicación: " + n[2])
182 Lb1.insert("end", "")
183
184 Lb1.pack()
185
186def buscarNombre():
187 list_window = Toplevel()
188 list_window.geometry("400x100")
189
190 frame = Frame(list_window)
191 frame.pack()
192
193 L1 = Label(frame, text="Nombre:")
194 L1.pack( side = LEFT)
195 E1 = Entry(frame, bd = 1)
196 E1.pack(side = RIGHT)
197
198 mostrar = Button(list_window, text ="Buscar", command = lambda: mostrarDatos(getElementosByNombre(E1.get())))
199 mostrar.pack()
200
201
202def noImplementado():
203 list_window = Toplevel()
204 L1 = Label(list_window, text="Función no implementada")
205 L1.pack( side = LEFT)
206
207
208root = Tk()
209root.title(windowName)
210root.geometry("500x400")
211menubar = Menu(root)
212
213#Datos
214filemenu = Menu(menubar, tearoff=0)
215filemenu.add_command(label="Cargar", command=cargarDatos)
216filemenu.add_separator()
217filemenu.add_command(label="Salir", command=root.quit)
218menubar.add_cascade(label="Datos", menu=filemenu)
219
220#Buscar
221editmenu = Menu(menubar, tearoff=0)
222editmenu.add_command(label="Nombre", command=buscarNombre)
223editmenu.add_command(label="Ordenar por puntuación", command=noImplementado)
224editmenu.add_command(label="Marcas", command=noImplementado)
225menubar.add_cascade(label="Buscar", menu=editmenu)
226
227root.config(menu=menubar)
228root.mainloop()