· 4 years ago · May 06, 2021, 09:34 AM
1import csv
2import requests
3import openpyxl
4import os
5import pandas as pd
6import datetime
7from bs4 import BeautifulSoup
8from openpyxl.styles import Font, Border, Side
9from datetime import date, timedelta
10from tqdm import tqdm
11from openpyxl.chart import LineChart,Reference,Series
12from io import StringIO
13from sklearn.linear_model import LinearRegression
14
15thin_border = Border(left = Side(style='thin'),
16 right = Side(style='thin'),
17 top = Side(style='thin'),
18 bottom = Side(style='thin'))
19
20print('Uploading data in progress, please wait!')
21
22### BELOW CODE (CLASS) WILL UPLOAD NEW DATA TO THE FILE
23def find_companies(dict_name):
24 #create dict with links to wig20 companies historical data + names of sheets
25 source = requests.get('https://stooq.pl/t/?i=532').content
26 soup = BeautifulSoup(source, 'lxml')
27
28 names = soup.find_all('td',attrs = {'id':'f13'})
29 names_of_companies = [names[1 + num * 6].text for num in range(0,20)]
30
31 link = 'https://stooq.pl/q/d/l/?s='
32 link2 = '&i=d'
33
34 for i in names_of_companies:
35 dict_name[link + i.lower() + link2] = i
36
37def find_companies2(dict_name2):
38 #create dict with links to wig20 companies indicators + names of sheets
39 source = requests.get('https://www.biznesradar.pl/gielda/indeks:WIG20').content
40 soup = BeautifulSoup(source, 'lxml')
41
42 names = soup.find_all('td')
43
44 names_of_companies = [names[i*13].text.split(" ")[0] for i in range(0,20)]
45
46 link = 'https://www.biznesradar.pl/wskazniki-rentownosci/'
47
48 for i in names_of_companies:
49 dict_name2[link + i] = i
50
51def create_file(sheet_names):
52 path = os.path.expanduser("~/Desktop")
53
54 wb = openpyxl.Workbook()
55
56 for i in sheet_names:
57 wb.create_sheet(i)
58
59 wb.remove(wb['Sheet'])
60
61 wb.save(path +'/Wig20_analysis.xlsm')
62
63class upload_data:
64 def __init__(self,link,sheet):
65 self.link = link
66 self.sheet = sheet
67
68 def download_data(self):
69 #download data and calculate the % diffirance for Zamkniecie and add colour if the value is >0 or <0
70 with requests.Session() as downloaded_file:
71 download = downloaded_file.get(self.link)
72
73 decoded_content = download.content.decode('utf-8')
74
75 cr = csv.reader(decoded_content.splitlines(), delimiter=',')
76 my_list = list(cr)
77 for j in range(0,len(my_list)):
78 for i in range(0,len(my_list[j])):
79 if j>0:
80 try:
81 if i>0:
82 self.sheet.cell(row=j+1,column=i+1).value = float(my_list[::-1][j-1][i])
83 self.sheet.cell(row=j+1,column=i+1).border = thin_border
84
85 else:
86 self.sheet.cell(row=j+1,column=i+1).value = datetime.datetime.strptime(my_list[::-1][j-1][i], '%Y-%m-%d').strftime('%d/%m/%Y')
87 self.sheet.cell(row=j+1,column=i+1).border = thin_border
88 except IndexError:
89 self.sheet.cell(row=j+1,column=i+1).value = 'no data'
90 self.sheet.cell(row=j+1,column=i+1).border = thin_border
91 else:
92 self.sheet.cell(row=j+1,column=i+1).value = my_list[j][i]
93 self.sheet.cell(row=j+1,column=i+1).border = thin_border
94
95 self.sheet.cell(row=j+2,column=8).value = str(self.sheet)
96
97 self.sheet.cell(row=1,column=8).value = 'Tableau'
98 self.sheet.cell(row=11,column=9).value = 'Data wejścia na giełdę:'
99 self.sheet.cell(row=11,column=10).value = my_list[1][0]
100
101 for i in range(1,len(my_list)-1):
102 self.sheet.cell(row=i+1,column=7).value = 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4])
103 self.sheet.cell(row=i+1,column=7).border = thin_border
104 if 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) > 0 and i>0:
105 self.sheet.cell(row=i+1,column=7).font = Font(color = "FF00FF00")
106 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
107 elif 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) < 0 and i>0:
108 self.sheet.cell(row=i+1,column=7).font = Font(color = "FFFF0000")
109 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
110
111 self.sheet.cell(row=1,column=7).value = "Cena zamkniecia - zmiana [%]"
112 self.sheet.cell(row=1,column=7).border = thin_border
113
114 def statistics(self):
115 ### create table with statistics data (Q1,mean,std ETC)
116 r = requests.get(self.link)
117 df = pd.read_csv(StringIO(r.text))
118
119 list_of_headers = ['Otwarcie','Najwyzszy','Najnizszy','Zamkniecie','Wolumen']
120 names = ['Suma','Średnia','Standardowe odchylenie','Wartość minimalna','Q1','Q2','Q3','Wartość maksymalna']
121
122 #add headers
123 for col_num in range(0,len(list_of_headers)):
124 self.sheet.cell(row=1, column=col_num+10).value = list_of_headers[col_num]
125 self.sheet.cell(row=1, column=col_num+10).border = thin_border
126 #add names of rows
127 for row_num in range(0,len(names)):
128 self.sheet.cell(row=row_num+2,column=9).value = names[row_num]
129 self.sheet.cell(row=row_num+2,column=9).border = thin_border
130
131 #add values
132 for col_num in range(0,len(list_of_headers)):
133 for row_num in range(0,8):
134 self.sheet.cell(row=row_num+2, column=col_num+10).value = df[list_of_headers[col_num]].describe().tolist()[row_num]
135 self.sheet.cell(row=row_num+2, column=col_num+10).border = thin_border
136
137 ### CREATE LINEAR REGRESION MODEL TO PREDICT FUTURE DATA
138 #prediction
139 self.sheet.cell(row=1,column=17).value = 'Analiza Regresji Liniowej'
140 self.sheet.cell(row=2,column=17).value = 'Wzór modelu funkcji'
141 self.sheet.cell(row=4,column=17).value = 'Data'
142 self.sheet.cell(row=5,column=17).value = 'Przewidywane wartości'
143 self.sheet.cell(row=6,column=17).value = 'Dopasowanie modelu'
144
145 lm = LinearRegression()
146
147 X = df[['Otwarcie','Najwyzszy','Najnizszy']][::-1]
148 y = df['Zamkniecie'][::-1]
149
150 #create a model
151 lm = LinearRegression()
152 model = lm.fit(X,y)
153
154 #coefficient Zamkniecie
155 a = lm.intercept_
156
157 #coefficient Otwarcie,Najwyzszy,Najnizszy
158 b = lm.coef_.tolist()
159
160 #create formula
161 function_formula = str(a) + ' × ' + str(b[0]) + ' + ' + str(a) + ' × ' + str(b[1]) + ' + ' + str(a) + ' × ' + str(b[2])
162 self.sheet.cell(row=2,column=18).value = function_formula
163
164 #predict future value
165 predicted_value = lm.predict(X)[0:5].tolist()
166
167 #model fit
168 model_evaluation = lm.score(X,y)
169
170 #past values in excel file
171 days = 0
172 tomorrow = date.today() + timedelta(days=1)
173 for i in range(0,5):
174 self.sheet.cell(row=5,column=18+i).value = predicted_value[i]
175 self.sheet.cell(row=5,column=18+i).border = thin_border
176 if (tomorrow + timedelta(days=i)).weekday() not in [5,6]:
177 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+days)
178 elif (tomorrow + timedelta(days=i)).weekday() == 5:
179 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+2)
180 days = 2
181 elif (tomorrow + timedelta(days=i)).weekday() == 6:
182 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+2)
183 days = 2
184
185 self.sheet.cell(row=6,column=18).value = model_evaluation
186
187 def create_charts(self,name,last_row,where1,where2):
188
189 opening = Reference(self.sheet, min_col = 2, min_row = 2,
190 max_col = 2, max_row = last_row)
191
192 highest = Reference(self.sheet, min_col = 3, min_row = 2,
193 max_col = 3, max_row = last_row)
194
195 lowest = Reference(self.sheet, min_col = 4, min_row = 2,
196 max_col = 4, max_row = last_row)
197
198 closing = Reference(self.sheet, min_col = 5, min_row = 2,
199 max_col = 5, max_row = last_row)
200
201 volumen = Reference(self.sheet, min_col = 6, min_row = 2,
202 max_col = 6, max_row = last_row)
203
204 dates = Reference(self.sheet, min_col = 1, min_row = 2,
205 max_col = 1, max_row = last_row)
206 ## first chart
207 chart = LineChart()
208 series = Series(opening, title="Cena otwarcia")
209 chart.append(series)
210 series = Series(highest, title="Cena najwyższa")
211 chart.append(series)
212 series = Series(lowest, title="Cena najniższa")
213 chart.append(series)
214 series = Series(closing, title="Cena zamknięcia")
215 chart.append(series)
216
217 chart.set_categories(dates)
218 chart.title = str(name)
219 chart.x_axis.title = "Data"
220 chart.y_axis.title = "Wartość"
221 self.sheet.add_chart(chart, where1) #h24
222 chart.height = 10 # default is 7.5
223 chart.width = 30 # default is 15
224 ##
225
226 ## second chart
227 chart2 = LineChart()
228 chart2.append(Series(volumen, title = "Wolumen"))
229 chart2.set_categories(dates)
230
231 chart2.title = str(name)
232 chart2.x_axis.title = "Data"
233 chart2.y_axis.title = "Wartość"
234 self.sheet.add_chart(chart2, where2) #h45
235 chart2.height = 10 # default is 7.5
236 chart2.width = 30 # default is 15
237 ##
238
239 def adjusting_width(self):
240
241 # add new columsn so it will be more transparent
242 self.sheet.insert_cols(9)
243
244 list_of_cols = ['A','B','C','D','E','F','H','I','K','L','M','N','O','S','T','U','W','V']
245 list_of_cols2 = ['G','J','R']
246
247 for letter in list_of_cols:
248 self.sheet.column_dimensions[letter].width = 13
249
250 for letter in list_of_cols2:
251 self.sheet.column_dimensions[letter].width = 25
252
253 def visual(self):
254
255 for i in range(1,6):
256 self.sheet.cell(row=i,column=18).border = thin_border
257 self.sheet.cell(row=4,column=18+i).border = thin_border
258
259 self.sheet.cell(row=6,column=18).border = thin_border
260 self.sheet.cell(row=6,column=19).border = thin_border
261
262 self.sheet.cell(row=11,column=10).border = thin_border
263 self.sheet.cell(row=11,column=11).border = thin_border
264
265class indicators:
266 def __init__(self,link,sheet):
267 self.link = link
268 self.sheet = sheet
269
270 def financial_indicators(self):
271
272 source = requests.get(self.link).content
273 soup = BeautifulSoup(source, 'lxml')
274
275 indicator = soup.find_all('td',attrs = {'class':'h'})
276 dates = soup.find_all('th',attrs = {'class':'thq h'})
277 newest_date = soup.find_all('th', attrs = {'class':'thq h newest'})
278 names_of_indicators = soup.find_all('td', attrs = {'class':'f'})
279
280
281 row_num=14
282 for num in range(0,len(indicator)):
283 if num%(len(dates)+len(newest_date)) != 0:
284
285 if indicator[num].text == '':
286 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
287 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
288 else:
289 b = indicator[num].text
290 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
291 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
292
293 elif num%(len(dates)+1) == 0:
294 if num ==0:
295 pass
296 else:
297 row_num += 1
298
299 if indicator[num].text == '':
300 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
301 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
302 else:
303 b = indicator[num].text
304 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
305 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
306
307 #add dates
308 for num in range(0,len(dates)):
309 self.sheet.cell(row=13,column=10+num).value = " ".join(dates[num].text.split())
310 self.sheet.cell(row=13,column=10+num).border = thin_border
311
312 self.sheet.cell(row=13,column=10+len(dates)).value = " ".join(newest_date[0].text.split())
313 self.sheet.cell(row=13,column=10+len(dates)).border = thin_border
314
315
316 #add indicators names
317 for num in range(0,len(names_of_indicators)):
318 self.sheet.cell(row=14+num,column=9).value = names_of_indicators[num].text
319 self.sheet.cell(row=14+num,column=9).border = thin_border
320
321def main():
322 dict_name = {}
323 find_companies(dict_name)
324 list_of_companies = dict_name
325
326 dict_name2 = {}
327 find_companies2(dict_name2)
328 indicators_for_companies = dict_name2
329
330 path = os.path.expanduser("~/Desktop")
331
332 #remove file if it already exists
333 try:
334 os.remove(path + "/Wig20_analysis.xlsm")
335 except FileNotFoundError:
336 pass
337
338 #create new file
339 create_file(list(list_of_companies.values()))
340 working_file = openpyxl.load_workbook(filename=path +'/Wig20_analysis.xlsm', read_only=False, keep_vba=True)
341
342
343 #for ws in working_file:
344 # ws.column_dimensions['H'].hidden = False
345
346 for num in tqdm(range(0,len(list(list_of_companies.keys())))):
347 company = upload_data(list(list_of_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
348 companys_indicators = indicators(list(indicators_for_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
349
350 company.download_data()
351
352 company.statistics()
353
354 companys_indicators.financial_indicators()
355
356 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],len(working_file[list(list_of_companies.values())[num]]['A']),"J24","J45")
357
358 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],14,"V24","V45")
359
360 company.adjusting_width()
361
362 company.visual()
363
364 for ws in working_file:
365 ws.column_dimensions['H'].hidden = True
366
367 working_file.save(path +'/Wig20_analysis.xlsm')
368
369 print('Process completed!')
370
371if __name__ == "__main__":
372 main()
373