· 4 years ago · Apr 24, 2021, 08:40 AM
1import csv
2import requests
3import openpyxl
4import os
5import pandas as pd
6import datetime
7from bs4 import BeautifulSoup
8from openpyxl.styles import Font, Border, Side
9from datetime import date, timedelta
10from tqdm import tqdm
11from openpyxl.chart import LineChart,Reference,Series
12from io import StringIO
13from sklearn.linear_model import LinearRegression
14
15thin_border = Border(left = Side(style='thin'),
16 right = Side(style='thin'),
17 top = Side(style='thin'),
18 bottom = Side(style='thin'))
19
20print('Uploading data in progress, please wait!')
21
22### BELOW CODE (CLASS) WILL UPLOAD NEW DATA TO THE FILE
23def find_companies(dict_name):
24 #create dict with links to wig20 companies historical data + names of sheets
25 source = requests.get('https://stooq.pl/t/?i=532').content
26 soup = BeautifulSoup(source, 'lxml')
27
28 names = soup.find_all('td',attrs = {'id':'f13'})
29 names_of_companies = [names[1 + num * 6].text for num in range(0,20)]
30
31 link = 'https://stooq.pl/q/d/l/?s='
32 link2 = '&i=d'
33
34 for i in names_of_companies:
35 dict_name[link + i.lower() + link2] = i
36
37def find_companies2(dict_name2):
38 #create dict with links to wig20 companies indicators + names of sheets
39 source = requests.get('https://www.biznesradar.pl/gielda/indeks:WIG20').content
40 soup = BeautifulSoup(source, 'lxml')
41
42 names = soup.find_all('td')
43
44 names_of_companies = [names[i*13].text.split(" ")[0] for i in range(0,20)]
45
46 link = 'https://www.biznesradar.pl/wskazniki-rentownosci/'
47
48 for i in names_of_companies:
49 dict_name2[link + i] = i
50
51def create_file(sheet_names):
52 path = os.path.expanduser("~/Desktop")
53
54 wb = openpyxl.Workbook()
55
56 for i in sheet_names:
57 wb.create_sheet(i)
58
59 wb.remove(wb['Sheet'])
60
61 wb.save(path +'/Wig20_analysis.xlsm')
62
63class upload_data:
64 def __init__(self,link,sheet):
65 self.link = link
66 self.sheet = sheet
67
68 def download_data(self):
69 #download data and calculate the % diffirance for Zamkniecie and add colour if the value is >0 or <0
70 with requests.Session() as downloaded_file:
71 download = downloaded_file.get(self.link)
72
73 decoded_content = download.content.decode('utf-8')
74
75 cr = csv.reader(decoded_content.splitlines(), delimiter=',')
76 my_list = list(cr)
77 for j in range(0,len(my_list)):
78 for i in range(0,len(my_list[j])):
79 if j>0:
80 try:
81 if i>0:
82 self.sheet.cell(row=j+1,column=i+1).value = float(my_list[::-1][j-1][i])
83 self.sheet.cell(row=j+1,column=i+1).border = thin_border
84
85 else:
86 self.sheet.cell(row=j+1,column=i+1).value = datetime.datetime.strptime(my_list[::-1][j-1][i], '%Y-%m-%d').strftime('%d/%m/%Y')
87 self.sheet.cell(row=j+1,column=i+1).border = thin_border
88 except IndexError:
89 self.sheet.cell(row=j+1,column=i+1).value = 'no data'
90 self.sheet.cell(row=j+1,column=i+1).border = thin_border
91 else:
92 self.sheet.cell(row=j+1,column=i+1).value = my_list[j][i]
93 self.sheet.cell(row=j+1,column=i+1).border = thin_border
94
95 self.sheet.cell(row=j+2,column=8).value = str(self.sheet)
96
97 self.sheet.cell(row=1,column=8).value = 'Tableau'
98 self.sheet.cell(row=11,column=9).value = 'Data wejścia na giełdę:'
99 self.sheet.cell(row=11,column=10).value = my_list[1][0]
100
101 for i in range(1,len(my_list)-1):
102 self.sheet.cell(row=i+1,column=7).value = 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4])
103 self.sheet.cell(row=i+1,column=7).border = thin_border
104 if 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) > 0 and i>0:
105 self.sheet.cell(row=i+1,column=7).font = Font(color = "FF00FF00")
106 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
107 elif 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) < 0 and i>0:
108 self.sheet.cell(row=i+1,column=7).font = Font(color = "FFFF0000")
109 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
110
111 self.sheet.cell(row=1,column=7).value = "Cena zamkniecia - zmiana [%]"
112 self.sheet.cell(row=1,column=7).border = thin_border
113
114 def statistics(self):
115 ### create table with statistics data (Q1,mean,std ETC)
116 r = requests.get(self.link)
117 df = pd.read_csv(StringIO(r.text))
118
119 list_of_headers = ['Otwarcie','Najwyzszy','Najnizszy','Zamkniecie','Wolumen']
120 names = ['Suma','Średnia','Standardowe odchylenie','Wartość minimalna','Q1','Q2','Q3','Wartość maksymalna']
121
122 #add headers
123 for col_num in range(0,len(list_of_headers)):
124 self.sheet.cell(row=1, column=col_num+10).value = list_of_headers[col_num]
125 self.sheet.cell(row=1, column=col_num+10).border = thin_border
126 #add names of rows
127 for row_num in range(0,len(names)):
128 self.sheet.cell(row=row_num+2,column=9).value = names[row_num]
129 self.sheet.cell(row=row_num+2,column=9).border = thin_border
130
131 #add values
132 for col_num in range(0,len(list_of_headers)):
133 for row_num in range(0,8):
134 self.sheet.cell(row=row_num+2, column=col_num+10).value = df[list_of_headers[col_num]].describe().tolist()[row_num]
135 self.sheet.cell(row=row_num+2, column=col_num+10).border = thin_border
136
137 ### CREATE LINEAR REGRESION MODEL TO PREDICT FUTURE DATA
138 #prediction
139 self.sheet.cell(row=1,column=17).value = 'Analiza Regresji Liniowej'
140 self.sheet.cell(row=2,column=17).value = 'Wzór modelu funkcji'
141 self.sheet.cell(row=4,column=17).value = 'Data'
142 self.sheet.cell(row=5,column=17).value = 'Przewidywane wartości'
143 self.sheet.cell(row=6,column=17).value = 'Dopasowanie modelu'
144
145 lm = LinearRegression()
146
147 X = df[['Otwarcie','Najwyzszy','Najnizszy']][::-1]
148 y = df['Zamkniecie'][::-1]
149
150 #create a model
151 lm = LinearRegression()
152 model = lm.fit(X,y)
153
154 #coefficient Zamkniecie
155 a = lm.intercept_
156
157 #coefficient Otwarcie,Najwyzszy,Najnizszy
158 b = lm.coef_.tolist()
159
160 #create formula
161 function_formula = str(a) + ' × ' + str(b[0]) + ' + ' + str(a) + ' × ' + str(b[1]) + ' + ' + str(a) + ' × ' + str(b[2])
162 self.sheet.cell(row=2,column=18).value = function_formula
163
164 #predict future value
165 predicted_value = lm.predict(X)[0:5].tolist()
166
167 #model fit
168 model_evaluation = lm.score(X,y)
169
170 #past values in excel file
171 tomorrow = date.today() + timedelta(days=1)
172 for i in range(0,5):
173 self.sheet.cell(row=5,column=18+i).value = predicted_value[i]
174 self.sheet.cell(row=5,column=18+i).border = thin_border
175 if (tomorrow + timedelta(days=i)).weekday() not in [5,6]:
176 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i)
177 elif (tomorrow + timedelta(days=i)).weekday() == 5:
178 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+2)
179 elif (tomorrow + timedelta(days=i)).weekday() == 6:
180 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+1)
181
182 self.sheet.cell(row=6,column=18).value = model_evaluation
183
184 def create_charts(self,name,last_row,where1,where2):
185
186 opening = Reference(self.sheet, min_col = 2, min_row = 2,
187 max_col = 2, max_row = last_row)
188
189 highest = Reference(self.sheet, min_col = 3, min_row = 2,
190 max_col = 3, max_row = last_row)
191
192 lowest = Reference(self.sheet, min_col = 4, min_row = 2,
193 max_col = 4, max_row = last_row)
194
195 closing = Reference(self.sheet, min_col = 5, min_row = 2,
196 max_col = 5, max_row = last_row)
197
198 volumen = Reference(self.sheet, min_col = 6, min_row = 2,
199 max_col = 6, max_row = last_row)
200
201 dates = Reference(self.sheet, min_col = 1, min_row = 2,
202 max_col = 1, max_row = last_row)
203 ## first chart
204 chart = LineChart()
205 series = Series(opening, title="Cena otwarcia")
206 chart.append(series)
207 series = Series(highest, title="Cena najwyższa")
208 chart.append(series)
209 series = Series(lowest, title="Cena najniższa")
210 chart.append(series)
211 series = Series(closing, title="Cena zamknięcia")
212 chart.append(series)
213
214 chart.set_categories(dates)
215 chart.title = str(name)
216 chart.x_axis.title = "Data"
217 chart.y_axis.title = "Wartość"
218 self.sheet.add_chart(chart, where1) #h24
219 chart.height = 10 # default is 7.5
220 chart.width = 30 # default is 15
221 ##
222
223 ## second chart
224 chart2 = LineChart()
225 chart2.append(Series(volumen, title = "Wolumen"))
226 chart2.set_categories(dates)
227
228 chart2.title = str(name)
229 chart2.x_axis.title = "Data"
230 chart2.y_axis.title = "Wartość"
231 self.sheet.add_chart(chart2, where2) #h45
232 chart2.height = 10 # default is 7.5
233 chart2.width = 30 # default is 15
234 ##
235
236 def adjusting_width(self):
237
238 # add new columsn so it will be more transparent
239 self.sheet.insert_cols(9)
240
241 list_of_cols = ['A','B','C','D','E','F','H','I','K','L','M','N','O','S','T','U','W','V']
242 list_of_cols2 = ['G','J','R']
243
244 for letter in list_of_cols:
245 self.sheet.column_dimensions[letter].width = 13
246
247 for letter in list_of_cols2:
248 self.sheet.column_dimensions[letter].width = 25
249
250 def visual(self):
251
252 for i in range(1,6):
253 self.sheet.cell(row=i,column=18).border = thin_border
254 self.sheet.cell(row=4,column=18+i).border = thin_border
255
256 self.sheet.cell(row=6,column=18).border = thin_border
257 self.sheet.cell(row=6,column=19).border = thin_border
258
259 self.sheet.cell(row=11,column=10).border = thin_border
260 self.sheet.cell(row=11,column=11).border = thin_border
261
262class indicators:
263 def __init__(self,link,sheet):
264 self.link = link
265 self.sheet = sheet
266
267 def financial_indicators(self):
268
269 source = requests.get(self.link).content
270 soup = BeautifulSoup(source, 'lxml')
271
272 indicator = soup.find_all('td',attrs = {'class':'h'})
273 dates = soup.find_all('th',attrs = {'class':'thq h'})
274 newest_date = soup.find_all('th', attrs = {'class':'thq h newest'})
275 names_of_indicators = soup.find_all('td', attrs = {'class':'f'})
276
277
278 row_num=14
279 for num in range(0,len(indicator)):
280 if num%(len(dates)+len(newest_date)) != 0:
281
282 if indicator[num].text == '':
283 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
284 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
285 else:
286 b = indicator[num].text
287 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
288 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
289
290 elif num%(len(dates)+1) == 0:
291 if num ==0:
292 pass
293 else:
294 row_num += 1
295
296 if indicator[num].text == '':
297 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
298 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
299 else:
300 b = indicator[num].text
301 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
302 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
303
304 #add dates
305 for num in range(0,len(dates)):
306 self.sheet.cell(row=13,column=10+num).value = " ".join(dates[num].text.split())
307 self.sheet.cell(row=13,column=10+num).border = thin_border
308
309 self.sheet.cell(row=13,column=10+len(dates)).value = " ".join(newest_date[0].text.split())
310 self.sheet.cell(row=13,column=10+len(dates)).border = thin_border
311
312
313 #add indicators names
314 for num in range(0,len(names_of_indicators)):
315 self.sheet.cell(row=14+num,column=9).value = names_of_indicators[num].text
316 self.sheet.cell(row=14+num,column=9).border = thin_border
317
318def main():
319 dict_name = {}
320 find_companies(dict_name)
321 list_of_companies = dict_name
322
323 dict_name2 = {}
324 find_companies2(dict_name2)
325 indicators_for_companies = dict_name2
326
327 path = os.path.expanduser("~/Desktop")
328
329 #remove file if it already exists
330 try:
331 os.remove(path + "/Wig20_analysis.xlsm")
332 except FileNotFoundError:
333 pass
334
335 #create new file
336 create_file(list(list_of_companies.values()))
337 working_file = openpyxl.load_workbook(filename=path +'/Wig20_analysis.xlsm', read_only=False, keep_vba=True)
338
339
340 #for ws in working_file:
341 # ws.column_dimensions['H'].hidden = False
342
343 for num in tqdm(range(0,len(list(list_of_companies.keys())))):
344 company = upload_data(list(list_of_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
345 companys_indicators = indicators(list(indicators_for_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
346
347 company.download_data()
348
349 company.statistics()
350
351 companys_indicators.financial_indicators()
352
353 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],len(working_file[list(list_of_companies.values())[num]]['A']),"J24","J45")
354
355 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],14,"V24","V45")
356
357 company.adjusting_width()
358
359 company.visual()
360
361 for ws in working_file:
362 ws.column_dimensions['H'].hidden = True
363
364 working_file.save(path +'/Wig20_analysis.xlsm')
365
366 print('Process completed!')
367
368if __name__ == "__main__":
369 main()
370