· 4 years ago · Apr 23, 2021, 03:32 PM
1import csv
2import requests
3import openpyxl
4import os
5import pandas as pd
6import datetime
7from bs4 import BeautifulSoup
8from openpyxl.styles import Font, Border, Side
9from datetime import date, timedelta
10from tqdm import tqdm
11from openpyxl.chart import LineChart,Reference,Series
12from io import StringIO
13from sklearn.linear_model import LinearRegression
14
15thin_border = Border(left = Side(style='thin'),
16 right = Side(style='thin'),
17 top = Side(style='thin'),
18 bottom = Side(style='thin'))
19
20print('Uploading data in progress, please wait!')
21
22### BELOW CODE (CLASS) WILL UPLOAD NEW DATA TO THE FILE
23def create_file(sheet_names):
24 path = os.path.expanduser("~/Desktop")
25
26 wb = openpyxl.Workbook()
27
28 for i in sheet_names:
29 wb.create_sheet(i)
30
31 wb.remove_sheet('Sheet')
32
33 wb.save(path +'/Wig20_analysis.xlsm')
34
35class upload_data:
36 def __init__(self,link,sheet):
37 self.link = link
38 self.sheet = sheet
39
40 def download_data(self):
41 #download data and calculate the % diffirance for Zamkniecie and add colour if the value is >0 or <0
42 with requests.Session() as downloaded_file:
43 download = downloaded_file.get(self.link)
44
45 decoded_content = download.content.decode('utf-8')
46
47 cr = csv.reader(decoded_content.splitlines(), delimiter=',')
48 my_list = list(cr)
49 for j in range(0,len(my_list)):
50 for i in range(0,len(my_list[j])):
51 if j>0:
52 try:
53 if i>0:
54 self.sheet.cell(row=j+1,column=i+1).value = float(my_list[::-1][j-1][i])
55 self.sheet.cell(row=j+1,column=i+1).border = thin_border
56
57 else:
58 self.sheet.cell(row=j+1,column=i+1).value = datetime.datetime.strptime(my_list[::-1][j-1][i], '%Y-%m-%d').strftime('%d/%m/%Y')
59 self.sheet.cell(row=j+1,column=i+1).border = thin_border
60 except IndexError:
61 self.sheet.cell(row=j+1,column=i+1).value = 'no data'
62 self.sheet.cell(row=j+1,column=i+1).border = thin_border
63 else:
64 self.sheet.cell(row=j+1,column=i+1).value = my_list[j][i]
65 self.sheet.cell(row=j+1,column=i+1).border = thin_border
66
67 self.sheet.cell(row=j+2,column=8).value = str(self.sheet)
68
69 self.sheet.cell(row=1,column=8).value = 'Tableau'
70 self.sheet.cell(row=11,column=9).value = 'Data wejścia na giełdę:'
71 self.sheet.cell(row=11,column=10).value = my_list[1][0]
72
73 for i in range(1,len(my_list)-1):
74 self.sheet.cell(row=i+1,column=7).value = 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4])
75 self.sheet.cell(row=i+1,column=7).border = thin_border
76 if 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) > 0 and i>0:
77 self.sheet.cell(row=i+1,column=7).font = Font(color = "FF00FF00")
78 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
79 elif 1 - float(my_list[::-1][i][4])/float(my_list[::-1][i-1][4]) < 0 and i>0:
80 self.sheet.cell(row=i+1,column=7).font = Font(color = "FFFF0000")
81 self.sheet.cell(row=i+1,column=7).number_format = '0.00%'
82
83 self.sheet.cell(row=1,column=7).value = "Cena zamkniecia - zmiana [%]"
84 self.sheet.cell(row=1,column=7).border = thin_border
85
86 def statistics(self):
87 ### create table with statistics data (Q1,mean,std ETC)
88 r = requests.get(self.link)
89 df = pd.read_csv(StringIO(r.text))
90
91 list_of_headers = ['Otwarcie','Najwyzszy','Najnizszy','Zamkniecie','Wolumen']
92 names = ['Suma','Średnia','Standardowe odchylenie','Wartość minimalna','Q1','Q2','Q3','Wartość maksymalna']
93
94 #add headers
95 for col_num in range(0,len(list_of_headers)):
96 self.sheet.cell(row=1, column=col_num+10).value = list_of_headers[col_num]
97 self.sheet.cell(row=1, column=col_num+10).border = thin_border
98 #add names of rows
99 for row_num in range(0,len(names)):
100 self.sheet.cell(row=row_num+2,column=9).value = names[row_num]
101 self.sheet.cell(row=row_num+2,column=9).border = thin_border
102
103 #add values
104 for col_num in range(0,len(list_of_headers)):
105 for row_num in range(0,8):
106 self.sheet.cell(row=row_num+2, column=col_num+10).value = df[list_of_headers[col_num]].describe().tolist()[row_num]
107 self.sheet.cell(row=row_num+2, column=col_num+10).border = thin_border
108
109 ### CREATE LINEAR REGRESION MODEL TO PREDICT FUTURE DATA
110 #prediction
111 self.sheet.cell(row=1,column=17).value = 'Analiza Regresji Liniowej'
112 self.sheet.cell(row=2,column=17).value = 'Wzór modelu funkcji'
113 self.sheet.cell(row=4,column=17).value = 'Data'
114 self.sheet.cell(row=5,column=17).value = 'Przewidywane wartości'
115 self.sheet.cell(row=6,column=17).value = 'Dopasowanie modelu'
116
117 lm = LinearRegression()
118
119 X = df[['Otwarcie','Najwyzszy','Najnizszy']][::-1]
120 y = df['Zamkniecie'][::-1]
121
122 #create a model
123 lm = LinearRegression()
124 model = lm.fit(X,y)
125
126 #coefficient Zamkniecie
127 a = lm.intercept_
128
129 #coefficient Otwarcie,Najwyzszy,Najnizszy
130 b = lm.coef_.tolist()
131
132 #create formula
133 function_formula = str(a) + ' × ' + str(b[0]) + ' + ' + str(a) + ' × ' + str(b[1]) + ' + ' + str(a) + ' × ' + str(b[2])
134 self.sheet.cell(row=2,column=18).value = function_formula
135
136 #predict future value
137 predicted_value = lm.predict(X)[0:5].tolist()
138
139 #model fit
140 model_evaluation = lm.score(X,y)
141
142 #past values in excel file
143 tomorrow = date.today() + timedelta(days=1)
144 for i in range(0,5):
145 self.sheet.cell(row=5,column=18+i).value = predicted_value[i]
146 self.sheet.cell(row=5,column=18+i).border = thin_border
147 if (tomorrow + timedelta(days=i)).weekday() not in [5,6]:
148 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i)
149 elif (tomorrow + timedelta(days=i)).weekday() == 5:
150 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+2)
151 elif (tomorrow + timedelta(days=i)).weekday() == 6:
152 self.sheet.cell(row=4,column=18+i).value = tomorrow + timedelta(days=i+1)
153
154 self.sheet.cell(row=6,column=18).value = model_evaluation
155
156 def create_charts(self,name,last_row,where1,where2):
157
158 opening = Reference(self.sheet, min_col = 2, min_row = 2,
159 max_col = 2, max_row = last_row)
160
161 highest = Reference(self.sheet, min_col = 3, min_row = 2,
162 max_col = 3, max_row = last_row)
163
164 lowest = Reference(self.sheet, min_col = 4, min_row = 2,
165 max_col = 4, max_row = last_row)
166
167 closing = Reference(self.sheet, min_col = 5, min_row = 2,
168 max_col = 5, max_row = last_row)
169
170 volumen = Reference(self.sheet, min_col = 6, min_row = 2,
171 max_col = 6, max_row = last_row)
172
173 dates = Reference(self.sheet, min_col = 1, min_row = 2,
174 max_col = 1, max_row = last_row)
175 ## first chart
176 chart = LineChart()
177 series = Series(opening, title="Cena otwarcia")
178 chart.append(series)
179 series = Series(highest, title="Cena najwyższa")
180 chart.append(series)
181 series = Series(lowest, title="Cena najniższa")
182 chart.append(series)
183 series = Series(closing, title="Cena zamknięcia")
184 chart.append(series)
185
186 chart.set_categories(dates)
187 chart.title = str(name)
188 chart.x_axis.title = "Data"
189 chart.y_axis.title = "Wartość"
190 self.sheet.add_chart(chart, where1) #h24
191 chart.height = 10 # default is 7.5
192 chart.width = 30 # default is 15
193 ##
194
195 ## second chart
196 chart2 = LineChart()
197 chart2.append(Series(volumen, title = "Wolumen"))
198 chart2.set_categories(dates)
199
200 chart2.title = str(name)
201 chart2.x_axis.title = "Data"
202 chart2.y_axis.title = "Wartość"
203 self.sheet.add_chart(chart2, where2) #h45
204 chart2.height = 10 # default is 7.5
205 chart2.width = 30 # default is 15
206 ##
207
208 def adjusting_width(self):
209
210 # add new columsn so it will be more transparent
211 self.sheet.insert_cols(9)
212
213 list_of_cols = ['A','B','C','D','E','F','H','I','K','L','M','N','O','S','T','U','W','V']
214 list_of_cols2 = ['G','J','R']
215
216 for letter in list_of_cols:
217 self.sheet.column_dimensions[letter].width = 13
218
219 for letter in list_of_cols2:
220 self.sheet.column_dimensions[letter].width = 25
221
222 def visual(self):
223
224 for i in range(1,6):
225 self.sheet.cell(row=i,column=18).border = thin_border
226 self.sheet.cell(row=4,column=18+i).border = thin_border
227
228 self.sheet.cell(row=6,column=18).border = thin_border
229 self.sheet.cell(row=6,column=19).border = thin_border
230
231 self.sheet.cell(row=11,column=10).border = thin_border
232 self.sheet.cell(row=11,column=11).border = thin_border
233
234class indicators:
235 def __init__(self,link,sheet):
236 self.link = link
237 self.sheet = sheet
238
239 def financial_indicators(self):
240
241 source = requests.get(self.link).content
242 soup = BeautifulSoup(source, 'lxml')
243
244 indicator = soup.find_all('td',attrs = {'class':'h'})
245 dates = soup.find_all('th',attrs = {'class':'thq h'})
246 newest_date = soup.find_all('th', attrs = {'class':'thq h newest'})
247 names_of_indicators = soup.find_all('td', attrs = {'class':'f'})
248
249
250 row_num=14
251 for num in range(0,len(indicator)):
252 if num%(len(dates)+len(newest_date)) != 0:
253
254 if indicator[num].text == '':
255 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
256 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
257 else:
258 b = indicator[num].text
259 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
260 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
261
262 elif num%(len(dates)+1) == 0:
263 if num ==0:
264 pass
265 else:
266 row_num += 1
267
268 if indicator[num].text == '':
269 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = 'N/A'
270 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
271 else:
272 b = indicator[num].text
273 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).value = str(b)[0:(b.find('%'))+1]
274 self.sheet.cell(row=row_num,column=10 + num%(len(dates)+len(newest_date))).border = thin_border
275
276 #add dates
277 for num in range(0,len(dates)):
278 self.sheet.cell(row=13,column=10+num).value = " ".join(dates[num].text.split())
279 self.sheet.cell(row=13,column=10+num).border = thin_border
280
281 self.sheet.cell(row=13,column=10+len(dates)).value = " ".join(newest_date[0].text.split())
282 self.sheet.cell(row=13,column=10+len(dates)).border = thin_border
283
284
285 #add indicators names
286 for num in range(0,len(names_of_indicators)):
287 self.sheet.cell(row=14+num,column=9).value = names_of_indicators[num].text
288 self.sheet.cell(row=14+num,column=9).border = thin_border
289
290def main():
291 list_of_companies = {'https://stooq.pl/q/d/l/?s=acp&i=d':'ASSECOPOL',
292 'https://stooq.pl/q/d/l/?s=ale&i=d':'ALLEGRO',
293 'https://stooq.pl/q/d/l/?s=alr&i=d':'ALIOR',
294 'https://stooq.pl/q/d/l/?s=ccc&i=d':'CCC',
295 'https://stooq.pl/q/d/l/?s=cdr&i=d':'CDPROJEKT',
296 'https://stooq.pl/q/d/l/?s=cps&i=d':'CYFRPLSAT',
297 'https://stooq.pl/q/d/l/?s=dnp&i=d':'DINOPL',
298 'https://stooq.pl/q/d/l/?s=jsw&i=d':'JSW',
299 'https://stooq.pl/q/d/l/?s=kgh&i=d':'KGHM',
300 'https://stooq.pl/q/d/l/?s=lpp&i=d':'LPP',
301 'https://stooq.pl/q/d/l/?s=lts&i=d':'LOTOS',
302 'https://stooq.pl/q/d/l/?s=opl&i=d':'OPL',
303 'https://stooq.pl/q/d/l/?s=peo&i=d':'PEO',
304 'https://stooq.pl/q/d/l/?s=pge&i=d':'PGE',
305 'https://stooq.pl/q/d/l/?s=pgn&i=d':'PGN',
306 'https://stooq.pl/q/d/l/?s=pkn&i=d':'PKN',
307 'https://stooq.pl/q/d/l/?s=pko&i=d':'PKO',
308 'https://stooq.pl/q/d/l/?s=pzu&i=d':'PZU',
309 'https://stooq.pl/q/d/l/?s=spl&i=d':'SPL',
310 'https://stooq.pl/q/d/l/?s=tpe&i=d':'TPE',
311 }
312 indicators_for_companies= {'https://www.biznesradar.pl/wskazniki-rentownosci/ASSECO-POLAND':'ASSECOPOL',
313 'https://www.biznesradar.pl/wskazniki-rentownosci/ALE':'ALLEGRO',
314 'https://www.biznesradar.pl/wskazniki-rentownosci/ALIOR-BANK':'ALIOR',
315 'https://www.biznesradar.pl/wskazniki-rentownosci/CCC':'CCC',
316 'https://www.biznesradar.pl/wskazniki-rentownosci/CD-PROJEKT':'CDPROJEKT',
317 'https://www.biznesradar.pl/wskazniki-rentownosci/CYFROWY-POLSAT':'CYFRPLSAT',
318 'https://www.biznesradar.pl/wskazniki-rentownosci/DNP':'DINOPL',
319 'https://www.biznesradar.pl/wskazniki-rentownosci/JSW-JASTRZEBSKA-SPOLKA-WEGLOWA':'JSW',
320 'https://www.biznesradar.pl/wskazniki-rentownosci/KGHM':'KGHM',
321 'https://www.biznesradar.pl/wskazniki-rentownosci/LPP':'LPP',
322 'https://www.biznesradar.pl/wskazniki-rentownosci/LOTOS':'LOTOS',
323 'https://www.biznesradar.pl/wskazniki-rentownosci/ORANGE':'OPL',
324 'https://www.biznesradar.pl/wskazniki-rentownosci/PEKAO':'PEO',
325 'https://www.biznesradar.pl/wskazniki-rentownosci/PGE':'PGE',
326 'https://www.biznesradar.pl/wskazniki-rentownosci/PGNIG':'PGN',
327 'https://www.biznesradar.pl/wskazniki-rentownosci/PKN-ORLEN':'PKN',
328 'https://www.biznesradar.pl/wskazniki-rentownosci/PKO':'PKO',
329 'https://www.biznesradar.pl/wskazniki-rentownosci/PZU':'PZU',
330 'https://www.biznesradar.pl/wskazniki-rentownosci/SPL':'SPL',
331 'https://www.biznesradar.pl/wskazniki-rentownosci/TAURON':'TPE',
332 }
333 path = os.path.expanduser("~/Desktop")
334
335 #remove file if it exists
336 try:
337 os.remove(path + "/Wig20_analysis.xlsm")
338 except FileNotFoundError:
339 pass
340
341 #create new file
342 create_file(list(list_of_companies.values()))
343
344 working_file = openpyxl.load_workbook(filename=path +'/Wig20_analysis.xlsm', read_only=False, keep_vba=True)
345
346 for ws in working_file:
347 ws.column_dimensions['H'].hidden = False
348
349 for num in tqdm(range(0,len(list(list_of_companies.keys())))):
350 company = upload_data(list(list_of_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
351 companys_indicators = indicators(list(indicators_for_companies.keys())[num],working_file[list(list_of_companies.values())[num]])
352
353 company.download_data()
354
355 company.statistics()
356
357 companys_indicators.financial_indicators()
358
359 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],len(working_file[list(list_of_companies.values())[num]]['A']),"J24","J45")
360
361 company.create_charts(list_of_companies[list(list_of_companies.keys())[num]],14,"AC24","AC45")
362
363 company.adjusting_width()
364
365 company.visual()
366
367 for ws in working_file:
368 ws.column_dimensions['H'].hidden = True
369
370 working_file.save(path +'/Wig20_analysis.xlsm')
371
372 print('Process completed!')
373
374if __name__ == "__main__":
375 main()
376