· 6 years ago · Nov 19, 2019, 09:30 PM
1import pandas as pd
2import quandl, math, datetime
3import numpy as np
4from sklearn import preprocessing, model_selection, svm
5from sklearn.linear_model import LinearRegression
6import matplotlib.pyplot as plt
7from matplotlib import style
8
9style.use('ggplot')
10
11quandl.ApiConfig.api_key = 'Not gonna show the API key'
12
13#Get data from quandl
14df = quandl.get('WIKI/GOOGL')
15
16#Deleting not important data
17df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume',]]
18
19#Calc HL_PCT
20df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
21
22#Calc PCT_change
23df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
24
25#Getting rid of useless data
26df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
27
28forecast_col = 'Adj. Close'
29
30#Fill empty data with -99999
31df.fillna(-99999, inplace=True)
32
33#Calculate the value we want to see in the future
34forecast_out = math.ceil(0.01*len(df))
35
36#Shifting the table up
37df['label'] = df[forecast_col].shift(-forecast_out)
38
39#Define X
40X = np.array(df.drop(['label'], 1))
41X = preprocessing.scale(X)
42X = X[:-forecast_out]
43X_lately = X[-forecast_out:]
44
45df.dropna(inplace=True)
46
47#Define y
48y = np.array(df['label'])
49y = np.array(df['label'])
50
51
52X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
53
54#Choosing algorythm
55clf = svm.SVR()
56
57#Fitting the data
58clf.fit(X_train, y_train)
59
60#Calculating accuracy
61accuracy = clf.score(X_test, y_test)
62
63print('Accuracy: ', (accuracy*100.00), '%')
64
65#Predicting values
66forecast_predict = clf.predict(X_lately)
67
68#Printing predicted values
69print(forecast_predict, forecast_out)
70
71df['Forecast'] = np.nan
72
73
74#Date calculation for visualisation
75last_date = df.iloc[-1].name
76last_unix = last_date.timestamp()
77one_day = 86400
78next_unix = last_unix + one_day
79
80for i in forecast_predict:
81 next_date = datetime.datetime.fromtimestamp(next_unix)
82 next_unix += one_day
83 df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)] + [i]
84
85#Displaying the graph
86df['Adj. Close'].plot()
87df['Forecast'].plot()
88plt.legend(loc=4)
89plt.xlabel('Date')
90plt.ylabel('Price')
91plt.show()