C2GEaCKj

· 6 years ago · Jul 30, 2020, 08:00 PM
1import pandas as pd
2import quandl
3import time
4import math
5import numpy
6from sklearn import model_selection
7from sklearn import preprocessing, svm
8from sklearn.linear_model import LinearRegression
9
10# Value for using in dataframe.head() functions.
11n = 5
12
13
14# Simple countdown function
15def countdown():
16    for secs in range(1, 21):
17        if secs < 15:
18            time.sleep(secs)
19        elif secs >= 15:
20            print(f"The data will disappear in {str(20 - secs)}")
21        elif secs == 20:
22            print("Data is now gone, preparing next data model: ")
23            time.sleep(3)
24
25
26# Verifies API key, so I get a wider range of data to play with
27# Took off SSL verification as its quicker/ for personal use.
28# quandl.ApiConfig.api_key = 'xB-cLJjFBURX-w4CdWxt'
29# quandl.ApiConfig.verify_ssl = False
30
31# I was originally going to make a full program with try and except for the user to
32# enter in the date and time and verify if it was correct, but m too lazy
33# I want to get on with other stuff.
34
35print("This is my program to get the market prices of Palladium/Platinum metals.\n\n ")
36time.sleep(5)
37
38# strtarray = [2020, 01, 02]
39# strtdate = (f"""start_date='{year}-{month}-{strtday}""")
40# strtmonth = int(input(""))
41
42# Although I put the date range specified, it only shows 5 results, I refuse to pay for
43# Premium to get more values lol.
44mrktprc = quandl.get("LPPM/PALL", start_date='2020-01-01', end_date='2020-07-25', paginate=True)
45
46for secs in range(1, 6):
47    if secs < 5:
48        print(f"The raw, unfiltered data will be displayed in the next {secs} seconds.\n\n")
49        time.sleep(1)
50    else:
51        print("The data will now be displayed:\n\n")
52        time.sleep(3)
53        print(mrktprc.head(n))
54
55mrktprc = (mrktprc[['USD AM', 'USD PM', 'EUR AM', 'EUR PM', 'GBP AM', 'GBP PM']])
56mrktprc["VSusd"] = (mrktprc["USD AM"] - mrktprc["USD PM"])
57mrktprc["VSeur"] = (mrktprc["EUR AM"] - mrktprc["EUR PM"])
58mrktprc["VSgbp"] = (mrktprc["GBP AM"] - mrktprc["GBP PM"])
59
60mrktprc["PCusd"] = (mrktprc["USD AM"] - mrktprc["USD PM"]) / (mrktprc["USD AM"]) * 100
61mrktprc["PCeur"] = (mrktprc["EUR AM"] - mrktprc["EUR PM"]) / (mrktprc["EUR AM"]) * 100
62mrktprc["PCgbp"] = (mrktprc["GBP AM"] - mrktprc["GBP PM"]) / (mrktprc["GBP AM"]) * 100
63
64# Picked out most important data and have changed up the data frame.
65mrktprc = (mrktprc[["GBP PM", "USD PM", "EUR PM", "VSusd", "PCusd", "VSeur", "PCeur",
66                    "VSgbp", "PCgbp"]])
67
68# Could make a program to ask what function a user may want to perform with the data, but im too lazy.
69# I only want to do this for myself, working through it quickly.
70
71# Wont let me us "\n\n" to create new lines on this part for some reason --- (Nevermind)
72
73print("\n\nThis is a new dataframe, with more important information\n\n")
74print(mrktprc.head(n))
75print("\n\n\Any negative number represents a loss in market price, over the course of the day.")
76print("The above data will be displayed for around another 20 seconds: \n\n")
77countdown()
78
79# This will be my forecast/predicitons using the model. Depends on what the header is.
80# Machine learning can't work with NaN values (as seen in the EUR columns of data frame,
81# When  no specific date range is given - I dunno why). Will be treated as outlier.
82forecast_col = "GBP PM"
83mrktprc.fillna(-9999, inplace=True)
84
85# Will use math module to predict the results of next few days, for the forecast column selected.
86# Predicts next 5 days, so that it doesnt extrapolate data past given range.
87# This line specifically predicts prices for the metals, in GBP, for the end of a day.
88forecast_out = int(math.ceil(0.005 * len(mrktprc)))
89mrktprc["PREDprc"] = mrktprc[forecast_col].shift(-forecast_out)
90mrktprc = (mrktprc[["GBP PM", "USD PM", "EUR PM", "VSusd", "PCusd", "VSeur", "PCeur",
91                    "VSgbp", "PCgbp", "PREDprc"]])
92# Just to note, the whole table of data doesnt show at once for some reason, it omits the middle values,
93# with elipses.
94print(mrktprc.head(n))
95
96# This is a simpler way of doing the predicted prices from above.
97# Features are X and labels are y
98X = numpy.array(mrktprc.drop["PREDprc"])
99y = numpy.array(mrktprc["PREDprc"])
100# Scales data, so basically makes it so can be viewed in graph form.
101X = preprocessing.scale(X)
102mrktprc.dropna(inplace=True)
103y = numpy.array(mrktprc["PREDprc"])
104
105# This is to test the arrays given to the program
106# Training always go before the testing phase, so there's no bias in results.
107# Test is only for 20% of the given data, using only linear regression.
108X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
109linreg = LinearRegression()
110linreg.fit(X_train, y_train)
111accuracy = linreg.score(X_test, y_test)
112print(accuracy)
113
114# This is the same as the section above, but it's using support vector machine learning.
115# It apparently gives lower accuracy.
116X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
117linreg = svm.SVR()
118linreg.fit(X_train, y_train)
119accuracy = linreg.score(X_test, y_test)
120print(accuracy)
121
122