· 6 years ago · Jun 08, 2019, 01:16 PM
1import pandas as pd
2import numpy as np
3import json
4from flask import Flask
5
6from cassandra.cluster import Cluster
7from cassandra.query import dict_factory
8
9app = Flask(__name__)
10
11desired_width=320
12pd.set_option('display.width', desired_width)
13np.set_printoptions(linewidth=desired_width)
14pd.set_option('display.max_columns',28)
15
16
17userRated = pd.read_csv('user_ratedmovies.dat', sep="\t", header=0, usecols=['userID','movieID','rating'], nrows=1000)
18genresKey = pd.read_csv('movie_genres.dat', sep="\t", header=0)
19
20genresKey['dumdum'] = 1
21genresKeyPivo = genresKey.pivot_table(index='movieID', columns='genre', values='dumdum').fillna(0)
22genresKeyPivo = genresKeyPivo.astype(int)
23merged = pd.merge(userRated, genresKeyPivo, on='movieID')
24merged.columns = ['' + str(col) for col in merged.columns]
25merged = merged.rename(index=str, columns = {'userID':'userID', 'movieID':'movieID', 'rating':'rating'})
26
27merged["genres_column_names"] = np.empty((len(merged), 0)).tolist()
28
29lista = merged.columns[3:-1]
30
31
32for index, row in merged.iterrows():
33 for el in lista:
34 if merged.loc[index ,el] == 1:
35 merged.loc[index,'genres_column_names'].append(el)
36
37
38EXPIRATION_SECONDS = 600
39
40def create_keyspace(session, keyspace):
41 session.execute("""
42 CREATE KEYSPACE IF NOT EXISTS """+keyspace+"""
43 WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
44 """)
45
46def create_rawRatingsData(session, keyspace, table):
47 session.execute("""
48 CREATE TABLE IF NOT EXISTS """+ keyspace+"""."""+table+""" (
49 userid int ,
50 movieid int,
51 rating float,
52 Action float,
53 Adventure float,
54 Animation float,
55 Children float,
56 Comedy float,
57 Crime float,
58 Documentary float,
59 Drama float,
60 Fantasy float,
61 Film_Noir float,
62 Horror float,
63 IMAX float,
64 Musical float,
65 Mystery float,
66 Romance float,
67 Sci_Fi float,
68 Short float,
69 Thriller float,
70 War float,
71 Western float,
72
73 PRIMARY KEY(userid, movieid)
74 )
75 """)
76
77
78def create_allUserRatings(session, keyspace, table):
79 session.execute("""
80 CREATE TABLE IF NOT EXISTS """+ keyspace+"""."""+table+""" (
81 user_id int ,
82 avg_movie_rating varchar,
83 PRIMARY KEY(user_id)
84 )
85 """)
86
87
88def push_data_rawRatingsData(session, keyspace, table, dictio):
89 print(dictio.keys())
90
91 print(get_data_table(session,keyspace,table))
92 session.execute(
93 """
94 INSERT INTO """+keyspace+"""."""+table+""" (userid, movieid,rating,action,adventure,animation,children,comedy,crime,documentary,drama,fantasy,film_noir,horror,imax,musical,mystery,romance,sci_fi,short,thriller,war,western)
95 VALUES (%(userid)s, %(movieid)s,%(rating)s, %(action)s,%(adventure)s, %(animation)s,%(children)s,
96 %(comedy)s,%(crime)s, %(documentary)s,%(drama)s, %(fantasy)s,%(film_noir)s, %(horror)s,%(imax)s, %(musical)s,%(mystery)s, %(romance)s,%(sci_fi)s,
97 %(short)s,%(thriller)s, %(war)s,%(western)s)
98 """,
99 {
100 'userid': dictio["userID"],
101 'movieid': dictio["movieID"],
102 'rating': dictio["rating"],
103 'action': dictio["Action"],
104 'adventure': dictio["Adventure"],
105 'animation': dictio["Animation"],
106 'children': dictio["Children"],
107 'comedy': dictio["Comedy"],
108 'crime': dictio["Crime"],
109 'documentary': dictio["Documentary"],
110 'drama': dictio["Drama"],
111 'fantasy': dictio["Fantasy"],
112 'film_noir': dictio["Film-Noir"],
113 'horror': dictio["Horror"],
114 'imax': dictio["IMAX"],
115 'musical': dictio["Musical"],
116 'mystery': dictio["Mystery"],
117 'romance': dictio["Romance"],
118 'sci_fi': dictio["Sci-Fi"],
119 'short': dictio["Short"],
120 'thriller': dictio["Thriller"],
121 'war': dictio["War"],
122 'western': dictio["Western"],
123
124 }
125 )
126
127
128def push_data_allUserRatings(session, keyspace, table, userId, avgMovieRating):
129 session.execute(
130 """
131 INSERT INTO """+keyspace+"""."""+table+""" (user_id, avg_movie_rating)
132 VALUES (%(user_id)s, %(avg_movie_rating)s)
133 """,
134 {
135 'user_id': userId,
136 'avg_movie_rating': avgMovieRating
137 }
138 )
139
140
141def get_data_table(session, keyspace, table):
142 rows = session.execute("SELECT * FROM "+keyspace+"."+table+";")
143 return list(rows)
144
145
146def clear_table(session, keyspace, table):
147 session.execute("TRUNCATE "+keyspace+"."+table+";")
148
149def delete_table(session, keyspace, table):
150 session.execute("DROP TABLE "+keyspace+"."+table+";")
151
152
153class api_logic:
154 keyspace = "user_ratings"
155
156 def __init__(self):
157 self.counter = 1
158
159 def add_rating(self, aNewRating):
160 print(type(get_data_table(session, keyspace, "rawRatingsData")))
161 tmp = aNewRating
162 print(tmp)
163
164
165 push_data_rawRatingsData(session, keyspace, "rawRatingsData", tmp)
166
167
168 def list_ratings(self):
169
170 return get_data_table(session,keyspace,"rawRatingsData")
171
172 def delete_ratings(self):
173
174 clear_table(session, keyspace, "rawRatingsData")
175
176 def dict_to_df(self, lista):
177 return pd.DataFrame.from_dict(lista)
178 def get_mean_ratings(self, df):
179 list = {}
180 for header in lista2:
181 dft = df.loc[df[header] == 1]
182 if len(dft) > 0:
183 v = np.nanmean(dft['rating'])
184 list[header] = round(v, 2)
185 else:
186 list[header] = 0
187 return list
188
189 def avgUserIdRatings(self, userID):
190 userid = str(userID)
191 ret = []
192 rows = session.execute("SELECT * FROM " + self.keyspace + ".rawRatingsData WHERE userid =" + userid + ";")
193 for row in rows:
194 ret.append(row)
195 ret = self.get_mean_ratings(self.dict_to_df(ret))
196
197
198
199
200 return (ret)
201
202 def avgRatings(self):
203 ret = []
204 rows = session.execute("SELECT * FROM " + self.keyspace + ".rawRatingsData;")
205 for row in rows:
206 ret.append(row)
207 ret = self.get_mean_ratings(self.dict_to_df(ret))
208 return (ret)
209
210 def calculate_user_profile(self, dict1, dict2):
211 ret = {}
212 for k, v in dict1.items():
213 if v != 0:
214 ret[k] = v - dict2[k]
215 else:
216 ret[k] = 0.0
217
218 return ret
219
220 def get_user_profile(self, userid):
221 userid = str(userid)
222 return self.calculate_user_profile(self.avgUserIdRatings(userid), self.avgRatings())
223
224@app.route('/fillme', methods=['POST'])
225def fill():
226 for row in merged.iterrows():
227 row_dict = row[1].to_dict()
228 logic.add_rating(row_dict)
229
230 return "I'm full"
231
232
233@app.route('/ratings', methods=['POST'])
234def new_rating():
235 newRecord = merged.iloc[logic.counter]
236 toRet = newRecord.to_json()
237 logic.counter += 1
238 newRecord = newRecord.to_dict()
239 logic.add_rating(newRecord)
240
241 return (toRet)
242
243
244@app.route('/ratings', methods=['GET'])
245def list_ratings():
246 ratings = logic.list_ratings()
247 return json.dumps(ratings)
248
249
250@app.route('/ratings', methods=['DELETE'])
251def delete_ratings():
252 logic.delete_ratings()
253 return "All is lost", 204
254
255
256@app.route('/avgGenreRatings', methods=['GET'])
257def list_avg():
258 return json.dumps(logic.avgRatings())
259
260
261@app.route('/avgGenreRatings/<user>', methods=['GET'])
262def list_usr_avg(user):
263 return json.dumps(logic.avgUserIdRatings(user))
264
265@app.route('/profile/<user>', methods=['GET'])
266def get_user_profile(user):
267 return json.dumps(logic.get_user_profile(user))
268
269
270def create_API_app():
271 app.run(port=9875)
272
273
274
275
276if __name__ == "__main__":
277
278 keyspace = "user_ratings"
279 cluster = Cluster(['127.0.0.1'], port=9042)
280 session = cluster.connect()
281 # utworzenie nowego keyspace
282 create_keyspace(session, keyspace)
283 # ustawienie używanego keyspace w sesji
284 session.set_keyspace(keyspace)
285 # użycie dict_factory pozwala na zwracanie słowników
286 # znanych z języka Python przy zapytaniach do bazy danych
287 session.row_factory = dict_factory
288 #delete_table(session,keyspace,"rawRatingsData")
289 #delete_table(session, keyspace, "allUserRatings")
290 create_rawRatingsData(session, keyspace, "rawRatingsData")
291 create_allUserRatings(session, keyspace, "allUserRatings")
292
293 logic = api_logic()
294 create_API_app()