· 6 years ago · Jun 08, 2019, 01:18 PM
1import json
2import random
3import pandas as pd
4import numpy as np
5import cherrypy
6import pickle
7import redis
8import zlib
9from cassandra.cluster import Cluster
10from cassandra.query import dict_factory
11
12
13
14userRated = pd.read_csv('user_ratedmovies.dat', sep="\t", header=0, usecols=['userID','movieID','rating'], nrows=1000)
15genresKey = pd.read_csv('movie_genres.dat', sep="\t", header=0)
16
17genresKey['example'] = 1
18genresKeyPivo = genresKey.pivot_table(index='movieID', columns='genre', values='example').fillna(0)
19genresKeyPivo = genresKeyPivo.astype(int)
20# merged = pd.merge(userRated, genresKey, left_on='movieID', right_on='movieID')
21merged = pd.merge(userRated, genresKeyPivo, on='movieID')
22merged.columns = ['' + str(col) for col in merged.columns]
23merged = merged.rename(index=str, columns = {'userID':'userID', 'movieID':'movieID', 'rating':'rating'})
24
25merged["genres_column_names"] = np.empty((len(merged), 0)).tolist()
26
27lista = merged.columns[3:-1]
28
29lista2 = [x.lower() for x in lista]
30lista2 = [w.replace('-', '_') for w in lista2]
31
32for index, row in merged.iterrows():
33 for el in lista:
34 if merged.loc[index, el] == 1:
35 merged.loc[index,'genres_column_names'].append(el)
36
37
38EXPIRATION_SECONDS = 600
39
40
41def create_keyspace(session, keyspace):
42 session.execute("""
43 CREATE KEYSPACE IF NOT EXISTS """ + keyspace + """
44 WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
45 """)
46
47
48# self.rawRatingsData = pd.DataFrame(get_data_table(session, keyspace, "rawRatingsData"))
49# self.allUserRatings = get_data_table(session, keyspace, "allUserRatings")
50# self.allRatings = get_data_table(session, keyspace, "allRatings")
51
52def create_rawRatingsData(session, keyspace, table):
53 session.execute("""
54 CREATE TABLE IF NOT EXISTS """ + keyspace + """.""" + table + """ (
55 userid int ,
56 movieid int,
57 rating float,
58 Action float,
59 Adventure float,
60 Animation float,
61 Children float,
62 Comedy float,
63 Crime float,
64 Documentary float,
65 Drama float,
66 Fantasy float,
67 Film_Noir float,
68 Horror float,
69 IMAX float,
70 Musical float,
71 Mystery float,
72 Romance float,
73 Sci_Fi float,
74 Short float,
75 Thriller float,
76 War float,
77 Western float,
78
79 PRIMARY KEY(userid, movieid)
80 )
81 """)
82
83
84def create_allUserRatings(session, keyspace, table):
85 session.execute("""
86 CREATE TABLE IF NOT EXISTS """ + keyspace + """.""" + table + """ (
87 user_id int ,
88 avg_movie_rating varchar,
89 PRIMARY KEY(user_id)
90 )
91 """)
92
93
94def push_data_rawRatingsData(session, keyspace, table, dictio):
95 print(dictio.keys())
96
97 print(get_data_table(session, keyspace, table))
98 session.execute(
99 """
100 INSERT INTO """ + keyspace + """.""" + table + """ (userid, movieid,rating,action,adventure,animation,children,comedy,crime,documentary,drama,fantasy,film_noir,horror,imax,musical,mystery,romance,sci_fi,short,thriller,war,western)
101 VALUES (%(userid)s, %(movieid)s,%(rating)s, %(action)s,%(adventure)s, %(animation)s,%(children)s,
102 %(comedy)s,%(crime)s, %(documentary)s,%(drama)s, %(fantasy)s,%(film_noir)s, %(horror)s,%(imax)s, %(musical)s,%(mystery)s, %(romance)s,%(sci_fi)s,
103 %(short)s,%(thriller)s, %(war)s,%(western)s)
104 """,
105 {
106 'userid': dictio["userID"],
107 'movieid': dictio["movieID"],
108 'rating': dictio["rating"],
109 'action': dictio["Action"],
110 'adventure': dictio["Adventure"],
111 'animation': dictio["Animation"],
112 'children': dictio["Children"],
113 'comedy': dictio["Comedy"],
114 'crime': dictio["Crime"],
115 'documentary': dictio["Documentary"],
116 'drama': dictio["Drama"],
117 'fantasy': dictio["Fantasy"],
118 'film_noir': dictio["Film-Noir"],
119 'horror': dictio["Horror"],
120 'imax': dictio["IMAX"],
121 'musical': dictio["Musical"],
122 'mystery': dictio["Mystery"],
123 'romance': dictio["Romance"],
124 'sci_fi': dictio["Sci-Fi"],
125 'short': dictio["Short"],
126 'thriller': dictio["Thriller"],
127 'war': dictio["War"],
128 'western': dictio["Western"],
129
130 }
131 )
132
133
134def push_data_allUserRatings(session, keyspace, table, userId, avgMovieRating):
135 session.execute(
136 """
137 INSERT INTO """ + keyspace + """.""" + table + """ (user_id, avg_movie_rating)
138 VALUES (%(user_id)s, %(avg_movie_rating)s)
139 """,
140 {
141 'user_id': userId,
142 'avg_movie_rating': avgMovieRating
143 }
144 )
145
146
147def get_data_table(session, keyspace, table):
148 rows = session.execute("SELECT * FROM " + keyspace + "." + table + ";")
149 return list(rows)
150
151
152def clear_table(session, keyspace, table):
153 session.execute("TRUNCATE " + keyspace + "." + table + ";")
154
155
156def delete_table(session, keyspace, table):
157 session.execute("DROP TABLE " + keyspace + "." + table + ";")
158
159
160class api_logic:
161 keyspace = "user_ratings"
162 lista2 = [x.lower() for x in lista]
163 lista2 = [w.replace('-', '_') for w in lista2]
164 def __init__(self):
165 self.counter = 1
166
167 def add_rating(self, aNewRating):
168 print(type(get_data_table(session, keyspace, "rawRatingsData")))
169 tmp = aNewRating
170 print(tmp)
171
172 push_data_rawRatingsData(session, keyspace, "rawRatingsData", tmp)
173
174 def list_ratings(self):
175
176 return get_data_table(session, keyspace, "rawRatingsData")
177
178 def delete_ratings(self):
179
180 clear_table(session, keyspace, "rawRatingsData")
181
182 def dict_to_df(self, lista):
183 return pd.DataFrame.from_dict(lista)
184
185 def get_mean_ratings(self, df):
186 list = {}
187 for header in lista2:
188 dft = df.loc[df[header] == 1]
189 if len(dft) > 0:
190 v = np.nanmean(dft['rating'])
191 list[header] = round(v, 2)
192 else:
193 list[header] = 0
194 return list
195
196 def avgUserIdRatings(self, userID):
197 userid = str(userID)
198 ret = []
199 rows = session.execute("SELECT * FROM " + self.keyspace + ".rawRatingsData WHERE userid =" + userid + ";")
200 for row in rows:
201 ret.append(row)
202 ret = self.get_mean_ratings(self.dict_to_df(ret))
203
204 return (ret)
205
206 def avgRatings(self):
207 ret = []
208 rows = session.execute("SELECT * FROM " + self.keyspace + ".rawRatingsData;")
209 for row in rows:
210 ret.append(row)
211 ret = self.get_mean_ratings(self.dict_to_df(ret))
212 return (ret)
213
214 def calculate_user_profile(self, dict1, dict2):
215 ret = {}
216 for k, v in dict1.items():
217 if v != 0:
218 ret[k] = v - dict2[k]
219 else:
220 ret[k] = 0.0
221
222 return ret
223
224 def get_user_profile(self, userid):
225 userid = str(userid)
226 return self.calculate_user_profile(self.avgUserIdRatings(userid), self.avgRatings())
227
228
229
230
231class Server(object):
232 @cherrypy.expose
233 @cherrypy.tools.accept(media='text/plain')
234 def ratings(self):
235 if cherrypy.request.method == 'POST':
236 newRecord = merged.iloc[logic.counter]
237 toRet = newRecord.to_json()
238 logic.counter += 1
239 newRecord = newRecord.to_dict()
240 logic.add_rating(newRecord)
241 return toRet
242 elif cherrypy.request.method == 'GET':
243 ratings = logic.list_ratings()[:100]
244 return json.dumps(ratings)
245
246 elif cherrypy.request.method == 'DELETE':
247 logic.delete_ratings()
248 return json.dumps('""')
249 @cherrypy.expose
250 def avgGenreRatingsAll(self):
251 if cherrypy.request.method == 'GET':
252 return json.dumps(logic.avgRatings())
253 @cherrypy.expose
254 def avgGenreRatings(self, len = 8):
255 if cherrypy.request.method == 'GET':
256 return json.dumps(logic.avgUserIdRatings(len))
257
258 @cherrypy.expose
259 def profile(self, len=8):
260 if cherrypy.request.method == 'GET':
261 return json.dumps(logic.get_user_profile(len))
262
263
264
265
266if __name__ == '__main__':
267 keyspace = "user_ratings"
268 cluster = Cluster(['127.0.0.1'], port=9042)
269 session = cluster.connect()
270 # utworzenie nowego keyspace
271 create_keyspace(session, keyspace)
272 # ustawienie używanego keyspace w sesji
273 session.set_keyspace(keyspace)
274 # użycie dict_factory pozwala na zwracanie słowników
275 # znanych z języka Python przy zapytaniach do bazy danych
276 session.row_factory = dict_factory
277 delete_table(session,keyspace,"rawRatingsData")
278 delete_table(session, keyspace, "allUserRatings")
279 create_rawRatingsData(session, keyspace, "rawRatingsData")
280 create_allUserRatings(session, keyspace, "allUserRatings")
281
282
283 logic = api_logic()
284 cherrypy.config.update({'server.socket_port': 9898})
285 cherrypy.quickstart(Server())