· 4 years ago · Jun 09, 2021, 11:54 PM
1import cassandra
2from cassandra import cluster
3from flask.globals import session
4import pandas as pd
5from flask import Flask, json, request
6import json
7from cassandra import query
8from cassandra.cluster import Cluster, ResultSet, Session
9from cassandra.query import dict_factory
10import pandas as pd
11from pandas.core.frame import DataFrame
12import wtiproj06_api_logic
13
14class Cassandra:
15genres_list = ['adventure', 'animation', 'comedy', 'children', 'fantasy', 'romance', 'drama', 'action', 'crime', 'thriller',
16'horror', 'mystery', 'sci-fi', 'imax', 'documentary', 'war', 'musical', 'film-noir', 'western', 'short']
17
18genres_list_for_table = ['Adventure', 'Animation', 'Comedy', 'Children', 'Fantasy', 'Romance', 'Drama', 'Action', 'Crime', 'Thriller',
19'Horror', 'Mystery', '\"sci-fi\"', 'IMAX', 'Documentary', 'War', 'Musical', '\"film-noir\"', 'Western', 'Short']
20cluster = Cluster(['127.0.0.1'], port=9042)
21session = cluster.connect()
22avrage_ratings_all_users = {}
23
24def create_keyspace(self, session, keyspace):
25self.session.execute("""
26CREATE KEYSPACE IF NOT EXISTS """+keyspace+"""
27WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
28""")
29
30def create_table(self, keyspace, table):
31genres = ""
32for genre in self.genres_list_for_table:
33genres += genre + " double,"
34self.session.execute("""
35CREATE TABLE IF NOT EXISTS """ + keyspace+"""."""+table+""" (
36userID double,
37rating double,
38movieID double, """
39+ genres + """
40PRIMARY KEY(userID,movieID),
41)
42""")
43
44def insert_data_from_dataframe(self, keyspace, table, df: DataFrame):
45for _, row in df.iterrows():
46self.session.execute("""INSERT INTO """+keyspace+""".""" + \
47table + """ JSON '""" + row.to_json() + "';")
48
49def insert_row(self, keyspace, table, json):
50self.session.execute("""INSERT INTO """+keyspace +
51""".""" + table + """ JSON '""" + json + "';")
52
53def get_data_table(self, keyspace, table) -> ResultSet:
54rows = self.session.execute("SELECT * FROM "+keyspace+"."+table+";")
55return rows
56
57def clear_table(self, keyspace, table):
58self.session.execute("TRUNCATE "+keyspace+"."+table+";")
59
60def delete_table(self, keyspace, table):
61self.session.execute("DROP TABLE "+keyspace+"."+table+";")
62
63def create_data(self, rows):
64genres_df = pd.read_table("movie_genres.dat")
65users_df = pd.read_table("user_ratedmovies.dat", usecols=[0, 1, 2])
66
67df = pd.merge(users_df, genres_df, "inner", "movieID", sort=True)
68
69df['count'] = 1
70df = df.pivot_table(values='count', index=[
71'userID', 'movieID', 'rating'], columns='genre', fill_value=0)[:rows]
72return df.reset_index()
73
74# Get avg ratings of genres based on all users scores
75def avg_all_users(self):
76data = DataFrame.from_dict(self.get_data_table("user_ratings", "data"))
77self.avrage_ratings_all_users = wtiproj06_api_logic.avg(data,self.genres_list)
78
79# Get avg ratings of genres basend on given user
80def avg_user(self,userID):
81data = DataFrame.from_dict(self.session.execute(
82"SELECT * FROM user_ratings.data WHERE userID={0};".format(userID)))
83if data.empty:
84res = {}
85for genre in self.genres_list:
86res[genre] = 0.0
87return res
88return wtiproj06_api_logic.avg_vector(userID, data, self.genres_list)
89
90def __init__(self):
91keyspace = "user_ratings"
92table = "data"
93
94# utworzenie nowego keyspace
95self.create_keyspace(session, keyspace)
96# ustawienie używanego keyspace w sesji
97self.session.set_keyspace(keyspace)
98self.session.row_factory = dict_factory
99# tabela ze wszystkimi ocenami użytkowników
100self.create_table(keyspace, table)
101
102if(not self.get_data_table(keyspace, table)):
103df = self.create_data(300)
104self.insert_data_from_dataframe(keyspace, table, df)
105self.avg_all_users()
106