· 6 years ago · May 07, 2019, 10:20 AM
1from cassandra.cluster import Cluster
2from cassandra.query import dict_factory
3import Lab4
4
5KEYSPACE = "rating_keyspace"
6USER_TABLE = "user_rated_movies"
7AVG_GENRE_RATINGS_FOR_USER = "avg_genre_ratings_for_user"
8
9class CassClient:
10 def __init__(self):
11 cluster = Cluster(['127.0.0.1'], port=9042)
12 self.session = cluster.connect()
13 self.create_keyspace()
14 self.session.set_keyspace(KEYSPACE)
15 self.session.row_factory = dict_factory
16
17 def create_keyspace(self):
18 self.session.execute("""
19 CREATE KEYSPACE IF NOT EXISTS """+KEYSPACE+"""
20 WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
21 """)
22
23 def create_table_query(self, keyspace, table, primaryKey, additionalColumns):
24 output = """
25 CREATE TABLE IF NOT EXISTS """ + keyspace + """.""" + table + """ ("""
26 for column_name, type in additionalColumns.items():
27 output = output + str(column_name) + ' ' + str(type) + ', '
28 output = output + 'PRIMARY_KEY(' + primaryKey + ')'
29 output = output + \
30 """ )"""
31 return output
32
33 def query_helper(self, tableName):
34 _, genres_column_names = Lab4.merge(1)
35 genres_num = len(genres_column_names)
36 genres_column_names = [genre.replace('-', '_') for genre in genres_column_names]
37 if USER_TABLE == tableName:
38 genres_column_names.insert(0, "rating")
39 genres_column_names.insert(0, "movie_id")
40 genres_column_names.insert(0, "user_id")
41 types = ['int', 'int', 'float']
42 for _ in range(genres_num):
43 types.append('int')
44 else:
45 genres_column_names.insert(0, "user_id")
46 types = ['int']
47 for _ in range (genres_num):
48 types.append('float')
49 return dict(zip(genres_column_names, types))
50
51 def create_avg_genre_ratings_for_user_table(self):
52 query = self.create_table_query(KEYSPACE, AVG_GENRE_RATINGS_FOR_USER, 'user_id', self.query_helper(AVG_GENRE_RATINGS_FOR_USER))
53 self.session.execute(query)
54
55 def create_user_rated_movies(self):
56
57
58 # def push_data_table(session ,keyspace, table, userId, values):
59 # session.execute("""
60 # INSERT INTO """+keyspace+"""."""+table+"""(
61 # user_id, avg_movie_rating)
62 # VALUES(%(user_id)s, %(values)s)
63 # """,
64 # {
65 # 'user_id': userId,
66 # 'profile': values
67 # })
68
69 def get_data_table(self, keyspace, table):
70 rows = self.session.execute("SELECT * FROM "+keyspace+"."+table+";")
71 for row in rows:
72 print(row)
73
74 def clear_table(self, keyspace, table):
75 self.session.execute("TRUNCATE "+keyspace+"."+table+";")
76
77 def delete_table(self, keyspace, table):
78 self.session.execute("DROP TABLE "+keyspace+"."+table+";")
79
80 if __name__ == "__main__":
81 # keyspace = "user_ratings"
82 # table = "user_avg_rating"
83 # cluster = Cluster(['127.0.0.1'], port=9042)
84 # session = cluster.connect()
85 # create_keyspace(session, keyspace)
86 # session.set_keyspace(keyspace)
87 # session.row_factory = dict_factory
88 # create_table(session, keyspace, table)
89 #
90 # push_data_table(session, keyspace, table, userId=1337, avgMovieRating=4.2)
91 # get_data_table(session, keyspace, table)
92 # clear_table(session, keyspace, table)
93 # get_data_table(session, keyspace, table)
94 # delete_table(session, keyspace, table)