· 2 years ago · May 04, 2023, 01:20 AM
1"""
2Name: Jackson McAfee
3Date: 04/24/2023
4Assignment: Module 14: Adv Column Store Querying
5Due Date: 04/23/2023
6About this project: Sets up main column store db for the amazon.mdbe database
7Assumptions:NA
8All work below was performed by Jackson McAfee
9"""
10
11# Using monetDb
12import monetdbe
13
14# Path to the database file you want to create
15database = 'amazon.mdbe'
16
17try:
18 with monetdbe.connect(database) as conn:
19 # Drops the table you are going to create if it exists
20 conn.set_autocommit(True)
21
22 conn.execute("""DROP TABLE IF EXISTS AMAZON""")
23
24 # Creates a table that contains all the attributes in your data file
25 # attributes are Brand (string), Total (int), Releases (int), #1 Release (int), Lifetime Gross (int)
26 conn.execute("""CREATE TABLE AMAZON
27 (
28 Name TEXT,
29 Main_category TEXT,
30 Sub_category TEXT,
31 Image TEXT,
32 Link TEXT,
33 Ratings DOUBLE,
34 No_of_ratings INTEGER,
35 Discounted_price TEXT,
36 Actual_price TEXT)""")
37
38 # Load the table created with the data from the data file you selected
39 # This adds data from Watches.csv to the table
40 conn.execute(f"""
41 COPY OFFSET 2 INTO AMAZON
42 FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Watches.csv'
43 USING DELIMITERS ',', E'\n', '"'
44 NULL AS ''
45 BEST EFFORT""")
46
47 # This adds data from Wallets.csv to the table
48 conn.execute(f"""
49 COPY OFFSET 2 INTO AMAZON
50 FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Wallets.csv'
51 USING DELIMITERS ',', E'\n', '"'
52 NULL AS ''
53 BEST EFFORT""")
54
55 # This adds data from Sunglasses.csv to the table
56 conn.execute(f"""
57 COPY OFFSET 2 INTO AMAZON
58 FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Sunglasses.csv'
59 USING DELIMITERS ',', E'\n', '"'
60 NULL AS ''
61 BEST EFFORT""")
62
63 with monetdbe.connect(database) as conn:
64 # Print the results
65 print("Amazon Load Errors")
66 results = conn.execute("""SELECT * FROM sys.rejects""")
67 for r in results.fetchall():
68 print(r)
69
70 with monetdbe.connect(database) as conn:
71 print("Number of records")
72 results = conn.execute("""SELECT COUNT(*) FROM AMAZON""")
73 for r in results.fetchall():
74 print(r)
75
76 conn.close()
77
78except Exception as inst:
79 print(type(inst))
80 print(inst.args)
81
82#####################################
83
84"""
85Name: Jackson McAfee
86Date: 04/24/2023
87Assignment: Module 14: Adv Column Store Querying
88Due Date: 04/23/2023
89About this project: Creates AMAZON merge table and partitions it into AMAZON_WATCHES, AMAZON_WALLETS, and AMAZON_SUNGLASSES
90Assumptions:NA
91All work below was performed by Jackson McAfee
92"""
93
94# Using monetDb
95import monetdbe
96
97# Path to the database file you want to create
98database = 'amazon.mdbe'
99
100try:
101 with monetdbe.connect(database) as conn:
102 # Drops the table you are going to create if it exists
103 conn.set_autocommit(True)
104
105 conn.execute("""DROP TABLE IF EXISTS AMAZON""")
106
107 # Creates a table that contains all the attributes in your data file
108 # attributes are Brand (string), Total (int), Releases (int), #1 Release (int), Lifetime Gross (int)
109 conn.execute("""CREATE MERGE TABLE AMAZON
110 (
111 Name TEXT,
112 Main_category TEXT,
113 Sub_category TEXT,
114 Image TEXT,
115 Link TEXT,
116 Ratings REAL,
117 No_of_ratings INTEGER,
118 Discounted_price TEXT,
119 Actual_price TEXT)
120 PARTITION BY VALUES ON (Sub_category)
121 ; """)
122
123 conn.execute(
124 """ALTER TABLE AMAZON
125 ADD TABLE AMAZON_WATCHES
126 AS PARTITION IN (Watches)""")
127
128 conn.execute(
129 """ALTER TABLE AMAZON
130 ADD TABLE AMAZON_WALLETS
131 AS PARTITION IN (Wallets)""")
132
133 conn.execute(
134 """ALTER TABLE AMAZON
135 ADD TABLE AMAZON_SUNGLASSES
136 AS PARTITION IN (Sunglasses)""")
137
138
139 with monetdbe.connect(database) as conn:
140 results = conn.execute("""SELECT Count(*) FROM AMAZON""")
141 for r in results.fetchall():
142 print(r)
143
144 with monetdbe.connect(database) as conn:
145 print("Load Errors")
146 results = conn.execute("""SELECT * FROM sys.rejects""")
147 for r in results.fetchall():
148 print(r)
149
150 with monetdbe.connect(database) as conn:
151 results = conn.execute("""SELECT Count(*) FROM AMAZON_WATCHES""")
152 for r in results.fetchall():
153 print(r)
154
155 conn.close()
156
157except Exception as inst:
158 print(type(inst)) # the exception instance
159 print(inst.args) # arguments stored in .args
160 print(inst)
161
162#####################################
163
164"""
165Name: Jackson McAfee
166Date: 04/24/2023
167Assignment: Module 14: Adv Column Store Querying
168Due Date: 04/23/2023
169About this project: Creates a summary table for the amazon.mdbe database
170Assumptions:NA
171All work below was performed by Jackson McAfee
172"""
173
174import monetdbe
175import numpy
176
177database = 'amazon.mdbe'
178
179try:
180 with monetdbe.connect(database) as conn:
181 # Here we create a primary key, and use "NOT NULL" to prevent inserting invalid data
182 conn.set_autocommit(True)
183
184 conn.execute(
185 """DROP TABLE IF EXISTS AMAZON_SUMMARY""")
186
187 conn.execute(
188 """CREATE TABLE AMAZON_SUMMARY
189 (
190 CummSummNumRatings INTEGER,
191 Ratings FLOAT,
192 ActualPrice STRING)""")
193
194 conn.execute(
195 """Delete from AMAZON_SUMMARY
196 """)
197
198 # I do not know why this does not work, documentation did not help me at all.
199 # I assume it wants ints as a range instead of floats, but I just don't have
200 # the time to figure it out and I do not want to find a completely new database
201
202 # I tried changing to ints and running on CummSummNumRatings but it still did
203 # not work. I cannot tell how this is substantially different than the code in
204 # the other two files, but it is not doing anything for me.
205
206 for ratings in numpy.arange(1, 5.1, 0.1):
207 conn.execute(
208 """Insert into AMAZON_SUMMARY
209 (CummSumNumRatings,
210 Ratings,
211 ActualPrice) Values (0,?,0)""",float(ratings))
212
213
214 conn.execute(
215 """Update AMAZON_SUMMARY
216 Set CummSumNumRatings =
217 (Select SUM(No_of_Ratings)
218 from AMAZON
219 where AMAZON.No_of_Ratings =
220 AMAZON_SUMMARY.No_of_Ratings
221 """)
222
223 conn.execute(
224 """Delete from AMAZON_SUMMARY
225 where CummSumNumRatings = 0 """)
226
227
228 with monetdbe.connect(database) as conn:
229 print("Load Errors")
230 results = conn.execute("""SELECT * FROM sys.rejects""")
231 for r in results.fetchall():
232 print(r)
233
234 with monetdbe.connect(database) as conn:
235 print("Num Records")
236 results = conn.execute("""SELECT Count(*) FROM AMAZON_SUMMARY""")
237 for r in results.fetchall():
238 print(r)
239
240 conn.close()
241
242except Exception as inst:
243 print(type(inst)) # the exception instance
244 print(inst.args) # arguments stored in .args
245 print(inst)
246
247
248#####################################
249
250"""
251Name: Jackson McAfee
252Date: 04/24/2023
253Assignment: Module 14: Adv Column Store Querying
254Due Date: 04/23/2023
255About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_WATCHES and AMAZON tables)
256Assumptions:NA
257All work below was performed by Jackson McAfee
258"""
259
260import monetdbe
261import time
262database = 'amazon.mdbe'
263
264try:
265
266 with monetdbe.connect(database) as conn1:
267 start = time.time()
268 results1 = conn1.execute("""
269 SELECT Name, Ratings, No_of_ratings
270 FROM AMAZON_WATCHES
271 Order by Ratings DESC
272 """)
273
274 for r in results1.fetchall():
275 print(r)
276
277 end = time.time()
278 executionTime = end-start
279 print("Execution Time:", executionTime)
280
281 start = time.time()
282 results2 = conn1.execute("""
283 SELECT Name, Ratings, No_of_ratings
284 FROM AMAZON
285 Order by Ratings DESC
286 """)
287
288 for r in results2.fetchall():
289 print(r)
290
291 end = time.time()
292 executionTime = end-start
293 print("Execution Time:", executionTime)
294
295 conn1.close()
296except Exception as inst:
297 print(type(inst)) # the exception instance
298 print(inst.args) # arguments stored in .args
299 print(inst)
300
301##################################
302
303"""
304Name: Jackson McAfee
305Date: 04/24/2023
306Assignment: Module 14: Adv Column Store Querying
307Due Date: 04/23/2023
308About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_SUMMARY table)
309Assumptions:NA
310All work below was performed by Jackson McAfee
311"""
312
313
314import monetdbe
315import time
316
317database = 'amazon.mdbe'
318
319try:
320 with monetdbe.connect(database) as conn:
321 start = time.time()
322 results = conn.execute("""
323 SELECT CummSummNumRatings,Ratings,ActualPrice
324 FROM AMAZON_SUMMARY
325 Where CummSummNumRatings < 100
326 Order by Ratings DESC
327 """)
328 for r in results.fetchall():
329 print(r)
330 end = time.time()
331 executionTime = end-start
332 print("Execution Time:", executionTime)
333
334 start = time.time()
335 results2 = conn.execute("""
336 SELECT CummSummNumRatings,Ratings,ActualPrice
337 FROM AMAZON_SUMMARY
338 Where CummSummNumRatings < 100
339 Order by Ratings DESC
340 """)
341
342 for r in results2.fetchall():
343 print(r)
344
345 end = time.time()
346 executionTime = end-start
347 print("Execution Time:", executionTime)
348
349 conn.close()
350
351except Exception as inst:
352 print(type(inst)) # the exception instance
353 print(inst.args) # arguments stored in .args
354 print(inst)
355