· 5 years ago · Mar 24, 2020, 12:20 PM
1import psycopg2
2import configparser
3
4
5def fetchDBinfo(inifile):
6 """Fetches all required data to log into a given relational database from the given inifile."""
7 config = configparser.ConfigParser()
8 config.read(inifile)
9 dbname = config['relational_db']['database']
10 user = config['relational_db']['user']
11 password = config['relational_db']['password']
12 host = config['relational_db']['host']
13 return RDBconnect(dbname, user, password, host)
14
15
16def RDBconnect(DBName, Username, Password, Host):
17 """Returns a connection object that's linked to the local postgreSQL database that accepts
18 these credentials."""
19 connection = psycopg2.connect(
20 """dbname={} user={} password={} host={} """.format(str(DBName), str(Username), str(Password), str(Host)))
21 print(connection)
22 return connection
23
24
25def tableformat(table, inifile):
26 """Returns all the columns **and the table name** in the given table from a ini file, so that you can make data input
27 easier for functions."""
28 config = configparser.ConfigParser()
29 output = []
30 config.read(inifile)
31 for key in config[table]:
32 output.append(key)
33 print(output)
34 return output
35
36
37def fetchXrecords(table, connection, amount):
38 """Fetches X (positive int) from table (string), and returns it in a list with (multiple) tuples"""
39 data = tableformat(table, 'tables.cfg')
40 data.append(table)
41 data = tuple(data)
42 cursor = connection.cursor()
43 statement = """SELECT {} FROM {}""".format(",".join(data), table)
44 cursor.execute(statement)
45 output = cursor.fetchmany(size=amount)
46 print(output)
47 return output
48
49
50def fetchallrecords(table, connection):
51 """Fetches all rows from a given table using the connection object and a given record."""
52 data = tableformat(table, 'tables.cfg')
53 data.append(table)
54 data = tuple(data)
55 cursor = connection.cursor()
56 statement = """SELECT {} FROM {}""".format(",".join(data), table)
57 cursor.execute(statement)
58 output = cursor.fetchall()
59 print(output)
60 return output
61
62
63def insertrecord(table, connection, record):
64 """Inserts a record into a given table using the connection object and a given record.
65 Records should be tuples with objects in them proportional to the amount of rows said tables
66 has."""
67 data = tableformat(table, 'tables.cfg')
68 cursor = connection.cursor()
69 if isinstance(record, str):
70 statement = """INSERT INTO {} ({}) VALUES ('{}')""".format(table, ",".join(data), record)
71 elif isinstance(record, tuple):
72 statement = """INSERT INTO {} ({}) VALUES {}""".format(table, ",".join(data), record)
73 cursor.execute(statement, record)
74 connection.commit()
75 cursor.close()
76 return 1
77
78
79def analyticsinit(connection):
80 """Initialises a table in which we will store all the data acquired from analysis.
81 This table only contains a profileID and a productID; the productID will be a recommendation
82 for the given profileID when said profileID is logged in."""
83 cursor = connection.cursor()
84 cursor.execute("""
85 DROP TABLE IF EXISTS collaborative_filter;
86 DROP TABLE IF EXISTS content_filter;
87 CREATE TABLE collaborative_filter(
88 profileid varchar REFERENCES profiles(id),
89 productid varchar REFERENCES products(id)
90 );
91 CREATE TABLE content_filter(
92 profileid varchar REFERENCES profiles(id),
93 productid varchar REFERENCES products(id)
94 );""")
95 connection.commit()
96 cursor.close()
97 return 1
98
99
100# Het plan is als volgt; we kijken in de tabel 'profiles_previously_viewed' om te checken waar mensen mogelijk
101# interesse in hebben.
102#
103# Voor de collaborative filter doen we dit door te checken voor ieder (of enkele) profielen
104# waar ze naar hebben gekeken. Als een profiel een lijst heeft waarvan een ander profiel een echte deelverzameling
105# uitmaakt, dan geven we als recommendation voor het andere profiel om te kijken naar de dingen waar het eerste profiel
106# naar heeft gekeken, en het andere profiel niet.
107#
108# Voor de content filter kijken we naar welk product ieder profiel heeft gekeken, en door voor elk profiel te kijken
109# welke producten er het meeste op lijken, raden we die producten aan.
110
111
112def likeliness(collection1, collection2):
113 """Determines a decimal number of how much collection1 looks like collection2. Part of the
114 both content filters; collaborative and content filters."""
115 equality = 0
116 for item in collection1:
117 if item in collection2:
118 equality += 1
119 print(equality / len(collection1))
120 return equality / len(collection1)
121
122
123def indexlikeliness(list1, list2, indexes):
124 """Determines the equality between list1 and list2 by only looking at the
125 given indexes in the list."""
126 endlist1 = []
127 endlist2 = []
128 for element in list1:
129 if list1.index(element) in indexes:
130 endlist1.append(element)
131 for element in list2:
132 if list2.index(element) in indexes:
133 endlist2.append(element)
134 print(likeliness(endlist2, endlist2))
135 return likeliness(endlist1, endlist2)
136
137
138def profidandprodiddict(viewset):
139 """Fetches every profile id and uses them as keys, and then adds all product ids associated
140 with a profile id to their respective key into a list."""
141 usrdict = {}
142 for profid in viewset:
143 usrdict[viewset[viewset.index(profid)][0]] = []
144 for prodid in viewset:
145 usrdict[viewset[viewset.index(prodid)][0]].append(prodid[1])
146 print(usrdict)
147 return usrdict
148
149
150# Hieronder volgen de opgestelde regels
151# Collaborative filtering;
152
153
154def collaborativefilter(connection):
155 """This function generates recommendations based on the collaborative filter type;
156 it does this by checking each profileid for what kinds of products they have viewed,
157 and if the equality in orders between two profileid's is above a certain treshold,
158 then we """
159 viewedset = fetchXrecords('profiles_previously_viewed', connection, 2000)
160 # This is limited for testing. If you got bucketloads of time and a can-do attitude,
161 # replace fetchXrecords with fetchallrecords('profiles_previously_viewed',connection)
162 rectreshold = 0.5 # This variable controls the percentage (0.5 = 50 %) of equality needed before a recommendation is made.
163 usrdict = profidandprodiddict(viewedset) # Here we save all the records, for each profileid.
164 comparelist = [] # We use this to compare products per profileid.
165
166 for key in usrdict:
167 for key2 in usrdict:
168 if likeliness(usrdict[key], usrdict[key2]) >= rectreshold:
169 for prodid in usrdict[key2]:
170 if prodid not in usrdict[key]:
171 insertrecord('collaborative_filter', connection, (key, prodid))
172 return 1
173
174 """This function generates recommendations based on the content filtering type;
175 it does this by checking each profile for what products they have viewed. This
176 one is straight forward; we compare certain aspects of products (not all of them!)
177 that a profile has viewed, and based on a certain treshold in equality between products,
178 we recommend them if the equality is above said treshold."""
179 productset = fetchallrecords('products', connection) # To begin with, we will need **all** product records.
180 viewedset = fetchXrecords('profiles_previously_viewed', connection,
181 200) # We'll also need to know what each profile has seen.
182 # Again, we don't take all of them, but we take 200 here since you can make a lot of recommendations with just some products
183 # that are included with a single profileid.
184 usrdict = profidandprodiddict(viewedset)
185
186 for profid in usrdict:
187 for entry in productset:
188 prodid = entry[0]
189 if prodid in usrdict[profid]:
190 for otherrecord in productset:
191 if indexlikeliness(entry, otherrecord, [3, 4, 5, 6, 7]) == 1 and entry != otherrecord:
192 # We only compare the type, the category, the subcategory, the subsubcategory,
193 # and the target audience of the product, these are their indexes as mentioned above.
194 # And thus, if they are completely equal, that means we've found a product that's the same
195 # in functionality, but (probably) differs in appearance. Content filtering!
196 insertrecord('content_filter', connection, (profid, otherrecord[0]))
197 return 1
198
199
200def main():
201 connection = fetchDBinfo('database.ini')
202 analyticsinit(connection) # We gooien elke keer de twee filter tabellen leeg, om up-to-date te blijven van
203 # de juiste recommendations.
204 collaborativefilter(connection) # Collaborative filter
205 contentfilter(connection)
206 return
207
208
209def fetchprodid(profile, connection):
210 cursor = connection.cursor()
211 statement = """SELECT productid
212 FROM collaborative_filter WHERE profileid = '{}'""".format(profile)
213 cursor.execute(statement)
214 colfilter = cursor.fetchall()
215 statement = """SELECT productid
216 FROM content_filter WHERE profileid = '{}'""".format(profile)
217 cursor.execute(statement)
218 confilter = cursor.fetchall()
219 comparison = [len(colfilter), len(confilter)]
220 if max(comparison) == comparison[0]:
221 print(confilter)
222 return colfilter
223 elif max(comparison) == comparison[1]:
224 print(confilter)
225 return confilter
226 elif max(comparison) == comparison[0] and max(comparison) == comparison[1]:
227 print(confilter)
228 return confilter
229
230
231main()
232
233# Kies zelf door te kijken in de relationele database van welke profileid je de recommendations wilt hebben.
234# Hou er wel rekening mee dat als je ervoor kiest om niet van alle profielid's en productid's recommendations
235# te maken, dan zitten er vaak een stuk minder recommendations in de collaborative en content filtering tabellen.
236
237
238print(fetchprodid('59dceccba56ac6edb4d9fd0a', fetchDBinfo('database.ini')))
239
240# (A.P.A.) Bronlijst:
241# PostgreSQL Python Tutorial With Practical Examples. (2018, 23 februari). Geraadpleegd op 4 maart 2020,
242# van https://www.postgresqltutorial.com/postgresql-python/
243# Wikipedia contributors. (2019, 18 december). Association rule learning. Geraadpleegd op 18 maart 2020,
244# van https://en.wikipedia.org/wiki/Association_rule_learning#Useful_Concepts