· 4 years ago · May 20, 2021, 06:58 PM
1class FP_Tree:
2 def __init__(self, T, min_support=1000):
3 """
4 Constructor for FP_Tree. Should correctly build an FP-Tree with header table.
5 Hint: I strongly advise you to implement the missing sections of the Node class before this one
6
7 Inputs:
8 T: A list of lists, each inner list will contiain integer-item-ids.
9 Example: T = [[1, 2, 5], [2, 3, 4], [1, 6]]
10 min_support: The total number of occurences needed to keep the itemset.
11 """
12 self.min_support = min_support
13 self.header_table = {}
14 self.root = Node(header_table = self.header_table)
15
16 ### YOUR CODE HERE
17 for transaction in T:
18 ### I will be putting the items to the tree lexicographically, not by frequency
19 lex_sorted = sorted(transaction)
20 first_node = transaction[0]
21 if(first_node in self.root.children):
22 self.root.children[first_node].add_path(transaction)
23 else:
24 self.root.children[first_node] = Node(header_table=self.header_table, value=first_node, parent=self.root, path=transaction)
25# print("header table at the end of iteration in root:", self.header_table.keys())
26 ### YOUR CODE HERE
27
28
29 def print_tree(self,node=None, recursion_depth=0):
30# if (recursion_depth==0):
31# print(f"header table: {self.header_table}")
32 if node==None:
33 node = self.root
34 print("Root has the following children")
35 else:
36 if(len(node.children)!=0):
37 print("|->"*recursion_depth,f"[{node.value}] has the following children: ")
38
39 if(len(node.children)!=0):
40 print(" "*recursion_depth,end="[")
41 for value,child in (node.children.items()):
42 print(f"{value}:{child.count}",end=", ")
43 print("]\n")
44 for value,child in (node.children.items()):
45 self.print_tree(node=child,recursion_depth = recursion_depth+1)
46
47
48 ### Common functions for FP-tree and Conditional FP-tree
49 ### You do not need to modify the rest of this class
50 def generate_pattern(self, keys, support):
51 return tuple(keys + self.get_suffix()), support
52
53 def get_suffix(self):
54 return []
55
56 # This is the main function for generating frequent itemsets. You do not need to modify this,
57 # but I recommend reading and trying to understand it.
58 def mine_frequent_itemsets(self, res=None):
59# print("header_table",self.header_table.keys())
60 if res is None: res = []
61
62 if self.root.is_single_path():
63 keys = list(self.header_table.keys())
64 key_idx = {k:i for i, k in enumerate(keys)}
65 counts = [self.header_table[k].count for k in keys]
66
67 for key_pair in itertools.chain(*[itertools.combinations(keys, k) for k in range(1, len(keys)+1)]):
68 support = min([counts[key_idx[k]] for k in key_pair])
69 if support >= self.min_support:
70 res.append(self.generate_pattern(list(key_pair), support))
71
72 else: # Not single path
73 for key, node in self.header_table.items():
74 support = node.support()
75
76 if support >= self.min_support:
77 res.append( self.generate_pattern([key], support) )
78
79 basis = []
80 while node is not None:
81 curr_node = node
82 node = node.nodelink
83
84 if curr_node.parent is None: continue
85
86 path = curr_node.path(limit=curr_node.count)[:-1]
87 if len(path) == 0: continue
88
89 basis.append( path )
90
91 if len(basis) == 0: continue
92
93 conditional_tree = Conditional_FP_Tree(self.min_support, [key] + self.get_suffix(), basis)
94 if conditional_tree.root is None: continue
95
96 conditional_tree.mine_frequent_itemsets(res=res)
97 return res
98
99
100# You don't need to modify anything in this class
101class Conditional_FP_Tree(FP_Tree):
102 def __init__(self, min_support, suffix, basis):
103 self.min_support = min_support
104 self.suffix = suffix
105 self.header_table = {} # This will hold all unique items
106
107 self.root = Node(header_table=self.header_table)
108
109 self.build_tree(basis)
110 # self.root = prune(self.root, min_support)
111 if self.root is None: print("WARNING: root is empty after pruning")
112
113 def build_tree(self, basis):
114 for b in basis:
115 count = b[0][1]
116 path = list(map(lambda x: x[0], b))
117 for i in range(count):
118 self.root.add_path(path)
119
120 def get_suffix(self):
121 return self.suffix
122
123class Node:
124 def __init__(self, header_table, value=None, parent=None, path=None):
125 """
126 Constructor for Node class, which is used for the FP-Tree.
127 Inputs:
128 header_table: Dict. Should be same dict for all nodes in the tree
129 value: Integer id of the item the node represents
130 parent: Parent Node. None if root node
131 path: List of node values for a path that should start in this node.
132 """
133
134 self.children = {}
135 self.header_table = header_table
136 self.nodelink = None
137 self.value = None
138 self.parent = None
139 self.count = 0
140
141 if value is not None: # Only root node should have None as value
142 self.value = value
143 self.parent = parent
144
145 # YOUR CODE HERE {value:Object<Node>}
146 # TODO: Make sure header table links and node links work as intended
147 if (value in self.header_table):
148 last_added = self.header_table[value]
149 while (last_added.nodelink!=None):
150 last_added = last_added.nodelink
151 last_added.nodelink = self
152 else:
153 self.header_table[value] = self
154
155 # YOUR CODE HERE
156
157 if path is not None:
158 self.add_path(path)
159
160
161 def add_path(self, path):
162 """
163 Function for adding a path to tree.
164 Should follow an existing path and increment count while such a path exists.
165 If no path exists (or only partial path exists), this function should create or complete such a path
166 Hint: Recursion might be helpful.
167 Inputs:
168 path: A list node values.
169 Example: path = [1, 2, 5]
170 """
171
172 ### YOUR CODE
173
174 if (len(path)==0):
175 return
176
177 ###Increment count of the current node
178 if (path[0] == self.value):
179 self.count+=1
180
181 ### Remove current node and finish if path is 0
182 path.pop(0)
183 if (len(path)==0):
184# print("header table at the end of adding path in Node",self.header_table.keys())
185 return
186
187 next_node = path[0]
188
189 ### Continue construction with the remaining path
190 if (next_node in self.children):
191 self.children[next_node].add_path(path=path)
192
193 ### If current node doesn't yet have a child that occurs in the path
194 if (next_node not in self.children):
195 self.children[next_node] = Node(header_table=self.header_table, value=next_node, parent=self, path=path)
196
197
198 ### YOUR CODE
199
200
201 # Functions for frequent items-sets and rule mining below. You do not need to modify these
202 def is_single_path(self):
203 if len(self.children) == 0: return True
204 elif len(self.children) > 1: return False
205 else: # len == 1
206 key = next((k for k in self.children.keys()))
207 return self.children[key].is_single_path()
208
209 def support(self, verbose=False):
210 if verbose: print("Counting support, this value is ", self.value, " with count ", self.count, " and parent ", self.parent.value)
211
212 if self.nodelink is not None: return self.count + self.nodelink.support(verbose)
213 else: return self.count
214
215 def path(self, limit=-1):
216 if self.value is None:
217 return []
218 else:
219 count = self.count if limit == -1 else min(self.count, limit)
220 return self.parent.path(limit=limit) + [(self.value, count)]
221
222 def print(self, indent="", spacing="----|-"):
223 print(indent + str(self.value) + ":" + str(self.count))
224 for v in self.children.values():
225 v.print(indent=indent + spacing)
226