· 6 years ago · Nov 18, 2019, 03:36 AM
1from time_this import *
2import copy
3import os
4
5class Hash:
6
7 def __init__(self):
8 self.table = [[None]] * 31
9
10 def __str__(self):
11 return self.table
12
13 def next_prime(self):
14 """
15 Finds next prime number for new Hash Table capacity.
16
17 returns: new prime number which is roughly double in size
18 """
19 prime = len(self.table)
20 for p in range(prime * 2, prime * 2 + prime):
21 for i in range(2, p):
22 if p % i == 0:
23 break
24 else:
25 return p
26
27 def SortWord(self, word):
28 """
29 (Bubble Sort)
30 Turns word into a list converted via 'ord'. The worst makes mulitple passhes through a list.
31 It compares adjacent items and exchanges those that are out of order. Each pass through the list places the
32 next largest value in its proper place. In essence, each item “bubbles” up to the location where it belongs.
33 :param word:
34 :return:
35 """
36 listed = list(word)
37 alist = []
38 for i in listed:
39 alist.append(ord(i))
40 for passnum in range(len(alist) - 1, 0, -1):
41 for i in range(passnum):
42 if alist[i] > alist[i + 1]:
43 temp = alist[i]
44 alist[i] = alist[i + 1]
45 alist[i + 1] = temp
46 newword = []
47 for num in alist:
48 newword.append(chr(num))
49 return "".join(newword)
50
51 def hash_function(self,word, TableCapacity):
52 """
53 Finds a new index value for the Hash Table by converting each letter into a number via 'ord'. The algorithm
54 I chose is totaling each converted letter and multiplying by its weight depending on its index
55 (i.e. the second letter in of word bear, the letter "e", will be worth (99 * (99-96)). The usage of a
56 weight helps create a consistent way of preventing two different words from having the same value.
57 Lastly, it will mod by the length of the list (TableCapacity), it will return the remainder
58 which will be the new index for the table.
59
60 :param TableCapacity, word:
61 :return: new hash table index
62 """
63 total = 0
64 for i in list(word):
65 total = total + ord(i)*(ord(i)-96)
66 while total >= TableCapacity:
67 total = total % TableCapacity
68 return total
69
70 def LoadFactor(self):
71 """
72 Checks load factor by counting the None's and divide by the length of the current table
73 subtracted by 1. It also counts how many slots are occupied by subtracting the length with the NoneCount.
74
75 :param table:
76 :return: Load factor rounded by 4 decimals.
77 Number of occupied slots
78 """
79 NoneCount = 0
80 for i in self.table:
81 if i == [None]:
82 NoneCount += 1
83 LoadSize =(1 - NoneCount / len(self.table))
84 occupied = (len(self.table) - NoneCount)
85 return round(LoadSize, 4), occupied
86
87 def store(self, word):
88 """
89 uses hash_function to find index value and stores the data in the corresponding index value
90 in the Hash Table.
91
92 The conflict algorithm is simple, if neither of the two if statements pass, the index will be added +1 so
93 the word will traverse to the list to the right. If the index is at the end of the list, it will revert
94 back to [0] and start from the beginning until there is no more conflict.
95
96 This method will keep track of number of conflicts and unique items stored.
97
98
99 :param word:
100 :return: current Hash table, number of unique keys, and conflicts
101 """
102 IndexList = []
103 unique = 0
104 conflicts = 0
105 index = self.hash_function(self.SortWord(word),len(self.table))
106 StoreStatus = False
107 while StoreStatus is not True:
108 if self.LoadFactor()[0] > 0.7: # Checks if load factor is above 70%. If true: rehash table.
109 self.rehash()
110 else:
111
112 if self.table[index] == [None]:
113 """
114 This checks to see if a KEY exists at given index
115 If not, stores KEY and VALUE within the index.
116 """
117 self.table[index] = [self.SortWord(word)]
118 self.table[index].append(word)
119 IndexList.append(index)
120 StoreStatus = True
121 elif self.table[index][0] == self.SortWord(word) and word not in self.table[index]:
122 """
123 This checks if the sorted word already exists. Which means the KEY is already there.
124 If so, then append the original (NOT SORTED) word to be viewed as a VALUE to the KEY.
125 """
126 self.table[index].append(word)
127 StoreStatus = True
128 else:
129 if index >= (len(self.table) - 1): # When we're at the end of the list
130 index = 0
131 conflicts += 1
132 else:
133 index += 1
134 conflicts += 1
135 return self.table, unique, conflicts
136
137 def get(self,word):
138 """
139 Accepts a key and finds and returns its anagrams.
140
141 Because an index is a list that stores both keys and values, anything past the [0] index
142 are the values. Where [0] is the key. The method returns anything past [1:] to show you the values.
143
144 :param word:
145 :return: all anagrams with given key and number of values stored with the key.
146 """
147 index = self.hash_function(word, len(self.table))
148 for i in range(len(self.table)):
149 if self.table[index][0] == word:
150 values = self.table[index][1:]
151 FixValuesFormat = (', '.join(values))
152 length = len(values)
153 return FixValuesFormat, length
154 else:
155 if index == (len(self.table)-1):
156 index = 0
157 index += 1
158 values = self.table[index][1:]
159 FixValuesFormat = (', '.join(values))
160 length = len(values)
161 return FixValuesFormat, length
162
163 def rehash(self):
164 """
165 Keeps track of the contents of the hash table and their locations and is stored in a
166 temp variable. A new hash table is created with the length of the next prime number (almost doubled).
167 Using the old list, this method will know what/how to store the current items into the new hash table.
168 :return:
169 """
170 IndexList = []
171 print("Rehashing...........")
172 temp = self.table
173 tempindex = IndexList[:]
174 IndexList.clear()
175 self.table = [[None]] * self.next_prime()
176 for i in tempindex:
177 for word in temp[i][1:]:
178 self.store(word)
179 print("New and current list: ",(self.table),"\nwhich as a length of :", len(self.table),"\n\ncontinuing.... please wait")
180
181def main():
182
183 h = Hash()
184 IndexList = []
185
186 w = open("Resources\output.txt", "w")
187
188 DictionaryFileName = os.path.basename("Resources\dictionary_test.txt")
189 opendictionary = open("Resources\dictionary_test.txt", "r")
190 DictionaryList = opendictionary.read().splitlines()
191
192 unique = 0
193 conflicts = 0
194 count = 0
195 w.write("Program Start"
196 "\nHash Table details: "+str(len(h.table))+" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0]))
197 w.write("\nStart reading words from file "+ str(DictionaryFileName))
198 for i in DictionaryList:
199 count += 1
200 if h.LoadFactor()[0] > 0.7:
201 w.write("\n \t Before expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
202 storeoutput = h.store(i)
203 print(count)
204 w.write("\n \t After expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
205 else:
206 storeoutput = h.store(i)
207 conflicts += storeoutput[2]
208 unique += storeoutput[1]
209 w.write("\n\nEnd reading words from file "+DictionaryFileName+"\n"
210 "HashTable details: "+str(len(h.table)) +" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0])+
211 "\nNumber of unique keys inserted into HashTable = "+str(unique)+
212 "\nNumber of keys conflicts encounter = "+ str(conflicts))
213
214 opendictionary.close()
215 InputFileName = os.path.basename("Resources\input.txt")
216 openinput = open("Resources\input.txt", "r")
217 InputList = openinput.read().splitlines()
218 w.write("\n\nStart reading words from file "+ str(InputFileName))
219 for j in InputList:
220 print(h.get(j)[1])
221 w.write("\n\t"+(str(j))+" "+str((h.get(j)[1]))+": "+str(h.get(j)[0])+" - this anagram took "+str(time_this(h.get, j)[0])+" seconds")
222 w.write("\nEnd reading keys from file example_input.txt"
223 "\nProgram end")
224 openinput.close()
225 w.close()
226
227if __name__ == "__main__":
228 main()