9xtZGFh0

· 6 years ago · Nov 18, 2019, 03:36 AM
1from time_this import *
2import copy
3import os
4
5class Hash:
6
7    def __init__(self):
8        self.table = [[None]] * 31
9
10    def __str__(self):
11        return self.table
12
13    def next_prime(self):
14        """
15        Finds next prime number for new Hash Table capacity.
16
17        returns: new prime number which is roughly double in size
18        """
19        prime = len(self.table)
20        for p in range(prime * 2, prime * 2 + prime):
21            for i in range(2, p):
22                if p % i == 0:
23                    break
24            else:
25                return p
26
27    def SortWord(self, word):
28        """
29        (Bubble Sort)
30        Turns word into a list converted via 'ord'.  The worst makes mulitple passhes through a list.
31        It compares adjacent items and exchanges those that are out of order. Each pass through the list places the
32        next largest value in its proper place. In essence, each item “bubbles” up to the location where it belongs.
33        :param word:
34        :return:
35        """
36        listed = list(word)
37        alist = []
38        for i in listed:
39            alist.append(ord(i))
40        for passnum in range(len(alist) - 1, 0, -1):
41            for i in range(passnum):
42                if alist[i] > alist[i + 1]:
43                    temp = alist[i]
44                    alist[i] = alist[i + 1]
45                    alist[i + 1] = temp
46        newword = []
47        for num in alist:
48            newword.append(chr(num))
49        return "".join(newword)
50
51    def hash_function(self,word, TableCapacity):
52        """
53        Finds a new index value for the Hash Table by converting each letter into a number via 'ord'.  The algorithm
54        I chose is totaling each converted letter and multiplying by its weight depending on its index
55        (i.e. the second letter in of word bear, the letter "e", will be worth (99 * (99-96)).  The usage of a
56        weight helps create a consistent way of preventing two different words from having the same value.
57        Lastly, it will mod by the length of the list (TableCapacity), it will return the remainder
58        which will be the new index for the table.
59
60        :param TableCapacity, word:
61        :return: new hash table index
62        """
63        total = 0
64        for i in list(word):
65            total = total + ord(i)*(ord(i)-96)
66        while total >= TableCapacity:
67            total = total % TableCapacity
68        return total
69
70    def LoadFactor(self):
71        """
72        Checks load factor by counting the None's and divide by the length of the current table
73        subtracted by 1. It also counts how many slots are occupied by subtracting the length with the NoneCount.
74
75        :param table:
76        :return: Load factor rounded by 4 decimals.
77                 Number of occupied slots
78        """
79        NoneCount = 0
80        for i in self.table:
81            if i == [None]:
82                NoneCount += 1
83            LoadSize =(1 - NoneCount / len(self.table))
84            occupied = (len(self.table) - NoneCount)
85        return round(LoadSize, 4), occupied
86
87    def store(self, word):
88        """
89        uses hash_function to find index value and stores the data in the corresponding index value
90        in the Hash Table.
91
92        The conflict algorithm is simple, if neither of the two if statements pass, the index will be added +1 so
93        the word will traverse to the list to the right.  If the index is at the end of the list, it will revert
94        back to [0] and start from the beginning until there is no more conflict.
95
96        This method will keep track of number of conflicts and unique items stored.
97
98
99        :param word:
100        :return: current Hash table, number of unique keys, and conflicts
101        """
102        IndexList = []
103        unique = 0
104        conflicts = 0
105        index = self.hash_function(self.SortWord(word),len(self.table))
106        StoreStatus = False
107        while StoreStatus is not True:
108            if self.LoadFactor()[0] > 0.7:         # Checks if load factor is above 70%. If true: rehash table.
109                self.rehash()
110            else:
111
112                if self.table[index] == [None]:
113                    """
114                    This checks to see if a KEY exists at given index
115                    If not, stores KEY and VALUE within the index.
116                    """
117                    self.table[index] = [self.SortWord(word)]
118                    self.table[index].append(word)
119                    IndexList.append(index)
120                    StoreStatus = True
121                elif self.table[index][0] == self.SortWord(word) and word not in self.table[index]:
122                    """
123                    This checks if the sorted word already exists.  Which means the KEY is already there.
124                    If so, then append the original (NOT SORTED) word to be viewed as a VALUE to the KEY.
125                    """
126                    self.table[index].append(word)
127                    StoreStatus = True
128                else:
129                    if index >= (len(self.table) - 1):      # When we're at the end of the list
130                        index = 0
131                        conflicts += 1
132                    else:
133                        index += 1
134                        conflicts += 1
135        return self.table, unique, conflicts
136
137    def get(self,word):
138        """
139        Accepts a key and finds and returns its anagrams.
140
141        Because an index is a list that stores both keys and values, anything past the [0] index
142        are the values.  Where [0] is the key.  The method returns anything past [1:] to show you the values.
143
144        :param word:
145        :return: all anagrams with given key and number of values stored with the key.
146        """
147        index = self.hash_function(word, len(self.table))
148        for i in range(len(self.table)):
149            if self.table[index][0] == word:
150                values = self.table[index][1:]
151                FixValuesFormat = (', '.join(values))
152                length = len(values)
153                return FixValuesFormat, length
154            else:
155                if index == (len(self.table)-1):
156                    index = 0
157                index += 1
158        values = self.table[index][1:]
159        FixValuesFormat = (', '.join(values))
160        length = len(values)
161        return FixValuesFormat, length
162
163    def rehash(self):
164        """
165        Keeps track of the contents of the hash table and their locations and is stored in a
166        temp variable.  A new hash table is created with the length of the next prime number (almost doubled).
167        Using the old list, this method will know what/how to store the current items into the new hash table.
168        :return:
169        """
170        IndexList = []
171        print("Rehashing...........")
172        temp = self.table
173        tempindex = IndexList[:]
174        IndexList.clear()
175        self.table = [[None]] * self.next_prime()
176        for i in tempindex:
177            for word in temp[i][1:]:
178                self.store(word)
179        print("New and current list: ",(self.table),"\nwhich as a length of :", len(self.table),"\n\ncontinuing.... please wait")
180
181def main():
182
183    h = Hash()
184    IndexList = []
185
186    w = open("Resources\output.txt", "w")
187
188    DictionaryFileName = os.path.basename("Resources\dictionary_test.txt")
189    opendictionary = open("Resources\dictionary_test.txt", "r")
190    DictionaryList = opendictionary.read().splitlines()
191
192    unique = 0
193    conflicts = 0
194    count = 0
195    w.write("Program Start"
196            "\nHash Table details: "+str(len(h.table))+" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0]))
197    w.write("\nStart reading words from file "+ str(DictionaryFileName))
198    for i in DictionaryList:
199        count += 1
200        if h.LoadFactor()[0] > 0.7:
201            w.write("\n \t Before expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
202            storeoutput = h.store(i)
203            print(count)
204            w.write("\n \t After expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
205        else:
206            storeoutput = h.store(i)
207        conflicts += storeoutput[2]
208        unique += storeoutput[1]
209    w.write("\n\nEnd reading words from file "+DictionaryFileName+"\n"
210            "HashTable details: "+str(len(h.table)) +" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0])+
211            "\nNumber of unique keys inserted into HashTable = "+str(unique)+
212            "\nNumber of keys conflicts encounter = "+ str(conflicts))
213
214    opendictionary.close()
215    InputFileName = os.path.basename("Resources\input.txt")
216    openinput = open("Resources\input.txt", "r")
217    InputList = openinput.read().splitlines()
218    w.write("\n\nStart reading words from file "+ str(InputFileName))
219    for j in InputList:
220        print(h.get(j)[1])
221        w.write("\n\t"+(str(j))+" "+str((h.get(j)[1]))+": "+str(h.get(j)[0])+" - this anagram took "+str(time_this(h.get, j)[0])+" seconds")
222    w.write("\nEnd reading keys from file example_input.txt"
223             "\nProgram end")
224    openinput.close()
225    w.close()
226
227if __name__ == "__main__":
228    main()