· 6 years ago · Sep 30, 2019, 08:48 PM
1/* Author: Alexander Rosenkrans 2019-09-28
2Added function mostfrequentwords which takes parameters x,y which form the range
3of words that are [x,y] most frequently used words in the text, given some word size restraint.
4The function copies the vals and keys arrays, sorts the copy of the value array using bottomup mergesort, while at the
5same time inserting keys at their correct position using an additional aux array. Code taken from the book include
6the mergesort algorithm, the frequency counter test, and the binary search symbol table API methods.
7 */
8
9
10import java.lang.*;
11import java.util.*;
12import java.io.*;
13public class Uppgift3BinarySearchRange<Key extends Comparable<Key>, Value extends Comparable<Value>> {
14 private Key[] keys;
15 private Value[] vals;
16 private int N = 0;
17
18 /* Function that is called in main to print the [x, x+y] most frequent words */
19 public void mostfrequentwords(int x, int y){
20 // Copy vals into valscopy
21 Value[] valscopy;
22 valscopy = (Value[]) new Comparable[N];
23 for(int z = 0; z < N; z++){
24 valscopy[z] = vals[z];
25 }
26
27 Key[] keyscopy;
28 keyscopy = (Key[]) new Comparable[N];
29 for(int z = 0; z < N; z++){
30 keyscopy[z] = keys[z];
31 }
32
33 sort(valscopy, keyscopy); // sorts in ASCENDING order
34
35 StdOut.println(x + " to " + y + " most frequent words");
36
37 for (int i = valscopy.length - x; i >= valscopy.length - y; i--){
38 StdOut.println("Word:" + keyscopy[i] + " Frequency:" + valscopy[i]);
39 }
40 }
41
42 public Key select(int k)
43 { return keys[k]; }
44
45 private static Comparable[] aux;
46 private static Comparable[] aux2;
47 public static void sort(Comparable[] a, Comparable[] b)
48 { // Do lg N passes of pairwise merges.
49 int L = a.length;
50 int P = b.length;
51 aux = new Comparable[L];
52 aux2 = new Comparable[P];
53 for (int sz = 1; sz < L; sz = sz+sz) // sz: subarray size
54 for (int lo = 0; lo < L-sz; lo += sz+sz) // lo: subarray index
55 merge(a, b, lo, lo+sz-1, Math.min(lo+sz+sz-1, L-1));
56 }
57 // ascending merge
58 private static boolean less(Comparable v, Comparable w){
59 return v.compareTo(w) < 0;
60 }
61 public static <Key> void merge(Comparable[] a, Comparable[] b, int lo, int mid, int hi) {
62
63 for (int u = 0; u < b.length; u++){
64 aux2[u] = b[u];
65 }
66
67 int i = lo, j = mid + 1;
68 for (int k = lo; k <= hi; k++) // Copy a[lo..hi] to aux[lo..hi].
69 aux[k] = a[k];
70
71 for (int k = lo; k <= hi; k++) { // Merge back to a[lo..hi].
72 // left half exhausted (take from the right)
73 if (i > mid){
74
75 b[k] = aux2[j];
76 a[k] = aux[j++];
77 }
78 // right half exhausted (take from the left)
79 else if (j > hi){
80
81 b[k] = aux2[i];
82 a[k] = aux[i++];
83 }
84 // current key on right less than current key on left (take from the right)
85 else if (less(aux[j], aux[i])){
86
87 b[k] = aux2[j];
88 a[k] = aux[j++];
89 // current key on right greater than or equal to current key on left (take from the left)
90 } else {
91
92 b[k] = aux2[i];
93 a[k] = aux[i++];
94 }
95 }
96 }
97
98 public Uppgift3BinarySearchRange(int capacity)
99 { // See Algorithm 1.1 for standard array-resizing code.
100 keys = (Key[]) new Comparable[capacity];
101 vals = (Value[]) new Comparable[capacity];
102 }
103
104 // If empty, return null. If found key, return value at that place
105 public Value get(Key key) {
106 if (isEmpty()) {
107 return null;
108 }
109 int i = rank(key);
110 if (i < N && keys[i].compareTo(key) == 0) {
111 return vals[i];
112 } else {
113 return null;
114 }
115 }
116
117 // Search for key. Update value if found; grow table if new.
118 public void put(Key key, Value val) {
119 int i = rank(key);
120 // if the same key is found, update value
121 if (i < N && keys[i].compareTo(key) == 0) {
122 vals[i] = val;
123 return;
124 }
125 // make room for new key,value pair to be inserted at position i
126 for (int j = N; j > i; j--) {
127 keys[j] = keys[j - 1];
128 vals[j] = vals[j - 1];
129 }
130 // insert key and value
131 keys[i] = key;
132 vals[i] = val;
133 N++;
134 }
135
136 public int rank(Key key) {
137 int lo = 0, hi = N - 1;
138 while (lo <= hi) {
139 int mid = lo + (hi - lo) / 2;
140 int cmp = key.compareTo(keys[mid]);
141 if (cmp < 0) {
142 hi = mid - 1;
143 } else if (cmp > 0) {
144 lo = mid + 1;
145 } else return mid;
146 }
147 return lo;
148 }
149
150 Iterable<Key> keys(){
151 return keys(min(), max());
152 }
153
154 public Key min()
155 { return keys[0]; }
156 public Key max()
157 { return keys[N-1]; }
158
159 public Iterable<Key> keys(Key lo, Key hi)
160 {
161 Queue<Key> q = new Queue<Key>();
162 for (int i = rank(lo); i < rank(hi); i++)
163 q.enqueue(keys[i]);
164 if (contains(hi))
165 q.enqueue(keys[rank(hi)]);
166 return q;
167 }
168 boolean contains(Key key) {
169 return get(key) != null;
170 }
171 public int size() {
172 return N;
173 }
174 boolean isEmpty() {
175 return size() == 0;
176 }
177 public static void main(String[] args) {
178 int minlen = Integer.parseInt(args[0]); // key-length cutoff (command line argument)
179 Uppgift3BinarySearchRange<String, Integer> st = new Uppgift3BinarySearchRange<String, Integer>(200000);
180 String word = "";
181 Random rnd = new Random(System.currentTimeMillis());
182 /** test begins here **/
183 long s = System.nanoTime();
184 while (!StdIn.isEmpty()) { // Build symbol table and count frequencies.
185 word = StdIn.readString(); // Read words/keys
186 if (word.length() < minlen) continue; // Break one iteration of the loop (ignore short words/keys)
187 if (!st.contains(word)) st.put(word, 1); // if first time we saw this word, set value to 1
188 else st.put(word, st.get(word) + 1); // if we've already seen this word, increment value by 1
189 }
190
191 st.mostfrequentwords(1, 4);
192 // Find a key with the highest frequency count.
193 String max = "";
194 st.put(max, 0);
195 for (String a : st.keys()) {
196 if (st.get(a) > st.get(max)) {
197 max = a;
198 }
199 }
200 StdOut.println("Most frequent word:" + max + " Frequency:" + st.get(max));
201 /** test ends here **/
202 long e = System.nanoTime() - s;
203 System.out.println("Sorting runtime in ms:");
204 double c = ((double) e) / 1000000;
205 System.out.printf("%.0f", c);
206 System.out.println();
207 }
208}