· 5 years ago · Feb 13, 2020, 11:14 AM
1/*
2 * To change this license header, choose License Headers in Project Properties.
3 * To change this template file, choose Tools | Templates
4 * and open the template in the editor.
5 */
6package Exercise5;
7
8import MyMapReduce.MyMapReduce;
9import MyMapReduce.Pair;
10import MyMapReduce.Reader;
11import MyMapReduce.Writer;
12import java.io.File;
13import java.io.FileNotFoundException;
14import java.io.IOException;
15import java.io.PrintStream;
16import java.nio.file.Path;
17import java.nio.file.Paths;
18import java.util.Arrays;
19import java.util.Comparator;
20import java.util.LinkedList;
21import java.util.List;
22import java.util.Map;
23import java.util.Scanner;
24import java.util.TreeMap;
25import java.util.concurrent.atomic.AtomicInteger;
26import java.util.stream.Stream;
27
28/**
29 *
30 * @author Giulio Purgatorio <giulio.purgatorio93 at gmail.com>
31 */
32public class InvertedIndex extends MyMapReduce<String, List<String>, String, Pair<String, Integer>, Pair<String, List<Integer>>> {
33
34 @Override
35 protected Stream<Pair<String, List<String>>> read() {
36
37 /* "The program should ask the user for the absolute path of the directory
38 where documents are stored. Only files ending in .txt should be considered." */
39 System.out.println("Write the absolute path of the directory where documents are stored");
40 Scanner s = new Scanner(System.in);
41 String input = s.nextLine();
42
43 Path p = Paths.get(input);
44
45 /* The read function must return a stream of pairs (fileName, contents),
46 where filename is the name of the text file and contents is a list of strings,
47 one for each line of the file. For the read function you can exploit the
48 enclosed class Reader.java in the way you prefer. */
49 Reader r = new Reader(p);
50
51 try {
52 return r.read();
53 } catch (IOException e) {
54 System.err.println("Error during read() method: " + e.getMessage()); return null;
55 }
56 }
57
58
59 /* Must return a stream of pairs containing, for each word (of length
60 greater than 3) in a line, the pair (w, k) where k is the number
61 of occurrences of w in that line. */
62 @Override
63 protected Stream<Pair<String, Pair<String, Integer>>> map(Stream<Pair<String, List<String>>> s) {
64
65 Map<String, Pair<String, Integer>> tree = new TreeMap<>(String::compareTo);
66
67 s.forEach(pair -> {
68 AtomicInteger a = new AtomicInteger();
69 a.lazySet(0);
70 List<String> lines = pair.getValue();
71 lines.forEach((l) -> {
72 String[] ws = l.split(" ");
73 Integer lineNum = a.addAndGet(1);
74 Arrays.stream(ws)
75 .filter(w -> w.length() > 3)
76 .map(w -> w.toLowerCase().replaceAll("[^a-z0-9]", ""))
77 .forEach(w -> tree.put(w, new Pair(pair.getKey(), lineNum)));
78 }
79 );
80 });
81
82 return tree.entrySet().stream().map(res -> new Pair(res.getKey(), res.getValue()));
83 }
84
85
86 /**
87 * The compare function should compare strings according to the standard alphanumeric ordering.
88 * @param s1 The string to compare
89 * @param s2 The string to be compared to
90 * @return an integer, representing the comparison between the two strings
91 */
92 @Override
93 protected int compare(String s1, String s2) {
94 return s1.compareTo(s2);
95 }
96
97 @Override
98 protected Stream<Pair<String, Pair<String, List<Integer>>>> reduce(Stream<Pair<String, List<Pair<String,Integer>>>> s) {
99
100 Map<String, Pair<String, List<Integer>>> tree = new TreeMap<>();
101
102 s.forEach(pair -> {
103 List<Pair<String, Integer>> values = pair.getValue();
104
105 for(Pair<String, Integer> e : values) {
106 String fileName = e.getKey();
107 List<Integer> lineNums = new LinkedList<>();
108 for(int i = 0; i < values.size(); i++) {
109 if(values.get(i).getKey().equals(fileName)) {
110 if(pair.getKey().equals("your"))
111 System.out.println("I've found one at pos " + values.get(i).getValue());
112 lineNums.add(values.get(i).getValue());
113 }
114 }
115 tree.put(pair.getKey(), new Pair(fileName, lineNums));
116 }
117 });
118
119 return tree.entrySet().stream().map(res -> new Pair(res.getKey(), res.getValue()));
120 }
121
122 @Override
123 protected void write(Stream<Pair<String, Pair<String, List<Integer>>>> s) {
124 File dst = new File("output.csv");
125
126 try {
127 PrintStream ps = new PrintStream(dst);
128 s.sorted(Comparator.comparing(Pair::getKey))
129 .forEach(p -> ps.println(p.getKey() + ", " + p.getValue().getKey() + ", " + Arrays.toString(p.getValue().getValue().toArray())));
130 ps.close();
131 } catch (FileNotFoundException e) { System.err.println("Error during the write() method: " + e.getMessage()); return; }
132 }
133}