· 5 years ago · May 02, 2020, 03:20 PM
1#include <ctype.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include <list>
7#include <mutex>
8#include <string>
9#include <unordered_map>
10#include <unordered_set>
11#include <vector>
12
13#include <condition_variable>
14#include <functional>
15#include <future>
16#include <optional>
17#include <thread>
18
19struct notification_queue {
20 /**
21 * Work queue for a single thread
22 * to implement task stealing
23 */
24private:
25 std::list<std::function<void()>> work;
26 std::mutex mtx;
27 std::condition_variable has_work;
28 std::atomic<bool> done;
29
30public:
31 void push(std::function<void()> task) {
32 std::unique_lock<std::mutex> lck(mtx);
33 work.push_back(task);
34 has_work.notify_one();
35 }
36
37 std::optional<std::function<void()>> pop() {
38
39 // hold until we get a job in or are told to finish
40 std::unique_lock<std::mutex> lck(mtx);
41 has_work.wait(lck, [=] { return work.size() || done; });
42
43 if (!work.size())
44 return {};
45
46 auto ret = work.front();
47 work.pop_front();
48 return ret;
49 }
50 std::optional<std::function<void()>> try_pop() {
51 std::unique_lock<std::mutex> lck(mtx, std::try_to_lock);
52 if (!lck || !work.size())
53 return {};
54
55 auto ret = work.front();
56 work.pop_front();
57 return ret;
58 }
59
60 void set_done(bool arg) {
61 done = arg;
62 has_work.notify_all();
63 }
64};
65
66/**
67 * Thread pool implementation that assumes that its
68 * operations run on threadsafe data structures,
69 */
70struct workers {
71private:
72 std::vector<std::thread> threads;
73 std::vector<notification_queue> work_queues;
74 std::atomic<int> _counter;
75 int thread_count;
76
77public:
78 workers(int thread_count) {
79 this->thread_count = thread_count;
80 this->_counter = 0;
81 work_queues = std::vector<notification_queue>(thread_count);
82 }
83
84 void add_task(std::function<void()> task) {
85 auto copy = _counter++;
86 work_queues.at(copy % thread_count).push(task);
87 }
88
89 void do_work() {
90
91 for (int i = 0; i < thread_count; i++) {
92
93 threads.push_back(std::thread([=] {
94 while (true) {
95 std::optional<std::function<void()>> task;
96 for (int j = 0; j < thread_count; j++) {
97 if (j == i)
98 continue;
99
100 task = work_queues.at(j).try_pop();
101 // if we manage to steal work leave and execute the function
102 if (task.has_value()) {
103 break;
104 }
105 }
106
107 // check if we managed to steal a function
108 if (task.has_value()) {
109 task.value()();
110 continue;
111 }
112
113 // we are getting work from our own queue
114 // hold whilst we need to get the task
115 task = work_queues.at(i).pop();
116 if (!task.has_value()) {
117 break;
118 }
119 task.value()();
120 }
121 }));
122 }
123 }
124
125 /**
126 * Tells the threads to begin finishing
127 * the work that they have to do
128 */
129 void setFinish() {
130
131 for (int i = 0; i < thread_count; i++) {
132 work_queues.at(i).set_done(true);
133 threads.at(i).join();
134 }
135 }
136};
137
138struct ts_unordered_map {
139
140private:
141 std::unordered_map<std::string, std::list<std::string>> _results;
142 std::mutex mtx;
143
144public:
145 // thread safe wrapper to determine whether a given key is in the map
146 bool contains(std::string arg) {
147 std::unique_lock<std::mutex> lck(mtx);
148 return _results.find(arg) != _results.end();
149 }
150
151 // threadsafe wrapper for adding new kv pairs to the map
152 bool insert(const std::string &arg, std::list<std::string> values) {
153 std::unique_lock<std::mutex> lck(mtx);
154 _results.insert({arg, values});
155 return true;
156 }
157
158 // thread safe wrapper for adding new values to a bucket
159 void append(std::string target, std::string value) {
160
161 std::unique_lock<std::mutex> lck(mtx);
162 auto bucket = _results.find(target);
163
164 // check if the bucket exists, if note exit
165 if (bucket == _results.end())
166 return;
167
168 (bucket->second).push_back(value);
169 }
170
171 // return a pointer to a given bucket, not threadsafe but used in a single threaded
172 //context
173 std::list<std::string> *leak(std::string arg) { return &_results[arg]; }
174};
175
176std::vector<std::string> dirs;
177// work queue functionality is now in workers class
178ts_unordered_map theTable;
179
180void arg_binder(std::string filename, workers *threads);
181
182std::string dirName(const char *c_str) {
183 std::string s = c_str; // s takes ownership of the string content by
184 // allocating memory for it
185 if (s.back() != '/') {
186 s += '/';
187 }
188 return s;
189}
190
191std::pair<std::string, std::string> parseFile(const char *c_file) {
192 std::string file = c_file;
193 std::string::size_type pos = file.rfind('.');
194 if (pos == std::string::npos) {
195 return {file, ""};
196 } else {
197 return {file.substr(0, pos), file.substr(pos + 1)};
198 }
199}
200
201// open file using the directory search path constructed in main()
202static FILE *openFile(const char *file) {
203 FILE *fd;
204 for (unsigned int i = 0; i < dirs.size(); i++) {
205 std::string path = dirs[i] + file;
206 fd = fopen(path.c_str(), "r");
207 if (fd != NULL)
208 return fd; // return the first file that successfully opens
209 }
210 return NULL;
211}
212
213/**
214 * Generates specific tasks for a thread pool
215 * given a filename, adds all of its dependencies as tasks
216 */
217void generate_tasks(std::string filename, workers *threads) {
218
219 char buf[4096], name[4096];
220 // 1. open the file
221 FILE *fd = openFile(filename.c_str());
222 if (fd == NULL) {
223 fprintf(stderr, "Error opening %s\n", filename.c_str());
224 exit(-1);
225 }
226 while (fgets(buf, sizeof(buf), fd) != NULL) {
227 char *p = buf;
228 // 2a. skip leading whitespace
229 while (isspace((int)*p)) {
230 p++;
231 }
232 // 2b. if match #include
233 if (strncmp(p, "#include", 8) != 0) {
234 continue;
235 }
236 p += 8; // point to first character past #include
237 // 2bi. skip leading whitespace
238 while (isspace((int)*p)) {
239 p++;
240 }
241 if (*p != '"') {
242 continue;
243 }
244 // 2bii. next character is a "
245 p++; // skip "
246 // 2bii. collect remaining characters of file name
247 char *q = name;
248 while (*p != '\0') {
249 if (*p == '"') {
250 break;
251 }
252 *q++ = *p++;
253 }
254 *q = '\0';
255 // 2bii. append file name to dependency list
256 theTable.append(filename, name);
257 // 2bii. if file name not already in table ...
258 if (theTable.contains(name)) {
259 continue;
260 }
261 // ... insert mapping from file name to empty list in table ...
262 theTable.insert(name, {});
263 // add a new task to the work queue for the threads
264 arg_binder(name, threads);
265 }
266 // 3. close file
267 fclose(fd);
268}
269
270/**
271 * Binds arguements to a function
272 * so threads simply execute functions
273 * without passing args
274 * mutually recurses with generate tasks
275 */
276void arg_binder(std::string filename, workers *threads) {
277
278 // add a single unit of work to the work queue
279 threads->add_task([=]() { generate_tasks(filename, threads); });
280}
281
282// iteratively print dependencies
283static void printDependencies(std::unordered_set<std::string> *printed,
284 std::list<std::string> *toProcess, FILE *fd) {
285 if (!printed || !toProcess || !fd)
286 return;
287
288 // 1. while there is still a file in the toProcess list
289 while (toProcess->size() > 0) {
290 // 2. fetch next file to process
291 std::string name = toProcess->front();
292 toProcess->pop_front();
293 // 3. lookup file in the table, yielding list of dependencies
294 std::list<std::string> *ll = theTable.leak(name);
295 // 4. iterate over dependencies
296 for (auto iter = ll->begin(); iter != ll->end(); iter++) {
297 // 4a. if filename is already in the printed table, continue
298 if (printed->find(*iter) != printed->end()) {
299 continue;
300 }
301 // 4b. print filename
302 fprintf(fd, " %s", iter->c_str());
303 // 4c. insert into printed
304 printed->insert(*iter);
305 // 4d. append to toProcess
306 toProcess->push_back(*iter);
307 }
308 }
309}
310
311int main(int argc, char *argv[]) {
312 // 1. look up CPATH in environment
313
314 char *cpath = getenv("CPATH");
315 char *crawl_str = getenv("CRAWLER_THREADS");
316 int thread_count;
317 if (!crawl_str)
318 thread_count = 2;
319 else
320 thread_count = strtol(crawl_str, NULL, 10);
321 if (thread_count <= 0) {
322 fprintf(stderr,
323 "ERR: %d is an invalid number of threads check CRAWLER_THREADS\n",
324 thread_count);
325 return -1;
326 }
327
328 // determine the number of -Idir arguments
329 int i;
330 for (i = 1; i < argc; i++) {
331 if (strncmp(argv[i], "-I", 2) != 0)
332 break;
333 }
334 int start = i;
335
336 // 2. start assembling dirs vector
337 dirs.push_back(dirName("./")); // always search current directory first
338 for (i = 1; i < start; i++) {
339 dirs.push_back(dirName(argv[i] + 2 /* skip -I */));
340 }
341 if (cpath != NULL) {
342 std::string str(cpath);
343 std::string::size_type last = 0;
344 std::string::size_type next = 0;
345 while ((next = str.find(":", last)) != std::string::npos) {
346 dirs.push_back(str.substr(last, next - last));
347 last = next + 1;
348 }
349 dirs.push_back(str.substr(last));
350 }
351 // 2. finished assembling dirs vector
352
353 workers threads(thread_count);
354 threads.do_work();
355
356 // 3. for each file argument ...
357 for (i = start; i < argc; i++) {
358 std::pair<std::string, std::string> pair = parseFile(argv[i]);
359 if (pair.second != "c" && pair.second != "y" && pair.second != "l") {
360 fprintf(stderr, "Illegal extension: %s - must be .c, .y or .l\n",
361 pair.second.c_str());
362 return -1;
363 }
364 std::string obj = pair.first + ".o";
365 // 3a. insert mapping from file.o to file.ext
366 theTable.insert(obj, {argv[i]});
367 // 3b. insert mapping from file.ext to empty list
368 theTable.insert(argv[i], {});
369 // 3c. add file to the work queue
370 arg_binder(argv[i], &threads);
371 }
372
373 threads.setFinish();
374
375 // 5. for each file argument
376 for (i = start; i < argc; i++) {
377 // 5a. create hash table in which to track file names already printed
378 std::unordered_set<std::string> printed;
379 // 5b. create list to track dependencies yet to print
380 std::list<std::string> toProcess;
381
382 std::pair<std::string, std::string> pair = parseFile(argv[i]);
383
384 std::string obj = pair.first + ".o";
385 // 5c. print "foo.o:" ...
386 printf("%s:", obj.c_str());
387 // 5c. ... insert "foo.o" into hash table and append to list
388 printed.insert(obj);
389 toProcess.push_back(obj);
390 // 5d. invoke
391 printDependencies(&printed, &toProcess, stdout);
392
393 printf("\n");
394 }
395
396 return 0;
397}