· 6 years ago · Mar 18, 2019, 05:10 PM
1#include <iostream>
2#include <vector>
3#include <string>
4#include <set>
5#include <map>
6#include <stack>
7
8using namespace std;
9
10
11char startP; // starting variable of the productions
12map<set<string>, int> refMap; // refMap, used to check if a set of kernels already exists
13map<int, map<char, int>> refShiftMap; // used to keep track of transitions out of each state.
14
15/*-----------------------------------------------------------------------------------------------------------------*/
16
17map<char, string> mapGrammar(vector <string> grammar) // returns a map from input strings. Variables = keys, Productions = values
18{
19 map<char, string> Map; // creates a temporary map to be returned
20 //startP = grammar[0][0];
21 for (int i = 0; i < grammar.size(); i++) // parse through the vector of strings
22 {
23 string tmp = ""; // creates an initial string for the production
24 for (int j = 3; j < (grammar.at(i).size()); j++) // looks at everything after "->"
25 {
26 tmp += grammar[i][j]; // adds production to string, character by character
27 }
28 //cout << "map @ " << i << ": " << grammar[i][0] << " -> " << tmp << endl;
29 Map.insert({ grammar[i][0], tmp }); // inserts the variable and production to the map
30 }
31 return Map; // returns completed map
32}
33
34map<char, set<char>> mapSet(vector <string> grammar) // returs a map from input strings. Variables = keys. Sets = productions
35{
36 map<char, set<char>> Map; // creates a temporary map to be returned
37 for (int i = 0; i < grammar.size(); i++) // parse through the vector of strings
38 {
39 Map.insert({ grammar[i][0], {} }); // creates empty sets for each variable
40 }
41 return Map; // returns completed map
42}
43
44vector<string> getSubProd(string Prod) // returns a vector of all the individual sub-productions of a variable. ( D->Aa|d|# returns 0:Aa, 1:d, 2:# )
45{
46 vector<string> temp; // create a temp vector to be returned
47 string str; // creates a tmp string that takes in each individual sub-production
48 for (int i = 0; i < Prod.size(); i++) // parse through the entire production
49 {
50 if (Prod.at(i) == '|') // if OR statement is reached
51 {
52 temp.push_back(str); // add the string before the OR statement to the vector list.
53 str = ""; // resets the temp string
54 }
55 else
56 {
57 str += Prod.at(i); // add the current character to the sub-Production string
58 }
59 }
60 if (str.size() > 0) // no OR statement found
61 {
62 temp.push_back(str); // add the string to the vector list
63 }
64 return temp; // returns the vector list of strings.
65}
66
67vector<string> antiSubProd(vector<string> items)
68{
69 map<char, string> Map;
70 for (int i = 0; i < items.size(); i++) // for each productions
71 {
72 char lhs = items[i][0];
73 string lhsP;
74 lhsP.push_back(lhs);
75 lhsP.push_back('-');
76 lhsP.push_back('>');
77 if (Map.find(lhs) == Map.end()) // if lhs doesnt exist in map yet
78 {
79 Map.insert(pair<char, string>(lhs, lhsP)); // add lhs to map
80 }
81 else
82 {
83 Map.find(lhs)->second.push_back('|'); // create OR seperator
84 }
85 for (int j = 3; j < items[i].size(); j++) // for each letter past ->
86 {
87 Map.find(lhs)->second.push_back(items[i][j]);
88 }
89 }
90 vector<string> newGrammar;
91 for (auto it = Map.begin(); it != Map.end(); it++)
92 {
93 newGrammar.push_back(it->second);
94 }
95 return newGrammar;
96}
97
98set<char> getUnion(const set<char>& a, const set<char>& b) // union two sets
99{
100 set<char> result = a;
101 result.insert(b.begin(), b.end());
102 return result;
103}
104
105void printSet(set<char> set) // print the contents of a set
106{
107 cout << "{ ";
108 for (auto it = set.begin(); it != set.end(); it++)
109 {
110 cout << *it << ", ";
111 }
112 cout << "}";
113}
114
115set<char> First(map<char, string> &Map, char S) // returns the First() set of a Variable. Takes in the map to be used as reference, and a character you want to return the First() set of.
116{
117 set<char> finalSet; // set that will be returned to the function
118 string production; // entire string value that is associated with each variable key. Will be broken down later by getSubProd()
119
120 auto it = Map.find(S); // Look for the key associated with input character
121 if (it != Map.end()) // if foudn
122 {
123 production = it->second; // assign production to the string value mapped to Variable 'S'
124 }
125 else // 'S' not found in map keys
126 {
127 finalSet.insert(S); // insert unidentified Terminal into finalSet
128 return finalSet;
129 }
130
131 vector<string> initSet;
132 initSet = getSubProd(production); // a vector of the sub-Productions of char 'S'
133
134 for (int i = 0; i < initSet.size(); i++) // check each sub-Production. ( Aa | d | # )
135 {
136 int j = 0;
137 char firstChar = initSet[i][j]; // firstChar = first character of each sub-production ( A | d | # )
138 if (Map.find(firstChar) == Map.end()) // if first character is a Terminal
139 {
140 finalSet.insert(firstChar); // add character to finalSet
141 }
142 else if (firstChar != S) // first character is a Non-Terminal and not equal to the left side of the production, i.e First(S) = First(S);
143 {
144 finalSet = getUnion(finalSet, First(Map, firstChar)); // unions finalSet with finalSet of the Non-Terminal
145 while (finalSet.find('#') != finalSet.end() && (j + 1) < initSet.at(i).size()) // while epsilon is returned from First() and it's not the last Variable in the production
146 {
147 finalSet.erase('#'); // erase epsilon
148 char nextChar = initSet[i][++j]; // look at the next character in the production
149 finalSet = getUnion(finalSet, First(Map, nextChar)); // find First() of the next character
150 }
151 }
152 }
153 return finalSet;
154}
155
156set<char> Follow(map<char, string> &Map, char S)
157{
158 set<char> finalSet; // set that will be returned to the function
159 string production;
160
161 if (S == startP) // If the character is the starting production
162 {
163 finalSet.insert('$');
164 }
165
166 auto itr = Map.begin(); // itr points to the beginning of the map
167 for (itr = Map.begin(); itr != Map.end(); itr++) // for each key in the map
168 {
169 production = itr->second; // production = production string in map
170 vector<string> initSet;
171 initSet = getSubProd(production); // create a vector of sub-Productions givin the production
172
173 for (int j = 0; j < initSet.size(); j++) // for each sub-Production in the vector
174 {
175 for (int k = 0; k < initSet.at(j).size(); k++) // for each character in the sub-Production
176 {
177 if (initSet[j][k] == S) // found Variable in sub-Production
178 {
179 int l = k + 1; // points to next character in the sub-Production
180 if (l >= initSet.at(j).size()) // if Variable is last character of sub-Production
181 {
182 if (initSet[j][k] != itr->first) // if Follow() of Variable is not equal to Follow() of variable on the left side of the production.
183 {
184 finalSet = getUnion(finalSet, Follow(Map, itr->first)); // Follow() of Variable = Follow() of Variable on left side of the production
185 }
186 }
187 else
188 {
189 char nextChar = initSet[j][l]; // next character in the sub-Production
190 finalSet = getUnion(finalSet, First(Map, nextChar)); // Follow() = First(nextChar)
191 while (finalSet.find('#') != finalSet.end() && (l + 1) < initSet.at(j).size()) // next character is a Non-Terminal that contains '#' && Not at end of sub-Production
192 {
193 finalSet.erase('#');
194 finalSet = getUnion(finalSet, First(Map, initSet[j][++l])); // Moves onto the next character in the sub-Production
195 }
196 if (finalSet.find('#') != finalSet.end()) // if the last character returned an epsilon
197 {
198 finalSet.erase('#');
199 if (initSet[j][l] != itr->first) // if Follow() of Variable is not equal to Follow() of variable on the left side of the production.
200 {
201 finalSet = getUnion(finalSet, Follow(Map, itr->first)); // Follow() of Variable = Follow() of Variable on left side of the production
202 }
203 }
204 }
205 }
206 }
207 }
208 }
209 return finalSet;
210}
211
212map<char, set<char>> getFollowMap(vector<string> input)
213{
214 vector<string> initFollow = antiSubProd(input);
215 map<char, string> gramMap = mapGrammar(initFollow);
216 map<char, set<char>> followMap;
217 followMap = mapSet(initFollow); // creates a map for the follow set
218 for (int i = 0; i < initFollow.size(); i++) // assigns Follow() to each Variable in the grammar
219 {
220 set<char> temp = Follow(gramMap, initFollow[i][0]);
221 followMap.find(initFollow[i][0])->second = temp;
222 cout << "Follow(" << initFollow[i][0] << ") = ";
223 printSet(temp);
224 cout << endl;
225 }
226 return followMap;
227}
228
229/*---------------------------------------------------------------------------------------------------------------------------------------*/
230
231void printLR(vector<vector<string>> v) // goes through the entire vector of vector of strings
232{
233 for (int i = 0; i < v.size(); i++) // for each table
234 {
235 cout << "Table: " << i << endl;
236 for (int j = 0; j < v[i].size(); j++) // for each production
237 {
238 cout << v[i][j] << endl;
239 }
240 cout << endl;
241 }
242}
243
244void printRefMap(map<set<string>, int> refMap) // prints the reference map.
245{
246 for (auto it = refMap.begin(); it != refMap.end(); it++) // for each set in the map
247 {
248 cout << "kernel: " << it->second << endl; // it->second = table the set appears in
249 for (auto setItr = it->first.begin(); setItr != it->first.end(); setItr++) // for each item in the set
250 {
251 cout << *setItr << endl;
252 }
253 cout << endl;
254 }
255}
256
257set<char> getGrammarSymbols(vector<string> grammar) // creates a set of every symbol in the grammar. Used for GOTO(I,X)
258{
259 set<char> gramSymbs;
260 for (int i = 0; i < grammar.size(); i++) // for each production
261 {
262 for (int j = 0; j < grammar[i].size(); j++) // for each char of the production
263 {
264 char currentChar = grammar[i][j];
265 if (currentChar != '-' && currentChar != '>' && gramSymbs.find(currentChar) == gramSymbs.end()) // if the symbol hasnt been seen, and it's not a "->"
266 {
267 gramSymbs.insert(currentChar);
268 }
269 }
270 }
271 return gramSymbs;
272}
273
274set<char> getVars(vector<string> grammar)
275{
276 set<char> variables;
277 for (int i = 0; i < grammar.size(); i++) // creates a set of all the variables
278 {
279 variables.insert(grammar[i][0]); // first character of every production is a variable
280 }
281 return variables;
282}
283
284set<char> getTerms(vector<string> grammar, set<char> vars)
285{
286 set<char> terms;
287 for (int i = 0; i < grammar.size(); i++) // for each production
288 {
289 for (int j = 0; j < grammar[i].size(); j++) // for each char of the production
290 {
291 char currentChar = grammar[i][j];
292 if (currentChar != '-' && currentChar != '>' &&
293 vars.find(currentChar) == vars.end()) // if the terminal hasnt been seen, and it's not a "->"
294 {
295 terms.insert(currentChar);
296 }
297 }
298 }
299 return terms;
300}
301
302vector<char> getAllSymbols(set<char> terms, set<char> vars)
303{
304 vector<char> symbols;
305 for (auto itr = terms.begin(); itr != terms.end(); itr++)
306 {
307 symbols.push_back(*itr);
308 }
309 symbols.push_back('$');
310 for (auto itr = vars.begin(); itr != vars.end(); itr++)
311 {
312 symbols.push_back(*itr);
313 }
314 return symbols;
315}
316
317vector<string> initialDot(vector<string> v) // creates a string of productions that start with "."
318{
319 vector<string> finalString;
320 string temp;
321 temp = v[0][0];
322 temp += "'->.";
323 temp += v[0][0];
324 finalString.push_back(temp); // augmented production is added to the grammar (E'->.E)
325
326 for (int i = 0; i < v.size(); i++) // adds "." to the beginning of every production
327 {
328 temp = v[i];
329 if (temp.size() == 4 && temp[3] == '#')
330 {
331 temp.pop_back();
332 }
333 temp.insert(3, ".");
334 finalString.push_back(temp);
335 }
336
337 return finalString;
338}
339
340char charAfterDot(string production) // returns the character after a '.'
341{
342 int dotLoc = production.find('.');
343 char nextChar;
344 if (dotLoc != production.size() - 1) // if the dot isnt at the end of the production
345 {
346 nextChar = production[dotLoc + 1]; // char after the dot
347 }
348 else // if dot is at the end
349 {
350 nextChar = NULL;
351 }
352 return nextChar;
353}
354
355string moveDot(string prod) // move the dot forward in the production
356{
357 int loc = prod.find(".", 3); // locate the dot
358
359 if (loc != prod.size() - 1) // dot isn't at the end of the production
360 {
361 swap(prod.at(loc), prod.at(loc + 1)); // swaps the dot with the character after it
362 }
363
364 return prod; // return a new production with the dot moved forward one spot
365}
366
367vector<string> GOTO(vector<string> itemSet, char c)
368{
369 vector<string> kernels; // vector we'll be returning
370 for (int i = 0; i < itemSet.size(); i++) // for each item in the set
371 {
372 char nextChar = charAfterDot(itemSet[i]); // get the character after the '.'
373 if (nextChar == c) // if nextChar = to character we're building a set for
374 {
375 string nextProd = moveDot(itemSet[i]); // move the dot forward in the item
376 kernels.push_back(nextProd); // add the new item to the kernel vector
377 }
378 }
379 return kernels;
380}
381
382set<string> setOf(vector<string> kernels) // creates a set given a vector<string>
383{
384 set<string> kernelSet;
385 for (int i = 0; i < kernels.size(); i++) // for each item in the vector
386 {
387 kernelSet.insert(kernels[i]);
388 }
389 return kernelSet;
390}
391
392vector<string> Closure(vector<string> dotVector, vector<string> kernels) // creats the closure of a given set of productions. Uses the origin dotVector table as reference.
393{
394 vector<string> closure; // a vector of the productions in closure
395 closure = kernels; // sets closure equal to the current set of productions
396
397 set<char> variables;
398 for (int i = 0; i < dotVector.size(); i++) // creates a set of all the variables
399 {
400 variables.insert(dotVector[i][0]); // first character of every production is a variable
401 }
402
403 set<char> seenVars; // a set of all the variables that have already been seen
404
405 for (int i = 0; i < closure.size(); i++) // for each production in the closure vector
406 {
407 int dotLoc = closure[i].find(".", 3); // find the dot in the production
408 char nextChar = closure[i][dotLoc + 1]; // character after the dot
409 if (variables.find(nextChar) != variables.end() && seenVars.find(nextChar) == seenVars.end()) // if next character is a variable and hasn't already been seen.
410 {
411 for (int j = 1; j < dotVector.size(); j++) // for each production in the dotVector. Dont check the augmented production
412 {
413 if (dotVector[j][0] == nextChar) // if production starting with nextChar is found
414 {
415 closure.push_back(dotVector[j]); // add that production to the closure vector
416 }
417 }
418 seenVars.insert(nextChar); // adds variable to the list of seen Variables (prevents infinite loops)
419 }
420 }
421 return closure;
422}
423
424void outputShifts(map<char, int> Map)
425{
426 for (auto itr = Map.begin(); itr != Map.end(); itr++)
427 {
428 cout << itr->first << " -> " << itr->second << endl;
429 }
430}
431
432void outputRefShiftMap(map<int, map<char, int>> shiftMap)
433{
434 for (auto itr = shiftMap.begin(); itr != shiftMap.end(); itr++)
435 {
436 cout << "table: " << itr->first << endl;
437 outputShifts(itr->second);
438 cout << endl;
439 }
440}
441
442vector<vector<string>> LRset(vector<string> v) // creates the LR(0) sets of a grammar
443{
444 vector<vector<string>> LR0; // The LR(0) set.
445 set<char> gramSymbs = getGrammarSymbols(v); // set of every symbol that appears in the grammar. Used for GOTO(I,X)
446 vector<string> dotProductions = initialDot(v); // Augmented grammar with '.' in front of each production
447 vector<string> set0 = { dotProductions[0] }; // Put augmented production (E'->E) in first set
448 set0 = Closure(dotProductions, set0); // Do closure of the first set
449 LR0.push_back(set0); // pushes first set onto LR(0) sets
450 refMap.insert(pair<set<string>, int>({ set0[0] }, 0));
451 map<char, int> shifts;
452
453 for (int i = 0; i < LR0.size(); i++) // for each set
454 {
455 shifts.clear();
456 for (int j = 0; j < LR0[i].size(); j++) // for each production in the set
457 {
458 for (auto itr = gramSymbs.begin(); itr != gramSymbs.end(); itr++) // for each grammar symbol
459 {
460 vector<string> kernels = GOTO(LR0[i], *itr); // create vector of Kernels from a grammar symbol
461 set<string> kernelSet = setOf(kernels); // assign kernels to set, used for checking with refMap
462 if (kernels.size() != 0 && refMap.find(kernelSet) == refMap.end()) // if kernelSet exist and isn't in refMap
463 {
464 vector<string> closure = Closure(dotProductions, kernels); // do closure of kernel vector
465 LR0.push_back(closure); // add Closure(kernel) to LR(0) sets
466 refMap.insert(pair<set<string>, int>(kernelSet, LR0.size() - 1)); // insert kernelSet in refMap for future referencing
467 }
468 if (kernels.size() != 0)
469 {
470 shifts.insert(pair<char, int>(*itr, refMap.find(kernelSet)->second));
471 }
472 }
473 }
474 refShiftMap.insert(pair<int, map<char, int>>(i, shifts));
475 }
476 cout << endl;
477 printLR(LR0);
478 cout << endl << "kernel reference Map" << endl;
479 printRefMap(refMap);
480 cout << endl << "shift reference Map" << endl;
481 outputRefShiftMap(refShiftMap);
482 return LR0;
483}
484
485void printGrammar(vector<string> grammar)
486{
487 cout << "Grammar" << endl;
488 for (int i = 0; i < grammar.size(); i++)
489 {
490 cout << grammar[i] << endl;
491 }
492 cout << endl;
493}
494
495string finalItem(set<string> kernels)
496{
497 for (auto itr = kernels.begin(); itr != kernels.end(); itr++)
498 {
499 string prod = *itr;
500 int dotLoc = prod.find(".",0);
501 if (dotLoc == prod.size() - 1)
502 {
503 return prod;
504 }
505 }
506 return "";
507}
508
509set<string> refMapFind(int i)
510{
511 for (auto itr = refMap.begin(); itr != refMap.end(); itr++)
512 {
513 if (itr->second == i)
514 {
515 return itr->first;
516 }
517 }
518}
519
520/*-------------------------------------------------------------------------------------------------------------*/
521
522int main()
523{
524 vector<string> input;
525
526 /*
527 input.push_back("E->TD");
528 input.push_back("D->+TD");
529 input.push_back("D->#");
530 input.push_back("T->FU");
531 input.push_back("U->*FU");
532 input.push_back("U->#");
533 input.push_back("F->(E)");
534 input.push_back("F->I");
535 input.push_back("I->x");
536 input.push_back("I->y");
537 input.push_back("I->z");
538 /*/
539
540 /*
541 input.push_back("S->SS");
542 input.push_back("S->00");
543 input.push_back("S->11");
544 /*/
545
546 //*
547 input.push_back("S->0S0");
548 input.push_back("S->1S1");
549 input.push_back("S->1");
550 input.push_back("S->0");
551 /*/
552
553 /*
554 input.push_back("S->A");
555 input.push_back("S->B");
556 input.push_back("S->C0A");
557 input.push_back("A->C0C");
558 input.push_back("C->0C1");
559 input.push_back("C->CC");
560 input.push_back("C->#");
561 input.push_back("B->D1B");
562 input.push_back("B->D1D");
563 input.push_back("D->0D1");
564 input.push_back("D->1D0");
565 input.push_back("D->DD");
566 input.push_back("D->#");
567 /*/
568
569 /*
570 input.push_back("E->T");
571 input.push_back("E->T+E");
572 input.push_back("T->(E)");
573 input.push_back("T->i*T");
574 input.push_back("T->i");
575 /*/
576
577 /*
578 input.push_back("S->AA");
579 input.push_back("A->aA");
580 input.push_back("A->b");
581 /*/
582
583 /*
584 input.push_back("E->E+T");
585 input.push_back("E->T");
586 input.push_back("T->T*F");
587 input.push_back("T->F");
588 input.push_back("F->(E)");
589 input.push_back("F->i");
590 /*/
591
592 /*
593 input.push_back("S->ABCD");
594 input.push_back("A->a");
595 input.push_back("A->#");
596 input.push_back("B->CD");
597 input.push_back("B->b");
598 input.push_back("C->c");
599 input.push_back("C->#");
600 input.push_back("D->Aa");
601 input.push_back("D->d");
602 input.push_back("D->#");
603 /*/
604
605 /*
606 input.push_back("S->E");
607 input.push_back("E->E+T");
608 input.push_back("E->T");
609 input.push_back("T->T*F");
610 input.push_back("T->F");
611 input.push_back("F->i");
612 input.push_back("F->(E)");
613 */
614
615 printGrammar(input);
616
617 startP = input[0][0];
618
619 set<char> variables = getVars(input);
620 set<char> terminals = getTerms(input, variables);
621 vector<char> symbols = getAllSymbols(terminals, variables);
622 map<char, set<char>> followMap = getFollowMap(input);
623 vector<vector<string>> LR = LRset(input);
624 //map<set<string>, int> refMap; // refMap, used to check if a set of kernels already exists
625 //map<int, map<char, int>> refShiftMap; // used to keep track of transitions out of each state.
626
627 const int setSize = LR.size();
628
629 stack<int> states;
630
631 vector<vector<string>> AGTable;
632 AGTable.resize(setSize);
633 cout << "ACTION & GOTO Table" << endl;
634 cout << "s: ";
635 for (int i = 0; i < symbols.size(); i++)
636 {
637 cout << symbols[i] << " ";
638 }
639 cout << endl;
640 for (int i = 0; i < LR.size(); i++)
641 {
642 AGTable[i].resize(symbols.size());
643 auto state = refShiftMap.find(i);
644 if (i < 10)
645 {
646 cout << i << ": ";
647 }
648 else
649 cout << i << ":";
650
651 set<string> kernels = refMapFind(i);
652 string reduce = finalItem(kernels);
653 for (int j = 0; j < symbols.size(); j++)
654 {
655 char currSym = symbols[j];
656 auto transTable = state->second;
657 auto transition = transTable.find(currSym);
658 if (transition != transTable.end())
659 {
660 if (j < terminals.size()+1)
661 {
662 AGTable[i][j] = "s" + to_string(transition->second);
663 }
664 else
665 {
666 AGTable[i][j] = "g" + to_string(transition->second);
667 }
668
669 }
670 else if (reduce.size() > 0)
671 {
672 char lhs = reduce[0];
673 set<char> followSet = followMap.find(lhs)->second;
674 if (followSet.find(currSym) != followSet.end())
675 {
676 if (currSym == '$' && reduce[1] == '\'')
677 {
678 AGTable[i][j] = "A!";
679 }
680 else
681 {
682 AGTable[i][j] = "r*";
683 }
684
685 }
686 else
687 {
688 AGTable[i][j] = "--";
689 }
690 }
691 else
692 {
693 AGTable[i][j] = "--";
694 }
695 cout << AGTable[i][j] << " ";
696 }
697 cout << endl;
698 }
699
700 system("pause");
701 return 0;
702}