· 4 years ago · May 11, 2021, 05:50 PM
1#include "llvm/ADT/APFloat.h"
2#include "llvm/ADT/Optional.h"
3#include "llvm/ADT/STLExtras.h"
4#include "llvm/IR/BasicBlock.h"
5#include "llvm/IR/Constants.h"
6#include "llvm/IR/DerivedTypes.h"
7#include "llvm/IR/Function.h"
8#include "llvm/IR/Instructions.h"
9#include "llvm/IR/IRBuilder.h"
10#include "llvm/IR/LLVMContext.h"
11#include "llvm/IR/LegacyPassManager.h"
12#include "llvm/IR/Module.h"
13#include "llvm/IR/Type.h"
14#include "llvm/IR/Verifier.h"
15#include "llvm/Support/FileSystem.h"
16#include "llvm/Support/Host.h"
17#include "llvm/Support/raw_ostream.h"
18#include "llvm/Support/TargetRegistry.h"
19#include "llvm/Support/TargetSelect.h"
20#include "llvm/Target/TargetMachine.h"
21#include "llvm/Target/TargetOptions.h"
22#include <algorithm>
23#include <cassert>
24#include <cctype>
25#include <cstdio>
26#include <cstdlib>
27#include <map>
28#include <memory>
29#include <string>
30#include <system_error>
31#include <utility>
32#include <vector>
33#include <iostream>
34using namespace llvm;
35using namespace llvm::sys;
36
37
38enum VariableType {
39 Double='d',
40 String='s',
41 Unknown='e'
42};
43
44static VariableType ConvertStrTypeToEnum(std::string strType) {
45 if (strType == "Double")
46 return VariableType::Double;
47 if (strType == "String")
48 return VariableType::String;
49 return VariableType::Unknown;
50}
51
52static std::map<std::string, VariableType> VarTypes;
53
54//===----------------------------------------------------------------------===//
55// Lexer
56//===----------------------------------------------------------------------===//
57
58// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
59// of these for known things.
60enum Token {
61 tok_eof = -1,
62
63 // commands
64 tok_def = -2,
65 tok_extern = -3,
66
67 // primary
68 tok_identifier = -4,
69 tok_number = -5,
70 tok_str = -6,
71
72 // control
73 tok_if = -7,
74 tok_then = -8,
75 tok_else = -9,
76 tok_for = -10,
77 tok_in = -11,
78
79 // operators
80 tok_binary = -12,
81 tok_unary = -13,
82
83 // var definition
84 tok_var = -14
85};
86
87static std::string IdentifierStr; // Filled in if tok_identifier
88static double NumVal; // Filled in if tok_number
89static std::string StringVal; // Filled in if tok_str
90
91/// gettok - Return the next token from standard input.
92static int gettok() {
93 static int LastChar = ' ';
94
95 // Skip any whitespace.
96 while (isspace(LastChar))
97 LastChar = getchar();
98
99 if(LastChar == '"'){
100 LastChar = getchar();
101 std::string value;
102
103 while(LastChar != '"'){
104 value += LastChar;
105 LastChar = getchar();
106 }
107 LastChar = getchar();
108 StringVal = value;
109 return tok_str;
110 }
111
112 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
113 IdentifierStr = LastChar;
114 while (isalnum((LastChar = getchar())))
115 IdentifierStr += LastChar;
116
117 if (IdentifierStr == "func")
118 return tok_def;
119 if (IdentifierStr == "extern")
120 return tok_extern;
121 if (IdentifierStr == "if")
122 return tok_if;
123 if (IdentifierStr == "then")
124 return tok_then;
125 if (IdentifierStr == "else")
126 return tok_else;
127 if (IdentifierStr == "for")
128 return tok_for;
129 if (IdentifierStr == "in")
130 return tok_in;
131 if (IdentifierStr == "binary")
132 return tok_binary;
133 if (IdentifierStr == "unary")
134 return tok_unary;
135 if (ConvertStrTypeToEnum(IdentifierStr) != VariableType::Unknown)
136 return tok_var;
137 return tok_identifier;
138 }
139
140 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
141 std::string NumStr;
142 do {
143 NumStr += LastChar;
144 LastChar = getchar();
145 } while (isdigit(LastChar) || LastChar == '.');
146
147 NumVal = strtod(NumStr.c_str(), nullptr);
148 return tok_number;
149 }
150
151 if (LastChar == '#') {
152 // Comment until end of line.
153 do
154 LastChar = getchar();
155 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
156
157 if (LastChar != EOF)
158 return gettok();
159 }
160
161 // Check for end of file. Don't eat the EOF.
162 if (LastChar == EOF)
163 return tok_eof;
164
165 // Otherwise, just return the character as its ascii value.
166 int ThisChar = LastChar;
167 LastChar = getchar();
168 return ThisChar;
169}
170
171//===----------------------------------------------------------------------===//
172// Abstract Syntax Tree (aka Parse Tree)
173//===----------------------------------------------------------------------===//
174
175namespace {
176
177/// ExprAST - Base class for all expression nodes.
178 class ExprAST {
179 public:
180 virtual ~ExprAST() = default;
181
182 virtual Value *codegen() = 0;
183 virtual std::string getType() {return "Unknown"; };
184 };
185
186/// NumberExprAST - Expression class for numeric literals like "1.0".
187 class NumberExprAST : public ExprAST {
188 double Val;
189
190 public:
191 NumberExprAST(double Val) : Val(Val) {}
192
193 virtual std::string getType() override { return "Double"; }
194
195 Value *codegen() override;
196 };
197 class StringExprAST : public ExprAST {
198 std::string Val;
199
200 public:
201 StringExprAST(std::string Val) : Val(Val) {}
202 virtual std::string getType() override { return "String"; }
203
204 Value *codegen() override;
205 };
206
207/// VariableExprAST - Expression class for referencing a variable, like "a".
208 class VariableExprAST : public ExprAST {
209 std::string Name;
210
211 public:
212 VariableExprAST(const std::string &Name) : Name(Name) {}
213
214 Value *codegen() override;
215 const std::string &getName() const { return Name; }
216 };
217
218/// UnaryExprAST - Expression class for a unary operator.
219 class UnaryExprAST : public ExprAST {
220 char Opcode;
221 std::unique_ptr<ExprAST> Operand;
222
223 public:
224 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
225 : Opcode(Opcode), Operand(std::move(Operand)) {}
226
227 Value *codegen() override;
228 };
229
230/// BinaryExprAST - Expression class for a binary operator.
231 class BinaryExprAST : public ExprAST {
232 char Op;
233 std::unique_ptr<ExprAST> LHS, RHS;
234
235 public:
236 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
237 std::unique_ptr<ExprAST> RHS)
238 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
239
240 Value *codegen() override;
241 };
242
243/// CallExprAST - Expression class for function calls.
244 class CallExprAST : public ExprAST {
245 std::string Callee;
246 std::vector<std::unique_ptr<ExprAST>> Args;
247
248 public:
249 CallExprAST(const std::string &Callee,
250 std::vector<std::unique_ptr<ExprAST>> Args)
251 : Callee(Callee), Args(std::move(Args)) {}
252
253 Value *codegen() override;
254 };
255
256/// IfExprAST - Expression class for if/then/else.
257 class IfExprAST : public ExprAST {
258 std::unique_ptr<ExprAST> Cond, Then, Else;
259
260 public:
261 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
262 std::unique_ptr<ExprAST> Else)
263 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
264
265 Value *codegen() override;
266 };
267
268/// ForExprAST - Expression class for for/in.
269 class ForExprAST : public ExprAST {
270 std::string VarName;
271 std::unique_ptr<ExprAST> Start, End, Step, Body;
272
273 public:
274 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
275 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
276 std::unique_ptr<ExprAST> Body)
277 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
278 Step(std::move(Step)), Body(std::move(Body)) {}
279
280 Value *codegen() override;
281 };
282
283/// VarExprAST - Expression class for var/in
284 class VarExprAST : public ExprAST {
285 std::vector<std::pair<std::string, std::pair<VariableType, std::unique_ptr<ExprAST>>>> VarNames;
286 std::unique_ptr<ExprAST> Body;
287
288 public:
289 VarExprAST(
290 std::vector<std::pair<std::string, std::pair<VariableType, std::unique_ptr<ExprAST>>>> VarNames,
291 std::unique_ptr<ExprAST> Body)
292 : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
293
294 Value *codegen() override;
295 };
296
297/// PrototypeAST - This class represents the "prototype" for a function,
298/// which captures its name, and its argument names (thus implicitly the number
299/// of arguments the function takes), as well as if it is an operator.
300 class PrototypeAST {
301 std::string Name;
302 VariableType ReturnType;
303 // type, value
304 std::vector<std::pair<VariableType, std::string>> Args;
305 bool IsOperator;
306 unsigned Precedence; // Precedence if a binary op.
307
308 public:
309 PrototypeAST(
310 const std::string &Name,
311 const VariableType returnType,
312 std::vector<std::pair<VariableType, std::string>> Args,
313 bool IsOperator = false,
314 unsigned Prec = 0)
315 : Name(Name),
316 ReturnType(returnType),
317 Args(std::move(Args)), IsOperator(IsOperator),
318 Precedence(Prec) {}
319
320 Function *codegen();
321 const std::string &getName() const { return Name; }
322
323 bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
324 bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
325
326 char getOperatorName() const {
327 assert(isUnaryOp() || isBinaryOp());
328 return Name[Name.size() - 1];
329 }
330
331 unsigned getBinaryPrecedence() const { return Precedence; }
332 };
333
334/// FunctionAST - This class represents a function definition itself.
335 class FunctionAST {
336 std::unique_ptr<PrototypeAST> Proto;
337 std::unique_ptr<ExprAST> Body;
338
339 public:
340 FunctionAST(std::unique_ptr<PrototypeAST> Proto,
341 std::unique_ptr<ExprAST> Body)
342 : Proto(std::move(Proto)), Body(std::move(Body)) {}
343
344 Function *codegen();
345 };
346
347} // end anonymous namespace
348
349//===----------------------------------------------------------------------===//
350// Parser
351//===----------------------------------------------------------------------===//
352
353/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
354/// token the parser is looking at. getNextToken reads another token from the
355/// lexer and updates CurTok with its results.
356static int CurTok;
357static int getNextToken() { return CurTok = gettok(); }
358
359/// BinopPrecedence - This holds the precedence for each binary operator that is
360/// defined.
361static std::map<char, int> BinopPrecedence;
362
363/// GetTokPrecedence - Get the precedence of the pending binary operator token.
364static int GetTokPrecedence() {
365 if (!isascii(CurTok))
366 return -1;
367
368 // Make sure it's a declared binop.
369 int TokPrec = BinopPrecedence[CurTok];
370 if (TokPrec <= 0)
371 return -1;
372 return TokPrec;
373}
374
375/// LogError* - These are little helper functions for error handling.
376std::unique_ptr<ExprAST> LogError(const char *Str) {
377 fprintf(stderr, "Error: %s\n", Str);
378 return nullptr;
379}
380
381std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
382 LogError(Str);
383 return nullptr;
384}
385
386static std::unique_ptr<ExprAST> ParseExpression();
387
388/// numberexpr ::= number
389static std::unique_ptr<ExprAST> ParseNumberExpr() {
390 auto Result = std::make_unique<NumberExprAST>(NumVal);
391 getNextToken(); // consume the number
392 return std::move(Result);
393}
394/// stringexpr ::= string
395static std::unique_ptr<ExprAST> ParseStringExpr() {
396 auto Result = std::make_unique<StringExprAST>(StringVal);
397
398 getNextToken(); // consume the number
399 return std::move(Result);
400}
401
402/// parenexpr ::= '(' expression ')'
403static std::unique_ptr<ExprAST> ParseParenExpr() {
404 getNextToken(); // eat (.
405 auto V = ParseExpression();
406 if (!V)
407 return nullptr;
408
409 if (CurTok != ')')
410 return LogError("expected ')'");
411 getNextToken(); // eat ).
412 return V;
413}
414
415/// identifierexpr
416/// ::= identifier
417/// ::= identifier '(' expression* ')'
418static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
419 std::string IdName = IdentifierStr;
420
421 getNextToken(); // eat identifier.
422
423 if (CurTok != '(') // Simple variable ref.
424 return std::make_unique<VariableExprAST>(IdName);
425
426 // Call.
427 getNextToken(); // eat (
428 std::vector<std::unique_ptr<ExprAST>> Args;
429 if (CurTok != ')') {
430 while (true) {
431 if (auto Arg = ParseExpression())
432 Args.push_back(std::move(Arg));
433 else
434 return nullptr;
435
436 if (CurTok == ')')
437 break;
438
439 if (CurTok != ',')
440 return LogError("Expected ')' or ',' in argument list");
441 getNextToken();
442 }
443 }
444
445 // Eat the ')'.
446 getNextToken();
447
448 return std::make_unique<CallExprAST>(IdName, std::move(Args));
449}
450
451/// ifexpr ::= 'if' expression 'then' expression 'else' expression
452static std::unique_ptr<ExprAST> ParseIfExpr() {
453 getNextToken(); // eat the if.
454
455 // condition.
456 auto Cond = ParseExpression();
457 if (!Cond)
458 return nullptr;
459
460 if (CurTok != tok_then)
461 return LogError("expected then");
462 getNextToken(); // eat the then
463
464 auto Then = ParseExpression();
465 if (!Then)
466 return nullptr;
467
468 if (CurTok != tok_else)
469 return LogError("expected else");
470
471 getNextToken();
472
473 auto Else = ParseExpression();
474 if (!Else)
475 return nullptr;
476
477 return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
478 std::move(Else));
479}
480
481/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
482static std::unique_ptr<ExprAST> ParseForExpr() {
483 getNextToken(); // eat the for.
484
485 if (CurTok != tok_identifier)
486 return LogError("expected identifier after for");
487
488 std::string IdName = IdentifierStr;
489 getNextToken(); // eat identifier.
490
491 if (CurTok != '=')
492 return LogError("expected '=' after for");
493 getNextToken(); // eat '='.
494
495 auto Start = ParseExpression();
496 if (!Start)
497 return nullptr;
498 if (CurTok != ',')
499 return LogError("expected ',' after for start value");
500 getNextToken();
501
502 auto End = ParseExpression();
503 if (!End)
504 return nullptr;
505
506 // The step value is optional.
507 std::unique_ptr<ExprAST> Step;
508 if (CurTok == ',') {
509 getNextToken();
510 Step = ParseExpression();
511 if (!Step)
512 return nullptr;
513 }
514
515 if (CurTok != tok_in)
516 return LogError("expected 'in' after for");
517 getNextToken(); // eat 'in'.
518
519 auto Body = ParseExpression();
520 if (!Body)
521 return nullptr;
522
523 return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
524 std::move(Step), std::move(Body));
525}
526
527/// varexpr ::= 'var' identifier ('=' expression)?
528// (',' identifier ('=' expression)?)* 'in' expression
529static std::unique_ptr<ExprAST> ParseVarExpr() {
530
531 std::vector<std::pair<std::string, std::pair<VariableType, std::unique_ptr<ExprAST>>>> VarNames;
532
533 while (true) {
534
535 VariableType varType = ConvertStrTypeToEnum(IdentifierStr);
536 if(varType == VariableType::Unknown)
537 return LogError("Unknown variable type");
538
539 getNextToken(); // eat the type.
540
541 if(CurTok != tok_identifier)
542 return LogError("Expected identifier after variable type.");
543
544 std::string Name = IdentifierStr;
545 getNextToken(); // eat identifier.
546
547 // Read the optional initializer.
548 std::unique_ptr<ExprAST> Init = nullptr;
549 if (CurTok == '=') {
550 getNextToken(); // eat the '='.
551 Init = ParseExpression();
552 if (!Init)
553 return nullptr;
554 }
555
556 VarNames.push_back(std::make_pair(Name, std::make_pair(varType, std::move(Init))));
557 VarTypes[Name] = varType;
558 // End of var list, exit loop.
559 if (CurTok != ',')
560 break;
561 getNextToken(); // eat the ','.
562
563 }
564
565 // At this point, we have to have 'in'.
566 if (CurTok != tok_in)
567 return LogError("expected 'in' keyword after 'var'");
568 getNextToken(); // eat 'in'.
569
570 auto Body = ParseExpression();
571 if (!Body)
572 return nullptr;
573
574 return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
575}
576
577/// primary
578/// ::= identifierexpr
579/// ::= numberexpr
580/// ::= parenexpr
581/// ::= ifexpr
582/// ::= forexpr
583/// ::= varexpr
584static std::unique_ptr<ExprAST> ParsePrimary() {
585 switch (CurTok) {
586 default:
587 return LogError("unknown token when expecting an expression");
588 case tok_identifier:
589 return ParseIdentifierExpr();
590 case tok_number:
591 return ParseNumberExpr();
592 case tok_str:
593 return ParseStringExpr();
594 case '(':
595 return ParseParenExpr();
596 case tok_if:
597 return ParseIfExpr();
598 case tok_for:
599 return ParseForExpr();
600 case tok_var:
601 return ParseVarExpr();
602 }
603}
604
605/// unary
606/// ::= primary
607/// ::= '!' unary
608static std::unique_ptr<ExprAST> ParseUnary() {
609 // If the current token is not an operator, it must be a primary expr.
610 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
611 return ParsePrimary();
612
613 // If this is a unary operator, read it.
614 int Opc = CurTok;
615 getNextToken();
616 if (auto Operand = ParseUnary())
617 return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
618 return nullptr;
619}
620
621/// binoprhs
622/// ::= ('+' unary)*
623static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
624 std::unique_ptr<ExprAST> LHS) {
625 // If this is a binop, find its precedence.
626 while (true) {
627 int TokPrec = GetTokPrecedence();
628
629 // If this is a binop that binds at least as tightly as the current binop,
630 // consume it, otherwise we are done.
631 if (TokPrec < ExprPrec)
632 return LHS;
633
634 // Okay, we know this is a binop.
635 int BinOp = CurTok;
636 getNextToken(); // eat binop
637
638 // Parse the unary expression after the binary operator.
639 auto RHS = ParseUnary();
640 if (!RHS)
641 return nullptr;
642
643 // If BinOp binds less tightly with RHS than the operator after RHS, let
644 // the pending operator take RHS as its LHS.
645 int NextPrec = GetTokPrecedence();
646 if (TokPrec < NextPrec) {
647 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
648 if (!RHS)
649 return nullptr;
650 }
651
652 // Merge LHS/RHS.
653 LHS =
654 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
655 }
656}
657
658/// expression
659/// ::= unary binoprhs
660///
661static std::unique_ptr<ExprAST> ParseExpression() {
662 auto LHS = ParseUnary();
663 if (!LHS)
664 return nullptr;
665
666 return ParseBinOpRHS(0, std::move(LHS));
667}
668
669/// prototype
670/// ::= id '(' id* ')'
671/// ::= binary LETTER number? (id, id)
672/// ::= unary LETTER (id)
673static std::unique_ptr<PrototypeAST> ParsePrototype() {
674 std::string FnName;
675
676 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
677 unsigned BinaryPrecedence = 30;
678
679 switch (CurTok) {
680 default:
681 return LogErrorP("Expected function name in prototype");
682 case tok_identifier:
683 FnName = IdentifierStr;
684 Kind = 0;
685 getNextToken();
686 break;
687 case tok_unary:
688 getNextToken();
689 if (!isascii(CurTok))
690 return LogErrorP("Expected unary operator");
691 FnName = "unary";
692 FnName += (char)CurTok;
693 Kind = 1;
694 getNextToken();
695 break;
696 case tok_binary:
697 getNextToken();
698 if (!isascii(CurTok))
699 return LogErrorP("Expected binary operator");
700 FnName = "binary";
701 FnName += (char)CurTok;
702 Kind = 2;
703 getNextToken();
704
705 // Read the precedence if present.
706 if (CurTok == tok_number) {
707 if (NumVal < 1 || NumVal > 100)
708 return LogErrorP("Invalid precedence: must be 1..100");
709 BinaryPrecedence = (unsigned)NumVal;
710 getNextToken();
711 }
712 break;
713 }
714
715 if (CurTok != '(')
716 return LogErrorP("Expected '(' in prototype");
717
718 std::vector<std::pair<VariableType, std::string>> ArgNames;
719 std::string dataType = "";
720 bool seenSeperator = false;
721 for (getNextToken();(CurTok == tok_identifier || CurTok == tok_var || CurTok==',');getNextToken()) {
722 if (CurTok == ',') {
723 seenSeperator = true;
724 if ((dataType.size() > 0 || ArgNames.size() == 0)) {
725 return LogErrorP("Only expected ',' after a variable name");
726 }
727 continue;
728 }
729
730 if (CurTok == tok_var) {
731 if (!seenSeperator && ArgNames.size() > 0) {
732 return LogErrorP("Seperate variable names with ','");
733 }
734 if (dataType.size() > 0) {
735 return LogErrorP("Expected variable name after a data type");
736 }
737 if (ConvertStrTypeToEnum(IdentifierStr) == VariableType::Unknown) {
738 return LogErrorP("Invalid data type");
739 }
740 dataType = IdentifierStr;
741 seenSeperator = false;
742 } else {
743 if (dataType.size() == 0) {
744 return LogErrorP("Expected a datatype before variable name");
745 }
746 ArgNames.push_back({ConvertStrTypeToEnum(dataType), IdentifierStr});
747 dataType = "";
748 }
749 }
750 if (CurTok != ')')
751 return LogErrorP("Expected ')' in prototype");
752
753 // success.
754 getNextToken(); // eat ')'.
755
756 auto functionType = ConvertStrTypeToEnum(IdentifierStr);
757 if (functionType != VariableType::Unknown) {
758 getNextToken();
759 } else {
760 // TODO: Make void in the future
761 functionType = VariableType::Unknown;
762 return LogErrorP("Unknown function return type");
763 }
764
765 // Verify right number of names for operator.
766 if (Kind && ArgNames.size() != Kind)
767 return LogErrorP("Invalid number of operands for operator");
768
769 return std::make_unique<PrototypeAST>(FnName, functionType, ArgNames, Kind != 0,
770 BinaryPrecedence);
771}
772
773/// definition ::= 'def' prototype expression
774static std::unique_ptr<FunctionAST> ParseDefinition() {
775 getNextToken(); // eat func.
776 auto Proto = ParsePrototype();
777 if (!Proto)
778 return nullptr;
779
780 if (auto E = ParseExpression())
781 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
782 return nullptr;
783}
784
785/// toplevelexpr ::= expression
786static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
787 if (auto E = ParseExpression()) {
788 // Make an anonymous proto.
789 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", VariableType::Unknown,
790 std::vector<std::pair<VariableType, std::string>>());
791 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
792 }
793 return nullptr;
794}
795
796/// external ::= 'extern' prototype
797static std::unique_ptr<PrototypeAST> ParseExtern() {
798 getNextToken(); // eat extern.
799 return ParsePrototype();
800}
801
802//===----------------------------------------------------------------------===//
803// Code Generation
804//===----------------------------------------------------------------------===//
805
806static std::unique_ptr<LLVMContext> TheContext;
807static std::unique_ptr<Module> TheModule;
808static std::unique_ptr<IRBuilder<>> Builder;
809static std::map<std::string, AllocaInst *> NamedValues;
810static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
811static ExitOnError ExitOnErr;
812
813Value *LogErrorV(const char *Str) {
814 LogError(Str);
815 return nullptr;
816}
817
818Function *getFunction(std::string Name) {
819
820 // First, see if the function has already been added to the current module.
821 if (auto *F = TheModule->getFunction(Name))
822 return F;
823
824 // If not, check whether we can codegen the declaration from some existing
825 // prototype.
826 auto FI = FunctionProtos.find(Name);
827
828 if (FI != FunctionProtos.end()) {
829 return FI->second->codegen();
830 }
831
832
833 // If no existing prototype exists, return null.
834 return nullptr;
835}
836
837static llvm::Type *GetTypeAlloc(VariableType varType) {
838 switch(varType) {
839 case VariableType::String:
840 return Type::getInt8PtrTy(*TheContext);
841 case VariableType::Double:
842 return Type::getDoubleTy(*TheContext);
843 case VariableType::Unknown:
844 default:
845 return nullptr;
846 }
847}
848/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
849/// the function. This is used for mutable variables etc.
850static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
851 StringRef VarName) {
852 IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
853 TheFunction->getEntryBlock().begin());
854 const VariableType varType = VarTypes[VarName.str()];
855 return TmpB.CreateAlloca(GetTypeAlloc(varType), nullptr, VarName);
856}
857
858Value* createString(std::string StringValue){
859 return Builder->CreateGlobalStringPtr(StringRef(StringValue));
860}
861
862Value *NumberExprAST::codegen() {
863 return ConstantFP::get(*TheContext, APFloat(Val));
864}
865
866Value *StringExprAST::codegen() {
867 return createString(this->Val);
868}
869
870Value *VariableExprAST::codegen() {
871 // Look this variable up in the function.
872 Value *V = NamedValues[Name];
873 if (!V)
874 return LogErrorV("Unknown variable name");
875
876 // Load the value.
877 return Builder->CreateLoad(V, Name.c_str());
878}
879
880Value *UnaryExprAST::codegen() {
881 Value *OperandV = Operand->codegen();
882 if (!OperandV)
883 return nullptr;
884
885 Function *F = getFunction(std::string("unary") + Opcode);
886 if (!F)
887 return LogErrorV("Unknown unary operator");
888
889 return Builder->CreateCall(F, OperandV, "unop");
890}
891
892Value *BinaryExprAST::codegen() {
893 // Special case '=' because we don't want to emit the LHS as an expression.
894 if (Op == '=') {
895 // Assignment requires the LHS to be an identifier.
896 // This assume we're building without RTTI because LLVM builds that way by
897 // default. If you build LLVM with RTTI this can be changed to a
898 // dynamic_cast for automatic error checking.
899 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
900 if (!LHSE)
901 return LogErrorV("destination of '=' must be a variable");
902 // Codegen the RHS.
903 Value *Val = RHS->codegen();
904 if (!Val)
905 return nullptr;
906
907 // Look up the name.
908 Value *Variable = NamedValues[LHSE->getName()];
909 if (!Variable)
910 return LogErrorV("Unknown variable name");
911
912 Builder->CreateStore(Val, Variable);
913 return Val;
914 }
915
916 Value *L = LHS->codegen();
917 Value *R = RHS->codegen();
918 if (!L || !R)
919 return nullptr;
920
921 switch (Op) {
922 case '+':
923 return Builder->CreateFAdd(L, R, "addtmp");
924 case '-':
925 return Builder->CreateFSub(L, R, "subtmp");
926 case '*':
927 return Builder->CreateFMul(L, R, "multmp");
928 case '<':
929 L = Builder->CreateFCmpULT(L, R, "cmptmp");
930 // Convert bool 0/1 to double 0.0 or 1.0
931 return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
932 default:
933 break;
934 }
935
936 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
937 // a call to it.
938 Function *F = getFunction(std::string("binary") + Op);
939 assert(F && "binary operator not found!");
940
941 Value *Ops[] = {L, R};
942 return Builder->CreateCall(F, Ops, "binop");
943}
944
945Value *CallExprAST::codegen() {
946
947 // Look up the name in the global module table.
948 Function *CalleeF = getFunction(Callee);
949 if (!CalleeF)
950 return LogErrorV("Unknown function referenced");
951
952 // If argument mismatch error.
953 if (CalleeF->arg_size() != Args.size())
954 return LogErrorV("Incorrect # arguments passed");
955
956 std::vector<Value *> ArgsV;
957 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
958 ArgsV.push_back(Args[i]->codegen());
959 if (!ArgsV.back())
960 return nullptr;
961 }
962
963 return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
964}
965
966Value *IfExprAST::codegen() {
967 Value *CondV = Cond->codegen();
968 if (!CondV)
969 return nullptr;
970
971 // Convert condition to a bool by comparing non-equal to 0.0.
972 CondV = Builder->CreateFCmpONE(
973 CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
974
975 Function *TheFunction = Builder->GetInsertBlock()->getParent();
976
977 // Create blocks for the then and else cases. Insert the 'then' block at the
978 // end of the function.
979 BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
980 BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
981 BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
982
983 Builder->CreateCondBr(CondV, ThenBB, ElseBB);
984
985 // Emit then value.
986 Builder->SetInsertPoint(ThenBB);
987
988 Value *ThenV = Then->codegen();
989 if (!ThenV)
990 return nullptr;
991
992 Builder->CreateBr(MergeBB);
993 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
994 ThenBB = Builder->GetInsertBlock();
995
996 // Emit else block.
997 TheFunction->getBasicBlockList().push_back(ElseBB);
998 Builder->SetInsertPoint(ElseBB);
999
1000 Value *ElseV = Else->codegen();
1001 if (!ElseV)
1002 return nullptr;
1003
1004 Builder->CreateBr(MergeBB);
1005 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
1006 ElseBB = Builder->GetInsertBlock();
1007
1008 // Emit merge block.
1009 TheFunction->getBasicBlockList().push_back(MergeBB);
1010 Builder->SetInsertPoint(MergeBB);
1011 PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
1012
1013 PN->addIncoming(ThenV, ThenBB);
1014 PN->addIncoming(ElseV, ElseBB);
1015 return PN;
1016}
1017
1018// Output for-loop as:
1019// var = alloca double
1020// ...
1021// start = startexpr
1022// store start -> var
1023// goto loop
1024// loop:
1025// ...
1026// bodyexpr
1027// ...
1028// loopend:
1029// step = stepexpr
1030// endcond = endexpr
1031//
1032// curvar = load var
1033// nextvar = curvar + step
1034// store nextvar -> var
1035// br endcond, loop, endloop
1036// outloop:
1037Value *ForExprAST::codegen() {
1038 Function *TheFunction = Builder->GetInsertBlock()->getParent();
1039
1040 // Create an alloca for the variable in the entry block.
1041 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1042
1043 // Emit the start code first, without 'variable' in scope.
1044 Value *StartVal = Start->codegen();
1045 if (!StartVal)
1046 return nullptr;
1047
1048 // Store the value into the alloca.
1049 Builder->CreateStore(StartVal, Alloca);
1050
1051 // Make the new basic block for the loop header, inserting after current
1052 // block.
1053 BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
1054
1055 // Insert an explicit fall through from the current block to the LoopBB.
1056 Builder->CreateBr(LoopBB);
1057
1058 // Start insertion in LoopBB.
1059 Builder->SetInsertPoint(LoopBB);
1060
1061 // Within the loop, the variable is defined equal to the PHI node. If it
1062 // shadows an existing variable, we have to restore it, so save it now.
1063 AllocaInst *OldVal = NamedValues[VarName];
1064 NamedValues[VarName] = Alloca;
1065
1066 // Emit the body of the loop. This, like any other expr, can change the
1067 // current BB. Note that we ignore the value computed by the body, but don't
1068 // allow an error.
1069 if (!Body->codegen())
1070 return nullptr;
1071
1072 // Emit the step value.
1073 Value *StepVal = nullptr;
1074 if (Step) {
1075 StepVal = Step->codegen();
1076 if (!StepVal)
1077 return nullptr;
1078 } else {
1079 // If not specified, use 1.0.
1080 StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
1081 }
1082
1083 // Compute the end condition.
1084 Value *EndCond = End->codegen();
1085 if (!EndCond)
1086 return nullptr;
1087
1088 // Reload, increment, and restore the alloca. This handles the case where
1089 // the body of the loop mutates the variable.
1090 Value *CurVar = Builder->CreateLoad(Type::getDoubleTy(*TheContext), Alloca,
1091 VarName.c_str());
1092 Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
1093 Builder->CreateStore(NextVar, Alloca);
1094
1095 // Convert condition to a bool by comparing non-equal to 0.0.
1096 EndCond = Builder->CreateFCmpONE(
1097 EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
1098
1099 // Create the "after loop" block and insert it.
1100 BasicBlock *AfterBB =
1101 BasicBlock::Create(*TheContext, "afterloop", TheFunction);
1102
1103 // Insert the conditional branch into the end of LoopEndBB.
1104 Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
1105
1106 // Any new code will be inserted in AfterBB.
1107 Builder->SetInsertPoint(AfterBB);
1108
1109 // Restore the unshadowed variable.
1110 if (OldVal)
1111 NamedValues[VarName] = OldVal;
1112 else
1113 NamedValues.erase(VarName);
1114
1115 // for expr always returns 0.0.
1116 return Constant::getNullValue(Type::getDoubleTy(*TheContext));
1117}
1118
1119Value *VarExprAST::codegen() {
1120 std::vector<AllocaInst *> OldBindings;
1121
1122 Function *TheFunction = Builder->GetInsertBlock()->getParent();
1123
1124 // Register all variables and emit their initializer.
1125 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
1126 const std::string &VarName = VarNames[i].first;
1127
1128
1129 ExprAST *Init = VarNames[i].second.second.get();
1130
1131 // Emit the initializer before adding the variable to scope, this prevents
1132 // the initializer from referencing the variable itself, and permits stuff
1133 // like this:
1134 // var a = 1 in
1135 // var a = a in ... # refers to outer 'a'.
1136 VariableType Type = VarNames[i].second.first;
1137
1138 Value *InitVal;
1139 if (Init) {
1140 InitVal = Init->codegen();
1141 if (!InitVal)
1142 return nullptr;
1143 } else { // If not specified, use 0.0.
1144 if(Type == VariableType::String){
1145 InitVal = createString("");
1146 }else if(Type == VariableType::Double){
1147 InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
1148 }else{
1149 return LogErrorV("Fatal crash");
1150 }
1151 }
1152
1153 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1154 Builder->CreateStore(InitVal, Alloca);
1155
1156 // Remember the old variable binding so that we can restore the binding when
1157 // we unrecurse.
1158 OldBindings.push_back(NamedValues[VarName]);
1159
1160 // Remember this binding.
1161 NamedValues[VarName] = Alloca;
1162 }
1163
1164 // Codegen the body, now that all vars are in scope.
1165 Value *BodyVal = Body->codegen();
1166 if (!BodyVal)
1167 return nullptr;
1168
1169 // Pop all our variables from scope.
1170 for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1171 NamedValues[VarNames[i].first] = OldBindings[i];
1172
1173 // Return the body computation.
1174 return BodyVal;
1175}
1176
1177
1178Function *PrototypeAST::codegen() {
1179 // Make the function type: double(double,double) etc.
1180 std::vector<Type *> types;
1181 // Type::getDoubleTy(*TheContext));
1182 for (auto &arg : Args) {
1183 types.push_back(GetTypeAlloc(arg.first));
1184 }
1185
1186 FunctionType *FT =
1187 FunctionType::get(GetTypeAlloc(ReturnType), types, false);
1188 Function *F =
1189 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1190
1191 // Set names for all arguments.
1192 unsigned Idx = 0;
1193 for (auto &Arg : F->args()) {
1194 VarTypes[Args[Idx].second] = Args[Idx].first;
1195 Arg.setName(Args[Idx++].second);
1196 }
1197 return F;
1198}
1199
1200Function *FunctionAST::codegen() {
1201 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1202 // reference to it for use below.
1203 auto &P = *Proto;
1204
1205 FunctionProtos[Proto->getName()] = std::move(Proto);
1206 Function *TheFunction = getFunction(P.getName());
1207
1208 if (!TheFunction)
1209 return nullptr;
1210
1211 // If this is an operator, install it.
1212 if (P.isBinaryOp())
1213 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1214
1215 // Create a new basic block to start insertion into.
1216 BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
1217 Builder->SetInsertPoint(BB);
1218
1219 // Record the function arguments in the NamedValues map.
1220 NamedValues.clear();
1221 for (auto &Arg : TheFunction->args()) {
1222 // Create an alloca for this variable.
1223 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1224
1225 // Store the initial value into the alloca.
1226 Builder->CreateStore(&Arg, Alloca);
1227
1228 // Add arguments to variable symbol table.
1229 NamedValues[std::string(Arg.getName())] = Alloca;
1230 }
1231
1232 if (Value *RetVal = Body->codegen()) {
1233 // Finish off the function.
1234 Builder->CreateRet(RetVal);
1235
1236 // Validate the generated code, checking for consistency.
1237 verifyFunction(*TheFunction);
1238
1239 return TheFunction;
1240 }
1241
1242 // Error reading body, remove function.
1243 TheFunction->eraseFromParent();
1244
1245 if (P.isBinaryOp())
1246 BinopPrecedence.erase(P.getOperatorName());
1247 return nullptr;
1248}
1249
1250//===----------------------------------------------------------------------===//
1251// Top-Level parsing and JIT Driver
1252//===----------------------------------------------------------------------===//
1253
1254static void InitializeModuleAndPassManager() {
1255 // Open a new module.
1256 TheContext = std::make_unique<LLVMContext>();
1257 TheModule = std::make_unique<Module>("my cool jit", *TheContext);
1258
1259 // Create a new builder for the module.
1260 Builder = std::make_unique<IRBuilder<>>(*TheContext);
1261}
1262
1263static void HandleDefinition() {
1264 if (auto FnAST = ParseDefinition()) {
1265 if (auto *FnIR = FnAST->codegen()) {
1266 fprintf(stderr, "Read function definition:");
1267 FnIR->print(errs());
1268 fprintf(stderr, "\n");
1269 }
1270 } else {
1271 // Skip token for error recovery.
1272 getNextToken();
1273 }
1274}
1275
1276static void HandleExtern() {
1277 if (auto ProtoAST = ParseExtern()) {
1278 if (auto *FnIR = ProtoAST->codegen()) {
1279 fprintf(stderr, "Read extern: ");
1280 FnIR->print(errs());
1281 fprintf(stderr, "\n");
1282 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1283 }
1284 } else {
1285 // Skip token for error recovery.
1286 getNextToken();
1287 }
1288}
1289
1290static void HandleTopLevelExpression() {
1291 // Evaluate a top-level expression into an anonymous function.
1292 if (auto FnAST = ParseTopLevelExpr()) {
1293 FnAST->codegen();
1294 } else {
1295 // Skip token for error recovery.
1296 getNextToken();
1297 }
1298}
1299
1300/// top ::= definition | external | expression | ';'
1301static void MainLoop() {
1302 while (true) {
1303 switch (CurTok) {
1304 case tok_eof:
1305 return;
1306 case ';': // ignore top-level semicolons.
1307 getNextToken();
1308 break;
1309 case tok_def:
1310 HandleDefinition();
1311 break;
1312 case tok_extern:
1313 HandleExtern();
1314 break;
1315 default:
1316 HandleTopLevelExpression();
1317 break;
1318 }
1319 }
1320}
1321
1322//===----------------------------------------------------------------------===//
1323// "Library" functions that can be "extern'd" from user code.
1324//===----------------------------------------------------------------------===//
1325
1326#ifdef _WIN32
1327#define DLLEXPORT __declspec(dllexport)
1328#else
1329#define DLLEXPORT
1330#endif
1331
1332/// putchard - putchar that takes a double and returns 0.
1333extern "C" DLLEXPORT double putchard(double X) {
1334 fputc((char)X, stderr);
1335 return 0;
1336}
1337
1338/// printd - printf that takes a double prints it as "%f\n", returning 0.
1339extern "C" DLLEXPORT double printd(double X) {
1340 fprintf(stderr, "%f\n", X);
1341 return 0;
1342}
1343
1344//===----------------------------------------------------------------------===//
1345// Main driver code.
1346//===----------------------------------------------------------------------===//
1347
1348int main() {
1349 // Install standard binary operators.
1350 // 1 is lowest precedence.
1351 BinopPrecedence['<'] = 10;
1352 BinopPrecedence['+'] = 20;
1353 BinopPrecedence['-'] = 20;
1354 BinopPrecedence['*'] = 40; // highest.
1355
1356 // Prime the first token.
1357 fprintf(stderr, "stalk> ");
1358 getNextToken();
1359
1360 InitializeModuleAndPassManager();
1361
1362 // Run the main "interpreter loop" now.
1363 MainLoop();
1364
1365 // Initialize the target registry etc.
1366 InitializeAllTargetInfos();
1367 InitializeAllTargets();
1368 InitializeAllTargetMCs();
1369 InitializeAllAsmParsers();
1370 InitializeAllAsmPrinters();
1371
1372 auto TargetTriple = sys::getDefaultTargetTriple();
1373 TheModule->setTargetTriple(TargetTriple);
1374
1375 std::string Error;
1376 auto Target = TargetRegistry::lookupTarget(TargetTriple, Error);
1377
1378 // Print an error and exit if we couldn't find the requested target.
1379 // This generally occurs if we've forgotten to initialise the
1380 // TargetRegistry or we have a bogus target triple.
1381 if (!Target) {
1382 errs() << Error;
1383 return 1;
1384 }
1385
1386 auto CPU = "generic";
1387 auto Features = "";
1388
1389 TargetOptions opt;
1390 auto RM = Optional<Reloc::Model>();
1391 auto TheTargetMachine =
1392 Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM);
1393
1394 TheModule->setDataLayout(TheTargetMachine->createDataLayout());
1395
1396 auto Filename = "output.o";
1397 std::error_code EC;
1398 raw_fd_ostream dest(Filename, EC, sys::fs::OF_None);
1399
1400 if (EC) {
1401 errs() << "Could not open file: " << EC.message();
1402 return 1;
1403 }
1404
1405 legacy::PassManager pass;
1406 auto FileType = CGFT_ObjectFile;
1407
1408 if (TheTargetMachine->addPassesToEmitFile(pass, dest, nullptr, FileType)) {
1409 errs() << "TheTargetMachine can't emit a file of this type";
1410 return 1;
1411 }
1412
1413 pass.run(*TheModule);
1414 dest.flush();
1415
1416 outs() << "Wrote " << Filename << "\n";
1417
1418 return 0;
1419}
1420