-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlexer.cpp
183 lines (161 loc) · 5.64 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#include <iostream>
#include <cstdlib>
#include <string>
#include <unordered_map>
#include <regex>
using namespace std;
class info{
public:
vector<int> line;
int value;
};
int main(){
regex integer("(\\+|-)?[[:digit:]]+");
regex string1("(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\")|(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\')");
regex varname("[a-zA-Z_][a-zA-Z0-9_]{0,31}");
regex key1("if|else|elif|for|while|is|not|function|return|break|continue|goto");
regex relop("<|>|==|!=|>=|<=");
regex op("\\+|\\-|\\*|\\/");
regex comm("#.*$");
unordered_map<string, int> keyMap;
// values 1-99 for keywords
keyMap["if"] = 1;
keyMap["else"] = 2;
keyMap["elif"] = 3;
keyMap["for"] = 4;
keyMap["while"] = 5;
keyMap["is"] = 6;
keyMap["not"] = 7;
keyMap["function"] = 8;
keyMap["return"] = 9;
keyMap["break"] = 10;
keyMap["continue"] = 11;
keyMap["goto"] = 12;
// values 100-199 for punctuations
keyMap[","] = 100;
keyMap[";"] = 101;
keyMap["\""] = 102;
keyMap["\'"] = 103;
keyMap["("] = 104;
keyMap[")"] = 105;
// values 200-299 for relational operators
keyMap["*"] = 200;
keyMap["/"] = 201;
keyMap["+"] = 202;
keyMap["-"] = 203;
keyMap["="] = 204;
keyMap["=="] = 205;
keyMap["!="] = 206;
keyMap[">="] = 207;
keyMap["<="] = 208;
// values 300-INFINITY for variable names
unordered_map<string, info> symtab;
//key is the variable that stores the raw input string
string key;
//Symtab index stores value in the key value pair. Set it 300 default for variables. Start at line number 1. Comment flag set to zero.
int symtabIndex = 300, line = 0;
// boolean flag to keep track whether the input string is a comment or not.
bool comment_flag = false;
//Infinite loop, keeps taking ip for now. File I/O will take care of this.
while(cin){
//Take the input
getline(cin,key);
cout<<key<<endl;
// increment the line
line++;
// make flag set to false intitally
comment_flag = false;
//TempKey is where we store the extracted substring, ie. something that matches a regex or a hashed value
string tempKey;
//strb is used so as to find out the end of a matched substring.
int strB = 0;
while(strB < key.length()){
// if the input string is a comment, break out of the loop.
if(comment_flag) break;
//i stores the starting index of the substring.
int i = strB;
for(;strB<key.length();strB++){
//Below is where we figure out the breakpoint for the substring.
if(key[strB] == ' ' || key[strB] == '\t' ||key[strB] == ',' ||key[strB] == ';' ||key[strB] == '+' ||key[strB] == '-' ||key[strB] == '*' ||key[strB] == '/' ||key[strB] == '(' ||key[strB] == ')')
break;
//Lookahead for certain operators
else if(key[strB] == '=' ||key[strB] == '>' ||key[strB] == '<' ||key[strB] == '!'){
if(key[strB+1] == '=' && (strB-i<1)) {
//Incrementing it by to 2 so as to compensate for the look ahead.
strB+=2;
}
break;
}
}
int len; // length of the substring
len = strB-i;
// If a single character then, len will be 0 since no increment is carried in that case, this following if will take of of it.
if(len == 0){
len = 1;
strB++;
}
tempKey = key.substr(i,len);
//If comment, break out. No need to fiddle around the Symbol table.
if(regex_match(tempKey,comm)) {
cout<<"comment"<<endl;
// set the flag indicating the string is a comment.
comment_flag = true;
cout << "Line number: " << line<< endl;
cout<<"----------------------------------------------------------------------"<<endl;
break;
}
if(tempKey == " " || tempKey == "\t") continue;
// TODO: Fix this. The entry will be added to symtab regardless of it's prior presence
unordered_map<string,info>::iterator it = symtab.find(tempKey);
// Not found in symtab
if(it == symtab.end()){
cout << tempKey << " not found in the Symbol table. \n";
// Find the type of the matched string.
unordered_map<string,int>::iterator keyIt = keyMap.find(tempKey);
// Add to the symbol table
if(keyIt != keyMap.end()){
info add;
add.line.push_back(line);
add.value = keyIt->second;
symtab.insert(pair<string,info>(tempKey,add));
cout << "added " << tempKey << endl;
}
// Else add it by using the def symtabIndex, because it's not a kw/punctuation/operator.
else{
info add;
add.line.push_back(line);
add.value = symtabIndex++;
symtab.insert(pair<string,info>(tempKey,add));
cout << "added " << tempKey << endl;
}
}
else{
cout << "Found the symbol " << it->first <<" with value "<< it->second.value << "\n";
it->second.line.push_back(line);
}
// Use RegEx to find the type of token.
if(regex_match(tempKey,key1)) cout<<"keyword"<<endl;
else{
if(regex_match(tempKey,varname)) cout<<"var"<<endl;
else if(regex_match(tempKey,string1)) cout<<"string"<<endl;
else if(regex_match(tempKey,integer)) cout<<"integer"<<endl;
else if(regex_match(tempKey,relop)) cout<<"Relational Operator"<<endl;
else if(regex_match(tempKey,op)) cout<<"Operator"<<endl;
else cout<<"ERROR! Plx check the syntax"<<endl;
}
cout << "Line number: " << line << endl;
cout<<"----------------------------------------------------------------------"<<endl;
}
}
cout<<"----------------SYMBOL TABLE STARTS HERE----------------"<<endl;
cout<<"Symbol\tLine\tValue"<<endl;
for(auto kv : symtab) {
cout<<kv.first<<"\t";
for (std::vector<int>::const_iterator i = kv.second.line.begin(); i != kv.second.line.end(); ++i){
std::cout << *i << ' ';
}
cout<<"\t"<<kv.second.value<<endl;
}
// Return SUCCESS. HELL YEAH!
return 0;
}