1.首先,先下载GitHub - jiweixing/BIT-MiniCC: A C compiler framework in Java项目并在IDEA中打开,确保安装好jdk1.8。
2.请参考博客编译原理Antlr教程_寒士°、的博客-CSDN博客 安装,配置,打包好MyCGrammer.jar
3.将MyCGrammer.jar放入lib文件夹中
之后点击右上角file->project structure->Libraries
中引入MyCGrammer.jar
4. 在BIT-MiniCC-mastersrcbitminisysminicc目录下新建文件MyMiniCompiler.java
在BIT-MiniCC-mastersrcbitminisysminiccscanner目录下新建文件MyScanner.java
package bit.minisys.minicc;
import MyCGrammer.MyCGrammerLexer;
import MyCGrammer.MyCGrammerParser;
import bit.minisys.minicc.scanner.MyScanner;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
public class MyMiniCompiler {
public static void main(String[] args)throws IOException {
String inputFile = "输入文件路径";
InputStream is = System.in;
is = new FileInputStream(inputFile);
ANTLRInputStream input = new ANTLRInputStream(is);
MyCGrammerLexer lexer = new MyCGrammerLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyCGrammerParser parser = new MyCGrammerParser(tokens);
ParseTree tree = parser.compilationUnit();
String fName = inputFile.trim();
String temp[] = fName.split("\\");
String tokenFileName =temp[temp.length - 1] + ".tokens";
MyScanner myScanner = new MyScanner(tokenFileName,tokens);
}
}
package bit.minisys.minicc.scanner;
import org.antlr.v4.runtime.CommonTokenStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
public class MyScanner {
public MyScanner(String tokenFileName,CommonTokenStream tokens) throws IOException {
FileWriter fileWriter = new FileWriter(new File(tokenFileName));
for(int i=0;i
5.结果展示
int fibonacci(int num){
int res;
if(num < 1){
res = 0;
}else if(num <= 2){
res = 1;
}else{
res = fibonacci(num-1)+fibonacci(num-2);
}
return res;
}
int main(){
Mars_PrintStr("Please input a number:n");
int n = Mars_GetInt();
int res = fibonacci(n);
Mars_PrintStr("This number's fibonacci value is :n");
Mars_PrintInt(res);
return 0;
}
[@0,0:2='int',<28>,1:0]
[@1,4:12='fibonacci',<62>,1:4]
[@2,13:13='(',<8>,1:13]
[@3,14:16='int',<28>,1:14]
[@4,18:20='num',<62>,1:18]
[@5,21:21=')',<9>,1:21]
[@6,22:22='{',<5>,1:22]
[@7,25:27='int',<28>,2:1]
[@8,29:31='res',<62>,2:5]
[@9,32:32=';',<1>,2:8]
[@10,35:36='if',<7>,3:1]
[@11,37:37='(',<8>,3:3]
[@12,38:40='num',<62>,3:4]
[@13,42:42='<',<47>,3:8]
[@14,44:44='1',<63>,3:10]
[@15,45:45=')',<9>,3:11]
[@16,46:46='{',<5>,3:12]
[@17,50:52='res',<62>,4:2]
[@18,54:54='=',<70>,4:6]
[@19,56:56='0',<63>,4:8]
[@20,57:57=';',<1>,4:9]
[@21,60:60='}',<6>,5:1]
[@22,61:64='else',<10>,5:2]
[@23,66:67='if',<7>,5:7]
[@24,68:68='(',<8>,5:9]
[@25,69:71='num',<62>,5:10]
[@26,73:74='<=',<45>,5:14]
[@27,76:76='2',<63>,5:17]
[@28,77:77=')',<9>,5:18]
[@29,78:78='{',<5>,5:19]
[@30,82:84='res',<62>,6:2]
[@31,86:86='=',<70>,6:6]
[@32,88:88='1',<63>,6:8]
[@33,89:89=';',<1>,6:9]
[@34,92:92='}',<6>,7:1]
[@35,93:96='else',<10>,7:2]
[@36,97:97='{',<5>,7:6]
[@37,101:103='res',<62>,8:2]
[@38,105:105='=',<70>,8:6]
[@39,107:115='fibonacci',<62>,8:8]
[@40,116:116='(',<8>,8:17]
[@41,117:119='num',<62>,8:18]
[@42,120:120='-',<57>,8:21]
[@43,121:121='1',<63>,8:22]
[@44,122:122=')',<9>,8:23]
[@45,123:123='+',<56>,8:24]
[@46,124:132='fibonacci',<62>,8:25]
[@47,133:133='(',<8>,8:34]
[@48,134:136='num',<62>,8:35]
[@49,137:137='-',<57>,8:38]
[@50,138:138='2',<63>,8:39]
[@51,139:139=')',<9>,8:40]
[@52,140:140=';',<1>,8:41]
[@53,143:143='}',<6>,9:1]
[@54,146:151='return',<18>,10:1]
[@55,153:155='res',<62>,10:8]
[@56,156:156=';',<1>,10:11]
[@57,158:158='}',<6>,11:0]
[@58,160:162='int',<28>,12:0]
[@59,164:167='main',<62>,12:4]
[@60,168:168='(',<8>,12:8]
[@61,169:169=')',<9>,12:9]
[@62,170:170='{',<5>,12:10]
[@63,173:185='Mars_PrintStr',<62>,13:1]
[@64,186:186='(',<8>,13:14]
[@65,187:212='"Please input a number:n"',<64>,13:15]
[@66,213:213=')',<9>,13:41]
[@67,214:214=';',<1>,13:42]
[@68,217:219='int',<28>,14:1]
[@69,221:221='n',<62>,14:5]
[@70,223:223='=',<70>,14:7]
[@71,225:235='Mars_GetInt',<62>,14:9]
[@72,236:236='(',<8>,14:20]
[@73,237:237=')',<9>,14:21]
[@74,238:238=';',<1>,14:22]
[@75,241:243='int',<28>,15:1]
[@76,245:247='res',<62>,15:5]
[@77,249:249='=',<70>,15:9]
[@78,251:259='fibonacci',<62>,15:11]
[@79,260:260='(',<8>,15:20]
[@80,261:261='n',<62>,15:21]
[@81,262:262=')',<9>,15:22]
[@82,263:263=';',<1>,15:23]
[@83,266:278='Mars_PrintStr',<62>,16:1]
[@84,279:279='(',<8>,16:14]
[@85,280:317='"This number's fibonacci value is :n"',<64>,16:15]
[@86,318:318=')',<9>,16:53]
[@87,319:319=';',<1>,16:54]
[@88,322:334='Mars_PrintInt',<62>,17:1]
[@89,335:335='(',<8>,17:14]
[@90,336:338='res',<62>,17:15]
[@91,339:339=')',<9>,17:18]
[@92,340:340=';',<1>,17:19]
[@93,346:351='return',<18>,18:4]
[@94,353:353='0',<63>,18:11]
[@95,354:354=';',<1>,18:12]
[@96,356:356='}',<6>,19:0]
[@97,357:356='',<-1>,19:1]
二、C++手撸词法分析器
当时做实验的时候还没有做到后面,自己手撸了一个词法分析器,然而后面的实验根本用不上,所以不如一开始就使用antlr来做,并不推荐此方法。
词法分析器本质是一个DFA,罗列出状态,根据转换条件编写转换函数即可。
#include
#include
#include
#include
#include
using namespace std;
string keywords[34] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", "long", "register", "restrict", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while" };
string symbol[54] = { "[","]","(",")","{","}",".","->","++","--","&","*","+","-","~","!","/","%","<<",">>","<",">","<=",">=","==","!=","^","|","&&","||","?",":",";","...","=","*=","/=","%=","+=","-=","<<=",">>=","&=","^=","|=",",","#","##","<:",":>","<%","%>","%:","%:%:" };
enum DFA
{
INITIAL, //初始态
IDENTIFIER, //标识符
KEYWORD, //关键字
SYMBOL, //特殊符号
CHAR, //字符
MIDCHAR, //字符中间态
INTERGER, //整形常量
STRING, //字符串
MIDSTRING, //字符串中间态
FLOAT, //浮点型常量
};
vectorFile;
string line;
int lineIndex = 0;
int charIndex = 0;
int tokenNum = 0;
char GetNextChar()
{
char c = NULL;
while (1)
{
if (lineIndex < File.size())
{
line = File.at(lineIndex);
if (charIndex < line.length())
{
c = line.at(charIndex);
charIndex++;
break;
}
else if (charIndex == line.length())
{
c = 'n';
charIndex++;
break;
}
else
{
lineIndex++;
charIndex = 0;
}
}
else
{
break;
}
}
return c;
}
bool isKeyword(string s)
{
for (int i = 0; i < 34; i++)
{
if (keywords[i] == s)
return true;
}
return false;
}
bool isSymbol(string s)
{
for (int i = 0; i < 54; i++)
{
if (symbol[i] == s)
return true;
}
return false;
}
void PreTreatment(string str,string path)
{
char *p, *end, c; //p-动态移动的字符指针,end-指向文件末尾的字符指针,c-存储没一个p指向的字符
char *sq_start, *dq_start; //sq_start-单引号开始位置(single),dq_start-双引号开始(double)
char *lc_start, *bc_start; //lc_start-//的开始位置,bc_start-
{
c = *p; //用字符变量c存储指针指向的字符
switch (c) //根据c的值做相应处理
{
case ''':
{
if (dq_start || lc_start || bc_start) //当遇到过双引号、//或
if (sq_start == NULL)
{
sq_start = p++; //start指向单引号的开始位置,p指向下一个
}
else
{
len = (p++) - sq_start;
if (len == 2 && *(sq_start + 1) == '\')
{
continue;
}
sq_start = NULL; //否则将sq_start置位为NULL
}
break;
}
case '"':
{
if (sq_start || lc_start || bc_start) //当遇到过单引号、//或
if (dq_start == NULL)
{
dq_start = p++; //标记遇到了双引号
}
else
{
len = (p++) - dq_start;
if (len == 2 && *(dq_start + 1) == '\')
{
continue;
}
dq_start = NULL; //否则将sq_start置位为NULL
}
//dq_start = NULL; //如果双引号中不是//,标记为NULL
break;
}
case '/': //斜杠,注意这个斜杠也可以是'//',"//",//,
c = *(p + 1); //否则c取p指向字符的下一个字符
if (c == '/') //遇到了双斜杠
{
lc_start = p; //标记双斜杠的开始
p += 2; //p指向双斜杠后面的字符
}
else if (c == '*') //遇到了
else
{
p++;
}
break;
}
case '*': //星号,同斜杠,但少了如果遇到了
{
if (sq_start || dq_start || lc_start) //如果是单引号、双引号、斜杠、中间的内容包括注释符号本身。
bc_start = NULL;
break;
}
case 'n':
{
if (lc_start == NULL) //如果还没有遇到双斜杠,那么忽略
{
p++;
continue;
}
c = *(p - 1);
memset(lc_start, ' ', (c == 'r' ? ((p++) - 1) : p++) - lc_start);
lc_start = NULL;
break;
}
case '#':
{
if (sq_start || dq_start || lc_start || bc_start)
{
p++;
continue;
}
else
{
hashes_start = p;
p++;
}
break;
}
case '>':
{
if (sq_start || dq_start || lc_start || bc_start)
{
p++;
continue;
}
else if (hashes_start)
{
memset(hashes_start, ' ', p - hashes_start+1);
}
else
{
p++;
continue;
}
break;
}
default:
p++;
break;
}
if (lc_start)
{
memset(lc_start, ' ', p - lc_start);
}
}
ofstream s(path);
s << str;
}
int main()
{
string path1 = "C:\vscodeWork\test.c";//测试文件
string path2 = "C:\vscodeWork\testresult.pretreat";//处理掉注释的文件
string path3= "C:\vscodeWork\examples.tokens";//生成目标文件
ifstream fs(path1);
stringstream ss;
ss << fs.rdbuf();
fs.close();
string str = ss.str();
PreTreatment(str, path2);
ifstream fs1(path2);
while (getline(fs1, line))
{
File.push_back(line);
}
fs1.close();
DFA state = INITIAL;
string tokens = "";
string token = "";
char c = NULL;
bool SymbolFlag = false;
bool pre = false;
bool flag = true;
while (flag)
{
if (!pre)
{
c = GetNextChar();
}
pre = false;
switch (state)
{
case INITIAL:
{
token = "";
//如果读入的第一个字符为字母或是下划线
if (c == '_' || isalpha(c))
{
//其中如果是u或者l,可能为字符
if (c == 'u' || c == 'U' || c == 'l' || c == 'L')
{
state = MIDCHAR;
}
//否则则为标识符
else
{
state = IDENTIFIER;
}
token = token + c;
}
//数字
else if (isdigit(c))
{
state = INTERGER;
token = token + c;
}
//单引号转字符态
else if (c == ''')
{
state = CHAR;
token = token + c;
}
//双引号转字符串
else if (c == '"')
{
state = STRING;
token = token + c;
}
else if (c == ' '||c=='n'||c=='r'||c=='t')
{
}
//终止
else if (c == NULL)
{
flag = false;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() + 1) + ":" + to_string(charIndex) + "='" + "<'EOF'>" + "',<" + "EOF" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() + 1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
}
//其余情况为运算符
else
{
token = token + c;
SymbolFlag = isSymbol(token);
if (SymbolFlag)
{
state = SYMBOL;
}
else
{
cout << "error" << endl;
}
}
break;
}
case IDENTIFIER:
{
if (isalpha(c) || isdigit(c) || c == '_')
{
state = IDENTIFIER;
token = token + c;
}
else
{
if (isKeyword(token))
{
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length()-1) + ":" + to_string(charIndex-2) + "='" + token + "',<'" + token + "'>," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() -1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
}
else
{
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() - 1) + ":" + to_string(charIndex-2) + "='" + token + "',<" + "Identifier" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length()-1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
}
tokenNum++;
state = INITIAL;
pre = true;
}
break;
}
case MIDCHAR:
{
if (c == ''')
{
state = CHAR;
token = token + c;
}
else if (c == '"')
{
state = STRING;
token = token + c;
}
else if (c == '8')
{
state = MIDSTRING;
token = token + c;
}
else
{
state = IDENTIFIER;
token = token + c;
}
break;
}
case CHAR:
{
if (c != ''')
{
state = CHAR;
token = token + c;
}
else
{
token = token + c;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() -1) + ":" + to_string(charIndex-2) + "='" + token + "',<" + "CharacterConstant" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length()-1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
tokenNum++;
state = INITIAL;
}
break;
}
case MIDSTRING:
if (c == '"')
{
state = STRING;
token = token + c;
}
else
{
state = IDENTIFIER;
token = token + c;
}
break;
case STRING:
{
if (c != '"')
{
state = STRING;
token = token + c;
}
else
{
token = token + c;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() -1) + ":" + to_string(charIndex-2) + "='" + token + "',<" + "StringLiteral" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() - 1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
tokenNum++;
state = INITIAL;
}
break;
}
case INTERGER:
{
if (isdigit(c) || c == 'x' || c == 'X' || c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'L' || c == 'l' || c == 'U' || c == 'u')
{
state = INTERGER;
token = token + c;
}
else if (c == '.')
{
state = FLOAT;
token = token + c;
}
else
{
pre = true;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() - 1) + ":" + to_string(charIndex-2) + "='" + token + "',<" + "IntegerConstant" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() - 1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
tokenNum++;
state = INITIAL;
}
break;
}
case FLOAT:
{
if (isdigit(c) || c == 'e' || c == 'E' || c == 'f' || c == 'F' || c == 'L' || c == 'l' || c == 'p' || c == 'P' || c == '+' || c == '-' || c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'a' || c == 'b' || c == 'c' || c == 'd')
{
state = FLOAT;
token = token + c;
}
else
{
pre = true;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() - 1) + ":" + to_string(charIndex-2) + "='" + token + "',<" + "FloatingConstant" + ">," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() - 1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
tokenNum++;
state = INITIAL;
}
break;
}
case SYMBOL:
{
string temp = token + c;
if (!isSymbol(temp))
{
pre = true;
string tokenstream = "";
tokenstream = "[@" + to_string(tokenNum) + "," + to_string(charIndex - token.length() - 1) + ":" + to_string(charIndex-2) + "='" + token + "',<'" + token + "'>," + to_string(lineIndex) + ":" + to_string(charIndex - token.length() - 1) + "]n";
tokens = tokens + tokenstream;
cout << tokenstream;
tokenNum++;
state = INITIAL;
}
else
{
state = SYMBOL;
token = temp;
}
break;
}
default:
break;
}
}
ofstream s(path3);
s << tokens;
}



