词法分析C/C++的代码
直接上代码
原理很简单,就是检测C/C++的保留字,无符号常数,字母数字的组合判定为标识符
直接上Java代码:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
// 输入文件不应包含空行, 空行会导致程序结束,后面的程序无法分析
public class Analyse {
public static void main(String[] args) throws IOException {
//指定关键字
String[] KeyWords = {"int", "if", "else", "for", "char", "class", "public","double","private",
"while","do","return","break","continue","struct","float","void","static"};// C语言部分保留识别字(不全)
BufferedReader reader =
new BufferedReader(new InputStreamReader(new FileInputStream("reader.txt")));
BufferedWriter writer =
new BufferedWriter(new OutputStreamWriter(new FileOutputStream("writer.txt")));
// 文件输入输出
String string = reader.readLine();
final int length = 100;
char[] chars = new char[length];
int Index = 0, key = 0;
//集合存储数据 Index 当前字符串处理进度的下标 一个字符串分很多子串处理 eg: int sum = 0;
List result = new ArrayList();
writer.write("识别类别码提示n");
writer.write("识别保留字--1 标识符--2 常数--3 运算符--4 分隔符--5n");
writer.write("词法分析的结果如下:n");
do {
String strs = null;
//Index = 0;
char t = string.charAt(Index);
result = Check(t, chars, KeyWords, string, Index, strs);
if (result.size() == 0) {
Index++;
continue;
}
//规定List的第一个元素为Index,第二个元素为key
Index = Integer.parseInt((String) result.get(0));
key = Integer.parseInt((String) result.get(1));
String words = (String) result.get(2);
//System.out.printf("%d %dn",Index,key);
writer.write("(" + key + " , t"+ "''" + words + "''" + ")");
writer.newline();
writer.flush();
//System.out.println("(" + words + " , t"+ key + ")");
//string = reader.readLine();
//if(string.length() == 0) break;
if(Index == string.length()) { string = reader.readLine(); Index = 0; }
} while (key != 0);
writer.close();
reader.close();
}
public static List Check(char t, char[] chars, String[] KeyWords, String string, int Index, String strs) {
int keyId = -1, find = 0;
List result = new ArrayList();
//判断下一个读入的字符是否为空格,若读取到空格则跳过,提取下一个字符进行判断
while (t != ' ') {
//判断当前字符是字母或者数字和字母的组合
if ((t >= 'a' && t <= 'z') || t >= 'A' && t <= 'Z') {
while (t >= 'a' && t <= 'z' || t >= 'A' && t <= 'Z' || t >= '0' && t <= '9') {
chars[find++] = t;
strs += t + "";
t = string.charAt(++Index);//读取下一个
}
//关键字审查
strs = strs.substring(4);
for (int i = 0; i < KeyWords.length; i++) {
if (strs.equals(KeyWords[i])) {
keyId = 1; // 识别保留字
result.add(Index + "");
result.add(keyId + "");
result.add(strs);
return result;
}
}
// 标识符 识别码为2
keyId = 2;
result.add(Index + "");
result.add(keyId + "");
result.add(strs);
return result;
}
//判断当前字符是否为无符号整形常数
else if (t >= '0' && t <= '9') {
find = 0;
String tTokens = null;
// 对后面的字符进行判断是否为数字
while (t >= '0' && t <= '9') {
chars[find++] = t;
tTokens += t;
t = string.charAt(++Index);
}
// 不是数字则返回种别码,结束当前方法
keyId = 3;
tTokens = tTokens.substring(4);// 从第四位开始截取 前四位是null
result.add(Index + "");
result.add(keyId + "");
result.add(tTokens + "");
return result;
}
find = 0;
//判断当前字符是否为其他关系运算符
String token = null;
switch (t) {
// 运算符:
case '>':
chars[find++] = t;
token += t;
if (string.charAt(++Index) == '=') {
keyId = 31;
chars[find++] = t;
token += string.charAt(Index++);
} else {
keyId = 30;
}
result.add(Index + "");
result.add(keyId + "");
token = token.substring(4);
result.add(token);
return result;
case '<':
chars[find++] = t;
token += t;
if (string.charAt(++Index) == '=') { // 单独处理'<='
chars[find++] = t;
token += string.charAt(Index++);
}
keyId = 4;
result.add(Index + "");
result.add(keyId + "");
token = token.substring(4);
result.add(token);
return result;
case ':':
chars[find++] = t;
token += t;
if (string.charAt(++Index) == '=') {
chars[find++] = string.charAt(Index);
token += string.charAt(Index++);
}
keyId = 4;
result.add(Index + "");
result.add(keyId + "");
token = token.substring(4);
result.add(token);
return result;
case '*':
keyId = 4;
break;
case '/':
keyId = 4;
break;
case '+':
keyId = 4;
break;
case '-':
keyId = 4;
break;
case '=':
keyId = 4;
break;
// 分隔符:
case ';':
keyId = 5;
break;
case ',':
keyId = 5;
break;
case '(':
keyId = 5;
break;
case ')':
keyId = 5;
break;
case '{':
keyId = 5;
break;
case '}':
keyId = 5;
break;
case '[':
keyId = 5;
break;
case ']':
keyId = 5;
break;
case ''':
keyId = 5;// 单引号
break;
default:
keyId = -1;
break;
}
chars[find++] = t;
result.add(++Index + "");
result.add(keyId + "");
result.add(t + "");
return result;
}
return result;
}
}
词法分析基于Lex


