《自制编程语言:第一章和第二章》
1使用yacc和lex实现简单的计算器这两个工具的作用就是将我们输入的一个字符串,如1+1,解析后形成.c代码,然后我们用GCC来编译c代码生成exe文件,点击exe,输入1+1,回车执行看到效果。
win无法直接使用上述两个命令,所以安装各自的替代品bison和flex,都在unxutils工具包中。
安装unxutils,配置环境变量,可以使用bison和flex命令
https://blog.csdn.net/bedusing/article/details/5409495#comments
安装QTcreator,将minGW配置到环境变量,可以使用GCC命令
代码如下:
mycalc.y
%{
#include
#include
#define YYDEBUG 1
%}
%union {
int int_value;
double double_value;
}
%token DOUBLE_LITERAL
%token ADD SUB MUL DIV CR
%type expression term primary_expression
%%
line_list
: line
| line_list line
;
line
: expression CR
{
printf(">>%lfn", $1);
}
expression
: term
| expression ADD term
{
$$ = $1 + $3;
}
| expression SUB term
{
$$ = $1 - $3;
}
;
term
: primary_expression
| term MUL primary_expression
{
$$ = $1 * $3;
}
| term DIV primary_expression
{
$$ = $1 / $3;
}
;
primary_expression
: DOUBLE_LITERAL
;
%%
int
yyerror(char const *str)
{
extern char *yytext;
fprintf(stderr, "parser error near %sn", yytext);
return 0;
}
int main(void)
{
extern int yyparse(void);
extern FILE *yyin;
yyin = stdin;
if (yyparse()) {
fprintf(stderr, "Error ! Error ! Error !n");
exit(1);
}
}
mycalc.l
%{
#include
#include "y.tab.h"
int
yywrap(void)
{
return 1;
}
%}
%%
"+" return ADD;
"-" return SUB;
"*" return MUL;
"/" return DIV;
"n" return CR;
([1-9][0-9]*)|0|([0-9]+.[0-9]*) {
double temp;
sscanf(yytext, "%lf", &temp);
yylval.double_value = temp;
return DOUBLE_LITERAL;
}
[ t] ;
. {
fprintf(stderr, "lexical error.n");
exit(1);
}
%%
bat脚本文件,点击执行。也可以一行一行输入查看效果
-- 生成y.tab.c和y.tab.h bison --yacc -dv mycalc.y -- 引入y.tab.h头文件,生成lex.yy.c flex mycalc.l -- 将y.tab.c lex.yy.c生成mycalc.exe gcc -o mycalc y.tab.c lex.yy.c
执行mycalc.exe测试
2不借助工具编写计算器自制词法分析器
运行机制:每传入一个字符串,就会调用吃一次get_token(),并返回分割好的记号。
token.h
#ifndef TOKEN_H_INCLUDED
#define TOKEN_H_INCLUDED
typedef enum {
BAD_TOKEN,
NUMBER_TOKEN,
ADD_OPERATOR_TOKEN,
SUB_OPERATOR_TOKEN,
MUL_OPERATOR_TOKEN,
DIV_OPERATOR_TOKEN,
LEFT_PAREN_TOKEN,
RIGHT_PAREN_TOKEN,
END_OF_LINE_TOKEN
} TokenKind;
#define MAX_TOKEN_SIZE (100)
//!!!!!
typedef struct {
TokenKind kind;
double value;
char str[MAX_TOKEN_SIZE];
} Token;
void set_line(char *line);
void get_token(Token *token);
#endif
lexicalanalyzer.c
#include#include #include #include "token.h" static char *st_line; static int st_line_pos; typedef enum { INITIAL_STATUS, IN_INT_PART_STATUS, DOT_STATUS, IN_FRAC_PART_STATUS } LexerStatus; void get_token(Token *token) { int out_pos = 0; LexerStatus status = INITIAL_STATUS; char current_char; token->kind = BAD_TOKEN; while (st_line[st_line_pos] != ' ') { current_char = st_line[st_line_pos]; if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS) && !isdigit(current_char) && current_char != '.') { token->kind = NUMBER_TOKEN; sscanf(token->str, "%lf", &token->value); return; } if (isspace(current_char)) { if (current_char == 'n') { token->kind = END_OF_LINE_TOKEN; return; } st_line_pos++; continue; } if (out_pos >= MAX_TOKEN_SIZE-1) { fprintf(stderr, "token too long.n"); exit(1); } token->str[out_pos] = st_line[st_line_pos]; st_line_pos++; out_pos++; token->str[out_pos] = ' '; if (current_char == '+') { token->kind = ADD_OPERATOR_TOKEN; return; } else if (current_char == '-') { token->kind = SUB_OPERATOR_TOKEN; return; } else if (current_char == '*') { token->kind = MUL_OPERATOR_TOKEN; return; } else if (current_char == '/') { token->kind = DIV_OPERATOR_TOKEN; return; } else if (current_char == '(') { token->kind = LEFT_PAREN_TOKEN; return; } else if (current_char == ')') { token->kind = RIGHT_PAREN_TOKEN; return; } else if (isdigit(current_char)) { if (status == INITIAL_STATUS) { status = IN_INT_PART_STATUS; } else if (status == DOT_STATUS) { status = IN_FRAC_PART_STATUS; } } else if (current_char == '.') { if (status == IN_INT_PART_STATUS) { status = DOT_STATUS; } else { fprintf(stderr, "syntax error.n"); exit(1); } } } } void set_line(char *line) { st_line = line; st_line_pos = 0; } #if 0 void parse_line(void) { Token token; st_line_pos = 0; for (;;) { get_token(&token); if (token.kind == END_OF_LINE_TOKEN) { break; } else { printf("kind..%d, str..%sn", token.kind, token.str); } } } int main(int argc, char **argv) { //循环读取输入的字符串并分割 while (fgets(st_line, LINE_BUF_SIZE, stdin) != NULL) { parse_line(); } return 0; } #endif
自制语法分析器
词法分析器将输入的字符串分割并保存在内存中了,现在要判断分割出来的树是个怎样的执行顺序。
采用递归下降分析的方法。
parser.c
#include#include #include "token.h" #define LINE_BUF_SIZE (1024) static Token st_look_ahead_token; static int st_look_ahead_token_exists; static void my_get_token(Token *token) { if (st_look_ahead_token_exists) { *token = st_look_ahead_token; st_look_ahead_token_exists = 0; } else { get_token(token); } } static void unget_token(Token *token) { st_look_ahead_token = *token; st_look_ahead_token_exists = 1; } double parse_expression(void); static double parse_primary_expression() { Token token; double value = 0.0; int minus_flag = 0; my_get_token(&token); if (token.kind == SUB_OPERATOR_TOKEN) { minus_flag = 1; } else { unget_token(&token); } my_get_token(&token); if (token.kind == NUMBER_TOKEN) { value = token.value; } else if (token.kind == LEFT_PAREN_TOKEN) { value = parse_expression(); my_get_token(&token); if (token.kind != RIGHT_PAREN_TOKEN) { fprintf(stderr, "missing ')' error.n"); exit(1); } } else { unget_token(&token); } if (minus_flag) { value = -value; } return value; } static double parse_term() { double v1; double v2; Token token; v1 = parse_primary_expression(); for (;;) { my_get_token(&token); if (token.kind != MUL_OPERATOR_TOKEN && token.kind != DIV_OPERATOR_TOKEN) { unget_token(&token); break; } v2 = parse_primary_expression(); if (token.kind == MUL_OPERATOR_TOKEN) { v1 *= v2; } else if (token.kind == DIV_OPERATOR_TOKEN) { v1 /= v2; } } return v1; } double parse_expression() { double v1; double v2; Token token; v1 = parse_term(); for (;;) { my_get_token(&token); if (token.kind != ADD_OPERATOR_TOKEN && token.kind != SUB_OPERATOR_TOKEN) { unget_token(&token); break; } v2 = parse_term(); if (token.kind == ADD_OPERATOR_TOKEN) { v1 += v2; } else if (token.kind == SUB_OPERATOR_TOKEN) { v1 -= v2; } else { unget_token(&token); } } return v1; } double parse_line(void) { double value; st_look_ahead_token_exists = 0; value = parse_expression(); return value; } int main(int argc, char **argv) { char line[LINE_BUF_SIZE]; double value; while (fgets(line, LINE_BUF_SIZE, stdin) != NULL) { set_line(line); value = parse_line(); printf(">>%fn", value); } return 0; }
编译生成exe
gcc -o mycalc -Wall -Wswitch-enum -ansi parser.c lexicalanalyzer.c
运行mycalc .exe
visual studio实现



