编译原理课程设计报告C-语言词法与语法分析器的实现.doc
编译原理课程设计报告课题名称: 编译原理课程设计 C-语言词法与语法分析器的实现 提交文档学生姓名: 提交文档学生学号: 同组 成 员 名 单: 指导 教 师 姓 名: 指导教师评阅成绩: 指导教师评阅意见: . . 提交报告时间: 年 月 日C-词法与语法分析器的实现1.课程设计目标(1)题目实用性C-语言拥有一个完整语言的基本属性,通过编写C-语言的词法分析和语法分析,对于理解编译原理的相关理论和知识有很大的作用。通过编写C-语言词法和语法分析程序,能够对编译原理的相关知识:正则表达式、有限自动机、语法分析等有一个比较清晰的了解和掌握。(2)C-语言的词法说明 语言的关键字:else if int return void while 所有的关键字都是保留字,并且必须是小写。专用符号:+ - * / < <= > >= = != = ; , ( ) /* */其他标记是ID和NUM,通过下列正则表达式定义:ID = letter letter*NUM = digit digit*letter = a|.|z|A|.|Zdigit = 0|.|9 注:ID表示标识符,NUM表示数字,letter表示一个字母,digit表示一个数字。 小写和大写字母是有区别的。 空格由空白、换行符和制表符组成。空格通常被忽略。 注释用通常的c语言符号/ * . . . * /围起来。注释可以放在任何空白出现的位置(即注释不能放在标记内)上,且可以超过一行。注释不能嵌套。(3)程序设计目标能够对一个程序正确的进行词法及语法分析。2.分析与设计(1)设计思想a. 词法分析词法分析的实现主要利用有穷自动机理论。有穷自动机可用作描述在输入串中识别模式的过程,因此也能用作构造扫描程序。通过有穷自动机理论能够容易的设计出词法分析器。b. 语法分析语法分析采用递归下降分析。递归下降法是语法分析中最易懂的一种方法。它的主要原理是,对每个非终结符按其产生式结构构造相应语法分析子程序,其中终结符产生匹配命令,而非终结符则产生过程调用命令。因为文法递归相应子程序也递归,所以称这种方法为递归子程序下降法或递归下降法。其中子程序的结构与产生式结构几乎是一致的。(2)程序流程图程序主流程图: 词法分析: 语法分析: 读取程序读取程序进行递归下降分析匹配或建立树对输入的字符进行匹配判断对应输出各行代码的词法分析结果输出程序对应的语法树词法分析子流程图:Start否是Num是否为dight是否为num否否是ID是否为alpha是否为id否是是否为=是否为>,<.,!,=单符号否否是双符号是否为+,-,*等专用符号是是专用符号否是否是是否为/是否为*是否为*是否为/否否是错误结果over语法分析子流程图:3.程序代码实现整个词法以及语法的程序设计在一个工程里面,一共包含了8个文件,分别为main.cpp、parse.cpp、scan.cpp、util.cpp、scan.h、util.h、 globals.h、parse.h,其中scan.cpp和scan.h为词法分析程序。以下仅列出各个文件中的核心代码:Main.cpp#include "globals.h"#define NO_PARSE FALSE#include "util.h"#if NO_PARSE#include "scan.h"#else#include "parse.h"#endifint lineno=0;FILE * source;FILE * listing;FILE * code;int EchoSource = TRUE;int TraceScan=TRUE;int TraceParse=TRUE;int Error = FALSE;int main(int argc,char * argv) TreeNode * syntaxTree;char pgm120;scanf("%s",pgm);source=fopen(pgm,"r");if(source=NULL)fprintf(stderr,"File %s not foundn",pgm);exit(1);listing = stdout;fprintf(listing,"nC COMPILATION: %sn",pgm);#if NO_PARSEwhile(getToken()!=ENDFILE);#elsesyntaxTree = parse();if(TraceParse)fprintf(listing,"nSyntaxtree:n");printTree(syntaxTree);#endiffclose(source);return 0;Parse.cpp#include "globals.h"#include "util.h"#include "scan.h"#include "parse.h"static TokenType token; /* holds current token */* function prototypes for recursive calls */static TreeNode * declaration_list(void);static TreeNode * declaration(void);static TreeNode * params(void);static TreeNode * param_list(void);static TreeNode * param(void);static TreeNode * compound_stmt(void);static TreeNode * local_declarations(void);static TreeNode * statement_list(void);static TreeNode * statement(void);static TreeNode * expression_stmt(void);static TreeNode * if_stmt(void);static TreeNode * while_stmt(void);static TreeNode * return_stmt(void);static TreeNode * expression(void);static TreeNode * var(void);static TreeNode * simple_exp(void);static TreeNode * additive_expression(void);static TreeNode * term(void);static TreeNode * factor(void);static TreeNode * args(void);static TreeNode * arg_list(void);static void syntaxError(char * message) fprintf(listing,"n>>> ");fprintf(listing,"Syntax error at line %d: %s",lineno,message);Error = TRUE;/*判断读取的字符*/static void match(TokenType expected)if(token=expected)token=getToken( );elsesyntaxError("unexpected token -> ");printToken(token,tokenString);fprintf(listing," ");/*进行语法分析,构建语法树*/TreeNode * declaration_list(void)TreeNode * t= declaration();TreeNode * p= t;while (token=INT) | (token=VOID) ) TreeNode *q = declaration();if (q!=NULL) if (t=NULL) t = p = q;else /* now p cannot be NULL either */p->sibling = q;p = q;return t;TreeNode * declaration(void) TreeNode * t = NULL;switch (token)case VOID : case INT : t = newStmtNode(DecK);if(token = INT)t->type =Integer;elset->type = Void;match(token);switch (token)case ID:t->attr.name = copyString(tokenString);t->kind.stmt = VarDK;match(ID);switch (token)case LZKH:t->kind.stmt = VarDK;t->type = IntArray;match(LZKH);match(NUM);match(RZKH);match(SEMI);break;case LPAREN:t->kind.stmt = FunDK;match(LPAREN);t->child0 = params();match(RPAREN);t->child1 = compound_stmt();break;default: match(SEMI);break;break;default:syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;break;default : syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break; /* end case */return t;TreeNode * params(void)TreeNode * t = NULL;if(token = VOID)match(token);t = newStmtNode(ParamList); t->child0 = newStmtNode(ParamK); t->child0->type = Void;else if(token = RPAREN)t=NULL;elset = param_list();return t;TreeNode * param_list(void)TreeNode * t = newStmtNode(ParamList);int i = 1;t->child0 = param();while(token != RPAREN) match(DOT);t->childi = param();i+;return t;TreeNode * param(void)TreeNode * t = NULL;match(INT);t= newStmtNode(ParamK);t->type=Integer;t->attr.name=copyString(tokenString);match(ID);if(token = LZKH)t->type=IntArray;match(LZKH);match(RZKH);return t;TreeNode * compound_stmt(void)TreeNode * t = newStmtNode(ComK);match(LDKH);t->child0 = local_declarations();t->child1 = statement_list();match(RDKH);return t;TreeNode * local_declarations(void)TreeNode * t = newStmtNode(LocalDecK);int i=0;while(token = INT | token = VOID)t->childi = declaration();i+;return t;TreeNode * statement_list(void)TreeNode * t = newStmtNode(StmtList);int i=0;while(token != RDKH)t->childi =statement();i+;return t;TreeNode * statement(void)TreeNode * t ;switch (token) case IF : t = if_stmt(); break;case WHILE : t = while_stmt(); break;case ID :case SEMI:t = expression_stmt(); break;case RETURN : t = return_stmt(); break;case LDKH : t=compound_stmt();break;default : syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break; /* end case */return t;TreeNode * expression_stmt(void) TreeNode * t = newStmtNode(ExpstmtK);if(token = SEMI)match(SEMI);elset = expression();match(SEMI);return t;TreeNode * if_stmt(void)TreeNode * t = newStmtNode(IfK);if(t!=NULL)match(IF);match(LPAREN);t->child0 = expression();match(RPAREN);t->child1 = statement();if (token=ELSE)match(ELSE); if (t!=NULL) t->child2 = newStmtNode(ElseK); t->child2->child0 = statement();return t;TreeNode * while_stmt(void) TreeNode * t = newStmtNode(WhileK);match(WHILE);match(LPAREN);if (t!=NULL) t->child0 = expression();match(RPAREN);if (t!=NULL) t->child1 = statement();return t;TreeNode * return_stmt(void)TreeNode * t = newStmtNode(RetK);if(token = RETURN)match(RETURN);if(token = SEMI)match(SEMI);elset->child0 = expression();match(SEMI);return t;TreeNode * expression(void)TreeNode * t = simple_exp();return t;TreeNode* var(void)TreeNode* t = newExpNode(IdK);if (t!=NULL) && (token=ID)t->attr.name = copyString(tokenString);match(ID);if(token = LZKH)match(token);t->type = ArrayUnit;t->child0 = expression();match(RZKH);return t;TreeNode * simple_exp(void)TreeNode * t = additive_expression();if(t!=NULL)if (token = LT | token = LE| token = MT | token = ME|token =EQ|token =NEQ)TreeNode * p = newExpNode(OpK);if(p!=NULL)p->attr.op = token;p->child0 = t;match(token);p->child1 = additive_expression();t=p;return t;TreeNode* additive_expression(void)TreeNode * t = term();while(token = PLUS | token = MINUS)TreeNode * p = newExpNode(OpK);p->attr.op = token;p->child0 = t;match(token);p->child1 = term();t = p;return t;TreeNode * term(void) TreeNode * t = factor();while (token=TIMES)|(token=OVER) TreeNode * p = newExpNode(OpK);if (p!=NULL) p->child0 = t;p->attr.op = token;match(token);p->child1 = factor();t = p;return t;TreeNode * factor(void) TreeNode * t = NULL;switch (token) case NUM :t = newExpNode(ConstK);if (t!=NULL) && (token=NUM)t->attr.val = atoi(tokenString);match(NUM);break;case ID :t = var();if (token = ASSIGN)TreeNode* p = newStmtNode(AssignK);p->attr.name = t->attr.name;match(token);p->child0 = expression();t = p;if (token = LPAREN )TreeNode * p = newStmtNode(CallK);p->attr.name = t->attr.name;t=p;match(token);p->child0 = args();match(RPAREN);break;case LPAREN :match(LPAREN);t = expression();match(RPAREN);break;default:syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;return t;TreeNode * args(void)TreeNode * t = newStmtNode(ArgList);if(token != RPAREN)t->child0 = arg_list();return t;elsereturn NULL;TreeNode * arg_list(void)TreeNode * t = newStmtNode(ArgK);int i = 1;if(token != RPAREN)t->child0 = expression();while(token!=RPAREN)match(DOT);t->childi = expression();i+;return t;TreeNode * parse(void) TreeNode * t;token = getToken();t =declaration_list();if (token!=ENDFILE)syntaxError("Code ends before filen");return t;scan.cpp#include "globals.h"#include "util.h"#include "scan.h"/*对扫描的字符进行匹配判断*/TokenType getToken(void) /* index for storing into tokenString */ int tokenStringIndex = 0; /* holds current token to be returned */ TokenType currentToken; /* current state - always begins at START */ StateType state = START; /* flag to indicate save to tokenString */ int save; while (state != DONE) int c = getNextChar(); save = TRUE; switch (state) case START: if (isdigit(c) state = INNUM; else if (isalpha(c) state = INID; else if (c = '=') state = INEQUAL; else if (c = '<') state = INLE; else if (c = '>') state = INME; else if (c = ' ') | (c = 't') | (c = 'n') save = FALSE; else if (c= '!') state = INNEQ; else if (c = '/') if(getNextChar()!='*') ungetNextChar(); state = DONE; currentToken = OVER; break; else save = FALSE; state = INCOMMENT; else state = DONE; switch (c) case EOF: save = FALSE; currentToken = ENDFILE; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case '': currentToken = SEMI; break; case '': currentToken=LZKH; break; case '': currentToken=RZKH; break; case '': currentToken=LDKH; break; case '': currentToken=RDKH; break;case ',': currentToken=DOT; break; default: currentToken = ERROR; break; break; case INCOMMENT: save = FALSE; if (c = EOF) state = DONE; currentToken = ERROR; else if(c='*') if(getNextChar()='/') state = START; elseungetNextChar(); break; case INNEQ: state=DONE;if(c='=')currentToken=NEQ;else ungetNextChar();save=FALSE;currentToken=ERROR; break; case INEQUAL: state = DONE; if (c = '=') currentToken = EQ; else /* backup in the input */ ungetNextChar(); currentToken = ASSIGN; break; case INNUM: if (!isdigit(c) /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; break; case INID: if (!isalpha(c) /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; break; case INLE: state = DONE; if(c= '=')currentToken = LE; else /* backup in the input */ ungetNextChar(); currentToken = LT; break; case INME: state = DONE; if(c= '=')currentToken = ME; else /* backup in the input */ ungetNextChar(); currentToken = MT; break; case DONE: default: /* should never happen */ fprintf(listing,"Scanner Bug: state= %dn",state); state = DONE; currentToken = ERROR; break; if (save) && (tokenStringIndex <= MAXTOKENLEN