北邮大三上-编译原理-词法分析实验报告.doc
编译原理第三章 词法分析班级:学号:姓名:schnee目 录1.实验题目和要求32.检测代码分析33. 源代码. 41. 实验题目和要求题目:词法分析程序的设计与实现。实验内容:设计并实现C语言的词法分析程序,要求如下。(1)、可以识别出用C语言编写的源程序中的每个单词符号,并以记号的形式输出每个单词符号。(2)、可以识别并读取源程序中的注释。(3)、可以统计源程序汇总的语句行数、单词个数和字符个数,其中标点和空格不计算为单词,并输出统计结果(4)、检查源程序中存在的错误,并可以报告错误所在的行列位置。(5)、发现源程序中存在的错误后,进行适当的恢复,使词法分析可以继续进行,通过一次词法分析处理,可以检查并报告源程序中存在的所有错误。实验要求:方法1:采用C/C+作为实现语言,手工编写词法分析程序。方法2:通过编写LEX源程序,利用LEX软件工具自动生成词法分析程序。2. 检测代码分析1、 Hello World简单程序输入:2、 较复杂程序输入:3. 异常程序输入检测三, 源代码#include <cmath>#include <cctype>#include <string>#include <vector>#include <cstdio>#include <cstdlib>#include <cstring>#include <fstream>#include <iostream>#include <algorithm>using namespace std;const int FILENAME=105;const int MAXBUF=82;const int L_END=40;const int R_END=81;const int START=0; /开始指针vector<string> Key; /C保留的关键字表class funtion /词法分析结构public:/变量声明char filenameFILENAME; /需要词法分析的代码文件名ifstream f_in;char bufferMAXBUF; /输入缓冲区int l_end, r_end, forward; /左半区终点,右半区终点,前进指针,bool l_has, r_has; /辅助标记位,表示是否已经填充过缓冲区vector<string> Id; /标识符表char C; /当前读入的字符int linenum, wordnum, charnum; /行数,单词数,字符数string curword; /存放当前的字符串/函数声明void get_char(); /从输入缓冲区读一个字符,放入C中,forward指向下一个void get_nbc(); /检查当前字符是否为空字符,反复调用直到非空void retract(); /向前指针后退一位void initial(); /初始化要词法分析的文件void fillBuffer(int pos); /填充缓冲区,0表示左,1表示右void analyzer(); /词法分析void token_table(); /以记号的形式输出每个单词符号void note_print(); /识别并读取源程序中的注释void count_number(); /统计源程序汇总的语句行数、单词个数和字符个数void error_report(); /检查并报告源程序中存在的所有错误void solve(char* file); /主调用函数;void welcome()printf("n*n");printf( "* Welcome to use LexicalAnalyzer *n");printf( "* By schnee BUPT Date: 2011/20/10 *n");printf( "*nnn");void initKey()Key.clear();Key.push_back("auto"); Key.push_back("break"); Key.push_back("case"); Key.push_back("char"); Key.push_back("const"); Key.push_back("continue");Key.push_back("default"); Key.push_back("do"); Key.push_back("double"); Key.push_back("else"); Key.push_back("enum"); Key.push_back("extern"); Key.push_back("float"); Key.push_back("for"); Key.push_back("goto"); Key.push_back("if"); Key.push_back("int"); Key.push_back("long"); Key.push_back("register");Key.push_back("return"); Key.push_back("short"); Key.push_back("signed"); Key.push_back("static"); Key.push_back("sizeof"); Key.push_back("struct"); Key.push_back("switch"); Key.push_back("typedef"); Key.push_back("union"); Key.push_back("unsigned");Key.push_back("void"); Key.push_back("volatile");Key.push_back("while");void funtion:get_char()C=bufferforward;if(C=EOF)return ; /结束if(C='n')linenum+; /统计行数和字符数else if(isalnum(C) charnum+;forward+;if(bufferforward=EOF)if(forward=l_end)fillBuffer(1);forward+;else if(forward=r_end)fillBuffer(0);forward=START;void funtion:get_nbc()while(C=' ' | C='n' | C='t' | C='0')get_char();void funtion:initial(char* file)Id.clear(); /清空标识符表l_end=L_END;r_end=R_END; /初始化缓冲区forward=0;l_has=r_has=false;bufferl_end=bufferr_end=EOF;fillBuffer(0);linenum=wordnum=charnum=0; /初始化行数,单词数,字符数void funtion:fillBuffer(int pos)if(pos=0)/填充缓冲区的左半边if(l_has=false)fin.read(buffer, l_end);if(fin.gcount()!=l_end)bufferfin.gcount()=EOF;else l_has=false;else /填充缓冲区的右半边if(r_has=false)fin.read(buffer+l_end+1, l_end);if(fin.gcount()!=l_end)bufferfin.gcount()+l_end+1=EOF;else r_has=false;void funtion:retract()if(forward=0)l_has=true; /表示已经读取过文件,避免下次再次读取forward=l_end-1;elseforward-;if(forward=l_end)r_add=true;forward-;void funtion:analyzer()FILE *token_file, *note_file, *count_file, *error_file;token_file=fopen("token_file.txt", "w");note_file=fopen("note_file.txt", "w");count_file=fopen("count_file.txt", "w");error_file=fopen("error_file.txt", "w");int i;curword.clear();get_char();get_nbc();if(C=EOF)return false;if(isalpha(C) | C='_')/关键字和标识符的处理,以字母或下划线开头curword.clear();while(isalnum(C) | C='_')curword.push_back(C);get_char();retract();wordnum+;Id.push_back(curword);for(i=0; i<Key.size(); i+)if(Keyi=curword)break;/输出每一个单词的标识符if(i<Key.size() /关键字fprintf(token_file, "%8d-%20s %sn", wordnum, "KEY WORD", curword);elsefprintf(token_file, "%8d-%20s %sn", wordnum, "Identifier", curword);else if(isdigit(C)/无符号数的处理curword.clear();while(isdigit(C)curword.push_back(C);get_char();if(C='.' | C='E' | C='e')/处理小数和指数形式curword.push_back(C);get_char();while(isdigit()curword.push_back(C);get_char();retract();wordnum+;Id.push_back(curword);fprintf(token_file, "%8d-%20s %sn", wordnum, "Unsigned Number", curword);else if(C='#')/过滤掉以#开头的预处理fprintf(note_file, "preproccess Line %d : ", linenum);get_char();fprintf(note_file, "%c", C);while(C!='n')get_char(); fprintf(note_file, "%c", C);fprintf(note_file, "%c", C);else if(C='"')/""内的句子当成整个串保存起来curword.clear();get_char();while(C!='"')curword.push_back(C);get_char();fprintf(token_file, "*string in ""-%sn", curword);else if(C='/')get_char();if(C='/')/过滤掉/开头的行注释fprintf(note_file, "single-line note Line %d : ", linenum);get_char();curword.clear();while(C!='n')curword.push_back(C);get_char();fprintf(note_file, "%sn", curword);else if(C='*')/过滤掉/*/之间的段注释fprintf(note_file, "paragraph note Line %d : ", linenum);get_char();while(true)while(C!='/') fprintf(note_file, "%c", C); get_char();get_char();if(C='*')fprintf(note_file, "nto Line %dn", linenum);break;fprintf(note_file, "%c", C);else if(C='=')fprintf(token_file, "*ASSIGN-OP, DIVn");elsefprintf(token_file, "*CAL-OP, DIVn");retract(); /处理各种比较,赋值,运算符号else if(C='<')get_char();if(C='=')fprintf(token_file, "*RELOP, LEn");elsefprintf(token_file, "*RELOP, LTn");retract();else if(C='>')get_char();if(C='=')fprintf(token_file, "*RELOP, GEn");elsefprintf(token_file, "*RELOP, GTn");retract();else if(C='=')get_char();if(C='=')fprintf(token_file, "*RELOP, EQn");elsefprintf(token_file, "*ASSIGN-OP, EASYn");retract();else if(C='+')get_char();if(C='=')fprintf(token_file, "*ASSIGN-OP, ADDn");elsefprintf(token_file, "*CAL-OP, ADDn");retract();else if(C='-')get_char();if(C='=')fprintf(token_file, "*ASSIGN-OP, SUBn");elsefprintf(token_file, "*CAL-OP, SUBn");retract();else if(C='*')get_char();if(C='=')fprintf(token_file, "*ASSIGN-OP, MULn");elsefprintf(token_file, "*CAL-OP, MULn");retract();else if(C='!')get_char();if(C='=')fprintf(token_file, "*RELOP, UEn");else if(!isalpha(C) && C!='_')fprintf(error_file, "Line %d: error: '!' was illegal char n", linenum);else if(C=':' | C='(' | C=')' | C='' | C='' | C='' | C=',')fprintf(token_file, "*Other char-%cn", C);elsefprintf(error_file, "Line %d: error: '%c' was illegal char n", linenum, C);fprintf(count_file, "The Line number is %dn", linenum);fprintf(count_file, "The word number is %dn", wordnum);fprintf(count_file, "The char number is %dn", charnum);fclose(token_file);fclose(note_file);fclose(count_file);fclose(error_file);void funtion:token_table()fin.open("token_file.txt");printf("The token_table is as following:n");char str1;while(1)fin.read(str, 1);if(str0!=EOF)printf("%c", str0);void funtion:note_print()fin.open("note_file.txt");printf("The note is as following:n");char str1;while(1)fin.read(str, 1);if(str0!=EOF)printf("%c", str0);void funtion:count_number()fin.open("count_file.txt");printf("The count result is as following:n");char str1;while(1)fin.read(str, 1);if(str0!=EOF)printf("%c", str0);void funtion:error_report()fin.open("error_file.txt");printf("The error report is as following:n");char str1;while(1)fin.read(str, 1);if(str0!=EOF)printf("%c", str0);void funtion:solve(char* file)filename=file;fin.open(filename);intitial();analyzer();int choice;printf("* We have analyzed %s n");printf("*0: To endn");printf("*1: To get the token tablen");printf("*2: To get the note part of filen");printf("*4: To report all the error of the filen");printf("*3: To get the line num, word num and charter numnn");while(1)printf("*please input your choice: ");scanf("%d", &choice);if(choice=0)break;if(choice=1)token_table();else if(choice=2)note_print();else if(choice=3)count_number();else error_report();printf("n");void LexicalAnaylzer(char* file)funtion test;test.solve(file);int main()welcome();initKey();char fileFILENAME;while(1)printf("nDo you want to continue? ("YES" or "NO"): ");scanf("%s", file);if(strcmp(file, "NO")=0)printf("Thanks for your use! GoodBye next time!nn");break;printf("Please type your C file name(for example: a.cpp): ");scanf("%s", file);LexicalAnalyzer(file);return 0;