天津理工大学编译原理实验一.doc
Four short words sum up what has lifted most successful individuals above the crowd: a little bit more.-author-date天津理工大学编译原理实验一天津理工大学实验报告实验报告学院(系)名称:计算机与通信工程学院姓名学号专业计算机科学与技术班级实验项目实验一:词法分析课程名称编译原理课程代码实验时间2016/03/172016/03/22实验地点软件实验室7-219批改意见成绩教师签字: 实验内容:实现标准C语言词法分析器。实验要求:(1)单词种别编码要求 基本字、运算符、界符:一符一种 标识符:统一为一种; 常量:按类型编码;(2)词法分析工作过程中建立符号表、常量表。 并以文本文件形式输出。(3)词法分析的最后结果以文本文件形式输出。实验源代码和心得体会#include<iostream>#include<ctype.h> #include<cstring>#define bufsize1024/关键字 #define INCLUDE256 #define AUTO257 #define BREAK258 #define CASE259 #define CHAR260 #define CONST261 #define CONTINUE262 #define DEFAULT263 #define DO264 #define DOUBLE265 #define ELSE266 #define ENUM267 #define EXTERN268 #define FLOAT269 #define FOR 270 #define GOTO 271 #define IF 272 #define INT 273 #define LONG 274 #define REGISTER275 #define RETURN276 #define SHORT277 #define SIGNED278 #define SIZEOF279 #define STATIC 280 #define STRUCT281 #define SWITCH282 #define TYPEDEF283 #define UNION284 #define UNSIGNED285 #define VOLATILE286 #define WHILE287 /运算符 #define PLUS288/+#define MINUS289/-#defineMUL290/*#define DIV291/#define REMAIN292/%#define GREATER293/>#defineLESS294/<#defineEQUAL295/=#defineMISTAKE296/!#define AND297/&#define OR298/ | #define PP299/+#defineMM300/-#defineEE301/=#defineGE302/>=#define LE303/<=#define MISE304/!=#define AA305/&&#define OO306/|#define PE307/+=#define MINUSE308/ -=#defineMULE309/*=#define DIVE310/=#define POW311/ 界符 #define SEMIC312/;#define COMMA313/,#define MULANNO_L314/*#define MULANNO_R315/*/ #define BRACE_L316/#define BRACE_R317/#define BRAKET_L318/(#define BRAKET_R319/)#define MIDBRA_L320/#define MIDBRA_R321/#define ONE_ANNO322/ /标识符和常量符 #define TAG400#defineCONINT401#defineCONFLOAT402#defineCONCHAR403#defineCONSTRING404/转义字符和字符串#define CA 500 #define CB 501 #define CF 502 #define CN 503 #define CR 504 #define CT 505 #define CV 506 #define CBSL 507 #define CQUE 508 #define CDQM 509 #define CQM 510 #define ZERO 511 using namespace std;typedef struct Variate/变量标识符 int id;char name50;Variate;typedef struct Constant/常量 int id;char name50;Constant;typedef struct Signchar name100; int sym; char attr100;Sign;const char *keywordTable="include","auto","break","case","char","const","continue", "default","do","double","else","enum","extern","float","for", "goto","if","int","long","register","return","short","signed", "sizeof","static","struct","switch","typedef","union","unsigned", "volatile","while","","#"/#作用是判断是否结束 const char *operateTable="+","-","*","/","%",">","<","=","!","&","|","+","-","=",">=","<=","!=","&&","|","+=","-=","*=","/=","","#"const char *borderTable="",",","/*","*/","","","(",")","","","/","#"const char changeList12='a','b','f','n','r','t','v','','?','"',''','0' FILE *in;FILE *Out;FILE *Error; int line=1;/用于输出错误的行数或者其他情况。默认值是从 1 开始,为第一行char bufbufsize;/存储读取的一行的字符串 char firchar;/头文件下第一个字符 char Char;int start=0;int VariateNum=0;/记录变量的个数,减去1 int ConstantNum=0;/记录常量的个数int SignNum=0;/记录标记的个数 int notation=1;/记录是否找到多行注释的另一半 */ ,默认值是1 即为有另一半 int isNotation=0;/判断是否是在注释行内 0 不是, 1 单行注释 2多行注释 bool last=false;int late=0;Variate var;Constant con;Sign sign;Variate VarArrbufsize;Constant ConArrbufsize;Sign SigArrbufsize;/获取读取的文件本行的第一个字符 ,直到找到一个非空格字符 char getfirstc(FILE *in)char ch=fgetc(in);/fgetc() 函数的作用是读取文件的当前行的一个字符,返回读取的字符 while(ch=' '|ch='n'|ch='t')if(ch='n')line+;fputc('n',Out);/向输出的文本文件中打印换行 ch=fgetc(in);return ch;/处理读取的本行内容void dealhead(char *buf) char ch10;char cha; char strbufsize;int i=0;int j=0;int temp=0;while(i<bufsize)/找到include<并保存 if(bufi!=' '&&bufi!='0')if(temp=0)if(bufi='i')temp=1;else if(bufi='d')temp=2;elsefprintf(Error,"Line:%dtformat is wrong! Without'>'n",line);break; if(temp=1)chj=bufi;j+;if(bufi='<')chj='0'break;else if(temp=2)chj=bufi;j+;if(bufi+1=' ')chj='0'i+;break;i+;if(temp=1)int index=0;fputc('#',Out);while(bufindex!='0')if(bufindex!=' ')fputc(bufindex,Out);index+;if(strcmp(ch,"include<")=0) i+;/因为上面程序没进行+就直接break所以这里就需要加 1到下一个角标 while(cha=bufi)!='>') i+; if(cha='n') fprintf(Error,"Line:%dtinclude end without '>'n",line); break; else fprintf(Error,"Line:%dtinclude format is wrongn",line);else if(temp=2)if(strcmp(ch,"define")!=0) i+;while(cha=bufi)=' ') if(cha='n') / fprintf(Error,"Line:%dtinclude end without '>'n",line); break; i+; if(bufi!=' ') if(!(isalpha(bufi) fprintf(Error,"Line:%dtdefine format is wrongn",line); /此处意思是define后必须有变量名称 else/在上面的break之前已经进行过i+;所以这里可以直接用 while(!isalpha(bufi)if(bufi='0')fprintf(Error,"Line:%dtdefine without vatiate and name!n",line);break;i+;int index=0;int space=0;/计算在上一个字母之后第几次遇到空格 fputc('#',Out);while(bufindex!='0')if(bufindex!=' ')fputc(bufindex,Out);space=0;elsespace+;if(space=1&&index!=0) fputc(' ',Out); index+;/处理头文件 char head(FILE *in)char ch;if(late=0)ch=getfirstc(in);elsech=firchar;/即为# while(ch='#')fgets(buf,bufsize,in);/fgets()读取in文件当前一行的内容为bufsize-1个字符的内容int len=strlen(buf); buflen-1='0'dealhead(buf); /这条语句执行完毕后且找到'>'就说明这行结束 line+;ch=getfirstc(in);if(ch='#')fputc('n',Out);return ch;/判断关键字 int keyword(char *str)int i; for(i=0;keywordTablei!="#"i+) if(strcmp(str,keywordTablei)=0) return i+256; /返回关键字对应的值 return -1; /处理字母 void dealAlpha()char str50;Variate var;Sign sign;int i;int key;/记录字符串str对应的值。 str0=firchar;for(i=start;isalpha(bufi)|isdigit(bufi);i+)stri-start+1=bufi;/将本行的第一个字符串赋值给str stri-start+1='0'start=i;firchar=bufstart;key=keyword(str);if(key=-1)/说明不是关键字,是标识符。即为变量,函数名一类的 var.id=VariateNum;strcpy(var.name,str);VarArrVariateNum=var;VariateNum+;sign.sym=TAG;sprintf(sign.attr,"%d",var.id);/暂时不知道什么用处 strcpy(sign.name,str);SigArrSignNum=sign;SignNum+;else/说明是关键字 sign.sym=key;strcpy(sign.name,str);strcpy(sign.attr,"-");/sign.attr="-"是不正确的,attr的长度大,后面的不能忽略 SigArrSignNum=sign;SignNum+;/判断是否在常量表里int InConTable(char *name)int i; for(i=0;i<ConstantNum;i+) if(strcmp(name,ConArri.name)=0) return ConArri.id; return -1; /处理数字void dealDigit()int symbol; int id; char word100; Sign sign; Constant constant; int i; word0 = firchar; for(i=start; isdigit(bufi); i+) wordi-start+1 = bufi; if(bufi='.') i+; if(!isdigit(bufi) start=i; firchar=bufstart; fprintf(Error,"Line: %dtunavailabe floatn",line); return; wordi-start = '.' for(;isdigit(bufi);i+) wordi-start+1 = bufi; wordi-start+1='0' start=i; firchar=bufstart; id=InConTable(word); /*不在常量表里,新加项*/ if(id=-1) constant.id=ConstantNum; strcpy(constant.name,word); ConArrConstantNum=constant; ConstantNum+; id=constant.id; sign.sym=CONFLOAT; sprintf(sign.attr,"%d",id);/change int to string strcpy(sign.name,word); SigArrSignNum=sign; SignNum+; else wordi-start+1='0' start=i; firchar= bufstart; id=InConTable(word); /*不在常量表里,新加项*/ if(id=-1) constant.id = ConstantNum; strcpy(constant.name,word); ConArrConstantNum=constant; ConstantNum+; id=constant.id; sign.sym=CONINT; sprintf(sign.attr,"%d",id);/change int to string strcpy(sign.name,word); SigArrSignNum=sign; SignNum+; /处理注释void dealNotation()char str3;str0='/'str2='0'if(bufstart='/')/确定是单行注释。 isNotation=1; Sign sign;str1='/'int i;for(i=0;borderTablei!="#"i+)if(strcmp(str,borderTablei)=0)sign.sym=i+288;break;strcpy(sign.name,str);strcpy(sign.attr,"-");SigArrSignNum=sign;SignNum+; else/多行注释 isNotation=2; str1='*'Sign sign1,sign2;int i;int st;for(i=0;borderTablei!="#"i+)if(strcmp(str,borderTablei)=0)sign1.sym=i+312;break;strcpy(sign1.name,str);strcpy(sign1.attr,"-");SigArrSignNum=sign1;SignNum+;fputc('n',Out);if(Char!='/')fputc(Char,Out);int len ;len=strlen(buf);char c=Char;for(start=0;start<len;start+)/将多行注释/*之前的非注释部分以及/*先进行输出 if(c='/'&&bufstart='*')fputc('/',Out);fputc('*',Out);break;elsefputc(bufstart,Out);c=bufstart;start+;if(bufstart='0')/ /*XXXn 类型fputc('n',Out);line+;while(fgets(buf,bufsize,in)!=NULL)/说明这一行至少有一个字符 0除外 len=strlen(buf); buflen-1='0'start=0;if(bufstart='0')fputc('n',Out);line+;elsest=start;start+;while(bufstart!='0')if(bufst='*'&&bufstart='/')str0=bufst;str1=bufstart;for(i=0;borderTablei!="#"i+)if(strcmp(str,borderTablei)=0)sign2.sym=i+312;break;strcpy(sign2.name,str);strcpy(sign2.attr,"-");SigArrSignNum=sign2;SignNum+;fputs(sign2.name,Out);start+;firchar=bufstart;last=true;return ;st=start;start+;fputc('n',Out);line+; notation=0;elsest=start;start+;while(bufstart!='0')if(bufst='*'&&bufstart='/')/ /*/ 类型str0=bufst;str1=bufstart;for(i=0;borderTablei!="#"i+)if(strcmp(str,borderTablei)=0)sign2.sym=i+312;break;strcpy(sign2.name,str);strcpy(sign2.attr,"-");SigArrSignNum=sign2;SignNum+;len=strlen(buf);for(int temp=start;temp<len;temp+)if(buftemp-1='*'&&buftemp='/')temp-;while(buftemp!='0')fputc(buftemp,Out);temp+;break;last=true;return ;st=start;start+;fputc('n',Out); / /*XXXn 类型line+;while(fgets(buf,bufsize,in)!=NULL)int len=strlen(buf); buflen-1='0'start=0;if(bufstart='0')fputc('n',Out);line+;elsest=start;start+;while(bufstart!='0')if(bufst='*'&&bufstart='/')str0=bufst;str1=bufstart;for(i=0;borderTablei!="#"i+)if(strcmp(str,borderTablei)=0)sign2.sym=i+312;break;strcpy(sign2.name,str);strcpy(sign2.attr,"-");SigArrSignNum=sign2;SignNum+;for(int temp=st;temp<len;temp+)if(buftemp!='0')fputc(buftemp,Out);elsebreak;last=true;return ;st=start;start+;fputc('n',Out);line+; notation=0; /判断界符bool Border(char c) for(int i=0;borderTablei!="#"i+)if(c=borderTablei0&&borderTablei1='0')return true;return false; /处理界符void dealBorder()/这里已经将注释和普通界符分开,所以所有已定义的界符都是单个的 char str2;Sign sign;str0=firchar;str1='0'int i=0;while(borderTablei!="#")if(strcmp(str,borderTablei)=0)strcpy(sign.name,str);strcpy(sign.attr,"-");sign.sym=i+312;SigArrSignNum=sign;SignNum+;firchar=bufstart;return;i+; /判断运算符int Operate(char ch)for(int i=0;operateTablei!="#"i+)if(ch=operateTablei0)return 1;return 0; /处理运算符 void dealOperate()char str3;Sign sign;str0=firchar;str1=bufstart;str2='0'int i;if(/*str1!='0'&&str1!=' '&&*/Operate(str1)/说明是双运算符 i=0;while(operateTablei!="#")if(strcmp(str,operateTablei)=0)strcpy(sign.name,str);sign.sym=i+28