编译原理词法分析器,ll1,lr0,python实现代码.doc
【精品文档】如有侵权,请联系网站删除,仅供学习与交流编译原理词法分析器,ll1,lr0,python实现代码.精品文档.计算机科学与通信工程学院编译原理实验报告题目: 1.词法分析器2. LL(1)分析器 3. LR(0)分析器班级: 姓名: 学号: 指导老师: 2017年 月 目录一、实验题目1二、实验目的和要求1三、代码实现2四、总结27一、 实验题目1. 词法分析器分析一段程序代码,将代码中的单词符号分解出来,并对其进行检查,输出token表和error表2. LL(1)文法分析器分析给定文法。求出文法的FIRST集,FOLLOW集,并构建分析表,对给定输入串进行分析。3. LR(0)文法分析器分析给定文法。用_CLOSURE方法构造文法的LR(0)项目集规范族,根据状态转换函数GO构造出文法的DFA,并转换为分析表,对给定输入串进行分析。二、 实验目的和要求1. 学会词法分析器的实现思路。2. 学会求解FIRST集, FOLLOW集,构造LL(1)分析表。3. 学会_CLOSURE方法, 状态转换函数GO, 构造LR(0)分析表。三、 代码实现1. 词法分析器program.txt 中存放要分析的文法:E->TRR->+TR|-TR|T->FGG->*FG|/FG|F->(E)|i代码:KEYWORD_LIST = 'while', 'if', 'else', 'switch', 'case'SEPARATOR_LIST = '', ':', ',', '(', ')', '', '', '', ''OPERATOR_LIST1 = '+', '-', '*'OPERATOR_LIST2 = '<=', '<', '=', '=', '>', '>='CATEGORY_DICT = # KEYWORD "while": "while": "", "if": "if": "", "else": "else": "", "switch": "switch": "", "case": "case": "", # OPERATOR "+": "+": "", "-": "-": "", "*": "*": "", "<=": "relop": "LE", "<": "relop": "LT", ">=": "relop": "GE", ">": "relop": "GT", "=": "relop": "EQ", "=": "=": "", # SEPARATOR "": "": "", ":": ":": "", ",": ",": "", "(": "(": "", ")": ")": "", "": "": "", "": "": "", "": "": "", "": "": "",CONSTANTTABLE = TOKENTABLE = OPERATORTABLE = KEYWORDTABLE = SEPARATORTABLE = UNDEFINEDTABLE = # READ FILEdef read_file(path, method): temp_str = "" try: file = open(path, method) for line in file: line = line.replace('n', " ") temp_str += line temp_str = str(temp_str) except IOError as e: print(e) exit() finally: file.close() return temp_str.strip() + " "# GETBEdef getbe(): global token getchar() token = "" return# GETCHARdef getchar(): global character global location while all_stringlocation = " ": location = location + 1 character = all_stringlocation return character# LINK TOKENdef concatenation(): global token global character token = token + character# IS NUMBERdef digit(): if '0' <= character <= '9': return True return False# IS ALPHABETdef letter(): if 'A' <= character <= 'Z' or 'a' <= character <= 'z': return True return False# IS IDENTIFIERdef reserve(): if token in KEYWORD_LIST: return CATEGORY_DICTtoken else: return 0# RETRACTdef retract(): global location global character # location = location - 1 character = "" return# MAIN FUNCTIONdef main(): global token global character global location s = getchar() getbe() if 'a' <= s <= 'z' or 'A' <= s <= 'Z': while letter() or digit(): concatenation() location = location + 1 character = all_stringlocation retract() c = reserve() if c = 0: TOKENTABLE.append(token) print("这是标识符:'", token, "':'", TOKENTABLE.index(token), "'") else: KEYWORDTABLE.append(token) print("这是保留字:", CATEGORY_DICTtoken) elif '0' <= s <= '9': while digit(): concatenation() location = location + 1 character = all_stringlocation retract() CONSTANTTABLE.append(token) print("这是常数:'", token, "':'", CONSTANTTABLE.index(token), "'") elif s in OPERATOR_LIST1: location = location + 1 OPERATORTABLE.append(s) print("这是单操作符:", CATEGORY_DICTs) elif s in OPERATOR_LIST2: location = location + 1 character = all_stringlocation if character = '=': OPERATORTABLE.append(s + character) print("这是双操作符:", CATEGORY_DICTs + character) else: retract() location = location + 1 OPERATORTABLE.append(s) print("这是单操作符:", CATEGORY_DICTs) elif s in SEPARATOR_LIST: location = location + 1 SEPARATORTABLE.append(s) print("这是分隔符:", CATEGORY_DICTs) else: location += 1 UNDEFINEDTABLE.append(s) print("error:undefined identity :'", s, "'")if _name_ = '_main_': character = "" token = "" all_string = read_file("program.txt", "r") location = 0 while location + 1 < len(all_string): main() print('KEYWORDTABLE:', KEYWORDTABLE) print('TOKENTABLE:', TOKENTABLE) print('CONSTANTTABLE:', CONSTANTTABLE) print('OPERATORTABLE:', OPERATORTABLE) print('SEPARATORTABLE:', SEPARATORTABLE)运行结果:2. LL(1)分析器program.txt 中存放要分析的文法:E->TRR->+TR|-TR|T->FGG->*FG|/FG|F->(E)|i输入串:i+i*i代码:NonTermSet = set() # 非终结符集合TermSet = set() # 终结符集合First = # First集Follow = # Follow集GramaDict = # 处理过的产生式Code = # 读入的产生式AnalysisList = # 分析表StartSym = "" # 开始符号EndSym = '#' # 结束符号为“#“Epsilon = "" # 由于没有epsilon符号用“”代替# 构造First集def getFirst(): global NonTermSet, TermSet, First, Follow, FirstA for X in NonTermSet: FirstX = set() # 初始化非终结符First集为空 for X in TermSet: FirstX = set(X) # 初始化终结符First集为自己 Change = True while Change: # 当First集没有更新则算法结束 Change = False for X in NonTermSet: for Y in GramaDictX: k = 0 Continue = True while Continue and k < len(Y): if not FirstYk - set(Epsilon) <= FirstX: # 没有一样的就添加,并且改变标志 if Epsilon not in FirstYk and Yk in NonTermSet and k > 0: # Y1到Yi候选式都有存在 Continue = False else: FirstX |= FirstYk - set(Epsilon) Change = True if Epsilon not in FirstYk: Continue = False k += 1 if Continue: # X->或者Y1到Yk均有产生式 FirstX |= set(Epsilon) # FirstAY |= set(Epsilon)# 构造Follow集def getFollow(): global NonTermSet, TermSet, First, Follow, StartSym for A in NonTermSet: FollowA = set() FollowStartSym.add(EndSym) # 将结束符号加入Follow开始符号中 Change = True while Change: # 当Follow集没有更新算法结束 Change = False for X in NonTermSet: for Y in GramaDictX: for i in range(len(Y): if Yi in TermSet: continue Flag = True for j in range(i + 1, len(Y): # continue if not FirstYj - set(Epsilon) <= FollowYi: FollowYi |= FirstYj - set(Epsilon) # 步骤2 FIRST()/ 加入到FOLLOW(B)中。 Change = True if Epsilon not in FirstYj: Flag = False break if Flag: if not FollowX <= FollowYi: # 步骤3 ->,把FOLLOW(A)加到FOLLOW(B)中 FollowYi |= FollowX Change = True# 构造分析表def getAnalysisList(): for nonX in NonTermSet: AnalysisListnonX = dict() row = AnalysisListnonX flag = True for Y in GramaDictnonX: for term in TermSet: if term in FirstY0 and term in FirstnonX: rowterm = nonX+'->'+Y if Epsilon in FirstnonX and flag: flag = False for tmp in FollownonX: rowtmp = nonX+'->'+Epsilon print('分析表:') for nonX in NonTermSet: print(' ', nonX, AnalysisListnonX)# 查询分析表def findAnalysisList(non, ter): try: tmp = AnalysisListnonter X, Y = tmp.split('->') except Exception as e: print('find error ') # MA,a为空,发现语法错误 print(e) pass return Y# 显示格式def display(show_list): for item in show_list: print(' %-25s' % item, end='') print()# LL(1)分析器def analyzer(): head = "Stack", "StackTop", 'NowStr', "InputStr", "Action" # inputStr = 'i+i*i' + EndSym inputStr = input("请输入表达式:") + EndSym print('分析过程:') display(head) stack = location = 0 stack.append(EndSym) stack.append(StartSym) stack_top = stack.pop() while stack_top != EndSym and location < len(inputStr): if stack_top in TermSet and inputStrlocation = stack_top: # x = a != '#', mess = '匹配,弹出栈顶符号' + stack_top + '并读入输入串的下一符号' + inputStrlocation + 1 display(stack, stack_top, inputStrlocation, inputStrlocation + 1: len(inputStr), mess) location += 1 stack_top = stack.pop() elif stack_top in NonTermSet and inputStrlocation in TermSet: # x为一非终结符A,则查MA,a result = findAnalysisList(stack_top, inputStrlocation) if result = Epsilon: # MA,a中的产生式为A->,只将A弹出 mess = '弹出栈顶符号' + stack_top + '因M' + stack_top + ',' + inputStrlocation + '中为' + stack_top mess = mess + '->,故不压栈' else: # MA,a中的产生式右部符号串按逆序逐一压入栈中 mess = '弹出栈顶符号' + stack_top + ',将M' + stack_top + ',' + inputStr location + '中的' + stack_top + '->' + result + '的' + result mess = mess + '逆序压栈' tmp_list = for char in result: tmp_list.append(char) tmp_list.reverse() stack.extend(tmp_list) display(stack, stack_top, inputStrlocation, inputStrlocation + 1: len(inputStr), mess) stack_top = stack.pop() else: break if stack_top = EndSym and inputStrlocation = EndSym: # x = a = '#' 分析成功,分析器停止工作 display(, '#', '#', '', '匹配,分析成功') print() print('*') print('* Analysis Success *') print('*') else: print('Analysis Error')# 读取文法def readGrammar(): try: file = open('grammar.txt', 'r') for line in file: line = line.replace('n', "") Code.append(line) except IOError as e: print(e) exit() finally: file.close() return Code# 初始化def init(): global NonTermSet, TermSet, First, Follow, StartSym, Code Code = readGrammar() n = int(len(Code) print('产生式个数:', n) StartSym = Code00 print("开始符号:", StartSym) print('产生式:G', StartSym, ':') for i in range(n): X, Y = Codei.split('->') print(' ', Codei) NonTermSet.add(X) Y = Y.split('|') for Yi in Y: TermSet |= set(Yi) if X not in GramaDict: GramaDictX = set() GramaDictX |= set(Y) # 生成产生式集 TermSet -= NonTermSet print('非终结符:', NonTermSet) print('终结符:', TermSet) getFirst() getFollow() print("FIRST集:") for k in NonTermSet: print(' FIRST', k, ': ', Firstk) print("FOLLOW集:") for k, v in Follow.items(): print(' FOLLOW', k, ': ', v) TermSet -= set(Epsilon) TermSet |= set(EndSym) getAnalysisList() analyzer()init()运行结果:3. LR(0)分析器program.txt 中存放要分析的文法:X->SS->BBB->aBB->b输入串:abab代码:VN = # 非终结符VT = # 终结符NFA = # NFA表DFA = # DFA表grammar = # 读入的文法doted_grammar = # 加点后的文法VN2Int = # 非终结符映射VT2Int = # 终结符映射action = # action表goto = # goto表DFA_node = # DFA节点表status_stack = # 状态栈symbol_stack = # 符号栈now_state = '' # 栈顶状态input_ch = '' # 栈顶字符input_str = '' # 输入串location = 0 # 输入位置now_step = 0 # 当前步骤# 读取文法def read_grammar(file_name): global grammar with open(file_name, 'r') as file: for line in file: line = line.replace('n', "") grammar.append(line) file.close()# 找到终结符和非终结符def find_term_non(): global grammar n = int(len(grammar) temp_vt = l = 0 for i in range(n): X, Y = grammari.split('->') if X not in VN: VN.append(X) VN2Int.update(X: l) l += 1 for Yi in Y: temp_vt.append(Yi) m = 0 for i in temp_vt: if i not in VN and i not in VT: VT.append(i) VT2Int.update(i: m) m += 1 VT.append('#') VT2Int.update('#': m)# 在字符串某个位置加点def add_char2str(grammar_i, i): grammar_i = grammar_i0:i + '.' + grammar_ii:len(grammar_i) return grammar_i# 给文法加点def add_dot(): global doted_grammar j = 0 n = 0 for i in grammar: for k in range(len(i) - 2): doted_grammar.append() doted_grammarn.append(add_char2str(i, k + 3) doted_grammarn.append('false') n += 1 j += 1# 显示加点后的文法def print_doted_grammar(): print('-加点后的文法-') j = 1 for i in doted_grammar: print('%d.%s' % (j, i0) j += 1# 显示读入文法def print_read_grammar(): print('-读入的文法-') j = 0 for i in grammar: print('(%d)%s' % (j, i) j += 1# 初始化NFAdef init_NFA(): global NFA for row in range(len(doted_grammar): NFA.append() for col in range(len(doted_grammar): NFArow.append('')# 找到点的位置def find_pos_point(one_grammar): return one_grammar.find('.')# 文法是否以start开头,以'.'开始def is_start(grammar_i, start): if grammar_i0.find(start, 0, 1) + grammar_i0.find('.', 3, 4) = 3: return True else: return False# 查找以start开头,以'.'开始的文法,返回个数def find_node(start, grammar_id): num = 0 for i in doted_grammar: if is_start(i, start): grammar_idnum = doted_grammar.index(i) num += 1 return num# 构造NFAdef make_NFA(): global NFA grammar_id = for i in range(len(doted_grammar): grammar_id.append('') init_NFA() i = 0 for grammar_i in doted_grammar: pos_point = find_pos