简单类C语言词法分析器

# -*- coding: utf-8 -*-
"""
guanshzh@https://www.360docs.net/doc/889498679.html,
学号姓名-词法分析程序.zip (source code ,test data)
Created on Sat Apr 21 15:54:25 2018
这是一个词法分析器程序，用于分析类C语言的代码
@author: Guqi
"""
#以下为保留字

_key = ("auto","break","case","char","const","continue","default", "do","double","else","enum","extern","float","for",
"goto","if","int","long","register","return","short", "signed","static","sizeof","struct","switch","typedef","union",
"unsigned","void","volatile","while") # c语言的32个关键字

#以下为运算符
_operator = ("+","-", "*", "/","+=","++","--", "*=","-=","/=","==","&" ,"&&", "|", "||","=","%","%=","!=","!")

#以下为界符
_delimiters = {'[',']',';','(',')',',','{','}','"','\''}

#以下为常用转义字符
_escch = {'\\a','\\b','\\f','\\n','\\r','\\t','\\v','\\"'}

#本函数用于将结果按行写入到目标文件中
def writeResult(s):
path = input("请输入输出文件名")
fpath = r''+ path
with open(fpath,'w',encoding="UTF-8") as f:
for line in s:
f.write(str(line))
f.write("\n")
f.close

#参数为第row行的str，对此行进行分析，并返回一个list分析结果
def judge(mystr,row,state):
#读取字符，如果为字母或_则考虑其为标识符或关键字，若为数字则考虑其为整数或浮点数，界符、运算符等分别考虑
list2 = []
position = 0
ch = mystr[position]
_value = ''
def getonech():
nonlocal _value , ch , position
_value += ch
position += 1
ch = mystr[position]

def judgeoperator(b): #调用此函数说明此时运算符状态未定，需根据下一个数是否在b中进行判断
nonlocal _value , ch , position
getonech()
if(ch in b):
_value += ch
list2.append(['operator',_value])
_value = ''
else:
list2.append(['operator',mystr[position-1]])
_value = ''
position -= 1

while(1):

if(ch == '\ufeff'):
ch = ' '

if(state == 1 and ch == '"'):
list2.append(['delimiters','"'])
state = 10

elif(state == 10):
if(ch == '"'):
state = 1
list2.append(['str',_value])
_value = ''
list2.append(['delimiters','"'])
position += 1
ch = mystr[position]
elif(ch == '\\'):
_tempvalue = ch + mystr[position + 1]
if(_tempvalue in _escch):
list2.append(['str',_value])
_value = ''
list2.append(['escch',_tempvalue])
position += 1
else:
_value += ch
else:
_value += ch

if(state == 1 and ch == '/'):
ch = mystr[position+1]
if(ch == '/'):
ch = '\n'
elif(ch == '*'):
position += 1
state = 3
else:
ch = '/'
pass

elif(state == 3):
if(ch == '*'):
state = 4
else:
pass

elif(state == 4):
if(ch == '/'):
state = 1
position += 1
ch = mystr[position]
elif(ch == '*'):
pass
else:
state = 3

if(ch == "\n"):
if(state == 10 and mystr[position-1] != '\\'):
state = 1
list2.append(['wrong','引号未结束',str(row)+'行'])

return (list2,state)

if(state == 1 and ch != ' ' and ch != '\t' ):

if(ch.isalpha() or ch =='_'):
while ch.isalpha() or ch.isdigit() or ch == '_': #标识符的构成。逐个读取，直到不满足，并构成字符串_value
getonech()
if(_value in _key):
list2.append(['key',_value])
_value = ''
else:
list2.append(['id',_value])
_value = ''
position -= 1

elif(mystr[position].isdigit()):
if(mystr[position] == '0'):
state = 2
else:
state = 3

getonech()

if(state == 2):
if(ch == '.'):
state = 4
elif(ch.isdigit()):
list2.append(['wrong','非零数字不能以0开头',str(row)+"行"])
_value = ''
position -= 1
else:
list2.append(['integer',_value])
_value = ''
position -= 1

if(state == 3):
while(ch.isdigit()):
getonech()
if(ch == '.'):
state = 4
else:
list2.append(['integer',_value])
_value = ''
position -= 1

if(state == 4):
getonech()
while(ch.isdigit()):
getonech()
if(ch == 'e' or ch == 'E'):
state = 5
else:
list2.append(['float',_value])
_value = ''
position -= 1

if(state == 5):
getonech()
if(ch == '+' or ch == '-'):
state = 6
elif(ch.isdigit()):
state = 6
else:
list2.append(['wrong','e后应为整数',str(row)+'行'])
_value = ''

if(state == 6):
getonech()
while(ch.isdigit()):
getonech()
list2.append(['float',_value])
_value = ''
position -= 1

state = 1

elif(mystr[position] in _operator):
if(ch == '+'):
judgeoperator(['+','='])

elif(ch == '-'):
judgeoperator(['-','='])

elif(ch == '&'):
judgeoperator(['&'])

elif(ch == '|'):
judgeoperator(['|'])

elif(ch == '*' or ch == '/' or ch == '=' or ch == '%' or ch == '!'):
judgeoperator(['='])

elif(mystr[position] in _delimiters):
_value += ch
list2.append(['delimiter',_value])
_value = ''

elif(ch == '\\'):
getonech()
_value += ch
if(_value in _escch):
list2.append(['escch',_value])
_value = ''
else:
list2.append(['wrong','不合理的输入\\',str(row)+'行'])
_value = ''
position -= 1

else:
_value += '接收到不合理的输入'+ch
list2.append(['wrong',_value,str(row)+"行"])
_value = ''

position += 1
ch = mystr[position] #读取下一个数据

if __name__ == '__main__':

path = input("请输入测试代码文件名")
fpath = r''+ path
f = open(fpath, 'r',encoding="UTF-8") # 返回一个文件对象
line = f.readline() # 调用文件的 readline()方法
list1 = []
row = 0
state = 1
while line:
(list2,state) = judge(list(line+'\n'),row,state)
list1.append(list2)
line = f.readline()
row += 1

f.close
writeResult(list1)