# -*- coding: utf-8 -*-
import re
import sys
import getopt
KEYWORDS_LIST = [
'int', 'double', 'if', 'else', 'return', 'main'
]
OPERATORS_LIST = [
'<', '=', '+', '+=', '<=', '-', '*'
]
DELIMITERS_LIST = [
'(', ')', '{', '}', ';'
]
MATCH_TAG = {
1: "KEYWORD",
2: "DIGIT_CONSTANT",
3: "DELIMITER",
4: "OPTERATOR",
5: "IDENTIFIER",
6: "!!ERROR!!"
}
TEST_STRING = """
int main(){
int a = 10;
double b = -20.9;
if(a<=b)
a+=b;
else
a=2b+6;
return a;
}
"""
class DFA(object):
def __init__(self, content):
self.STATES = [i for i in range(0, 7)]
self.ENDING_STATES = [-i for i in range(1, 7)]
self.content = content
self.line = 0
# Following parameters can be cleaned
self.state = 0
self.buffer = ""
self.tag = None
def rule_0(self, char):
temp = self.buffer
self.buffer += char
if char.isalpha():
self.state = 1
elif char.isdigit():
self.state = 2
elif char in DELIMITERS_LIST:
self.state = 3
elif char in OPERATORS_LIST:
self.state = 4
else:
self.buffer = temp
self.state = -9
self.tag = MATCH_TAG[6]
def rule_1(self, char):
temp = self.buffer
self.buffer += char
if char.isalpha():
self.state = 1
elif char.isdigit():
self.state = 5
else:
self.buffer = temp
self.state = -1
self.tag = MATCH_TAG[1] if (
self.buffer in KEYWORDS_LIST
) else MATCH_TAG[5]
def rule_2(self, char):
temp = self.buffer
self.buffer += char
if char.isdigit():
self.state = 2
elif char == '.' and not '.' in temp:
self.state = 2
elif char.isalpha():
self.state = 6
else:
self.buffer = temp
self.state = -2
self.tag = MATCH_TAG[2]
def rule_3(self, char):
self.state = -3
self.tag = MATCH_TAG[3]
def rule_4(self, char):
temp = self.buffer
self.buffer += char
if char in OPERATORS_LIST:
self.state = 4
elif self.buffer[-2] == '-' and char.isdigit():
self.state = 2
else:
self.buffer = temp
self.state = -4
self.tag = MATCH_TAG[4]
def rule_5(self, char):
temp = self.buffer
self.buffer += char
if char.isalpha():
self.state = 5
elif char.isdigit():
self.state = 5
else:
self.buffer = temp
self.state = -5
self.tag = MATCH_TAG[5]
# ERROR State
def rule_6(self, char):
temp = self.buffer
self.buffer = temp
if char.isalpha():
self.state = 6
elif char.isdigit():
self.state = 6
else:
self.buffer = temp
self.state = -6
self.tag = MATCH_TAG[6]
def clean(self):
self.state = 0
self.tag = None
self.buffer = ""
def skip_blank(self, point):
while point < len(content) and self.is_blank(content[point]):
if content[point] == '\n':
self.line += 1
point += 1
return point
def log(self):
print("<line %d - %s - \'%s\' >" % (self.line, self.tag, str(self.buffer)))
@staticmethod
def is_blank(char):
return (
char == ' ' or
char == '\t' or
char == '\n' or
char == '\r'
)
def search(self, point):
self.clean()
point = self.skip_blank(point)
while self.state in self.STATES and point < len(content):
# Search Process
if self.state == 0:
self.rule_0(content[point])
elif self.state == 1:
self.rule_1(content[point])
elif self.state == 2:
self.rule_2(content[point])
elif self.state == 3:
self.rule_3(content[point])
elif self.state == 4:
self.rule_4(content[point])
elif self.state == 5:
self.rule_5(content[point])
elif self.state == 6:
self.rule_6(content[point])
if self.state in self.ENDING_STATES:
break
point += 1
return point
class Tools(object):
@staticmethod
def process(content):
point = 0
analyzer = DFA(content)
while point < len(content):
point = analyzer.search(point)
if point < len(content):
analyzer.log()
if __name__ == "__main__":
# source_file = open(input("Input source file path: "), 'r')
# content = source_file.read()
content = TEST_STRING
Tools.process(content)
网友评论