-
Notifications
You must be signed in to change notification settings - Fork 0
/
scanner.py
executable file
·146 lines (128 loc) · 5.09 KB
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from IO.file_IO import ErrorType, TokenType, InputFileIO, LexicalErrorIO, SymbolTableIO, TokenIO
from IO import input_check, input_process
from scanner_sup.dfa import Dfa
from scanner_sup.transition import Transition as tr
class Scanner:
def __init__(self) -> None:
self.inputFile = InputFileIO()
# self.errorFile = LexicalErrorIO()
self.errorFile = LexicalErrorIO(False)
# self.symbolTableFile = SymbolTableIO()
self.symbolTableFile = SymbolTableIO(False)
# self.tokenFile = TokenIO()
self.tokenFile = TokenIO(False)
self.dfa = Dfa()
def do_before_terminate(self):
self.inputFile.close_file()
self.errorFile.close_file()
def get_next_token(self):
try:
a = self.inputFile.get_char()
except:
return '', TokenType.END
self.dfa.reset_current_state() # Make sure that we are at state 0
token = self._handle_input(a)
# print(token)
if token is None:
return self.get_next_token()
return (*token, self.inputFile.lineno)
def print_token(self):
self.tokenFile.write_token(10, "asdfa", TokenType.KEYWORD)
def _handle_input(self, initial):
if input_check.is_EOF(initial):
self.do_before_terminate()
return initial, TokenType.END
if initial.isalpha():
return self._handle_id_and_keyword(initial)
elif initial.isnumeric():
return self._handle_num(initial)
elif input_check.is_symbol(initial):
return initial, TokenType.SYMBOL
elif initial == '=':
return self._handle_equality(initial)
elif initial == '*':
return self._handle_star(initial)
elif initial == '/':
return self._handle_comment(initial)
elif initial.isspace():
return None
else:
self.errorFile.write_error(self.inputFile.lineno, initial, ErrorType.INVALID_INPUT)
def _handle_id_and_keyword(self, initial):
token = ""
token += initial
self.dfa.move(initial)
while True:
character = self.inputFile.get_char()
self.dfa.move(character)
if self.dfa.is_accepting_with_return():
self.inputFile.go_to_previous_char()
if input_process.is_keyword(token):
return token, TokenType.KEYWORD
self.symbolTableFile.write_identifier(token)
return token, TokenType.ID
token += character
if self.dfa.is_error():
return self._handle_error(token)
def _handle_error(self, token, line=-1):
error = self.dfa.get_error()
line_num = line if line != -1 else self.inputFile.lineno
self.errorFile.write_error(line_num, token, error)
return None
def _handle_num(self, initial):
token = ""
token += initial
self.dfa.move(initial)
while True:
character = self.inputFile.get_char()
self.dfa.move(character)
if self.dfa.is_accepting_with_return():
self.inputFile.go_to_previous_char()
return token, TokenType.NUM
token += character
if self.dfa.is_error():
return self._handle_error(token)
def _handle_equality(self, initial):
token = ""
token += initial
self.dfa.move(initial)
character = self.inputFile.get_char()
self.dfa.move(character)
if self.dfa.is_accepting_with_return():
self.inputFile.go_to_previous_char()
return token, TokenType.SYMBOL
token += character
if self.dfa.is_accepting():
return token, TokenType.SYMBOL
if self.dfa.is_error():
return self._handle_error(token)
def _handle_star(self, initial):
self.dfa.move(initial)
character = self.inputFile.get_char()
self.dfa.move(character)
if self.dfa.is_accepting_with_return():
self.inputFile.go_to_previous_char()
return initial, TokenType.SYMBOL
if self.dfa.is_error():
token = initial + character
return self._handle_error(token)
def _handle_comment(self, initial):
token = ""
token += initial
line_num = self.inputFile.lineno
self.dfa.move(initial)
while True:
character = self.inputFile.get_char()
token += character
self.dfa.move(character)
if self.dfa.is_accepting():
return None
if self.dfa.is_error():
# To write the unclosed comment error with the correct line number, we should pass the "starting
# line" of the comment, which is line_num
if not tr.is_invalid_token(character):
self.inputFile.go_to_previous_char()
token = token[:-1]
return self._handle_error(token, line_num)
def write_token(self, token):
self.tokenFile.write_token(self.inputFile.lineno, token[0], token[1])