-
Notifications
You must be signed in to change notification settings - Fork 0
/
Automata.py
303 lines (241 loc) · 10.1 KB
/
Automata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
import re
class Automata(object):
#Constructor de la clase
def __init__(self, states, initial, final, transitions):
self.states = states
self.initial = initial
self.final = final
self.transitions = transitions
def print(self):
print("Automata:")
print(f"Estados: {self.states}")
print(f"Estado inicial: {self.initial}")
print(f"Estado final: {self.final}")
print("Transiciones:")
for transition in self.transitions:
print(transition)
#Funcion delta del automata
def delta(self, state, character):
if state in self.states:
for transition in self.transitions:
if(transition[0] == [state, character]):
return transition[1]
return None
#Obtiene la epsilon cerradura de un estado
def e_closure(self, state):
states = [state]
for transition in self.transitions:
if transition[0] == [state, 'epsilon']:
if transition[1] not in states:
states.append(transition[1])
states.extend(self.e_closure(transition[1]))
return states
#Obtiene la epsilon cerradura de un conjunto de estados
def e_closure_set(self, set):
states = []
for state in set:
e_closure = self.e_closure(state)
if e_closure not in states:
states.append(e_closure)
states = [item for sublist in states for item in sublist]
unique_states = []
for item in states:
if item not in unique_states:
unique_states.append(item)
unique_states.sort()
return [unique_states]
#Elimina una transicion ya registrada
def remove_transition(self, transition):
if transition in self.transitions:
self.transitions.remove(transition)
#Define si el automata acepta una cadena
def accepts(self, string):
current_state = self.initial
#print(f"Empezamos en {current_state}")
for char in string:
#print(f"Procesamos caracter '{char}'")
current_state = self.delta(current_state, char)
if(current_state == None):
break
#print(f"Cambiamos a {current_state}")
if current_state == None or current_state not in self.final:
return False
return True
#Convierte un automata finito no determinista
#en un automata finito determinista
def nfa_to_dfa(self, alphabet):
s0 = self.e_closure_set([self.initial])
queue = [s0]
set_states = [s0]
transitions = []
while queue:
current_state = queue.pop()
s_i = []
for states in current_state:
for char in alphabet:
for state in states:
reachable_state = self.delta(state,char)
if reachable_state not in s_i and reachable_state is not None:
s_i.append(reachable_state)
transitions.append([[state, char], reachable_state])
s_i = self.e_closure_set(s_i)
if s_i != [[]] and s_i not in set_states:
set_states.append(s_i)
queue.append(s_i)
return self.construct_dfa(set_states, transitions)
def construct_dfa(self, set_states, valid_transitions):
state_label = 0
states = []
dfa_states = []
final_states = []
initial_state = 0
transitions = []
#construct the new states
for state in set_states:
states.append([state_label,state[0]])
dfa_states.append(state_label)
if self.final in state[0]:
final_states.append(state_label)
if self.initial in state[0]:
initial_state = state_label
state_label += 1
#constructut the new transitions
for state in states:
for transition in valid_transitions:
transition_state = transition[0][0]
transition_char = transition[0][1]
transition_target = transition[1]
if transition_state in state[1]:
for aux_state in states:
if transition_target in aux_state[1]:
target_state = aux_state[0]
new_transition = [[aux_state[0]-1, transition_char], target_state]
transitions.append(new_transition)
return Automata(dfa_states, initial_state, final_states, transitions)
#Regresa la posicion de un token cuando
#es aceptado por el automata
def match_token(self, string):
result = []
for index,token in enumerate(string):
if self.accepts(token):
result.append([index,token])
return result
# A partir de una expresion regular, se construye un automata utilizando el algoritmo
# de Thompson
def compile(regex):
automatons = []
process_next = True
for index,char in enumerate(regex):
if process_next:
if char == '|':
nfa1 = automatons.pop()
nfa1_qf = max(nfa1.states)
if [[nfa1_qf -1, regex[index - 1]], nfa1_qf] in nfa1.transitions:
nfa1.remove_transition([[nfa1_qf -1, regex[index - 1]], nfa1_qf])
q_previous = nfa1_qf -1
qi_1 = nfa1_qf + 1
qf_1 = nfa1_qf + 2
qi_2 = nfa1_qf + 3
qf_2 = nfa1_qf + 4
qf = nfa1.final
next_char = regex[index+1]
previous_char = regex[index-1]
new_transititions = [
[[q_previous, 'epsilon'], qi_1],
[[q_previous, 'epsilon'], qi_2],
[[qi_1, previous_char], qf_1],
[[qi_2, next_char], qf_2],
[[qf_1, 'epsilon'], qf],
[[qf_2, 'epsilon'], qf]
]
states = nfa1.states + [qi_1,qf_1,qi_2,qf_2]
transitions = nfa1.transitions + new_transititions
automatons.append(Automata(states, nfa1.initial, qf, transitions))
process_next = False
elif char == '*':
nfa1 = automatons.pop()
nfa1_qi = nfa1.initial
nfa1_qf = max(nfa1.states)
qi = nfa1_qf + 1
qf = nfa1_qf + 2
new_transititions = [
[[nfa1_qf, 'epsilon'], qf],
[[nfa1_qf, 'epsilon'], nfa1_qi],
[[qi, 'epsilon'], nfa1_qi],
[[qi, 'epsilon'], qf]
]
transitions = nfa1.transitions + new_transititions
automatons.append(Automata(states + [qi,qf], qi, qf, transitions))
process_next = True
elif char == '?':
nfa1 = automatons.pop()
nfa1_qf = nfa1.final
last_transition = nfa1.transitions.pop()
nfa1.remove_transition(last_transition)
q_previous = last_transition[0][0]
qi_1 = nfa1_qf + 1
qf_1 = nfa1_qf + 2
qi_2 = nfa1_qf + 3
qf_2 = nfa1_qf + 4
qf = nfa1.final
previous_char = regex[index-1]
new_transititions = [
[[q_previous, 'epsilon'], qi_1],
[[q_previous, 'epsilon'], qi_2],
[[qi_1, previous_char], qf_1],
[[qi_2, 'epsilon'], qf_2],
[[qf_1, 'epsilon'], qf],
[[qf_2, 'epsilon'], qf]
]
states = nfa1.states + [qi_1,qf_1,qi_2,qf_2]
transitions = nfa1.transitions + new_transititions
automatons.append(Automata(states, nfa1.initial, qf, transitions))
process_next = True
else:
if automatons == []:
qi = 0
qf = 1
transitions = [
[[qi,char], qf]
]
automatons.append(Automata([qi,qf], qi, qf, transitions))
else:
nfa1 = automatons.pop()
qi = nfa1.initial
nfa1_qf = nfa1.final
qf = max(nfa1.states) + 1
states = nfa1.states + [qf]
new_transititions = [
[[nfa1_qf, char],qf]
]
transitions = nfa1.transitions + new_transititions
automatons.append(Automata(states, qi, qf, transitions))
process_next = True
else:
process_next = True
return automatons.pop()
def tokenize(string):
tokens = re.findall(r'\b\w+\b', string)
return tokens
regex = "niña|os?"
print("Expresion regular: ", regex)
nfa = compile(regex)
print("Automata NFA")
nfa.print()
dfa = nfa.nfa_to_dfa("niñaos")
print()
print("Automata DFA")
dfa.print()
#Cadenas que acepta
print()
print("DFA acepta niño: ",dfa.accepts("niño"))
print("DFA acepta niña: ",dfa.accepts("niña"))
print("DFA acepta niñas: ",dfa.accepts("niñas"))
print("DFA acepta niños: ",dfa.accepts("niños"))
print("DFA acepta niñs: ",dfa.accepts("niñs"))
text = 'Las niñas extranjeras jugaban con el niño y la niña en el patio.'
tokens = tokenize('Las niñas extranjeras jugaban con el niño y la niña en el patio.')
print()
print("Procesando el texto y obteniendo la posicion de los tokens aceptados")
print("Texto: ", text)
print(dfa.match_token(tokens))