-
Notifications
You must be signed in to change notification settings - Fork 0
/
esp_parser.py
72 lines (59 loc) · 2.62 KB
/
esp_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
from nltk.tag import StanfordPOSTagger
from nltk import word_tokenize
from nltk.parse.stanford import StanfordParser
from nltk.tree import Tree
class spanishPOSTagger(object):
"""A POS Tagger for spanish language. spanishPOSTagger has the
following properties:
Attributes:
jar: A string representing path to stanford POS tagger jar file.
model: A string represnting path to spanish tagger file.
tags: A list of word - POS tuples
"""
def __init__(self, jar, model,tags = []):
"""Return a spanishPOSTagger object."""
self.jar = jar
self.model = model
self.tags = tags
def tag(self, sentence):
"""Return a list containing tokenized words and their POS tags."""
pos_tagger = StanfordPOSTagger(self.model, self.jar, encoding='utf8')
self.tags = pos_tagger.tag(word_tokenize(sentence))
return self.tags
class spanishParser(object):
"""A parser for spanish language. spanishParser has the
following properties:
Attributes:
esp_model_path: A string representing path to stanford spanish model file.
path_to_models_jar: A string represnting path to parser models jar file.
path_to_jar: A string representing path to stanford parser jar file.
phrase_list: A list of phrases of the given type.
parse_tree = A list iterator for the parsed sentence
"""
def __init__(self, esp_model_path, path_to_models_jar, path_to_jar):
"""Return a spanishParser object."""
self.esp_model_path = esp_model_path
self.path_to_models_jar = path_to_models_jar
self.path_to_jar = path_to_jar
self.phrase_list = []
def parse(self, sentence):
"""Set the parse tree property for the given sentence."""
parser=StanfordParser(model_path=self.esp_model_path, path_to_models_jar=self.path_to_models_jar, path_to_jar=self.path_to_jar, encoding='utf8')
self.parse_tree = parser.raw_parse(sentence)
return self.parse_tree
def getPhrase(self,phrase_type):
"""Return a list of phrases of the given type."""
parsestr = ''
for line in self.parse_tree:
for sentence in line:
parsestr += str(sentence)
for i in Tree.fromstring(parsestr).subtrees():
if i.label() == phrase_type:
self.phrase_list.append(" ".join(str(x) for x in i.leaves()))
return self.phrase_list
def drawParseTree(self):
'''Draw GUI for the parse tree'''
for line in self.parse_tree:
for sentence in line:
sentence.draw()