From 061b1c3c0872522d378f9bec73636a7d8b13639d Mon Sep 17 00:00:00 2001 From: Juan Miguel Cejuela Date: Thu, 23 Jun 2016 19:39:21 +0200 Subject: [PATCH] use nalaf's --- relna/preprocessing/edges.py | 85 -------------------------- relna/structures/relation_pipelines.py | 2 +- 2 files changed, 1 insertion(+), 86 deletions(-) delete mode 100644 relna/preprocessing/edges.py diff --git a/relna/preprocessing/edges.py b/relna/preprocessing/edges.py deleted file mode 100644 index 22f4795..0000000 --- a/relna/preprocessing/edges.py +++ /dev/null @@ -1,85 +0,0 @@ -import abc -from nalaf.structures.data import Edge -from nltk.stem import PorterStemmer - -class EdgeGenerator: - """ - Abstract class for generating edges between two entities. Each edge represents - a possible relationship between the two entities - Subclasses that inherit this class should: - * Be named [Name]EdgeGenerator - * Implement the abstract method generate - * Append new items to the list field "edges" of each Part in the dataset - """ - - @abc.abstractmethod - def generate(self, dataset): - """ - :type dataset: nalaf.structures.data.Dataset - """ - return - - -class SimpleEdgeGenerator(EdgeGenerator): - """ - Simple implementation of generating edges between the two entities - if they are contained in the same sentence. - - Implements the abstract class EdgeGenerator. - - :type entity1_class: str - :type entity2_class: str - :type relation_type: str - """ - - def __init__(self, entity1_class, entity2_class, relation_type): - self.entity1_class = entity1_class - self.entity2_class = entity2_class - self.relation_type = relation_type - - def generate(self, dataset): - from itertools import product, chain - for part in dataset.parts(): - part.edges = [] - for ann_1, ann_2 in product( - (ann for ann in chain(part.annotations, part.predicted_annotations) if ann.class_id == self.entity1_class), - (ann for ann in chain(part.annotations, part.predicted_annotations) if ann.class_id == self.entity2_class)): - index_1 = part.get_sentence_index_for_annotation(ann_1) - index_2 = part.get_sentence_index_for_annotation(ann_2) - if index_1 == index_2 and index_1 != None: - part.edges.append( - Edge(ann_1, ann_2, self.relation_type, - part.sentences[index_1], index_1, part)) - - -class WordFilterEdgeGenerator(EdgeGenerator): - """ - Simple implementation of generating edges between the two entities - if they are contained in the same sentence. - - Implements the abstract class EdgeGenerator. - - :type entity1_class: str - :type entity2_class: str - :type relation_type: str - """ - def __init__(self, entity1_class, entity2_class, words): - self.entity1_class = entity1_class - self.entity2_class = entity2_class - self.relation_type = relation_type - self.words = words - - def generate(self, dataset): - from itertools import product - for part in dataset.parts(): - for ann_1, ann_2 in product( - (ann for ann in part.annotations if ann.class_id == self.entity1_class), - (ann for ann in part.annotations if ann.class_id == self.entity2_class)): - index_1 = part.get_sentence_index_for_annotation(ann_1) - index_2 = part.get_sentence_index_for_annotation(ann_2) - if index_1 == index_2 and index_1 != None: - for token in part.sentences[index1]: - if token.word in self.words: - part.edges.append( - Edge(ann_1, ann_2, self.relation_type, - part.sentences[index_1], index_1, part)) diff --git a/relna/structures/relation_pipelines.py b/relna/structures/relation_pipelines.py index 41af1b2..e232ad2 100644 --- a/relna/structures/relation_pipelines.py +++ b/relna/structures/relation_pipelines.py @@ -9,7 +9,7 @@ from nalaf.preprocessing.spliters import Splitter, NLTKSplitter from nalaf.preprocessing.tokenizers import Tokenizer, NLTKTokenizer, TmVarTokenizer from relna.preprocessing.parsers import Parser, BllipParser, SpacyParser -from relna.preprocessing.edges import SimpleEdgeGenerator +from nalaf.preprocessing.edges import SimpleEdgeGenerator from spacy.en import English class RelationExtractionPipeline: