Skip to content

Commit

Permalink
Merge pull request #138 from michelole/cleanup
Browse files Browse the repository at this point in the history
Cleanup
  • Loading branch information
Michel Oleynik authored Aug 21, 2020
2 parents 4436776 + 2f5ff19 commit 299b76f
Show file tree
Hide file tree
Showing 63 changed files with 166 additions and 2,695 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ matrix:
- python: "3.8"
before_install:
- export BOTO_CONFIG=/dev/null # See https://github.com/travis-ci/travis-ci/issues/7940
- export BING_SEARCH_API_KEY=${BING_SEARCH_API_KEY:=secret} # Fallback to 'secret' to avoid empty value
- export PYTHONHASHSEED=500 # Make word2vec deterministic
script:
- pylint -E acres && py.test --cov=acres
Expand Down
4 changes: 2 additions & 2 deletions acres/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Package containing evaluation modules.
"""
from acres.evaluation import corpus, evaluation, metrics
from acres.evaluation import evaluation, metrics

__all__ = ['corpus', 'evaluation', 'metrics']
__all__ = ['evaluation', 'metrics']
160 changes: 0 additions & 160 deletions acres/evaluation/corpus.py

This file was deleted.

5 changes: 2 additions & 3 deletions acres/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from itertools import islice
from typing import Dict, Tuple, List, Set

import acres.util.acronym
from acres.evaluation import metrics
from acres.model import expansion_standard, detection_standard, topic_list
from acres.resolution import resolver
Expand Down Expand Up @@ -52,7 +51,7 @@ def test_input(true_expansions: Set[str], possible_expansions: List[str],
return False


def analyze(contextualized_acronym: acres.util.acronym.Acronym, true_expansions: Set[str],
def analyze(contextualized_acronym: Acronym, true_expansions: Set[str],
strategy: resolver.Strategy, max_tries: int) -> Dict[str, bool]:
"""
Analyze a given row of the gold standard.
Expand All @@ -75,7 +74,7 @@ def analyze(contextualized_acronym: acres.util.acronym.Acronym, true_expansions:

# Remove context to improve cache hit
# XXX We currently support context only for n-grams
if strategy not in [resolver.Strategy.NGRAM, resolver.Strategy.FASTNGRAM]:
if strategy != resolver.Strategy.FASTNGRAM:
left_context = ""
right_context = ""

Expand Down
23 changes: 11 additions & 12 deletions acres/fastngram/fastngram.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from collections import OrderedDict
from typing import Dict, Set, Tuple, Iterator, List, Union

import acres.util.acronym
from acres.model import topic_list
from acres.preprocess import resource_factory
from acres.util import functions
from acres.util.functions import import_conf
from acres.util.acronym import Acronym

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -118,7 +117,7 @@ def fastngram(acronym: str, left_context: str = "", right_context: str = "",
:param max_rank:
:return:
"""
contextualized_acronym = acres.util.acronym.Acronym(acronym=acronym, left_context=left_context,
contextualized_acronym = Acronym(acronym=acronym, left_context=left_context,
right_context=right_context)
contexts = _generate_acronym_contexts(contextualized_acronym)

Expand All @@ -145,7 +144,7 @@ def fasttype(acronym: str, left_context: str = "", right_context: str = "",
yield ngram


def _find_contexts(acronym: str, min_freq: int) -> 'List[topic_list.Acronym]':
def _find_contexts(acronym: str, min_freq: int) -> 'List[Acronym]':
"""
Find contexts in the training data where this acronym appears.
Expand All @@ -155,22 +154,22 @@ def _find_contexts(acronym: str, min_freq: int) -> 'List[topic_list.Acronym]':
"""
model = resource_factory.get_center_map(functions.partition(acronym, PARTITIONS))

all_contexts = [] # type: List[topic_list.Acronym]
all_contexts = [] # type: List[Acronym]
for out_freq, contexts in model.contexts(acronym).items():
for left, right in contexts:
# Do not allow empty contexts.
if left == '' and right == '':
continue
if out_freq < min_freq:
break
contextualized_acronym = acres.util.acronym.Acronym(acronym=acronym, left_context=left,
contextualized_acronym = Acronym(acronym=acronym, left_context=left,
right_context=right)
all_contexts.append(contextualized_acronym)

return all_contexts


def _center_provider(contexts: 'List[topic_list.Acronym]', min_freq: int,
def _center_provider(contexts: 'List[Acronym]', min_freq: int,
max_rank: int) -> Iterator[str]:
"""
Provide unlimited center words for a given list of contexts.
Expand Down Expand Up @@ -227,7 +226,7 @@ def create_map(ngrams: Dict[str, int], model: Union[ContextMap, CenterMap],
return model


def _generate_ngram_contexts(ngram: str) -> 'List[topic_list.Acronym]':
def _generate_ngram_contexts(ngram: str) -> 'List[Acronym]':
"""
Generate a list of contextualized n-grams with a decreasing central n-gram and increasing \
lateral context.
Expand All @@ -252,12 +251,12 @@ def _generate_ngram_contexts(ngram: str) -> 'List[topic_list.Acronym]':
left = sys.intern(" ".join(tokens[0:i]))
right = sys.intern(" ".join(tokens[j:ngram_size]))
center = sys.intern(" ".join(tokens[i:j]))
contexts.append(acres.util.acronym.Acronym(acronym=center, left_context=left,
contexts.append(Acronym(acronym=center, left_context=left,
right_context=right))
return contexts


def _generate_acronym_contexts(contextualized_acronym: 'topic_list.Acronym') -> 'List[topic_list.Acronym]':
def _generate_acronym_contexts(contextualized_acronym: 'Acronym') -> 'List[Acronym]':
"""
Generate a list of contextualized acronyms with decreasing lateral context.
Expand All @@ -279,7 +278,7 @@ def _generate_acronym_contexts(contextualized_acronym: 'topic_list.Acronym') ->
if right_length > left_length:
max_length += min(MAX_DIFF, right_length - left_length)

contexts = [] # type: List[topic_list.Acronym]
contexts = [] # type: List[Acronym]
for j in range(max_length, -1, -1):
# Left size > right size
if j > right_length:
Expand All @@ -293,7 +292,7 @@ def _generate_acronym_contexts(contextualized_acronym: 'topic_list.Acronym') ->
continue
left_context = " ".join(left[i:left_length])
right_context = " ".join(right[0:j])
contexts.append(acres.util.acronym.Acronym(acronym=contextualized_acronym.acronym,
contexts.append(Acronym(acronym=contextualized_acronym.acronym,
left_context=left_context,
right_context=right_context))
return contexts
3 changes: 2 additions & 1 deletion acres/model/detection_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from acres.model import topic_list
from acres.util import acronym as acro_util
from acres.util.acronym import Acronym

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -77,7 +78,7 @@ def parse_valid(filename: str) -> Set[str]:
return filter_valid(parse(filename))


def update(previous: Dict[str, bool], acronyms: List[acro_util.Acronym]) -> Dict[str, bool]:
def update(previous: Dict[str, bool], acronyms: List[Acronym]) -> Dict[str, bool]:
"""
Update a previous detection standard with new acronyms from a topic list, preserving order.
Expand Down
1 change: 0 additions & 1 deletion acres/ngram/ngrams.py → acres/model/ngrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class FilteredNGramStat:
@todo ngramstat itself should be a generator
"""
NGRAM_SEPARATOR = "\t"
TOKEN_SEPARATOR = " "
PRINT_INTERVAL = 1000000

Expand Down
2 changes: 1 addition & 1 deletion acres/model/topic_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from operator import attrgetter
from typing import List, Set

from acres.ngram import ngrams
from acres.model import ngrams
from acres.util import acronym as acro_util
from acres.util import functions

Expand Down
11 changes: 0 additions & 11 deletions acres/ngram/__init__.py

This file was deleted.

Loading

0 comments on commit 299b76f

Please sign in to comment.