Skip to content

Commit

Permalink
prevents duplicates in simstring database
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni committed Sep 9, 2016
1 parent 021f914 commit e4e0c11
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions install.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,15 @@ def parse_and_encode_ngrams(extracted_it, simstring_dir, cuisty_dir):
mkdir(cuisty_dir)

ss_db = SimstringDBWriter(simstring_dir)

cuisty_db = CuiSemTypesDB(cuisty_dir)

simstring_terms = set()

for i, (term, cui, stys, preferred) in enumerate(extracted_it, start=1):
ss_db.insert(term)
if term not in simstring_terms:
ss_db.insert(term)
simstring_terms.add(term)

cuisty_db.insert(term, cui, stys, preferred)


Expand Down

0 comments on commit e4e0c11

Please sign in to comment.