Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
drupchen committed Aug 16, 2019
1 parent 6d5ad81 commit ea2f944
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion pybo/tokenizers/sentencetokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# variables used in the tests
ending_particles = ['གོ་', 'ངོ་', 'དོ་', 'ནོ་', 'བོ་', 'མོ་', 'འོ་', 'རོ་', 'ལོ་', 'སོ་', 'ཏོ་']
ending_words = ['ཅིག་', 'ཤོག་']
ending_verbs = ['ཡིན་', 'ཡོད་', 'མིན་', 'མེད་', 'འགྱུར་', 'ལྡན་', 'བགྱི་', 'བྱ་']
ending_verbs = ['ཡིན་', 'ཡོད་', 'མིན་', 'མེད་', 'འགྱུར་', 'ལྡན་', 'བགྱི་', 'བྱ་', 'བཞུགས་']
te_particles = ['སྟེ་', 'ཏེ་', 'དེ་'] # separated because these seem to cut long sentences
clause_boundaries = te_particles + ['ནས་', 'ན་']
dagdra = ['པ་', 'བ་', 'པོ་', 'བོ་']
Expand Down

0 comments on commit ea2f944

Please sign in to comment.