Skip to content

Commit

Permalink
add remove accute accent. and some wac extras.
Browse files Browse the repository at this point in the history
  • Loading branch information
rueter committed Jan 22, 2024
1 parent 01eecc9 commit 07ac014
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/fst/morphology/phonology.twolc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ Alphabet
Á É Ó Ú Í

ʼ
%> ;
%>
%<

;

Sets

Expand Down
125 changes: 125 additions & 0 deletions src/fst/morphology/root.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ Multichar_Symbols !!≈ # Definitions for @CODE@

!! The possession is marked as such:
+PxSg1 +PxSg2 +PxSg3 +PxDu1 +PxDu2 +PxDu3 +PxPl1 +PxPl2 +PxPl3
!extra (kiksht)
+PxSg3M +PxSg3F +PxSg3C
+PxPl1Inc +PxPl1Exc

!! The comparative forms are:
+Comp +Superl
!! Numerals are classified under:
Expand Down Expand Up @@ -153,6 +157,38 @@ Multichar_Symbols !!≈ # Definitions for @CODE@
@D.Sc@ !!= * @CODE@
@C.Sc@ !!= * @CODE@

!! wac NOUNS
@U.PX.SG1@
@U.PX.SG2@
@U.PX.SG3M@
@U.PX.SG3F@
@U.PX.SG3C@
@U.PX.DU1@
@U.PX.DU2@
@U.PX.DU3@
@U.PX.PL1INC@
@U.PX.PL1EXC@
@U.PX.PL2@
@U.PX.PL3@

@R.PX.SG1@
@R.PX.SG2@
@R.PX.SG3M@
@R.PX.SG3F@
@R.PX.SG3C@
@R.PX.DU1@
@R.PX.DU2@
@R.PX.DU3@
@R.PX.PL1INC@
@R.PX.PL1EXC@
@R.PX.PL2@
@R.PX.PL3@
@D.PX@

@U.GEN.SGa@
@U.GEN.SGi@
@U.GEN.PLit@
@U.GEN.PLid@

LEXICON Root
!! The word forms in Sahaptin Tenino language start from the lexeme roots of basic
Expand All @@ -168,4 +204,93 @@ LEXICON Root
Punctuation ;
Symbols ;

WAC_01 ;

LEXICON WAC_01
@U.GEN.SGa@:@U.GEN.SGa@a%< WAC_PX_NOUN ;
@U.GEN.SGi@:@U.GEN.SGi@i%< WAC_PX_NOUN ;
@U.GEN.PLit@:@U.GEN.PLit@it%< WAC_PX_NOUN ;
@U.GEN.PLid@:@U.GEN.PLid@id%< WAC_PX_NOUN ;

@U.TENSE.PAST4@:@U.TENSE.PAST4@:ga%< WAC_PREVERBS_01 ;

LEXICON WAC_PREVERBS_01
WAC_PREVERBS_02 ;
LEXICON WAC_PREVERBS_02
WAC_PREVERBS_03 ;
LEXICON WAC_PREVERBS_03
WAC_PREVERBS_04 ;
LEXICON WAC_PREVERBS_04
WAC_VERBS ;

LEXICON WAC_PX_NOUN
WAC_NOUNS ;
@U.PX.SG1@:@U.PX.SG1@k%< WAC_NOUNS ;
@U.PX.SG2@:@U.PX.SG2@mi%< WAC_NOUNS ;
@U.PX.SG3C@:@U.PX.SG3C@ƚa%< WAC_NOUNS ;
@U.PX.SG3F@:@U.PX.SG3F@ga%< WAC_NOUNS ;
@U.PX.SG3M@:@U.PX.SG3M@ia%< WAC_NOUNS ;
!@U.PX.DU1@ WAC_NOUNS ;
!@U.PX.DU2@ WAC_NOUNS ;
!@U.PX.DU3@ WAC_NOUNS ;
!@U.PX.PL1INC@ WAC_NOUNS ;
@U.PX.PL1EXC@:@U.PX.PL1EXC@nsha%< WAC_NOUNS ;
!@U.PX.PL2@ WAC_NOUNS ;
@U.PX.PL3@:@U.PX.PL3@da%< WAC_NOUNS ;


LEXICON WAC_VERBS
chíux+V:chíux # "make" ;
lx̱am+V:lx̱am # "to say" ;

LEXICON WAC_NOUNS
ichʼank+N+Masc:chʼank WAC_NOUNS_I/IT "male deer" ;
igunat+N+Masc:gunat WAC_NOUNS_I/IT "Chinook salmon" ;
iqʼinachx̱wai+N+Masc:qʼinachx̱wai WAC_NOUNS_I/IT "bullhead" ;
!itqwƚi+N:qwƚi WAC_NOUNS_0/MAXH "house" ;

aunaíax̱+N+Fem:unaíax̱ WAC_NOUNS_A/ID "huckleberry" ;
ag̱úlul+N+Fem:g̱úlul WAC_NOUNS_A/IT "acorn" ;
aƚagagwuli+N+Fem:ƚagagwuli WAC_NOUNS_A/IT "spider" ;

LEXICON WAC_NOUNS_A/ID
@U.GEN.SGa@ WAC_NOUNS_01 ;
@U.GEN.PLid@ WAC_NOUNS_01 ;

LEXICON WAC_NOUNS_A/IT
@U.GEN.SGa@ WAC_NOUNS_01 ;
@U.GEN.PLit@ WAC_NOUNS_01 ;

LEXICON WAC_NOUNS_I/IT
@U.GEN.SGi@ WAC_NOUNS_01 ;
@U.GEN.PLit@ WAC_NOUNS_01 ;


!LEXICON WAC_NOUNS_0/MAXH
!WAC_NOUNS_01 ;

LEXICON WAC_NOUNS_01
@U.GEN.SGa@+Sg:@U.GEN.SGa@ WAC_NOUNS_02 ;
@U.GEN.SGi@+Sg:@U.GEN.SGi@ WAC_NOUNS_02 ;
@U.GEN.PLit@+Pl:@U.GEN.PLit@ WAC_NOUNS_02 ;

LEXICON WAC_NOUNS_02
+All:%>pa WAC_NOUNS_03 ;
+Dat:%>iamt WAC_NOUNS_03 ;

LEXICON WAC_NOUNS_03
@R.PX.SG1@+PxSg1:@R.PX.SG1@ # ;
@R.PX.SG2@+PxSg2:@R.PX.SG2@ # ;
@R.PX.SG3C@+PxSg3C:@R.PX.SG3C@ # ;
@R.PX.SG3F@+PxSg3F:@R.PX.SG3F@ # ;
@R.PX.SG3M@+PxSg3M:@R.PX.SG3M@ # ;
@R.PX.DU1@+PxDu1:@R.PX.DU1@ # ;
@R.PX.DU2@+PxDu2:@R.PX.DU2@ # ;
@R.PX.DU3@+PxDu3:@R.PX.DU3@ # ;
@R.PX.PL1INC@+PxPl1Inc:@R.PX.PL1INC@ # ;
@R.PX.PL1EXC@+PxPl1Exc:@R.PX.PL1EXC@ # ;
@R.PX.PL2@+PxPl2:@R.PX.PL2@ # ;
@R.PX.PL3@+PxPl3:@R.PX.PL3@ # ;
@D.PX@ # ;

! vim: set ft=xfst-lexc:
19 changes: 19 additions & 0 deletions tools/spellcheckers/Makefile.mod-spellcheckers.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,25 @@
#
# analyser-speller-gt-norm.%: analyser-speller-gt-norm.tmp.%
# build rules
generator-speller-gt-norm.%: generator-speller-gt-norm.tmp.% \
$(top_builddir)/src/filters/remove-acute-accent.%
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"$<\" \
.o. @\"$(top_builddir)/src/filters/remove-acute-accent.$*\" \
;\n\
$(INVERT_XFST)$(INVERT_FOMA)\
save stack $@\n\
quit\n" | $(XFST_TOOL)

analyser-speller-gt-norm.%: generator-speller-gt-norm.tmp.% \
$(top_builddir)/src/filters/remove-acute-accent.%
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"$<\" \
.o. @\"$(top_builddir)/src/filters/remove-acute-accent.$*\" \
;\n\
$(INVERT_HFST)\
save stack $@\n\
quit\n" | $(XFST_TOOL)


# END: Local processing
Expand Down

0 comments on commit 07ac014

Please sign in to comment.