Skip to content

Commit

Permalink
a new script to generate full-form list just
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Jun 5, 2024
1 parent d7dabb9 commit 85513e1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
4 changes: 4 additions & 0 deletions scripts/unimorph/excluded.tags
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#
+
+ABBR
+ACR
+Acr
+Arab
+Coll
+Clt/Add
Expand Down
33 changes: 33 additions & 0 deletions scripts/unimorph/generate-surfs.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

if test $# -lt 1 ; then
echo "Usage: $0 GENERATOR"
echo
echo GENERATOR should be generator-gt-desc.hfst of target language
exit 1
fi

cyclictags=$(dirname "$0")/excluded.tags
if test ! -f "$cyclictags" ; then
echo "missing $cyclictags please get them from giella-core"
exit 2
fi
cyclicRE=$(tr '\n' '|' < "$cyclictags" | sed -e 's/|*$//')
generator=$1
shift

if test ! -f "$generator" ; then
echo "Could not find generator automaton $generator"
exit 1
fi
#echo "[%+UglyHack $cyclicRE %+UglyHack ] | [? - $cyclicRE ]* ;" |
# sed -e 's/+/%+/g' -e 's:/:%/:g' -e 's/#/%#/g' -e 's/\^/%^/g' > generative.regex
#hfst-regexp2fst -i generative.regex -o generative.hfst -f foma
#hfst-compose -F -1 generative.hfst -2 "$generator" |\
hfst-kill-paths -T "$cyclictags" -i "$generator" |\
hfst-project -p lower |\
hfst-minimize |\
hfst-fst2fst -f olw -o generator.hfst
hfst-fst2strings -c 0 generator.hfst > generated.surfs
uconv -f utf8 -x any-nfc < generated.surfs | sort | uniq

0 comments on commit 85513e1

Please sign in to comment.