Merge branch 'main' into use_gramtools

giellalt · Apr 10, 2024 · 4e8a557 · 4e8a557
2 parents 8e854e9 + 3b31f19
commit 4e8a557
Show file tree

Hide file tree

Showing 32 changed files with 1,253 additions and 222 deletions.
diff --git a/am-shared/docs-dir-include.am b/am-shared/docs-dir-include.am
@@ -49,7 +49,7 @@ endif
 
 # Generate endpoint json file for shield.io lemma count badge.
 # Only to be stored in the gh-pages branch, ignored in main.
-$(srcdir)/lemmacount.json: $(top_srcdir)/src/fst/morphology/stems/*.lexc
+$(srcdir)/lemmacount.json:
 	$(AM_V_GEN)$(GTCORE)/scripts/make-lemmacount.json.sh $(abs_top_srcdir) > $@
 
 # Generate a maturity.json file as endpoint for the maturity badge.
@@ -166,9 +166,14 @@ REPOURL=$(shell if test "x$(GH_REPO)" != x ; then \
 			fi)
 
 # Collect all target files into one big MD file:
+# Remove the VPATH prefix to create the header for each file/chapter:
 $(ALLINONE_MD_PAGE): $(VPATH_MDFILES)
-	$(AM_V_GEN)printf "# $(GLANGUAGE) description \n\nAll documents in one file\n\n" \
-	| cat - $(VPATH_MDFILES) > $@
+	$(AM_V_GEN)printf "# $(GLANGUAGE) language model documentation\n\nAll doc-comment documentation in one large file.\n" > $@
+	for f in $(VPATH_MDFILES); do \
+		header=$${f#"$(top_srcdir)/docs/"};\
+		printf "\n---\n\n# $$header \n\n" >> $@ ;\
+		cat $$f >> $@ ;\
+	done
 
 $(LINKS):
 	$(AM_V_GEN)for doc2md in $(DOCSRC_MDFILES) ; do \
@@ -182,27 +187,42 @@ $(LINKS):
 		d2=`echo "$$d" | cut -d '/' -f 2` ;\
 		d3=`echo "$$d" | cut -d '/' -f 3` ;\
 		d4=`echo "$$d" | cut -d '/' -f 4` ;\
+		d5=`echo "$$d" | cut -d '/' -f 5` ;\
 		if test "x$$d1" != "x$$oldd1" ; then \
 			echo "* \`$$d1/\`" ;\
 			oldd1=$$d1 ;\
+			oldd2="";\
+			oldd3="";\
+			oldd4="";\
 		fi ; \
 		if test "x$$d2" = x ; then \
 			echo "    * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\
 		elif test "x$$d2" != "x$$oldd2" ; then \
 			echo "    * \`$$d2/\`" ;\
 			oldd2=$$d2 ;\
+			oldd3="";\
+			oldd4="";\
+			oldd5="";\
 		fi ; \
 		if test "x$$d3" = x -a "x$$d2" != x; then \
 			echo "        * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\
 		elif test "x$$d3" != "x$$oldd3" ; then \
 			echo "        * \`$$d3/\`" ;\
 			oldd3=$$d3 ;\
+			oldd4="";\
 		fi ; \
 		if test "x$$d4" = x -a "x$$d3" != x ; then \
 			echo "            * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\
 		elif test "x$$d4" != "x$$oldd4" ; then \
 			echo "            * \`$$d4/\`" ;\
 			oldd4=$$d4 ;\
+			oldd5="";\
+		fi ; \
+		if test "x$$d5" = x -a "x$$d4" != x ; then \
+			echo "                * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\
+		elif test "x$$d5" != "x$$oldd5" ; then \
+			echo "                * \`$$d5/\`" ;\
+			oldd5=$$d5 ;\
 		fi ; \
 	done > $@
 

diff --git a/am-shared/src-morphology-dir-include.am b/am-shared/src-morphology-dir-include.am
@@ -290,7 +290,7 @@ lexicon.hfst: .generated/lexicon.hfst
 	$(AM_CP)cp -v $< $@
 
 clean-local:
-	-rm -f lexicon.hfst .generated/lexicon.hfst
+	-rm -f lexicon.hfst .generated/lexicon.hfst $(GIELLA_LOCAL_TARGETS)
 
 ####### Other targets: ###########
 maintainer-clean-local:

diff --git a/am-shared/src_alt_orth-include.am b/am-shared/src_alt_orth-include.am
@@ -459,7 +459,7 @@ $(foreach alt_orth,$(ALT_ORTHS),$(eval $(call alt_orth_norm_generators,$(alt_ort
 
 # morpher is a morph segmenting variant: taloautoissani -> talo#auto>i>ssa>ni
 define alt_orth_morphers
-.generated/morpher-gt-desc.$(1).tmp.%: a.generated/nalyser-raw-gt-desc.%         \
+.generated/morpher-gt-desc.$(1).tmp.%: .generated/analyser-raw-gt-desc.%         \
 					 orthography/inituppercase.compose.%   \
 					 orthography/spellrelax.compose.%      \
 					filters/remove-hyphenation-marks.%     \

diff --git a/am-shared/tools-spellcheckers-fstbased-desktop-dir-include.am b/am-shared/tools-spellcheckers-fstbased-desktop-dir-include.am
@@ -18,7 +18,7 @@
 # Use this as the source lexical fst for unit weighting, it contains correct
 # surface forms except for the word boundary #, which is still present, and
 # used in the weighting:
-UW_SPELLER_SRC=generator-desktopspeller-gt-norm-base.hfst
+UW_SPELLER_SRC=.generated/generator-desktopspeller-gt-norm-base.hfst
 
 ## Conditional string variables;
 # Set file name to the empty string or initial_letters_all depending on variable:
@@ -47,9 +47,9 @@ endif # WANT_SPELLERS
 ####    the fst):
 .generated/generator-desktopspeller-gt-norm-freq_weighted.hfst: \
 				 .generated/generator-desktopspeller-gt-norm-base.hfst \
-				 $(SURFWEIGHTS)
+				 .generated/$(SURFWEIGHTS)
 	$(AM_V_COMPOSE)$(HFST_COMPOSE) $(HFST_FLAGS) -F \
-			$< $(SURFWEIGHTS) \
+			$< .generated/$(SURFWEIGHTS) \
 		-o $@
 
 #### 3. Add a default unit weight to anything not covered by the corpus
@@ -99,11 +99,11 @@ endif # WANT_SPELLERS
 	quit\n" | $(HFST_XFST) -p $(MORE_VERBOSITY)
 
 # Copy the tmp transducer to the final one. This allows local overrides.
-.generated/%.hfst: .generated/%.tmp.hfst
+%.hfst: .generated/%.tmp.hfst
 	$(AM_V_CP)cp -f $< $@
 
 # Invert the final fst, to enable symmetric yaml tests and easy manual testing:
-.generated/analyser-desktopspeller-gt-norm.hfst: .generated/generator-desktopspeller-gt-norm.hfst
+analyser-desktopspeller-gt-norm.hfst: generator-desktopspeller-gt-norm.hfst
 	$(AM_V_INVERT)$(HFST_INVERT) $(MORE_VERBOSITY) $(HFST_FLAGS) -i $< \
 	| $(HFST_PRUNE_ALPHABET) $(MORE_VERBOSITY) \
 	| $(HFST_REMOVE_EPSILONS) $(MORE_VERBOSITY) -o $@

diff --git a/am-shared/tools-spellcheckers-fstbased-desktop-hfst-dir-include.am b/am-shared/tools-spellcheckers-fstbased-desktop-hfst-dir-include.am
@@ -16,7 +16,7 @@
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 GT_SPELLER_HFST=generator-desktopspeller-gt-norm.hfst
-GT_SPELLER_ACCEPTOR=acceptor.default.hfst
+GT_SPELLER_ACCEPTOR=.generated/acceptor.default.hfst
 
 # Max compression for zipped files:
 ZIPFLAGS=-9 $(VERBOSITY)
@@ -47,15 +47,15 @@ swaps=$(shell \
 # Set file name to the empty string or initial_letters.all depending on variable:
 initial_letter_deps=$(shell \
 	if [[ $(INITIAL_EDITS) != 'no' ]] ; then \
-		echo "initial_letters.all.%.hfst"; \
+		echo ".generated/initial_letters.all.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
 
 # Set file name to the empty string or initial_letters.all depending on variable:
 initial_letter_fst_include=$(shell \
 	if [[ $(INITIAL_EDITS) != 'no' ]] ; then \
-		echo "( @\\\"initial_letters.all.$*.hfst\\\" )"; \
+		echo "( @\\\".generated/initial_letters.all.$*.hfst\\\" )"; \
 	else \
 		echo ""; \
 	fi)
@@ -71,11 +71,11 @@ initial_letter_error_model_option=$(shell \
 # Set dependency file name(s) depending on variable value:
 initial_letter_all_deps=$(shell \
 	if   [[ $(INITIAL_EDITS) == 'regex' ]] ; then \
-		echo "initial_letters.regex.%.hfst"; \
+		echo ".generated/initial_letters.regex.%.hfst"; \
 	elif [[ $(INITIAL_EDITS) == 'txt'   ]] ; then \
-		echo "initial_letters.txt.%.hfst"; \
+		echo ".generated/initial_letters.txt.%.hfst"; \
 	elif [[ $(INITIAL_EDITS) == 'both'  ]] ; then \
-		echo "initial_letters.regex.%.hfst initial_letters.txt.%.hfst"; \
+		echo ".generated/initial_letters.regex.%.hfst .generated/initial_letters.txt.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
@@ -96,7 +96,7 @@ initial_letter_all_build=$(shell \
 # Set file name to the empty string or strings.all depending on variable:
 strings_deps=$(shell \
 	if [[ $(STRING_EDITS) != 'no' ]] ; then \
-		echo "strings.all.%.hfst"; \
+		echo ".generated/strings.all.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
@@ -112,11 +112,11 @@ strings_fst_include=$(shell \
 # Set dependency file name(s) depending on variable value:
 strings_all_deps=$(shell \
 	if   [[ $(STRING_EDITS) == 'regex' ]] ; then \
-		echo "strings.regex.%.hfst"; \
+		echo ".generated/strings.regex.%.hfst"; \
 	elif [[ $(STRING_EDITS) == 'txt'   ]] ; then \
-		echo "strings.txt.%.hfst"; \
+		echo ".generated/strings.txt.%.hfst"; \
 	elif [[ $(STRING_EDITS) == 'both'  ]] ; then \
-		echo "strings.regex.%.hfst strings.txt.%.hfst"; \
+		echo ".generated/strings.regex.%.hfst .generated/strings.txt.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
@@ -137,27 +137,27 @@ strings_all_build=$(shell \
 # Set file name to the empty string or final_strings.all depending on variable:
 final_strings_deps=$(shell \
 	if [[ $(FINAL_STRING_EDITS) != 'no' ]] ; then \
-		echo "final_strings.all.%.hfst"; \
+		echo ".generated/final_strings.all.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
 
 # Set build command for strings.all depending on variable value:
 final_strings_fst_include=$(shell \
 	if [[ $(FINAL_STRING_EDITS) != 'no' ]] ; then \
-		echo "( @\\\"final_strings.all.$*.hfst\\\" )"; \
+		echo "( @\\\".generated/final_strings.all.$*.hfst\\\" )"; \
 	else \
 		echo ""; \
 	fi)
 
 # Set dependency file name(s) depending on variable value:
 final_strings_all_deps=$(shell \
 	if   [[ $(FINAL_STRING_EDITS) == 'regex' ]] ; then \
-		echo "final_strings.regex.%.hfst"; \
+		echo ".generated/final_strings.regex.%.hfst"; \
 	elif [[ $(FINAL_STRING_EDITS) == 'txt'   ]] ; then \
-		echo "final_strings.txt.%.hfst"; \
+		echo ".generated/final_strings.txt.%.hfst"; \
 	elif [[ $(FINAL_STRING_EDITS) == 'both'  ]] ; then \
-		echo "final_strings.regex.%.hfst final_strings.txt.%.hfst"; \
+		echo ".generated/final_strings.regex.%.hfst final_strings.txt.%.hfst"; \
 	else \
 		echo ""; \
 	fi)
@@ -229,8 +229,7 @@ noinst_DATA+=$(GT_ERRMODELS) \
 ####### Easter egg version info: #######
 # Easter egg content - depends also on the fst, to
 # make sure the easter egg is rebuilt every time the fst is rebuilt:
-$(GIELLA_DESKTOP_EASTEREGGS): \
-easteregg.%.desktop.txt:
+.generated/easteregg.%.desktop.txt: $(GENDIR)
 	$(AM_V_GEN)$(GTCORE)/scripts/make-hfstspeller-version-easter-egg.sh \
 		$(GTLANG2) \
 		$(top_srcdir) \
@@ -240,7 +239,7 @@ easteregg.%.desktop.txt:
 		> $@
 
 # Easter egg suggestions:
-easteregg.%.desktop.suggtxt: easteregg.%.desktop.txt
+.generated/easteregg.%.desktop.suggtxt: .generated/easteregg.%.desktop.txt
 	$(AM_V_GEN)sed -e 's/^/nuvviDspeller:/' < $< \
 		| sed = \
 		| sed 'N;s/\n/	/' \
@@ -261,7 +260,7 @@ easteregg.%.desktop.suggtxt: easteregg.%.desktop.txt
 
 # Easter egg string acceptor:
 # easteregg.%.desktop.temp.hfst: easteregg.%.desktop.txt
-.generated/easteregg.%.desktop.hfst: easteregg.%.desktop.txt $(GENDIR)
+.generated/easteregg.%.desktop.hfst: .generated/easteregg.%.desktop.txt
 	$(AM_V_GEN)$(HFST_STRINGS2FST) $(HFST_FLAGS) -j < $< \
 		> $@
 
@@ -278,7 +277,7 @@ easteregg.%.desktop.suggtxt: easteregg.%.desktop.txt
 
 ####### Error model: #######
 # Error model building - edit distance based on transducer alphabet:
-editdist.%.regex: editdist.%.txt $(initial_letter_deps)
+.generated/editdist.%.regex: editdist.%.txt $(initial_letter_deps) $(GENDIR)
 	$(AM_V_GEN)$(GTCORE)/scripts/editdist.py \
 		--verbose \
 		$(swaps) \
@@ -289,7 +288,7 @@ editdist.%.regex: editdist.%.txt $(initial_letter_deps)
 		--output-file=$@ \
 		$(initial_letter_error_model_option)
 
-.generated/editdist.%.hfst: editdist.%.regex $(GENDIR)
+.generated/editdist.%.hfst: .generated/editdist.%.regex $(GENDIR)
 	$(AM_V_RGX2FST)$(HFST_REGEXP2FST) -S $(HFST_FLAGS) -i $<\
 	   --format=openfst-tropical \
 		-o $@
@@ -341,42 +340,42 @@ editdist.%.regex: editdist.%.txt $(initial_letter_deps)
 # larger as the edit distance, since the file is multiplied again as part of
 # the editStrings build target. The idea is that the regex should contain a
 # highly targeted set of frequent spelling errors.
-strings.regex.%.hfst: strings.%.regex anystar.hfst
+.generated/strings.regex.%.hfst: strings.%.regex .generated/anystar.hfst
 	$(AM_V_RGX2FST)$(HFST_REGEXP2FST) -S $(HFST_FLAGS) -i $<\
-		| $(HFST_CONCATENATE)   anystar.hfst - \
-		| $(HFST_CONCATENATE) - anystar.hfst   \
+		| $(HFST_CONCATENATE)   .generated/anystar.hfst - \
+		| $(HFST_CONCATENATE) - .generated/anystar.hfst   \
 		| $(HFST_REPEAT) -f 1 -t $(STRING_REGEX_EDIT_DISTANCE) \
 		-o $@
 
-strings.all.%.hfst: $(strings_all_deps)
+.generated/strings.all.%.hfst: $(strings_all_deps)
 	$(strings_all_build)
 
 # Combine edit distance with string pattern edits, then multiply according to
 # the specified editing distance. The strings part is included depending on
 # variable setting in Makefile.am.
-editdist.all.%.hfst: $(strings_deps) editdist.%.hfst
+.generated/editdist.all.%.hfst: $(strings_deps) .generated/editdist.%.hfst
 	$(strings_fst_include) \
 		| $(HFST_REPEAT) -f 1 -t $(EDIT_DISTANCE) \
 		-o $@
 
 # Error model building - list of words known to be misspelled:
-words.%.hfst: $(words_deps) easteregg.%.desktop.suggtxt
+.generated/words.%.hfst: $(words_deps) .generated/easteregg.%.desktop.suggtxt $(GENDIR)
 	$(AM_V_STR2FST)grep -h -v '^#' $^ | grep -v '^$$'   \
 		| $(HFST_STRINGS2FST) $(HFST_FLAGS) -j \
 		   --format=openfst-tropical \
 		-o $@
 
 # The final error model is assembled here:
-errmodel.%.hfst: words.%.hfst \
+errmodel.%.hfst: .generated/words.%.hfst \
 				 $(initial_letter_deps) \
-				 editdist.all.%.hfst \
+				 .generated/editdist.all.%.hfst \
 				 $(final_strings_deps)
 	$(AM_V_RGX2FST)printf "\
-		[ @\"words.$*.hfst\"    \
+		[ @\".generated/words.$*.hfst\"    \
 		| \
 		  [ \
 		      $(initial_letter_fst_include) \
-		      @\"editdist.all.$*.hfst\"  \
+		      @\".generated/editdist.all.$*.hfst\"  \
 		      $(final_strings_fst_include) \
 		  ] \
 		];" \
@@ -388,25 +387,24 @@ errmodel.%.hfst: words.%.hfst \
 ####### Alternate error model: #######
 # Alternatively, the error model can be constructed as a long list of regular
 # expressions, semicolon separated:
-errmodel.%.hfst: errmodel.%.regex easteregg.%.hfst
+.generated/errmodel.%.hfst: errmodel.%.regex .generated/asteregg.%.hfst
 	$(AM_V_GEN)$(HFST_REGEXP2FST) $(HFSTFLAGS) -S -i $< \
-		| $(HFST_DISJUNCT) - easteregg.$*.hfst \
+		| $(HFST_DISJUNCT) - .generated/easteregg.$*.hfst \
 		| $(HFST_PUSH_WEIGHTS) --push=initial  \
 		| $(HFST_FST2FST) $(HFST_FLAGS) -f olw \
 		-o $@
 
 # ... or as an xfscript file:
-errmodel.%.hfst: errmodel.%.xfscript easteregg.%.hfst
+.generated/errmodel.%.hfst: errmodel.%.xfscript .generated/easteregg.%.hfst
 	$(AM_V_GEN)$(HFST_REGEXP2FST) $(HFSTFLAGS) -S -i $< \
-		| $(HFST_DISJUNCT) - easteregg.$*.hfst \
+		| $(HFST_DISJUNCT) - .generated/easteregg.$*.hfst \
 		| $(HFST_PUSH_WEIGHTS) --push=initial  \
 		| $(HFST_FST2FST) $(HFST_FLAGS) -f olw \
 		-o $@
 
 ####### Speller acceptor: #######
 # Build the automaton used for the speller
-$(GT_SPELLER_ACCEPTOR): \
-acceptor.%.hfst: $(GT_SPELLER_HFST) filters/remove-error-strings.hfst \
+.generated/acceptor.%.hfst: $(GT_SPELLER_HFST) filters/remove-error-strings.hfst \
 				 .generated/easteregg.%.desktop.hfst
 	$(AM_V_PROJECT)$(HFST_COMPOSE) -1 filters/remove-error-strings.hfst -2 $< -F \
 		| $(HFST_PROJECT) $(HFST_FLAGS) \
@@ -427,7 +425,7 @@ $(GT_SPELLING_HFST): index.xml \
 	$(AM_V_at)$(MKDIR_P) build/$@
 	$(AM_V_at)rm -f build/$@/*
 	$(AM_V_at)cp index.xml build/$@/index.xml
-	$(AM_V_at)cp $(GT_SPELLER_ACCEPTOR) build/$@/$(GT_SPELLER_ACCEPTOR)
+	$(AM_V_at)cp $(GT_SPELLER_ACCEPTOR) build/$@/
 	$(AM_V_at)cp $(GT_ERRMODELS) build/$@/$(GT_ERRMODELS)
 	$(AM_V_ZIP)cd build/$@/ && $(ZIP) $(ZIPFLAGS) ../../$@ *
 	$(AM_V_at)$(MKDIR_P) 3

diff --git a/am-shared/tools-spellcheckers-fstbased-desktop-hfst_alt_orth-dir-include.am b/am-shared/tools-spellcheckers-fstbased-desktop-hfst_alt_orth-dir-include.am
@@ -22,11 +22,11 @@ if WANT_SPELLERS
 if WANT_ALT_ORTH_PROOFTOOLS
 
 GT_ALT_ORTH_ERRMODELS=$(shell for ld in $(ALT_ORTHS); do\
-    echo "errmodel.$$ld.hfst" ; \
+    echo ".generated/errmodel.$$ld.hfst" ; \
 done)
 
 GT_ALT_ORTH_SPELLER_ACCEPTORS=$(shell for ld in $(ALT_ORTHS); do\
-    echo "acceptor.$$ld.hfst" ; \
+    echo ".generated/acceptor.$$ld.hfst" ; \
 done)
 
 ALT_ORTH_ZHFST_FILES=$(shell for ld in $(ALT_ORTHS); do\
@@ -42,10 +42,10 @@ endif # HAVE_ALT_ORTHS
 #### Build rules: ####
 
 # Alternative based on the raw fst instead of the standard orthography:
-acceptor.%.hfst: \
+.generated/acceptor.%.hfst: \
 		$(GT_SPELLER_HFST) \
-		easteregg.%.desktop.hfst \
-		$(top_builddir)/src/fst/orthography/raw-to-%.compose.hfst
+		.generated/easteregg.%.desktop.hfst \
+		$(top_builddir)/src/fst/orthography/raw-to-%.compose.hfst $(GENDIR)
 	$(AM_V_GEN)cat $< \
 		| $(HFST_COMPOSE) $(HFST_FLAGS) -F \
 		  -2 $(top_builddir)/src/fst/orthography/raw-to-$*.compose.hfst \
@@ -56,9 +56,9 @@ acceptor.%.hfst: \
 		-o $@
 
 # Build rule for acceptors for alternate writing systems:
-acceptor.%.hfst: \
+.generated/acceptor.%.hfst: \
 		$(GT_SPELLER_HFST) \
-		easteregg.%.desktop.hfst \
+		.generated/easteregg.%.desktop.hfst \
 		$(top_builddir)/src/fst/orthography/$(DEFAULT_ORTH)-to-%.compose.hfst
 	$(AM_V_GEN)cat $< \
 		| $(HFST_COMPOSE) $(HFST_FLAGS) -F \