croatian models

TalkBank · Jan 24, 2024 · 7d0990c · 7d0990c
1 parent 699ead6
commit 7d0990c
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 32 deletions.
diff --git a/batchalign/pipelines/morphosyntax/ud.py b/batchalign/pipelines/morphosyntax/ud.py
@@ -648,7 +648,7 @@ def morphoanalyze(doc: Document, status_hook:callable = None):
         lang.pop(lang.index("zh"))
         lang.append("zh-hans")
 
-    elif "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang:
+    elif "hr" not in lang and "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang:
         if "en" in lang:
             config["processors"]["mwt"] = "gum"
         else:

diff --git a/batchalign/version b/batchalign/version
@@ -1,3 +1,3 @@
-0.4.0-post.0
-Jan 21st, 2024
-Initial Batchalign2 Public Release
+0.4.0-post.1
+Jan 23st, 2024
+Croatian models
diff --git a/scratchpad.py b/scratchpad.py
@@ -19,27 +19,27 @@
 ########### The Batchalign Core Test Harness ###########
 # from batchalign.formats.chat.parser import chat_parse_utterance
 
-# text = "I'm going to read some random crap as I see on the screen . 2530_4940"
+# text = "i'm going to read some random crap as i see on the screen . 2530_4940"
 
 # function = "morphosyntax"
 # lang = "eng"
 # num_speakers = 1
 
-# forms, delim = chat_parse_utterance(text, None, None, None, None)
-# utterance = Utterance(content=forms, delim=delim)
+# forms, delim = chat_parse_utterance(text, none, none, none, none)
+# utterance = utterance(content=forms, delim=delim)
 
-# # utterance = Utterance(content=text)
+# # utterance = utterance(content=text)
 
-# ut = Document(content=[utterance], langs=[lang])
+# ut = document(content=[utterance], langs=[lang])
 
-# pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers)
+# pipeline = batchalignpipeline.new(function, lang=lang, num_speakers=num_speakers)
 # doc = pipeline(ut)
 # doc[0][-1]
 # doc[0][-2].model_dump()
 
 # doc[0].content[-2]
 
-# print(str(CHATFile(doc=doc)))
+# print(str(chatfile(doc=doc)))
 
 ########### The Batchalign String Test Harness ###########
 # from batchalign.formats.chat.parser import chat_parse_utterance
@@ -93,34 +93,34 @@
 # doc[-382][1]
 
 ########### The Batchalign CLI Harness ###########
-# from batchalign.cli.dispatch import _dispatch
+from batchalign.cli.dispatch import _dispatch
 
-# in_dir = "../talkbank-alignment/test_harness/input/"
-# out_dir = "../talkbank-alignment/test_harness/output/"
-# in_format = "cha"
+in_dir = "../talkbank-alignment/test_harness/input/"
+out_dir = "../talkbank-alignment/test_harness/output/"
+in_format = "cha"
 
-# function = "morphotag"
-# lang = "fra"
-# num_speakers = 1
+function = "morphotag"
+lang = "hrv"
+num_speakers = 1
 
-# class Context:
-#     obj = {"verbose": 3}
+class Context:
+    obj = {"verbose": 3}
 
-# def loader(file):
-#     return CHATFile(path=os.path.abspath(file)).doc
+def loader(file):
+    return CHATFile(path=os.path.abspath(file)).doc
 
-#     # return file
+    # return file
 
-# def writer(doc, output):
-#     CHATFile(doc=doc).write(output)
-#     # CHATFile(doc=doc).write(output
-#     #                         .replace(".wav", ".cha")
-#     #                         .replace(".mp4", ".cha")
-#     #                         .replace(".mp3", ".cha"))
+def writer(doc, output):
+    CHATFile(doc=doc).write(output)
+    # CHATFile(doc=doc).write(output
+    #                         .replace(".wav", ".cha")
+    #                         .replace(".mp4", ".cha")
+    #                         .replace(".mp3", ".cha"))
 
-# _dispatch(function, lang, num_speakers, [in_format], Context(),
-#             in_dir, out_dir,
-#             loader, writer, Console())
+_dispatch(function, lang, num_speakers, [in_format], Context(),
+            in_dir, out_dir,
+            loader, writer, Console())
 
 ########## The Batchalign CHAT Test Tarness ##########