diff --git a/batchalign/pipelines/morphosyntax/ud.py b/batchalign/pipelines/morphosyntax/ud.py index 0090e10..d619304 100644 --- a/batchalign/pipelines/morphosyntax/ud.py +++ b/batchalign/pipelines/morphosyntax/ud.py @@ -648,7 +648,7 @@ def morphoanalyze(doc: Document, status_hook:callable = None): lang.pop(lang.index("zh")) lang.append("zh-hans") - elif "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang: + elif "hr" not in lang and "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang: if "en" in lang: config["processors"]["mwt"] = "gum" else: diff --git a/batchalign/version b/batchalign/version index 053686b..008d376 100644 --- a/batchalign/version +++ b/batchalign/version @@ -1,3 +1,3 @@ -0.4.0-post.0 -Jan 21st, 2024 -Initial Batchalign2 Public Release \ No newline at end of file +0.4.0-post.1 +Jan 23st, 2024 +Croatian models \ No newline at end of file diff --git a/scratchpad.py b/scratchpad.py index f8aa06e..4dba058 100644 --- a/scratchpad.py +++ b/scratchpad.py @@ -19,27 +19,27 @@ ########### The Batchalign Core Test Harness ########### # from batchalign.formats.chat.parser import chat_parse_utterance -# text = "I'm going to read some random crap as I see on the screen . 2530_4940" +# text = "i'm going to read some random crap as i see on the screen . 2530_4940" # function = "morphosyntax" # lang = "eng" # num_speakers = 1 -# forms, delim = chat_parse_utterance(text, None, None, None, None) -# utterance = Utterance(content=forms, delim=delim) +# forms, delim = chat_parse_utterance(text, none, none, none, none) +# utterance = utterance(content=forms, delim=delim) -# # utterance = Utterance(content=text) +# # utterance = utterance(content=text) -# ut = Document(content=[utterance], langs=[lang]) +# ut = document(content=[utterance], langs=[lang]) -# pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers) +# pipeline = batchalignpipeline.new(function, lang=lang, num_speakers=num_speakers) # doc = pipeline(ut) # doc[0][-1] # doc[0][-2].model_dump() # doc[0].content[-2] -# print(str(CHATFile(doc=doc))) +# print(str(chatfile(doc=doc))) ########### The Batchalign String Test Harness ########### # from batchalign.formats.chat.parser import chat_parse_utterance @@ -93,34 +93,34 @@ # doc[-382][1] ########### The Batchalign CLI Harness ########### -# from batchalign.cli.dispatch import _dispatch +from batchalign.cli.dispatch import _dispatch -# in_dir = "../talkbank-alignment/test_harness/input/" -# out_dir = "../talkbank-alignment/test_harness/output/" -# in_format = "cha" +in_dir = "../talkbank-alignment/test_harness/input/" +out_dir = "../talkbank-alignment/test_harness/output/" +in_format = "cha" -# function = "morphotag" -# lang = "fra" -# num_speakers = 1 +function = "morphotag" +lang = "hrv" +num_speakers = 1 -# class Context: -# obj = {"verbose": 3} +class Context: + obj = {"verbose": 3} -# def loader(file): -# return CHATFile(path=os.path.abspath(file)).doc +def loader(file): + return CHATFile(path=os.path.abspath(file)).doc -# # return file + # return file -# def writer(doc, output): -# CHATFile(doc=doc).write(output) -# # CHATFile(doc=doc).write(output -# # .replace(".wav", ".cha") -# # .replace(".mp4", ".cha") -# # .replace(".mp3", ".cha")) +def writer(doc, output): + CHATFile(doc=doc).write(output) + # CHATFile(doc=doc).write(output + # .replace(".wav", ".cha") + # .replace(".mp4", ".cha") + # .replace(".mp3", ".cha")) -# _dispatch(function, lang, num_speakers, [in_format], Context(), -# in_dir, out_dir, -# loader, writer, Console()) +_dispatch(function, lang, num_speakers, [in_format], Context(), + in_dir, out_dir, + loader, writer, Console()) ########## The Batchalign CHAT Test Tarness ##########