Skip to content

Commit

Permalink
croatian models
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Jan 24, 2024
1 parent 699ead6 commit 7d0990c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 32 deletions.
2 changes: 1 addition & 1 deletion batchalign/pipelines/morphosyntax/ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def morphoanalyze(doc: Document, status_hook:callable = None):
lang.pop(lang.index("zh"))
lang.append("zh-hans")

elif "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang:
elif "hr" not in lang and "zh" not in lang and "zh-hans" not in lang and "ja" not in lang and "ko" not in lang:
if "en" in lang:
config["processors"]["mwt"] = "gum"
else:
Expand Down
6 changes: 3 additions & 3 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.4.0-post.0
Jan 21st, 2024
Initial Batchalign2 Public Release
0.4.0-post.1
Jan 23st, 2024
Croatian models
56 changes: 28 additions & 28 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,27 @@
########### The Batchalign Core Test Harness ###########
# from batchalign.formats.chat.parser import chat_parse_utterance

# text = "I'm going to read some random crap as I see on the screen . 2530_4940"
# text = "i'm going to read some random crap as i see on the screen . 2530_4940"

# function = "morphosyntax"
# lang = "eng"
# num_speakers = 1

# forms, delim = chat_parse_utterance(text, None, None, None, None)
# utterance = Utterance(content=forms, delim=delim)
# forms, delim = chat_parse_utterance(text, none, none, none, none)
# utterance = utterance(content=forms, delim=delim)

# # utterance = Utterance(content=text)
# # utterance = utterance(content=text)

# ut = Document(content=[utterance], langs=[lang])
# ut = document(content=[utterance], langs=[lang])

# pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers)
# pipeline = batchalignpipeline.new(function, lang=lang, num_speakers=num_speakers)
# doc = pipeline(ut)
# doc[0][-1]
# doc[0][-2].model_dump()

# doc[0].content[-2]

# print(str(CHATFile(doc=doc)))
# print(str(chatfile(doc=doc)))

########### The Batchalign String Test Harness ###########
# from batchalign.formats.chat.parser import chat_parse_utterance
Expand Down Expand Up @@ -93,34 +93,34 @@
# doc[-382][1]

########### The Batchalign CLI Harness ###########
# from batchalign.cli.dispatch import _dispatch
from batchalign.cli.dispatch import _dispatch

# in_dir = "../talkbank-alignment/test_harness/input/"
# out_dir = "../talkbank-alignment/test_harness/output/"
# in_format = "cha"
in_dir = "../talkbank-alignment/test_harness/input/"
out_dir = "../talkbank-alignment/test_harness/output/"
in_format = "cha"

# function = "morphotag"
# lang = "fra"
# num_speakers = 1
function = "morphotag"
lang = "hrv"
num_speakers = 1

# class Context:
# obj = {"verbose": 3}
class Context:
obj = {"verbose": 3}

# def loader(file):
# return CHATFile(path=os.path.abspath(file)).doc
def loader(file):
return CHATFile(path=os.path.abspath(file)).doc

# # return file
# return file

# def writer(doc, output):
# CHATFile(doc=doc).write(output)
# # CHATFile(doc=doc).write(output
# # .replace(".wav", ".cha")
# # .replace(".mp4", ".cha")
# # .replace(".mp3", ".cha"))
def writer(doc, output):
CHATFile(doc=doc).write(output)
# CHATFile(doc=doc).write(output
# .replace(".wav", ".cha")
# .replace(".mp4", ".cha")
# .replace(".mp3", ".cha"))

# _dispatch(function, lang, num_speakers, [in_format], Context(),
# in_dir, out_dir,
# loader, writer, Console())
_dispatch(function, lang, num_speakers, [in_format], Context(),
in_dir, out_dir,
loader, writer, Console())

########## The Batchalign CHAT Test Tarness ##########

Expand Down

0 comments on commit 7d0990c

Please sign in to comment.