Skip to content

Commit

Permalink
Merge pull request #791 from clarin-eric/devel
Browse files Browse the repository at this point in the history
Devel-main
  • Loading branch information
matyaskopp authored Sep 25, 2023
2 parents 643f902 + 32771e0 commit 19b751a
Show file tree
Hide file tree
Showing 126 changed files with 36,683 additions and 606,687 deletions.
43 changes: 26 additions & 17 deletions Corpora/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

# Partial runs:
#CORPORA = GR BG UA
CORPORA = HU #RUN THIS ONE NEXT!!
CORPORA = FI

######## MTed CORPORA FOR V 3.1
MT-CORPORA = AT-en BA-en BE-en BG-en CZ-en DK-en EE-en ES-en ES-CT-en ES-GA-en FR-en GR-en HR-en HU-en IS-en IT-en LV-en NL-en NO-en PL-en PT-en RS-en SE-en SI-en TR-en UA-en

# Used in test targets:
CORPUS = UA
CORPUS = UK

#Where things are, as we use several branches: this one (most likely dev), and documentation
PARLAMINT = /project/corpora/Parla/ParlaMint/ParlaMint
Expand All @@ -27,7 +27,7 @@ TEMP = ${HERE}/Temp

#Where the submitted corpora are found (ParlaMint- .TEI/ and .TEI.ana/
SOURCES = ${HERE}/Sources-TEI
SOURCES-MT = ${HERE}/Sources-MT
SOURCES-MT = ${HERE}/Sources-Sem

# Version number and PID of next(!) TEI and TEI.ana ParlaMint release
VERSION = 3.1
Expand All @@ -44,19 +44,28 @@ WEB = tomaz@nl.ijs.si:/home/tomaz/www/tmp/ParlaMint/
###### Targets

###### Tests
test-vert3:
test-tei1:
${FINALIZE} -tei -valid -codes FI -in ${HERE}/Temp -out ${HERE}/Temp/Out
test-vert4:
$s meta=${HERE}/Master/ParlaMint-BG.TEI.ana/ParlaMint-BG.ana.xml -xsl:../Scripts/parlamint2xmlvert.xsl \
${HERE}/Master/ParlaMint-BG.TEI.ana/2022/ParlaMint-BG_2022-05-04.ana.xml > test.vert
${HERE}/Master/ParlaMint-BG.TEI.ana/2014/ParlaMint-BG_2014-10-27.ana.xml > test.vert
test-vert3:
$s meta=${HERE}/Master/ParlaMint-UA.TEI.ana/ParlaMint-UA.ana.xml -xsl:../Scripts/parlamint2xmlvert.xsl \
${HERE}/Master/ParlaMint-UA.TEI.ana/2012/ParlaMint-UA_2012-12-04-m0.ana.xml > test.vert
test-vert2:
${FINALIZE} -vert -codes ES-PV -in ${HERE}/Master -out ${HERE}/Master
test-meta3:
$s meta=../Corpora/Master/ParlaMint-BE.TEI/ParlaMint-BE.xml -xsl:../Scripts/parlamint2meta.xsl \
../Corpora/Master/ParlaMint-BE.TEI/2014/ParlaMint-BE_2014-06-19-54-plenair-ip001x.xml > test.tsv
test-meta2:
${FINALIZE} -txt -codes BE -in ${HERE}/Master -out ${HERE}/Master
test-vert1:
${FINALIZE} -vert -codes GR -in ${HERE}/Temp -out ${HERE}/Temp
${FINALIZE} -vert -codes GR-en -in ${HERE}/Temp -out ${HERE}/Temp
test-conll3:
${FINALIZE} -conll -codes GR-en -in ${HERE}/Temp -out ${HERE}/Temp
test-conll2:
${FINALIZE} -conll -codes GR -in ${HERE}/Temp -out ${HERE}/Temp
test-conll1:
${FINALIZE} -conll -codes SI -in ${HERE}/Master -out ${HERE}/Master
test-meta2:
$s meta=../Corpora/Master/ParlaMint-BE.TEI/ParlaMint-BE.xml -xsl:../Scripts/parlamint2meta.xsl \
../Corpora/Master/ParlaMint-BE.TEI/2014/ParlaMint-BE_2014-06-19-54-plenair-ip001x.xml > test.tsv
test-meta1:
${FINALIZE} -txt -codes GR -in ${HERE}/Temp -out ${HERE}/Temp
${FINALIZE} -txt -codes GR-en -in ${HERE}/Temp -out ${HERE}/Temp
Expand Down Expand Up @@ -103,11 +112,11 @@ merge-taxos:

### Fixes for 3.1-en:

# Instead of TEI-derived CoNLL-U files we release original MTed CoNLL-U
# because they also contain word alignments
# Instead of TEI-derived CoNLL-U files we release MTed CoNLL-U
# merged with TEI-derived CoNLL-U because they also contain word alignments
# Script also adds -en suffix to filesnames + readme.
mt-cp-conllu:
bin/cp-conllu.pl validate '${SOURCES-MT}/ParlaMint-*-en.conllu' 'Master'
bin/cp-conllu.pl Master validate '${SOURCES-MT}/ParlaMint-*-en.conllu' 'Master'

# Make txt and tsv files with tsvs
mt-convert-txt:
Expand Down Expand Up @@ -268,9 +277,9 @@ mt-convert:

mt-test8:
$s -xsl:../Scripts/validate-parlamint.xsl \
${HERE}/Master/ParlaMint-AT-en.TEI.ana/ParlaMint-AT-en.ana.xml
$s meta=${HERE}/Master/ParlaMint-AT-en.TEI.ana/ParlaMint-AT-en.ana.xml -xsl:../Scripts/validate-parlamint.xsl \
${HERE}/Master/ParlaMint-AT-en.TEI.ana/2022/ParlaMint-AT-en_2022-01-20-027-XXVII-NRSITZ-00139.ana.xml
${HERE}/Master/ParlaMint-UA.TEI.ana/ParlaMint-UA.ana.xml
$s meta=${HERE}/Master/ParlaMint-UA.TEI.ana/ParlaMint-UA.ana.xml -xsl:../Scripts/validate-parlamint.xsl \
${HERE}/Master/ParlaMint-UA.TEI.ana/2022/ParlaMint-UA_2022-01-25-m0.ana.xml
mt-test7:
$s meta=${HERE}/Master/ParlaMint-AT-en.TEI.ana/ParlaMint-AT-en.ana.xml -xsl:../Scripts//check-links.xsl \
${HERE}/Master/ParlaMint-AT-en.TEI.ana/2022/ParlaMint-AT-en_2022-01-20-027-XXVII-NRSITZ-00139.ana.xml
Expand All @@ -295,7 +304,7 @@ mt-test2a:
perl ../Scripts/conllu2tei.pl < Test/0.conllu > Test/0.body.xml
mt-test2:
perl ../Scripts/conllu2tei.pl \
< ${SOURCES-MT}/ParlaMint-LV-en.conllu/2015/ParlaMint-LV_2015-11-12-PT12-329.conllu \
< ${SOURCES-MT}/USAS/ParlaMint-LV-en.conllu/2015/ParlaMint-LV-en_2015-11-12-PT12-329.conllu \
> Test/ParlaMint-LV_2015-11-12-PT12-329.body.xml
xmllint --noout Test/ParlaMint-LV_2015-11-12-PT12-329.body.xml
mt-test1:
Expand Down
2 changes: 2 additions & 0 deletions Corpora/Master/ParlaMint.ana.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2913,6 +2913,8 @@
href="ParlaMint-ES-GA.TEI.ana/ParlaMint-ES-GA.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-ES.TEI.ana/ParlaMint-ES.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-FI.TEI.ana/ParlaMint-FI.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-FR.TEI.ana/ParlaMint-FR.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
Expand Down
Loading

0 comments on commit 19b751a

Please sign in to comment.