Skip to content

Commit

Permalink
Merge pull request #727 from clarin-eric/devel
Browse files Browse the repository at this point in the history
@TomazErjavec, merging to main
  • Loading branch information
matyaskopp authored Aug 23, 2023
2 parents 7d358e2 + 7b3b601 commit 6decd19
Show file tree
Hide file tree
Showing 1,483 changed files with 36,123 additions and 18,247 deletions.
8 changes: 4 additions & 4 deletions .github/actions/ParlaMintStatus/status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ pwd
cd ParlaMint

changed_files=$(git diff --name-only HEAD HEAD~1)
parla_changed=$(echo "$changed_files"|grep 'Data/ParlaMint-.*/'|sed -n 's/^Data\/ParlaMint-\([-A-Z]*\).*.xml$/\1/p'|sort|uniq|tr '\n' ' '|sed 's/ *$//')
parla_changed=$(echo "$changed_files"|grep 'Sample/ParlaMint-.*/'|sed -n 's/^Sample\/ParlaMint-\([-A-Z]*\).*.xml$/\1/p'|sort|uniq|tr '\n' ' '|sed 's/ *$//')
scripts_changed=$(echo "$changed_files"|egrep "^(Schema|Scripts)")
parla_all=$(echo Data/ParlaMint-*|sed 's/Data\/ParlaMint-\([-A-Z]*\)/\1/g'|sort)
parla_all=$(echo Sample/ParlaMint-*|sed 's/Sample\/ParlaMint-\([-A-Z]*\)/\1/g'|sort)


parla_process=$(test -z "${parla_changed}" && echo "${parla_all}" || echo "${parla_changed}")
Expand All @@ -13,8 +13,8 @@ parla_process=$(echo "[\"$parla_process\"]"|sed 's/ */","/g'| sed 's/^\[""\]$/[
parla_changed_size=0
for parla in $parla_changed;
do
size=$(find Data/ParlaMint-$parla -type f -name "ParlaMint-$parla*.xml" -print0 | du -c --block-size=1000000 --files0-from=-|tail -1|cut -f 1)
echo "::notice:: Data/ParlaMint-$parla size =${size} MB"
size=$(find Sample/ParlaMint-$parla -type f -name "ParlaMint-$parla*.xml" -print0 | du -c --block-size=1000000 --files0-from=-|tail -1|cut -f 1)
echo "::notice:: Sample/ParlaMint-$parla size =${size} MB"
parla_changed_size=$(echo "$parla_changed_size+$size"|bc)
done

Expand Down
2 changes: 1 addition & 1 deletion .github/actions/ParlaMintValidate/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cd ParlaMint

FAIL=0

DATADIR=Data
DATADIR=Sample

TESTDIR="SAMPLE/Parla-CLARIN"
mkdir -p $TESTDIR
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/createSample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,27 @@ jobs:
DIR="${{env.SAMPLE_DIR}}/$parla"
mkdir $DIR
echo "::notice::New sample files [$parla] TEXT"
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Data/ParlaMint-$parla/ParlaMint-$parla.xml
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Sample/ParlaMint-$parla/ParlaMint-$parla.xml
echo "::notice::New sample files [$parla] ANNOTATED"
if [ -f "Data/ParlaMint-$parla/ParlaMint-$parla.ana.xml" ] ; then
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Data/ParlaMint-$parla/ParlaMint-$parla.ana.xml
if [ -f "Sample/ParlaMint-$parla/ParlaMint-$parla.ana.xml" ] ; then
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Sample/ParlaMint-$parla/ParlaMint-$parla.ana.xml
else
echo "::warning::skipping annotated conversion - missing corpus root file"
fi
echo "::notice::Move new sample files to Data/ParlaMint-$parla"
ls $DIR|grep "ParlaMint-$parla[\.\_]"|xargs -I {} mv $DIR/{} Data/ParlaMint-$parla/
echo "::notice::Move new sample files to Sample/ParlaMint-$parla"
ls $DIR|grep "ParlaMint-$parla[\.\_]"|xargs -I {} mv $DIR/{} Sample/ParlaMint-$parla/
echo "::endgroup::"
done
- name: Remove unused data from repository
run: |
cd $GITHUB_WORKSPACE/ParlaMint
shopt -s globstar
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
for file in $(echo Data/ParlaMint-$parla/**/ParlaMint-$parla\_*) ; do
for file in $(echo Sample/ParlaMint-$parla/**/ParlaMint-$parla\_*) ; do
echo "testing $file"
xmlfile=$(echo $file|sed -E 's/(-meta\.tsv|\.conllu|\.txt|\.vert)$/.xml/;s/^Data\/ParlaMint-[^\/]*\///')
cat Data/ParlaMint-$parla/ParlaMint-$parla{,.ana}.xml | grep -Fq "$xmlfile" || git rm "$file"
xmlfile=$(echo $file|sed -E 's/(-meta\.tsv|\.conllu|\.txt|\.vert)$/.xml/;s/^Sample\/ParlaMint-[^\/]*\///')
cat Sample/ParlaMint-$parla/ParlaMint-$parla{,.ana}.xml | grep -Fq "$xmlfile" || git rm "$file"
done
done
- name: Create Derived files and Validate ${{needs.Changes.outputs.parla_changed}}
Expand All @@ -87,8 +87,8 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/ParlaMint
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
git add Data/ParlaMint-$parla/ParlaMint-*.{txt,tsv,conllu,vert,xml} || echo "::warning:: $parla suppress fatal: pathspec '<FILE>' did not match any files"
git diff --name-only Data/ParlaMint-$parla
git add Sample/ParlaMint-$parla/ParlaMint-*.{txt,tsv,conllu,vert,xml} || echo "::warning:: $parla suppress fatal: pathspec '<FILE>' did not match any files"
git diff --name-only Sample/ParlaMint-$parla
done
git status
git commit -a -m "action: generating ParlaMint-${{needs.Changes.outputs.parla_changed}} sample files with #${{github.event.number}}"
Expand Down
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,3 @@ nohup.*
*.zip
*.tar
*.tgz
validation*
Data/TMP
13 changes: 7 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## Git and GitHub

Sample data should be pushed to the Data branch of the ParlaMint repository directly into the parliament folder (*`Data/ParlaMint-XX`*) in a flat structure of files.
Sample data should be pushed to the Data branch of the ParlaMint repository directly into the samples folder
(*`Samples/ParlaMint-XX`*) in a flat structure of files.

### Setup

Expand Down Expand Up @@ -43,16 +44,16 @@ git pull

```bash
# replace XX with your country code
git add Data/ParlaMint-XX/*.xml
git commit -m 'XX' Data/ParlaMint-XX/ParlaMint-XX*.xml
git add Samples/ParlaMint-XX/*.xml
git commit -m 'XX' Samples/ParlaMint-XX/ParlaMint-XX*.xml
```

- Add common content (tagUsages, word extents, version):

- edit files and save in `Data/ParlaMint-XX/add-common-content/ParlaMint-XX/` folder: `make add-common-content-XX`
- edit files and save in `Samples/ParlaMint-XX/add-common-content/ParlaMint-XX/` folder: `make add-common-content-XX`
- check if modified files are ok
- replace `Data/ParlaMint-XX/*.xml` files with `Data/ParlaMint-XX/add-common-content/ParlaMint-XX/` content
- commit changes `git commit -m 'XX add common content' Data/ParlaMint-XX/ParlaMint-XX*.xml`
- replace `Samples/ParlaMint-XX/*.xml` files with `Samples/ParlaMint-XX/add-common-content/ParlaMint-XX/` content
- commit changes `git commit -m 'XX add common content' Samples/ParlaMint-XX/ParlaMint-XX*.xml`

- Push data to your Fork:

Expand Down
10 changes: 10 additions & 0 deletions Corpora/Logs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ParlaMint Corpora Log files

This directory contains the log files made by the release pipeline.
For each corpus there are 3 files:
* `ParlaMint-XX.log`: the complete log for ParlaMint-XX
* `ParlaMint-XX.warn.log`: warnings only
* `ParlaMint-XX.error.log`: errors only

The logs above are also present with the -en suffix (e.g. `ParlaMint-XX-en.log`) which are the logs for corpora
which have been machine translated to English.
144 changes: 74 additions & 70 deletions Distro/Makefile → Corpora/Makefile
Original file line number Diff line number Diff line change
@@ -1,55 +1,87 @@
## Transliteration tests
test-translit4:
$s tsv=0.tsv -xsl:bin/trans-tsv2tei.xsl Sources-TEI/ParlaMint-BG.TEI/ParlaMint-BG-listPerson.xml > ParlaMint-BG-listPerson.xml
test-translit3:
bin/trans-execute.pl Sources-TEI/ParlaMint-BG.TEI/ParlaMint-BG-listPerson.xml
test-translit2:
$s -xsl:bin/trans-tei2tsv.xsl Sources-TEI/ParlaMint-BG.TEI/ParlaMint-BG-listOrg.xml > ParlaMint-BG-listOrg.tsv
test-translit1:
$s -xsl:bin/trans-tei2tsv.xsl Sources-TEI/ParlaMint-BG.TEI/ParlaMint-BG-listPerson.xml > ParlaMint-BG-listPerson.tsv
$s -xsl:bin/trans-tei2tsv.xsl Sources-TEI/ParlaMint-GR.TEI/ParlaMint-GR-listPerson.xml > ParlaMint-GR-listPerson.tsv
$s -xsl:bin/trans-tei2tsv.xsl Sources-TEI/ParlaMint-UA.TEI/ParlaMint-UA-listPerson.xml > ParlaMint-UA-listPerson.tsv

######## Merging taxonomies

TAXONOMIES-TEI = subcorpus speaker_types parla.legislature
TAXONOMIES-ANA = NER

merge-taxos-nohup:
nohup time make merge-taxos 2> Taxonomies/ParlaMint-taxonomy-merge.log > Logs/ParlaMint-taxonomy.log &

merge-taxos:
for TAXONOMY in ${TAXONOMIES-TEI}; do \
$s template=../Corpora/Taxonomies/ParlaMint-taxonomy-$${TAXONOMY}.template.xml \
-xsl:../Scripts/parlamint-merge-taxonomy.xsl Master/ParlaMint.xml \
> Taxonomies/ParlaMint-taxonomy-$${TAXONOMY}.xml; \
done;
for TAXONOMY in ${TAXONOMIES-ANA}; do \
$s template=../Corpora/Taxonomies/ParlaMint-taxonomy-$${TAXONOMY}.ana.template.xml \
-xsl:../Scripts/parlamint-merge-taxonomy.xsl Master/ParlaMint.ana.xml \
> Taxonomies/ParlaMint-taxonomy-$${TAXONOMY}.ana.xml; \
done;
${vta} Taxonomies/ParlaMint-taxonomy-*.xml

############### Makefile for making a distributable version of the ParlaMint and ParlaMint-en corpora

### VARIABLES

# All ParlaMint II corpora
# CORPORA = AT BA BE BG CZ DK EE ES ES-CT ES-GA ES-PV FI FR GB GR HR HU IS IT LT LV NL NO PL PT RS RO SE SI TR UA
# Missing corpora: ES ES-PV FI LT RO
# Missing corpora: FI LT RO ES-PV

######## SUBMITTED CORPORA FOR V 3.0
# CORPORA = AT BA BE BG CZ DK EE ES-CT ES-GA FR GB GR HR HU IS IT LV NL NO PL PT RS SE SI TR UA
######## SUBMITTED CORPORA FOR V 3.1
# CORPORA = AT BA BE BG CZ DK EE ES ES-CT ES-GA FR GB GR HR HU IS IT LV NL NO PL PT RS SE SI TR UA

# Partial runs:
CORPORA = AT

######## MTed CORPORA FOR V 3.0
MT-CORPORA = AT-en BA-en BE-en BG-en CZ-en DK-en EE-en ES-CT-en ES-GA-en FR-en GR-en HR-en HU-en IS-en IT-en LV-en NL-en NO-en PL-en PT-en RS-en SE-en SI-en TR-en UA-en
######## MTed CORPORA FOR V 3.1
MT-CORPORA = AT-en BA-en BE-en BG-en CZ-en DK-en EE-en ES-en ES-CT-en ES-GA-en FR-en GR-en HR-en HU-en IS-en IT-en LV-en NL-en NO-en PL-en PT-en RS-en SE-en SI-en TR-en UA-en

# Used in test targets:
CORPUS = UA

#Where things are, as we use several branches: this one (most likely dev), and documentation
PARLAMINT = /project/corpora/Parla/ParlaMint
HERE = ${PARLAMINT}/ParlaMint-V3/Distro
DOC = ${PARLAMINT}/ParlaMint-documentation
SCH = ${DOC}/Schema
PARLAMINT = /project/corpora/Parla/ParlaMint/ParlaMint
SCH = ${PARLAMINT}/Schema
HERE = ${PARLAMINT}/Corpora
TEMP = ${HERE}/Temp

#Where the submitted corpora are found (ParlaMint- .TEI/ and .TEI.ana/
SOURCE = ${HERE}/Source
SOURCE-MT = ${HERE}/Source-MT
SOURCES = ${HERE}/Sources-TEI
SOURCES-MT = ${HERE}/Sources-MT

# Version number and PID of next(!) TEI and TEI.ana ParlaMint release
VERSION = 3.1
HANDLE-TEI = http://hdl.handle.net/11356/1859
HANDLE-ANA = http://hdl.handle.net/11356/1860

# Version number and PID of future(!) MTed ParlaMint-en.ana release
VERSION-MT = 3.0
HANDLE-MT = http://hdl.handle.net/11356/1810
# Version number and PID of next MTed ParlaMint-en.ana release
VERSION-MT = 3.1
HANDLE-MT = http://hdl.handle.net/11356/1864

#Where the produced corpora are put for inspection
WEB = tomaz@nl.ijs.si:/home/tomaz/www/tmp/ParlaMint/

###### Targets

### Fixes for 3.0-en:
### Fixes for 3.1-en:

# Instead of TEI-derived CoNLL-U files we release original MTed CoNLL-U
# because it contains word alignments
# because they also contain word alignments
# Script also adds -en suffix to filesnames + readme.
mt-cp-conllu:
bin/cp-conllu.pl 'Source-MT/ParlaMint-*-en.conllu' 'Master'
bin/cp-conllu.pl validate '${SOURCES-MT}/ParlaMint-*-en.conllu' 'Master'

# Make txt and tsv files with tsvs
mt-convert-txt:
Expand All @@ -62,9 +94,9 @@ mt-convert-txt:
mrg-conll-nohup:
nohup time make mrg-conll > Logs/ParlaMint_Merge_CoNLL-U.log &
mrg-conll:
bin/merge-conllu.pl Master/ParlaMint-BE.conllu Source-MT/ParlaMint-BE-en.conllu
bin/merge-conllu.pl Master/ParlaMint-ES-CT.conllu Source-MT/ParlaMint-ES-CT-en.conllu
bin/merge-conllu.pl Master/ParlaMint-UA.conllu Source-MT/ParlaMint-UA-en.conllu
bin/merge-conllu.pl Master/ParlaMint-BE.conllu ${SOURCES-MT}/ParlaMint-BE-en.conllu
bin/merge-conllu.pl Master/ParlaMint-ES-CT.conllu ${SOURCES-MT}/ParlaMint-ES-CT-en.conllu
bin/merge-conllu.pl Master/ParlaMint-UA.conllu ${SOURCES-MT}/ParlaMint-UA-en.conllu

# Fix a mistake with handle in corpora
fix-handle:
Expand All @@ -81,15 +113,15 @@ cp-readmes:
# Make samples only
samples:
for CORPUS in ${CORPORA}; do \
${FINALIZE} -sample -codes $${CORPUS} -in ${SOURCE} -out ${HERE}/Master 2> Logs/ParlaMint-$${CORPUS}.log; \
${FINALIZE} -sample -codes $${CORPUS} -in ${SOURCES} -out ${HERE}/Master 2> Logs/ParlaMint-$${CORPUS}.log; \
grep -a -i 'error' Logs/ParlaMint-$${CORPUS}.log > Logs/ParlaMint-$${CORPUS}.error.log; \
grep -a -i 'warn' Logs/ParlaMint-$${CORPUS}.log > Logs/ParlaMint-$${CORPUS}.warn.log; \
done;

# Make vertical files only
make-verts:
for CORPUS in ${CORPORA}; do \
${FINALIZE} -vert -codes $${CORPUS} -in ${SOURCE} -out ${HERE}/Master \
${FINALIZE} -vert -codes $${CORPUS} -in ${SOURCES} -out ${HERE}/Master; \
done;
make verts

Expand All @@ -106,7 +138,7 @@ mt-make-root:

cp-samples:
# bin/cp-samples.pl 'Master/Sample-ParlaMint-*-en' Test
bin/cp-samples.pl 'Master/Sample-ParlaMint-*-en' ../Data
bin/cp-samples.pl 'Master/Sample-ParlaMint-*-en' ../Samples

mt-logs:
for CORPUS in ${CORPORA}; do \
Expand All @@ -120,10 +152,6 @@ web:
rsync -av Logs/*.log ${WEB}/Logs
rsync -av Packed/*.tgz ${WEB}/Repo

###### Factorisation of source corpora; needs to be run only once
factor-all:
../Scripts/parlamint-factorize-corpora.pl ${SOURCE}

###### Targets for producing releasable version of ParlaMint corpora
FINALIZE = perl ../Scripts/parlamint2distro.pl -version ${VERSION} -teihandle ${HANDLE-TEI} -anahandle ${HANDLE-ANA} -schema ../Schema -docs Docs

Expand All @@ -136,16 +164,16 @@ nohup:
nohup2:
nice nohup time make all > Logs/ParlaMint.2.log &

all: final
all: final verts
xall: final verts pack

pack:
perl ../Scripts/pack-parlamint.pl -codes '${CORPORA}' -in Master -out Packed
verts:
perl ../Scripts/join-verts.pl -codes '${CORPORA}' -in Master -out Verts
perl ../Scripts/join-verts.pl -version ${VERSION} -codes '${CORPORA}' -in Master -out Verts
final:
for CORPUS in ${CORPORA}; do \
${FINALIZE} -all -codes $${CORPUS} -in ${SOURCE} -out ${HERE}/Master 2> Logs/ParlaMint-$${CORPUS}.log; \
${FINALIZE} -all -codes $${CORPUS} -in ${SOURCES} -out ${HERE}/Master 2> Logs/ParlaMint-$${CORPUS}.log; \
grep -a -i 'error' Logs/ParlaMint-$${CORPUS}.log > Logs/ParlaMint-$${CORPUS}.error.log; \
grep -a -i 'warn' Logs/ParlaMint-$${CORPUS}.log > Logs/ParlaMint-$${CORPUS}.warn.log; \
echo "$${CORPUS}.warn"; \
Expand All @@ -162,13 +190,13 @@ final:
### Make MTed corpora

# Make distribution with:
FINALIZE-MT = perl ../Scripts/parlamint2distro.pl -version ${VERSION-MT} -anahandle ${HANDLE-MT} -schema ${DOC}/Schema -docs ${HERE}/Docs
FINALIZE-MT = perl ../Scripts/parlamint2distro.pl -version ${VERSION-MT} -anahandle ${HANDLE-MT} -schema ${PARLAMINT}/Schema -docs ${HERE}/Docs

# Targets
mt-nohup:
nice nohup time make mt-all-final > Logs/ParlaMint-en.3.log &
mt-all-final: mt-pack mt-web
mt-xall-final: mt-convert mt-verts mt-pack mt-web
nice nohup time make mt-all-final > Logs/ParlaMint-en.log &
mt-all-final: mt-convert
mt-xall-final: mt-init mt-convert mt-verts mt-pack mt-web

mt-web:
rsync -av Logs/*-en*.log ${WEB}/Logs
Expand Down Expand Up @@ -198,8 +226,8 @@ mt-convert:
for CORPUS in ${CORPORA}; do \
perl ../Scripts/mt-conllu2tei.pl \
${HERE}/Master/ParlaMint-$${CORPUS}.TEI.ana/ParlaMint-$${CORPUS}.ana.xml \
${SOURCE-MT}/ParlaMint-$${CORPUS}-en-notes.tsv \
${SOURCE-MT}/ParlaMint-$${CORPUS}-en.conllu \
${SOURCES-MT}/ParlaMint-$${CORPUS}-en-notes.tsv \
${SOURCES-MT}/ParlaMint-$${CORPUS}-en.conllu \
${TEMP}/ParlaMint-$${CORPUS}-en.TEI.ana 2> Logs/ParlaMint-$${CORPUS}-en.log; \
${FINALIZE-MT} -all -notei -noconll -codes $${CORPUS}-en -in ${TEMP} -out ${HERE}/Master \
2>> Logs/ParlaMint-$${CORPUS}-en.log; \
Expand All @@ -208,6 +236,15 @@ mt-convert:
done;

### Tests for debugging MT processing

mt-test9:
perl ../Scripts/mt-conllu2tei.pl \
${HERE}/Master/ParlaMint-LV.TEI.ana/ParlaMint-LV.ana.xml \
${SOURCES-MT}/ParlaMint-LV-en-notes.tsv \
Sources-Sem/Test/ParlaMint-LV-en.conllu \
${TEMP}/ParlaMint-LV-en.TEI.ana
${FINALIZE-MT} -all -notei -noconll -codes LV-en -in ${TEMP} -out ${HERE}/Test

mt-test8:
$s -xsl:../Scripts/validate-parlamint.xsl \
${HERE}/Master/ParlaMint-AT-en.TEI.ana/ParlaMint-AT-en.ana.xml
Expand Down Expand Up @@ -237,46 +274,13 @@ mt-test2a:
perl ../Scripts/conllu2tei.pl < Test/0.conllu > Test/0.body.xml
mt-test2:
perl ../Scripts/conllu2tei.pl \
< Source-MT/ParlaMint-LV-en.conllu/2015/ParlaMint-LV_2015-11-12-PT12-329.conllu \
< ${SOURCES-MT}/ParlaMint-LV-en.conllu/2015/ParlaMint-LV_2015-11-12-PT12-329.conllu \
> Test/ParlaMint-LV_2015-11-12-PT12-329.body.xml
xmllint --noout Test/ParlaMint-LV_2015-11-12-PT12-329.body.xml
mt-test1:
rm -fr Test/ParlaMint-CZ.tmp/*
$s outDir=Test/ParlaMint-CZ.tmp -xsl:../Scripts/mt-prepare4mt.xsl ${HERE}/Master/ParlaMint-CZ.TEI.ana/ParlaMint-CZ.ana.xml

# Producing almost XX-en, uses CORPUS variable
mt-prep-nohup:
nohup time make mt-prep-cnv > Logs/ParlaMint-${CORPUS}-mt2tei.log &
mt-prep-cnv:
perl ../Scripts/mt-conllu2tei.pl \
${HERE}/Master/ParlaMint-${CORPUS}.TEI.ana/ParlaMint-${CORPUS}.ana.xml \
${SOURCE-MT}/ParlaMint-${CORPUS}-en-notes.tsv \
${SOURCE-MT}/ParlaMint-${CORPUS}-en.conllu \
Test/ParlaMint-${CORPUS}-en.TEI.ana

#Tests for original corpora
test1:
rm -fr Test/Out/ParlaMint-${CORPUS}*
perl ../Scripts/parlamint2distro.pl -all -codes ${CORPUS} -in Test/In -out Test/Out \
-version 3.0 -teihandle ${HANDLE-TEI} -anahandle ${HANDLE-ANA} -schema ../Schema -docs Docs \

test-factorize:
rm -fr Test/Factorized/ParlaMint-${CORPUS}*
mkdir Test/Factorized || :
$s outDir=Test/Factorized/ParlaMint-${CORPUS}.TEI \
prefix="ParlaMint-${CORPUS}-" \
-xsl:../Scripts/parlamint-factorize-teiHeader.xsl \
${SOURCE}/ParlaMint-${CORPUS}.TEI/ParlaMint-${CORPUS}.xml || :
$s outDir=Test/Factorized/ParlaMint-${CORPUS}.TEI.ana \
prefix="ParlaMint-${CORPUS}-" \
teiRoot=`pwd`"/Test/Factorized/ParlaMint-${CORPUS}.TEI/ParlaMint-${CORPUS}.xml" \
-xsl:../Scripts/parlamint-factorize-teiHeader.xsl \
${SOURCE}/ParlaMint-${CORPUS}.TEI.ana/ParlaMint-${CORPUS}.ana.xml || :

errs1:
grep -i error Logs/*.log | \
grep -v '...suppressing' | grep -v 'Format errors' | grep -v 'Syntax errors' | grep -v 'FAILED'

### Some idea, need to think about it!
#REGIS=at ba be bg cz dk es_ct fr gb gr hr hu is it lv nl no pl pt rs se si tr ua
REGIS=ua
Expand Down
Loading

0 comments on commit 6decd19

Please sign in to comment.