Skip to content

Commit

Permalink
Merge pull request #730 from clarin-eric/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
matyaskopp authored Aug 25, 2023
2 parents 6decd19 + c6a3fbd commit 0dfbeb7
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 49 deletions.
8 changes: 4 additions & 4 deletions .github/actions/ParlaMintStatus/status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ pwd
cd ParlaMint

changed_files=$(git diff --name-only HEAD HEAD~1)
parla_changed=$(echo "$changed_files"|grep 'Sample/ParlaMint-.*/'|sed -n 's/^Sample\/ParlaMint-\([-A-Z]*\).*.xml$/\1/p'|sort|uniq|tr '\n' ' '|sed 's/ *$//')
parla_changed=$(echo "$changed_files"|grep 'Samples/ParlaMint-.*/'|sed -n 's/^Samples\/ParlaMint-\([-A-Z]*\).*.xml$/\1/p'|sort|uniq|tr '\n' ' '|sed 's/ *$//')
scripts_changed=$(echo "$changed_files"|egrep "^(Schema|Scripts)")
parla_all=$(echo Sample/ParlaMint-*|sed 's/Sample\/ParlaMint-\([-A-Z]*\)/\1/g'|sort)
parla_all=$(echo Samples/ParlaMint-*|sed 's/Samples\/ParlaMint-\([-A-Z]*\)/\1/g'|sort)


parla_process=$(test -z "${parla_changed}" && echo "${parla_all}" || echo "${parla_changed}")
Expand All @@ -13,8 +13,8 @@ parla_process=$(echo "[\"$parla_process\"]"|sed 's/ */","/g'| sed 's/^\[""\]$/[
parla_changed_size=0
for parla in $parla_changed;
do
size=$(find Sample/ParlaMint-$parla -type f -name "ParlaMint-$parla*.xml" -print0 | du -c --block-size=1000000 --files0-from=-|tail -1|cut -f 1)
echo "::notice:: Sample/ParlaMint-$parla size =${size} MB"
size=$(find Samples/ParlaMint-$parla -type f -name "ParlaMint-$parla*.xml" -print0 | du -c --block-size=1000000 --files0-from=-|tail -1|cut -f 1)
echo "::notice:: Samples/ParlaMint-$parla size =${size} MB"
parla_changed_size=$(echo "$parla_changed_size+$size"|bc)
done

Expand Down
2 changes: 1 addition & 1 deletion .github/actions/ParlaMintValidate/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cd ParlaMint

FAIL=0

DATADIR=Sample
DATADIR=Samples

TESTDIR="SAMPLE/Parla-CLARIN"
mkdir -p $TESTDIR
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/createSample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,27 @@ jobs:
DIR="${{env.SAMPLE_DIR}}/$parla"
mkdir $DIR
echo "::notice::New sample files [$parla] TEXT"
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Sample/ParlaMint-$parla/ParlaMint-$parla.xml
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Samples/ParlaMint-$parla/ParlaMint-$parla.xml
echo "::notice::New sample files [$parla] ANNOTATED"
if [ -f "Sample/ParlaMint-$parla/ParlaMint-$parla.ana.xml" ] ; then
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Sample/ParlaMint-$parla/ParlaMint-$parla.ana.xml
if [ -f "Samples/ParlaMint-$parla/ParlaMint-$parla.ana.xml" ] ; then
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Samples/ParlaMint-$parla/ParlaMint-$parla.ana.xml
else
echo "::warning::skipping annotated conversion - missing corpus root file"
fi
echo "::notice::Move new sample files to Sample/ParlaMint-$parla"
ls $DIR|grep "ParlaMint-$parla[\.\_]"|xargs -I {} mv $DIR/{} Sample/ParlaMint-$parla/
echo "::notice::Move new sample files to Samples/ParlaMint-$parla"
ls $DIR|grep "ParlaMint-$parla[\.\_]"|xargs -I {} mv $DIR/{} Samples/ParlaMint-$parla/
echo "::endgroup::"
done
- name: Remove unused data from repository
run: |
cd $GITHUB_WORKSPACE/ParlaMint
shopt -s globstar
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
for file in $(echo Sample/ParlaMint-$parla/**/ParlaMint-$parla\_*) ; do
for file in $(echo Samples/ParlaMint-$parla/**/ParlaMint-$parla\_*) ; do
echo "testing $file"
xmlfile=$(echo $file|sed -E 's/(-meta\.tsv|\.conllu|\.txt|\.vert)$/.xml/;s/^Sample\/ParlaMint-[^\/]*\///')
cat Sample/ParlaMint-$parla/ParlaMint-$parla{,.ana}.xml | grep -Fq "$xmlfile" || git rm "$file"
xmlfile=$(echo $file|sed -E 's/(-meta\.tsv|\.conllu|\.txt|\.vert)$/.xml/;s/^Samples\/ParlaMint-[^\/]*\///')
cat Samples/ParlaMint-$parla/ParlaMint-$parla{,.ana}.xml | grep -Fq "$xmlfile" || git rm "$file"
done
done
- name: Create Derived files and Validate ${{needs.Changes.outputs.parla_changed}}
Expand All @@ -87,8 +87,8 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/ParlaMint
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
git add Sample/ParlaMint-$parla/ParlaMint-*.{txt,tsv,conllu,vert,xml} || echo "::warning:: $parla suppress fatal: pathspec '<FILE>' did not match any files"
git diff --name-only Sample/ParlaMint-$parla
git add Samples/ParlaMint-$parla/ParlaMint-*.{txt,tsv,conllu,vert,xml} || echo "::warning:: $parla suppress fatal: pathspec '<FILE>' did not match any files"
git diff --name-only Samples/ParlaMint-$parla
done
git status
git commit -a -m "action: generating ParlaMint-${{needs.Changes.outputs.parla_changed}} sample files with #${{github.event.number}}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,17 @@
<catDesc xml:lang="en"><term>Right to far-right</term></catDesc>
</category>
<category xml:id="orientation.BT">
<catDesc xml:lang="en"><term>Big tent</term>:
<ref target="https://en.wikipedia.org/wiki/Big_tent">Big tent</ref> refers to politics that are ...
</catDesc>
<catDesc xml:lang="en"><term>Big tent</term>: <ref target="https://en.wikipedia.org/wiki/Big_tent">Big tent</ref> or catch-all refers to political parties that have members covering a broad spectrum of beliefs.</catDesc>
</category>
<category xml:id="orientation.NP">
<catDesc xml:lang="en"><term>Nonpartisanism</term>:
<ref target="https://en.wikipedia.org/wiki/Nonpartisanism">Nonpartisanism</ref> refers to politics that are ...
</catDesc>
<category xml:id="orientation.NP"><catDesc xml:lang="en"><term>Nonpartisanism</term>: <ref target="https://en.wikipedia.org/wiki/Nonpartisanism">Nonpartisanism</ref> refers to a political stance that does not agree with the current political party system.</catDesc>
</category>
<category xml:id="orientation.PP">
<catDesc xml:lang="en"><term>Pirate Party</term>:
<ref target="https://en.wikipedia.org/wiki/Pirate_Party">Pirate Party</ref> refers to politics that are ...
</catDesc>
<category xml:id="orientation.PP"><catDesc xml:lang="en"><term>Pirate Party</term>: <ref target="https://en.wikipedia.org/wiki/Pirate_Party">Pirate Party</ref> refers to political parties that support civil rights, direct democracy, encourage innovation and creativity, free sharing of knowledge, information privacy, free speech, anti-corruption, net neutrality and oppose mass surveillance, censorship and Big Tech.</catDesc>
</category>
<category xml:id="orientation.SI">
<catDesc xml:lang="en"><term>Single Issue Politics</term>:
<ref target="https://en.wikipedia.org/wiki/Single-issue_politics">Single Issue Politics</ref> refers to politics that are ...
</catDesc>
<catDesc xml:lang="en"><term>Single Issue Politics</term>: <ref target="https://en.wikipedia.org/wiki/Single-issue_politics">Single Issue Politics</ref> refers to a political stance that is based on one essential policy area or idea. </catDesc>
</category>
<category xml:id="orientation.SY">
<catDesc xml:lang="en"><term>Syncretic politics</term>:
<ref target="https://en.wikipedia.org/wiki/Syncretic_politics">Syncretic politics</ref> refers to politics that are ...
</catDesc>
<ref target="https://en.wikipedia.org/wiki/Syncretic_politics">Syncretic politics</ref> refers to politics that combine elements from across the conventional left–right political spectrum.</catDesc>
</category>
</taxonomy>
21 changes: 5 additions & 16 deletions Corpora/Taxonomies/ParlaMint-taxonomy-politicalOrientation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,17 @@
<catDesc xml:lang="en"><term>Right to far-right</term></catDesc>
</category>
<category xml:id="orientation.BT">
<catDesc xml:lang="en"><term>Big tent</term>:
<ref target="https://en.wikipedia.org/wiki/Big_tent">Big tent</ref> refers to politics that are ...
</catDesc>
<catDesc xml:lang="en"><term>Big tent</term>: <ref target="https://en.wikipedia.org/wiki/Big_tent">Big tent</ref> or catch-all refers to political parties that have members covering a broad spectrum of beliefs.</catDesc>
</category>
<category xml:id="orientation.NP">
<catDesc xml:lang="en"><term>Nonpartisanism</term>:
<ref target="https://en.wikipedia.org/wiki/Nonpartisanism">Nonpartisanism</ref> refers to politics that are ...
</catDesc>
<category xml:id="orientation.NP"><catDesc xml:lang="en"><term>Nonpartisanism</term>: <ref target="https://en.wikipedia.org/wiki/Nonpartisanism">Nonpartisanism</ref> refers to a political stance that does not agree with the current political party system.</catDesc>
</category>
<category xml:id="orientation.PP">
<catDesc xml:lang="en"><term>Pirate Party</term>:
<ref target="https://en.wikipedia.org/wiki/Pirate_Party">Pirate Party</ref> refers to politics that are ...
</catDesc>
<category xml:id="orientation.PP"><catDesc xml:lang="en"><term>Pirate Party</term>: <ref target="https://en.wikipedia.org/wiki/Pirate_Party">Pirate Party</ref> refers to political parties that support civil rights, direct democracy, encourage innovation and creativity, free sharing of knowledge, information privacy, free speech, anti-corruption, net neutrality and oppose mass surveillance, censorship and Big Tech.</catDesc>
</category>
<category xml:id="orientation.SI">
<catDesc xml:lang="en"><term>Single Issue Politics</term>:
<ref target="https://en.wikipedia.org/wiki/Single-issue_politics">Single Issue Politics</ref> refers to politics that are ...
</catDesc>
<catDesc xml:lang="en"><term>Single Issue Politics</term>: <ref target="https://en.wikipedia.org/wiki/Single-issue_politics">Single Issue Politics</ref> refers to a political stance that is based on one essential policy area or idea. </catDesc>
</category>
<category xml:id="orientation.SY">
<catDesc xml:lang="en"><term>Syncretic politics</term>:
<ref target="https://en.wikipedia.org/wiki/Syncretic_politics">Syncretic politics</ref> refers to politics that are ...
</catDesc>
<ref target="https://en.wikipedia.org/wiki/Syncretic_politics">Syncretic politics</ref> refers to politics that combine elements from across the conventional left–right political spectrum.</catDesc>
</category>
</taxonomy>
5 changes: 3 additions & 2 deletions Scripts/parlamint2release.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,13 @@
<xsl:template mode="comp" match="tei:note[normalize-space(.) and not(./element())] | tei:desc">
<xsl:variable name="textIn" select="normalize-space(.)"/>
<xsl:variable name="textOut" select="mk:normalize-note($textIn)"/>
<xsl:if test="$textIn != $textOut">
<!-- Remove this message, as there are too many of them -->
<!--xsl:if test="$textIn != $textOut">
<xsl:message select="concat('WARN ', /tei:TEI/@xml:id,
': de-bracketing ',
parent::tei:*/local-name(),'/',local-name(),
' &quot;',$textIn,'&quot;')"/>
</xsl:if>
</xsl:if-->
<xsl:if test="not(normalize-space( replace($textOut, '[^\p{Lu}\p{Lt}\p{Ll}0-9]',' ')))
and not($allowedNotes[. = normalize-space($textOut)])">
<xsl:message select="concat('WARN ', /tei:TEI/@xml:id,
Expand Down

0 comments on commit 0dfbeb7

Please sign in to comment.