Skip to content

Commit

Permalink
Make script to listOrg2tsv (#859).
Browse files Browse the repository at this point in the history
  • Loading branch information
TomazErjavec committed Mar 26, 2024
1 parent 7995677 commit 5ec54e2
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 15 deletions.
28 changes: 13 additions & 15 deletions Build/Makefile
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
bug:
${vca} Sources-TEI/ParlaMint-DK.TEI.ana/*/*.xml

############### Makefile for making a distributable version of the ParlaMint TEI, TEI.ana, -en.TEI.ana corpora
#### Makefile for making a distribution of the ParlaMint corpora
#### Variables give the corpora, version, handle, paths and scripts to use
#### make nohup starts make all and saves the log in Logs/

#### For LREV paper:
#Make TSV and LaTeX tables
stats:
#Make overview TSV and LaTeX tables (for LREV paper)
geneate-stats:
$s mode=tsv -xsl:../Scripts/parlamint2cnt-overview.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-overview-stats.tsv
$s mode=tex -xsl:../Scripts/parlamint2cnt-overview.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-overview-stats.tex
$s mode=tsv -xsl:../Scripts/parlamint2cnt-particDesc.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-participDesc-stats.tsv
$s mode=tex -xsl:../Scripts/parlamint2cnt-particDesc.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-participDesc-stats.tex
$s mode=tsv -xsl:../Scripts/parlamint2cnt-speeches.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-speeches-stats.tsv
$s mode=tex -xsl:../Scripts/parlamint2cnt-speeches.xsl Distro/ParlaMint.xml > Metadata/ParlaMint-speeches-stats.tex

#Extract the parties and persons into TSV files
#Not tested yet for 4.1!
## Generate TSV files with party information on the basis of the corpus root file.
generate-parties:
$s path=${DATA} outDir=tmp -xsl:${RUN}/parlamint2tbl-parties.xsl \
${DATA}/ParlaMint.xml 2> ParlaMint_parties.log
extract2tsv:
## Generate TSV files with information on organisations
generate-orgs:
for CORPUS in ${CORPORA}; do \
$s out-lang=xx -xsl:Scripts/listOrg-tei2tsv.xsl Distro/ParlaMint-$${CORPUS}.TEI/ParlaMint-$${CORPUS}.xml \
> Metadata/ParlaMint-$${CORPUS}-listOrg.tsv ; \
$s out-lang=en -xsl:Scripts/listOrg-tei2tsv.xsl Distro/ParlaMint-$${CORPUS}.TEI/ParlaMint-$${CORPUS}.xml \
> Metadata/ParlaMint-$${CORPUS}-listOrg-en.tsv ; \
done;
generate-persons:
$s path=../Build/Distro outDir=../Build/Metadata -xsl:../Scripts/parlamint2tbl-parties.xsl Distro/ParlaMint.xml
for CORPUS in ${CORPORA}; do \
$s -xsl:../Scripts/parlamint2tbl-persons.xsl Distro/ParlaMint-$${CORPUS}.TEI/ParlaMint-$${CORPUS}.xml \
Expand All @@ -38,13 +36,13 @@ gen-all-persons:

### COMPLETE SET OF CORPORA
#CORPORA=AT BA BE BG CZ DK EE ES ES-CT ES-GA ES-PV FI FR GB GR HR HU IS IT LV NL NO PL PT RS SE SI TR UA
CORPORA=CS
CORPORA=HU

####### MTed CORPORA. Used only for make pack!
MT-CORPORA = AT-en BA-en BE-en BG-en CZ-en DK-en EE-en ES-en ES-CT-en ES-GA-en ES-PV-en FI-en FR-en GB-en GR-en HR-en HU-en IS-en IT-en LV-en NL-en NO-en PL-en PT-en RS-en SE-en SI-en TR-en UA-en

# Used in targets that run only for one corpus
CORPUS=LV
CORPUS=HU

#Absolute paths are needed otherwise problems with XSLT
PARLAMINT := $(shell realpath .. | tr -d '\n')# get real absolute path to ParlaMint directory
Expand Down
107 changes: 107 additions & 0 deletions Build/Scripts/listOrg-tei2tsv.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?xml version="1.0"?>
<!-- Outoput information on ParlaMint organisations as TSV file -->
<!-- Expects ParlaMint-XX.(ana).xml as input -->
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:et="http://nl.ijs.si/et"
xmlns="http://www.tei-c.org/ns/1.0"
exclude-result-prefixes="#all"
version="2.0">

<xsl:import href="parlamint-lib.xsl"/>

<xsl:output method="text"/>

<!-- Top level @xml:id should contain name of country or region -->
<xsl:variable name="country"
select="replace(tei:*/@xml:id,
'.*ParlaMint-([A-Z]{2}(-[A-Z0-9]{1,3})?).*',
'$1')"/>

<xsl:variable name="lang" select="/tei:*/@xml:lang" as="xs:string"/>

<xsl:template match="text()"/>
<xsl:template match="tei:*">
<xsl:apply-templates/>
</xsl:template>

<xsl:template match="/">
<xsl:variable name="document">
<xsl:apply-templates mode="expand" select="//tei:teiHeader">
<xsl:with-param name="lang" select="$lang"/>
</xsl:apply-templates>
</xsl:variable>
<xsl:apply-templates select="$document//tei:listOrg"/>
</xsl:template>

<xsl:template match="tei:listOrg">
<xsl:message select="concat('INFO: Converting ', @xml:id,
' (', $country, '/', $lang, ') to metadata TSV')"/>
<xsl:text>Country&#9;</xsl:text>
<xsl:text>orgType&#9;</xsl:text>
<xsl:text>orgID&#9;</xsl:text>
<xsl:text>AbbrName&#9;</xsl:text>
<xsl:text>FullName&#9;</xsl:text>
<xsl:text>From&#9;</xsl:text>
<xsl:text>To&#9;</xsl:text>
<xsl:text>Orientation-LR&#9;</xsl:text>
<xsl:text>Wikipedia&#9;</xsl:text>
<xsl:text>CHES-ID</xsl:text>
<xsl:text>&#10;</xsl:text>
<xsl:apply-templates select=".//tei:org"/>
</xsl:template>

<xsl:template match="tei:org">
<xsl:variable name="AbbrName">
<xsl:call-template name="orgName">
<xsl:with-param name="org" select="."/>
<xsl:with-param name="full">abb</xsl:with-param>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="FullName">
<xsl:call-template name="orgName">
<xsl:with-param name="org" select="."/>
<xsl:with-param name="full">yes</xsl:with-param>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="Orientation-LR">
<xsl:call-template name="party-orientation">
<xsl:with-param name="party" select="."/>
</xsl:call-template>
</xsl:variable>
<!-- Get Wikipedia URL from <idno> or <state> as fall-back -->
<xsl:variable name="Wikipedia">
<xsl:variable name="idnos" select="tei:idno[@type = 'URI' and @subtype = 'wikimedia']
[contains(., 'wikipedia')]"/>
<xsl:variable name="idno-en" select="$idnos[@xml:lang='en' or contains(., '/en.')]"/>
<xsl:variable name="idno-xx" select="$idnos[@xml:lang=$lang or contains(., concat('/', $lang, '.'))]"/>
<xsl:choose>
<xsl:when test="$out-lang = 'en' and $idno-en">
<xsl:value-of select="$idno-en"/>
</xsl:when>
<xsl:when test="$out-lang = 'xx' and $idno-xx">
<xsl:value-of select="$idno-xx"/>
</xsl:when>
<xsl:when test=".//tei:state[@type = 'Wikipedia'][@source]">
<xsl:value-of select=".//tei:state[@type = 'Wikipedia']/@source"/>
</xsl:when>
</xsl:choose>
</xsl:variable>

<xsl:value-of select="concat($country, '&#9;')"/>
<xsl:value-of select="concat(@role, '&#9;')"/>
<xsl:value-of select="concat(@xml:id, '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value($AbbrName), '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value($FullName), '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value(tei:event[tei:label = 'existence']/@from), '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value(tei:event[tei:label = 'existence']/@to), '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value($Orientation-LR), '&#9;')"/>
<xsl:value-of select="concat(et:tsv-value($Wikipedia), '&#9;')"/>
<xsl:value-of select="et:tsv-value(tei:state[@type='CHES']/@key)"/>
<xsl:text>&#10;</xsl:text>
</xsl:template>

</xsl:stylesheet>

0 comments on commit 5ec54e2

Please sign in to comment.