Skip to content

Commit

Permalink
action: generating ParlaMint-[DK] sample files with #755
Browse files Browse the repository at this point in the history
  • Loading branch information
matyaskopp committed Sep 12, 2023
1 parent d892803 commit 03d9cc8
Show file tree
Hide file tree
Showing 14 changed files with 3,838 additions and 88,468 deletions.
97 changes: 70 additions & 27 deletions Samples/ParlaMint-DK/ParlaMint-DK.ana.xml
Original file line number Diff line number Diff line change
@@ -1,22 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xml:lang="da" xml:id="ParlaMint-DK.ana">
<teiCorpus xmlns="http://www.tei-c.org/ns/1.0"
xml:lang="da"
xml:id="ParlaMint-DK.ana">
<teiHeader>
<fileDesc>
<titleStmt>
<title type="main" xml:lang="en">Danish parliamentary corpus ParlaMint-DK [ParlaMint.ana]</title>
<title type="main" xml:lang="da">Det danske korpus ParlaMint-DK [ParlaMint.ana]</title>
<title type="main" xml:lang="en">Danish parliamentary corpus ParlaMint-DK [ParlaMint.ana SAMPLE]</title>
<title type="main" xml:lang="da">Det danske korpus ParlaMint-DK [ParlaMint.ana SAMPLE]</title>
<title type="sub" xml:lang="en">Hansards of the Danish Parliament, Folketinget, session 20141 to 20211 (2014-2022)</title>
<title type="sub" xml:lang="da">Referater fra folketingssalen, folketingsårene 20141 til 20211 (2014-2022)</title>
<meeting xml:lang="en" n="20141" corresp="#FT" ana="#parla.uni #parla.session #FT.20141">Session 20141</meeting>
<meeting xml:lang="en" n="20142" corresp="#FT" ana="#parla.uni #parla.session #FT.20142">Session 20142</meeting>
<meeting xml:lang="en" n="20151" corresp="#FT" ana="#parla.uni #parla.session #FT.20151">Session 20151</meeting>
<meeting xml:lang="en" n="20161" corresp="#FT" ana="#parla.uni #parla.session #FT.20161">Session 20161</meeting>
<meeting xml:lang="en" n="20171" corresp="#FT" ana="#parla.uni #parla.session #FT.20171">Session 20171</meeting>
<meeting xml:lang="en" n="20181" corresp="#FT" ana="#parla.uni #parla.session #FT.20181">Session 20181</meeting>
<meeting xml:lang="en" n="20182" corresp="#FT" ana="#parla.uni #parla.session #FT.20182">Session 20182</meeting>
<meeting xml:lang="en" n="20191" corresp="#FT" ana="#parla.uni #parla.session #FT.20191">Session 20191</meeting>
<meeting xml:lang="en" n="20201" corresp="#FT" ana="#parla.uni #parla.session #FT.20201">Session 20201</meeting>
<meeting xml:lang="en" n="20211" corresp="#FT" ana="#parla.uni #parla.session #FT.20211">Session 20211</meeting>
<meeting xml:lang="en"
n="20141"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20141">Session 20141</meeting>
<meeting xml:lang="en"
n="20142"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20142">Session 20142</meeting>
<meeting xml:lang="en"
n="20151"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20151">Session 20151</meeting>
<meeting xml:lang="en"
n="20161"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20161">Session 20161</meeting>
<meeting xml:lang="en"
n="20171"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20171">Session 20171</meeting>
<meeting xml:lang="en"
n="20181"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20181">Session 20181</meeting>
<meeting xml:lang="en"
n="20182"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20182">Session 20182</meeting>
<meeting xml:lang="en"
n="20191"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20191">Session 20191</meeting>
<meeting xml:lang="en"
n="20201"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20201">Session 20201</meeting>
<meeting xml:lang="en"
n="20211"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20211">Session 20211</meeting>
<respStmt>
<persName>Bart Jongejan</persName>
<persName>Dorte Haltrup Hansen</persName>
Expand All @@ -37,7 +69,7 @@
<editionStmt>
<edition>3.1</edition>
</editionStmt>
<extent>
<extent><!--These numbers do not reflect the size of the sample!-->
<measure unit="speeches" quantity="398610" xml:lang="en">398,610 speeches</measure>
<measure unit="words" quantity="40797597" xml:lang="en">40,797,597 words</measure>
</extent>
Expand All @@ -51,7 +83,7 @@
<licence>http://creativecommons.org/licenses/by/4.0/</licence>
<p xml:lang="en">This work is licensed under the<ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p>
</availability>
<date when="2023-06-13">2023-06-13</date>
<date when="2023-09-12">2023-09-12</date>
</publicationStmt>
<sourceDesc>
<bibl>
Expand All @@ -64,7 +96,8 @@
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="en"><ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p>
<p xml:lang="en">
<ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p>
</projectDesc>
<editorialDecl>
<correction>
Expand All @@ -83,7 +116,7 @@
<p xml:lang="en">The texts are segmented into utterances (speeches) and segments (corresponding to one utterance).</p>
</segmentation>
</editorialDecl>
<tagsDecl>
<tagsDecl><!--These numbers do not reflect the size of the sample!-->
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="body" occurs="947"/>
<tagUsage gi="div" occurs="14302"/>
Expand All @@ -101,11 +134,16 @@
</namespace>
</tagsDecl>
<classDecl>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-parla.legislature.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-speaker_types.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-subcorpus.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-NER.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-UD-SYN.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-parla.legislature.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-speaker_types.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-subcorpus.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-NER.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-UD-SYN.ana.xml"/>
</classDecl>
<listPrefixDef>
<prefixDef ident="ud-syn" matchPattern="(.+)" replacementPattern="#$1">
Expand Down Expand Up @@ -145,7 +183,8 @@
</textClass>
<particDesc>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK-listOrg.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK-listPerson.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK-listPerson.xml"/>
</particDesc>
<langUsage>
<language ident="da" xml:lang="da">Dansk</language>
Expand All @@ -154,9 +193,13 @@
<language ident="en" xml:lang="en">English</language>
</langUsage>
</profileDesc>
<revisionDesc>
<change when="2023-09-12">
<name>GitHub Action</name>: Made sample.</change>
</revisionDesc>
</teiHeader>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK_2014-10-07-20141-M1.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK_2015-01-13-20141-M40.ana.xml"/>

<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK_2014-10-07-20141-M1.ana.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK_2015-01-13-20141-M40.ana.xml"/>
</teiCorpus>

88 changes: 63 additions & 25 deletions Samples/ParlaMint-DK/ParlaMint-DK.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,50 @@
<teiHeader>
<fileDesc>
<titleStmt>
<title type="main" xml:lang="en">Danish parliamentary corpus ParlaMint-DK [ParlaMint]</title>
<title type="main" xml:lang="da">Det danske korpus ParlaMint-DK [ParlaMint]</title>
<title type="main" xml:lang="en">Danish parliamentary corpus ParlaMint-DK [ParlaMint SAMPLE]</title>
<title type="main" xml:lang="da">Det danske korpus ParlaMint-DK [ParlaMint SAMPLE]</title>
<title type="sub" xml:lang="en">Hansards of the Danish Parliament, Folketinget, session 20141 to 20211 (2014-2022)</title>
<title type="sub" xml:lang="da">Referater fra folketingssalen, folketingsårene 20141 til 20211 (2014-2022)</title>
<meeting xml:lang="en" n="20141" corresp="#FT" ana="#parla.uni #parla.session #FT.20141">Session 20141</meeting>
<meeting xml:lang="en" n="20142" corresp="#FT" ana="#parla.uni #parla.session #FT.20142">Session 20142</meeting>
<meeting xml:lang="en" n="20151" corresp="#FT" ana="#parla.uni #parla.session #FT.20151">Session 20151</meeting>
<meeting xml:lang="en" n="20161" corresp="#FT" ana="#parla.uni #parla.session #FT.20161">Session 20161</meeting>
<meeting xml:lang="en" n="20171" corresp="#FT" ana="#parla.uni #parla.session #FT.20171">Session 20171</meeting>
<meeting xml:lang="en" n="20181" corresp="#FT" ana="#parla.uni #parla.session #FT.20181">Session 20181</meeting>
<meeting xml:lang="en" n="20182" corresp="#FT" ana="#parla.uni #parla.session #FT.20182">Session 20182</meeting>
<meeting xml:lang="en" n="20191" corresp="#FT" ana="#parla.uni #parla.session #FT.20191">Session 20191</meeting>
<meeting xml:lang="en" n="20201" corresp="#FT" ana="#parla.uni #parla.session #FT.20201">Session 20201</meeting>
<meeting xml:lang="en" n="20211" corresp="#FT" ana="#parla.uni #parla.session #FT.20211">Session 20211</meeting>
<meeting xml:lang="en"
n="20141"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20141">Session 20141</meeting>
<meeting xml:lang="en"
n="20142"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20142">Session 20142</meeting>
<meeting xml:lang="en"
n="20151"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20151">Session 20151</meeting>
<meeting xml:lang="en"
n="20161"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20161">Session 20161</meeting>
<meeting xml:lang="en"
n="20171"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20171">Session 20171</meeting>
<meeting xml:lang="en"
n="20181"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20181">Session 20181</meeting>
<meeting xml:lang="en"
n="20182"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20182">Session 20182</meeting>
<meeting xml:lang="en"
n="20191"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20191">Session 20191</meeting>
<meeting xml:lang="en"
n="20201"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20201">Session 20201</meeting>
<meeting xml:lang="en"
n="20211"
corresp="#FT"
ana="#parla.uni #parla.session #FT.20211">Session 20211</meeting>
<respStmt>
<persName>Bart Jongejan</persName>
<persName>Dorte Haltrup Hansen</persName>
Expand All @@ -36,7 +66,7 @@
<editionStmt>
<edition>3.1</edition>
</editionStmt>
<extent>
<extent><!--These numbers do not reflect the size of the sample!-->
<measure unit="speeches" quantity="398610" xml:lang="en">398,610 speeches</measure>
<measure unit="words" quantity="40797597" xml:lang="en">40,797,597 words</measure>
</extent>
Expand All @@ -50,7 +80,7 @@
<licence>http://creativecommons.org/licenses/by/4.0/</licence>
<p xml:lang="en">This work is licensed under the<ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p>
</availability>
<date when="2023-06-13">2023-06-13</date>
<date when="2023-09-12">2023-09-12</date>
</publicationStmt>
<sourceDesc>
<bibl>
Expand All @@ -63,7 +93,8 @@
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="en"><ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p>
<p xml:lang="en">
<ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p>
</projectDesc>
<editorialDecl>
<correction>
Expand All @@ -82,7 +113,7 @@
<p xml:lang="en">The texts are segmented into utterances (speeches) and segments (corresponding to one utterance).</p>
</segmentation>
</editorialDecl>
<tagsDecl>
<tagsDecl><!--These numbers do not reflect the size of the sample!-->
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="body" occurs="947"/>
<tagUsage gi="div" occurs="14302"/>
Expand All @@ -94,9 +125,12 @@
</namespace>
</tagsDecl>
<classDecl>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-parla.legislature.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-speaker_types.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-taxonomy-subcorpus.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-parla.legislature.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-speaker_types.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-taxonomy-subcorpus.xml"/>
</classDecl>
</encodingDesc>
<profileDesc>
Expand All @@ -113,7 +147,8 @@
</textClass>
<particDesc>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK-listOrg.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK-listPerson.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK-listPerson.xml"/>
</particDesc>
<langUsage>
<language ident="da" xml:lang="da">Dansk</language>
Expand All @@ -122,10 +157,13 @@
<language ident="en" xml:lang="en">English</language>
</langUsage>
</profileDesc>
<revisionDesc>
<change when="2023-09-12">
<name>GitHub Action</name>: Made sample.</change>
</revisionDesc>
</teiHeader>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK_2014-10-07-20141-M1.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-DK_2015-01-13-20141-M40.xml"/>


<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK_2014-10-07-20141-M1.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
href="ParlaMint-DK_2015-01-13-20141-M40.xml"/>
</teiCorpus>

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ID Title Date Body Term Session Meeting Sitting Agenda Subcorpus Speaker_role Speaker_MP Speaker_Minister Speaker_party Speaker_party_name Party_status Speaker_name Speaker_gender Speaker_birth
ParlaMint-DK_20141007120002 Referat fra folketingssalen, folketingsåret 20141, møde M1 (2014-10-07) endelig version 2014-10-07 Etkammersystem 2014-2015 20141 1 Reference Formand MP notMinister V Venstre Opposition Haarder, Bertel M 1944
ParlaMint-DK_20141007120050 Referat fra folketingssalen, folketingsåret 20141, møde M1 (2014-10-07) endelig version 2014-10-07 Etkammersystem 2014-2015 20141 1 Reference Formand MP notMinister V Venstre Opposition Haarder, Bertel M 1944
ParlaMint-DK_20141007124633 Referat fra folketingssalen, folketingsåret 20141, møde M1 (2014-10-07) endelig version 2014-10-07 Etkammersystem 2014-2015 20141 1 Reference Formand MP notMinister S Socialdemokratiet Coalition Lykketoft, Mogens M 1946
ParlaMint-DK_20141007124638 Referat fra folketingssalen, folketingsåret 20141, møde M1 (2014-10-07) endelig version 2014-10-07 Etkammersystem 2014-2015 20141 1 Reference Formand MP notMinister S Socialdemokratiet Coalition Lykketoft, Mogens M 1946
Loading

0 comments on commit 03d9cc8

Please sign in to comment.