Skip to content

cor mets

cor mets #20

Workflow file for this run

name: gtrepo
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch:
jobs:
cli:
name: makeBagit
runs-on: ubuntu-latest
permissions:
checks: write
contents: write
steps:
- name: Git checkout
uses: actions/checkout@v3
# Installation Styles
- name: install analyse xsl-styles
run: |
git clone https://github.com/tboenig/gt-repo-scripts.git
mv gt-repo-scripts/scripts scripts/
rm -r gt-repo-scripts
# Installation drawImage
- name: install drawImage
run: |
git clone https://github.com/tboenig/page-xml-draw.git
cd page-xml-draw/
pip3 install .
# Installation GT-Labelling Documentation
- name: install labeling
run: |
git clone https://github.com/tboenig/gt-guidelines.git
# Installation and Directories
- name: install jq
run: sudo apt-get install jq
- name: install ocrd
run: |
git clone --depth 1 https://github.com/OCR-D/core
cd core
sudo make deps-ubuntu
make install
cd ..
rm -rf core
- name: Download and install saxon
run: |
wget https://sourceforge.net/projects/saxon/files/Saxon-HE/10/Java/SaxonHE10-5J.zip/download
unzip download
- name: make ocrdzip_out
run: mkdir ocrdzip_out
- name: make gh-pages_out
run: |
mkdir ghout
mkdir ghout/drawImg
- name: make readme_out
run: sh scripts/readmefolder.sh
- name: readme.xml file
run: sh scripts/xreadme.sh
# Transformation and analyzing
- name: Get SDK Version from config
id: lookupSdkVersion
uses: mikefarah/yq@master
with:
cmd: yq -o=json METADATA.yml > METADATA.json
- name: transform METADATA and make GT-Overview
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METADATA repoBase=$GITHUB_REF_NAME repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \
-s:scripts/gt-overview_metadata.xsl -o:ghout/metadata.md
shell: bash
- name: make Compressed table view
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=TABLE repoBase=$GITHUB_REF_NAME repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/table.md
shell: bash
- name: detailed table view
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=OVERVIEW repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/overview.md
shell: bash
- name: generate mets.sh
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METS repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:scripts/mets.sh
shell: bash
- name: generate drawimages.sh
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=DrawImages repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:scripts/drawimages.sh
shell: bash {0}
- name: generate Metadata JSON file
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METAJSON repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \
-s:scripts/gt-overview_metadata.xsl -o:ocrdzip_out/metadata_l.json
shell: bash
- name: format json file and copy to gh branch
run: |
jq '.' ocrdzip_out/metadata_l.json > ocrdzip_out/metadata.json
cp ocrdzip_out/metadata.json ghout/
rm ocrdzip_out/metadata_l.json
- name: generate README
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=README repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:README.md
shell: bash
- name: generate METS Volume File
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSvolume repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \
-s:scripts/gt-overview_metadata.xsl -o:ocrdzip_out/mets.xml
shell: bash
- name: delete fileGrp DEFAULT
run: |
java -jar saxon-he-10.5.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSdefault repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \
-s:scripts/gt-overview_metadata.xsl
shell: bash
- name: Index-link
run: |
cd ghout
ln -s metadata.md index.md
# Bagit
- name: make bagit
run: sh scripts/data_structure.sh
shell: bash
# drawImages
- name: make drawimages
run: |
sh scripts/drawimages.sh
shell: bash
# resize images with imagemagick
- name: resize images
run: |
cd ghout/drawImg
mogrify -resize 50% *.png
- name: copy css styles and yml files to ghout
run: |
cp scripts/table_hide.css ghout/
cp scripts/_config.yml ghout/
- name: archive the ocrdzip files from ocrdzip_out folder
uses: thedoctor0/zip-release@master
with:
filename: bagitDump-v${{ github.run_number }}.zip
path: 'ocrdzip_out'
- name: Upload Release
uses: ncipollo/release-action@v1
with:
artifacts: './bagitDump-v*.zip'
artifactContentType: application/zip
name: Release ${{ github.run_number }}_${{ github.ref_name }}
body: |
<dl>
<dt>Version:</dt>
<dd>${{ github.ref_name }}</dd>
<dt>Info:</dt>
<dd>To use the Ground Truth download the bagitDump-v${{ github.run_number }}.zip file.<br/>
The zip file contains all ocr-d-bagit files.<br/>
The Metadata.json file contains metadata for the Ground Truth corpus.<br/>
The Mets.xml file lists all containing documents/bagits.<br/>
The bagits correspond to the <a href="https://ocr-d.de/de/spec/ocrd_zip.html">OCR-D Bagit Spec</a>.</dd>
</dl>
token: ${{ secrets.GITHUB_TOKEN }}
- name: Commit README
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add README.md
git commit -m "[Automatic] Update readme files" || echo "Nothing to update"
git push origin HEAD:main
- name: Deploy GT-Overview to GitHub Pages 🚀
uses: JamesIves/github-pages-deploy-action@v4.4.1
with:
branch: gh-pages # The branch the action should deploy to.
folder: ghout # The folder the action should deploy.