From c6c838f2203a440c49bada03bed99b7081db7272 Mon Sep 17 00:00:00 2001 From: Hongxin <5400599+zhx828@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:07:16 -0400 Subject: [PATCH] Split genomic change annotation to a separate file The GN annotation seems unstable sometime, we need to be able to rerun it separately --- ...ompare-genomic-change-study-annotation.yml | 93 +++++++++++++++++++ .../workflows/compare-study-annotation.yml | 13 ++- AnnotatorCore.py | 2 +- 3 files changed, 100 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/compare-genomic-change-study-annotation.yml diff --git a/.github/workflows/compare-genomic-change-study-annotation.yml b/.github/workflows/compare-genomic-change-study-annotation.yml new file mode 100644 index 0000000..363979b --- /dev/null +++ b/.github/workflows/compare-genomic-change-study-annotation.yml @@ -0,0 +1,93 @@ +# This workflow will install Python dependencies, run annotation against the master annotation for a particular study + +name: Compare Study Genomic Change Annotation + +on: + push: + branches: + - master +jobs: + build: + if: github.repository == 'oncokb/oncokb-annotator' + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 + pip install -r requirements/common.txt -r requirements/pip3.txt + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Annotate msk_impact_2017 study + id: annotate + env: + ONCOKB_API_TOKEN: ${{ secrets.ONCOKB_BOT_API_TOKEN }} + ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }} + run: | + git checkout -b compare + + STUDY=msk_impact_2017 + DATAHUB_URL=https://media.githubusercontent.com/media/cBioPortal/datahub/42afc279efb8d9104aba36fa35bad3ec41921949/public/$STUDY + + MUTATION_DATA_NAME=data_mutations_extended.txt + CLINICAL_DATA_NAME=data_clinical_sample.txt + + cd data + curl -s $DATAHUB_URL/$MUTATION_DATA_NAME -O + curl -s $DATAHUB_URL/$CLINICAL_DATA_NAME -O + cd .. + + # create compare folder to add all annotated files + mkdir compare + + PREFIX=oncokb + + OGCMAF="$PREFIX"_genomic_change_$MUTATION_DATA_NAME + + python MafAnnotator.py -i data/$MUTATION_DATA_NAME -o compare/$OGCMAF -c data/$CLINICAL_DATA_NAME -b $ONCOKB_API_TOKEN -q Genomic_Change + + git config user.name oncokb-bot + git config user.email dev.oncokb@gmail.com + + git add . + git commit -m 'add analysis' + + echo "::set-output name=STUDY::$STUDY" + echo "::set-output name=FILE_NAME::$OGCMAF" + + - name: Compare annotation result with the ones from master + id: compare + env: + STUDY: ${{steps.annotate.outputs.STUDY}} + FILE_NAME: ${{steps.annotate.outputs.FILE_NAME}} + ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }} + run: | + # remove everything under compare folder and replace wiht the ones from oncokb-data + rm -f compare/*.txt + + cd compare + curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/$STUDY | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do + if [[ "$name" == "$FILE_NAME" ]]; then + curl -s "$downloadurl" -o $name + fi + done + cd .. + + # compare + CHANGED=$(git diff --name-only HEAD --) + + if [ -n "$CHANGED" ] + then + git diff + exit 1 + fi + diff --git a/.github/workflows/compare-study-annotation.yml b/.github/workflows/compare-study-annotation.yml index daae1df..4758fae 100644 --- a/.github/workflows/compare-study-annotation.yml +++ b/.github/workflows/compare-study-annotation.yml @@ -4,8 +4,8 @@ name: Compare Study Annotation on: push: - branches: [ master ] - + branches: + - master jobs: build: if: github.repository == 'oncokb/oncokb-annotator' @@ -57,9 +57,6 @@ jobs: IMAF=data/$MUTATION_DATA_NAME OMAF=compare/"$PREFIX"_$MUTATION_DATA_NAME - IGCMAF=data/$MUTATION_DATA_NAME - OGCMAF=compare/"$PREFIX"_genomic_change_$MUTATION_DATA_NAME - IC=data/$CLINICAL_DATA_NAME OC=compare/"$PREFIX"_$CLINICAL_DATA_NAME @@ -70,7 +67,6 @@ jobs: OCNA=compare/"$PREFIX"_$CNA_DATA_NAME python MafAnnotator.py -i $IMAF -o $OMAF -c $IC -b $ONCOKB_API_TOKEN - python MafAnnotator.py -i $IGCMAF -o $OGCMAF -c $IC -b $ONCOKB_API_TOKEN -q Genomic_Change python FusionAnnotator.py -i $IF -o $OF -c $IC -b $ONCOKB_API_TOKEN python CnaAnnotator.py -i $ICNA -o $OCNA -c $IC -b $ONCOKB_API_TOKEN python ClinicalDataAnnotator.py -i $IC -o $OC -a $OMAF,$OCNA,$OF @@ -88,13 +84,16 @@ jobs: env: STUDY: ${{steps.annotate.outputs.STUDY}} ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }} + FIEL_NAME_PREFIX: 'oncokb_data' run: | # remove everything under compare folder and replace wiht the ones from oncokb-data rm -f compare/*.txt cd compare curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/$STUDY | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do - curl -s "$downloadurl" -o $name + if [[ "$name" == "$FIEL_NAME_PREFIX"* ]]; then + curl -s "$downloadurl" -o $name + fi done cd .. diff --git a/AnnotatorCore.py b/AnnotatorCore.py index edd3ec1..cf8227f 100644 --- a/AnnotatorCore.py +++ b/AnnotatorCore.py @@ -158,7 +158,7 @@ class ReferenceGenome(Enum): } POST_QUERIES_THRESHOLD = 1000 -POST_QUERIES_THRESHOLD_GC_HGVSG = 100 +POST_QUERIES_THRESHOLD_GC_HGVSG = 1000 def getOncokbInfo(): ret = ['Files annotated on ' + date.today().strftime('%m/%d/%Y') + "\nOncoKB API URL: "+oncokbapiurl]