Add basic structure for asv performance measurement (#289)

* add basic structure for asv * fix checking last release * attempt fix libmambapy related ci failure * reverted mamba attempted fix * update readme and add PR example --------- Co-authored-by: Luca Marconato <m.lucalmer@gmail.com>
scverse · Aug 16, 2024 · 13e69df · 13e69df
1 parent 69105e0
commit 13e69df
Show file tree

Hide file tree

Showing 13 changed files with 558 additions and 0 deletions.
diff --git a/.github/BENCHMARK_FAIL_TEMPLATE.md b/.github/BENCHMARK_FAIL_TEMPLATE.md
@@ -0,0 +1,13 @@
+---
+title: "{{ env.TITLE }}"
+labels: [bug]
+---
+
+The {{ workflow }} workflow failed on {{ date | date("YYYY-MM-DD HH:mm") }} UTC
+
+The most recent failing benchmark was on {{ env.PLATFORM }} py{{ env.PYTHON }} {{ env.BACKEND }}
+with commit: {{ sha }}
+
+Full run: https://github.com/napari/napari/actions/runs/{{ env.RUN_ID }}
+
+(This post will be updated if another test fails, as long as this issue remains open.)
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -0,0 +1,194 @@
+# This CI configuration for relative benchmarks is based on the research done
+# for scikit-image's implementation available here:
+# https://github.com/scikit-image/scikit-image/blob/9bdd010a8/.github/workflows/benchmarks.yml#L1
+# Blog post with the rationale: https://labs.quansight.org/blog/2021/08/github-actions-benchmarks/
+
+name: Benchmarks
+
+on:
+  pull_request:
+    types: [labeled]
+  schedule:
+    - cron: "6 6 * * 0" # every sunday
+  workflow_dispatch:
+    inputs:
+      base_ref:
+        description: "Baseline commit or git reference"
+        required: true
+      contender_ref:
+        description: "Contender commit or git reference"
+        required: true
+
+# This is the main configuration section that needs to be fine tuned to napari's needs
+# All the *_THREADS options is just to make the benchmarks more robust by not using parallelism
+env:
+  OPENBLAS_NUM_THREADS: "1"
+  MKL_NUM_THREADS: "1"
+  OMP_NUM_THREADS: "1"
+  ASV_OPTIONS: "--split --show-stderr --factor 1.5 --attribute timeout=900"
+  # --split -> split final reports in tables
+  # --show-stderr -> print tracebacks if errors occur
+  # --factor 1.5 -> report anomaly if tested timings are beyond 1.5x base timings
+  # --attribute timeout=300 -> override timeout attribute (default=60s) to allow slow tests to run
+  # see https://asv.readthedocs.io/en/stable/commands.html#asv-continuous for more details!
+
+jobs:
+  benchmark:
+    if: ${{ github.event.label.name == 'run-benchmarks' && github.event_name == 'pull_request' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
+    name: ${{ matrix.benchmark-name }}
+    runs-on: ${{ matrix.runs-on }}
+    permissions:
+      contents: read
+      issues: write
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - benchmark-name: Qt
+            asv-command: continuous
+            selection-regex: "^benchmark_qt_.*"
+            runs-on: macos-latest
+            # Qt tests run on macOS to avoid using Xvfb business
+            # xvfb makes everything run, but some tests segfault :shrug:
+            # Fortunately, macOS graphics stack does not need xvfb!
+          - benchmark-name: non-Qt
+            asv-command: continuous
+            selection-regex: "^benchmark_(?!qt_).*"
+            runs-on: ubuntu-latest
+
+    steps:
+      # We need the full repo to avoid this issue
+      # https://github.com/actions/checkout/issues/23
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v5
+        name: Install Python
+        with:
+          python-version: "3.11"
+          cache-dependency-path: pyproject.toml
+
+      - uses: tlambert03/setup-qt-libs@v1
+
+      - name: Setup asv
+        run: python -m pip install "asv[virtualenv]"
+        env:
+          PIP_CONSTRAINT: resources/constraints/benchmark.txt
+
+      - uses: octokit/request-action@v2.x
+        id: latest_release
+        with:
+          route: GET /repos/{owner}/{repo}/releases/latest
+          owner: napari
+          repo: napari
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Run ${{ matrix.benchmark-name }} benchmarks
+        id: run_benchmark
+        env:
+          # asv will checkout commits, which might contain LFS artifacts; ignore those errors since
+          # they are probably just documentation PNGs not needed here anyway
+          GIT_LFS_SKIP_SMUDGE: 1
+          HEAD_LABEL: ${{ github.event.pull_request.head.label }}
+          PIP_CONSTRAINT: ${{ github.workspace }}/benchmarks/benchmark.txt
+        run: |
+          set -euxo pipefail
+          read -ra cmd_options <<< "$ASV_OPTIONS"
+
+          # ID this runner
+          asv machine --yes
+
+          if [[ $GITHUB_EVENT_NAME == pull_request ]]; then
+            EVENT_NAME="PR #${{ github.event.pull_request.number }}"
+            BASE_REF=${{ github.event.pull_request.base.sha }}
+            CONTENDER_REF=${GITHUB_SHA}
+            echo "Baseline:  ${BASE_REF} (${{ github.event.pull_request.base.label }})"
+            echo "Contender: ${CONTENDER_REF} ($HEAD_LABEL)"
+          elif [[ $GITHUB_EVENT_NAME == schedule ]]; then
+            EVENT_NAME="cronjob"
+            BASE_REF="${{ fromJSON(steps.latest_release.outputs.data).target_commitish }}"
+            CONTENDER_REF="${GITHUB_SHA}"
+            echo "Baseline:  ${BASE_REF} (${{ fromJSON(steps.latest_release.outputs.data).tag_name }})"
+            echo "Contender: ${CONTENDER_REF} (current main)"
+          elif [[ $GITHUB_EVENT_NAME == workflow_dispatch ]]; then
+            EVENT_NAME="manual trigger"
+            BASE_REF="${{ github.event.inputs.base_ref }}"
+            CONTENDER_REF="${{ github.event.inputs.contender_ref }}"
+            echo "Baseline:  ${BASE_REF} (workflow input)"
+            echo "Contender: ${CONTENDER_REF} (workflow input)"
+          fi
+
+          echo "EVENT_NAME=$EVENT_NAME" >> "$GITHUB_ENV"
+          echo "BASE_REF=$BASE_REF" >> "$GITHUB_ENV"
+          echo "CONTENDER_REF=$CONTENDER_REF" >> "$GITHUB_ENV"
+
+          # Run benchmarks for current commit against base
+          asv continuous "${cmd_options[@]}" -b "${{ matrix.selection-regex }}" "${BASE_REF}" "${CONTENDER_REF}" \
+          | sed -E "/Traceback | failed$|PERFORMANCE DECREASED/ s/^/::error:: /" \
+          | tee asv_continuous.log
+
+          # Report and export results for subsequent steps
+          if grep "Traceback \|failed\|PERFORMANCE DECREASED" asv_continuous.log > /dev/null ; then
+              exit 1
+          fi
+
+      - name: Report Failures as Issue
+        if: ${{ (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && failure() }}
+        uses: JasonEtco/create-an-issue@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PLATFORM: ${{ matrix.runs-on }}
+          PYTHON: "3.9"
+          BACKEND: ${{ matrix.benchmark-name }}
+          RUN_ID: ${{ github.run_id }}
+          TITLE: "[test-bot] Benchmark tests failing"
+        with:
+          filename: .github/BENCHMARK_FAIL_TEMPLATE.md
+          update_existing: true
+
+      - name: Add more info to artifact
+        if: always()
+        run: |
+          # Copy the full `asv continuous` log
+          cp asv_continuous.log .asv/results/asv_continuous_${{ matrix.benchmark-name }}.log
+          # ensure that even if this isn't a PR, the benchmark_report workflow can run without error
+          touch .asv/results/message_${{ matrix.benchmark-name }}.txt
+
+          # Add the message that might be posted as a comment on the PR
+          # We delegate the actual comment to `benchmarks_report.yml` due to
+          # potential token permissions issues
+          if [[ $GITHUB_EVENT_NAME == pull_request ]]; then
+
+          echo "${{ github.event.pull_request.number }}" > .asv/results/pr_number
+          echo \
+          "The ${{ matrix.benchmark-name }} benchmark run requested by $EVENT_NAME ($CONTENDER_REF vs $BASE_REF) has" \
+          "finished with status '${{ steps.run_benchmark.outcome }}'. See the" \
+          "[CI logs and artifacts](||BENCHMARK_CI_LOGS_URL||) for further details." \
+          > .asv/results/message_${{ matrix.benchmark-name }}.txt
+
+          fi
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: asv-benchmark-results-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}-${{ matrix.benchmark-name }}
+          path: .asv/results
+
+  combine-artifacts:
+    runs-on: ubuntu-latest
+    needs: benchmark
+    if: always()
+    steps:
+      - name: Download artifact
+        uses: actions/download-artifact@v4
+        with:
+          pattern: asv-benchmark-results*
+          path: asv_result
+          merge-multiple: true
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: asv-benchmark-results-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
+          path: asv_result
diff --git a/.github/workflows/benchmarks_report.yml b/.github/workflows/benchmarks_report.yml
@@ -0,0 +1,98 @@
+# Report benchmark results to the PR
+# We need a dual workflow to make sure the token has the needed permissions to post comments
+# See https://stackoverflow.com/a/71683208 for more details
+
+# When this workflow is triggered, it pulls the latest version of this file on
+# the default branch. Changes to this file won't be reflected until after the
+# PR is merged.
+# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_run
+
+name: "Benchmarks - Report"
+
+on:
+  workflow_run:
+    workflows: [Benchmarks]
+    types:
+      - completed
+
+permissions:
+  pull-requests: write
+  issues: write
+
+jobs:
+  download:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Download artifact"
+        uses: actions/github-script@v7
+        with:
+          script: |
+            let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: context.payload.workflow_run.id,
+            });
+            let artifactName = `asv-benchmark-results-${context.payload.workflow_run.id}-${context.payload.workflow_run.run_number}-${context.payload.workflow_run.run_attempt}`
+            console.log(`Artifact name: ${artifactName}`);
+            console.log(`All artifacts: ${JSON.stringify(allArtifacts.data.artifacts)}`);
+            let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == artifactName
+            })[0];
+            if (matchArtifact === undefined) {
+              throw TypeError('Build Artifact not found!');
+            }
+            let download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            let fs = require('fs');
+            fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/asv_results.zip`, Buffer.from(download.data));
+
+      - name: Unzip and prepare data
+        run: |
+          unzip asv_results.zip
+          # combine the Qt and non-Qt messages
+          cat message_Qt.txt message_non-Qt.txt > message.txt
+
+      - name: Replace URLs
+        run: |
+          sed -i 's@||BENCHMARK_CI_LOGS_URL||@${{ github.event.workflow_run.html_url }}@g' message.txt
+
+      - name: Collect PR number if available
+        run: |
+          if [[ -f pr_number ]]; then
+            echo "PR_NUMBER=$(cat pr_number)" >> "$GITHUB_ENV"
+          fi
+
+      - name: "Comment on PR"
+        if: env.PR_NUMBER != ''
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            let fs = require('fs');
+            let issue_number = Number(process.env.PR_NUMBER);
+            let body = fs.readFileSync('message.txt', 'utf8');
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issue_number,
+              body: body,
+            });
+
+      - name: "Remove run-benchmarks label"
+        if: env.PR_NUMBER != ''
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            let fs = require('fs');
+            let issue_number = Number(process.env.PR_NUMBER);
+            await github.rest.issues.removeLabel({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issue_number,
+              name: 'run-benchmarks',
+            });
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -69,3 +69,60 @@ jobs:
         with:
           name: plotting-results
           path: /home/runner/work/napari-spatialdata/napari-spatialdata/tests/plots/*
+
+  test_benchmarks:
+    name: test benchmarks
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      GIT_LFS_SKIP_SMUDGE: 1
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+          cache-dependency-path: pyproject.toml
+
+      - uses: tlambert03/setup-qt-libs@v1
+
+      - uses: octokit/request-action@v2.x
+        # here we get hash of the latest release commit to compare with PR
+        id: latest_release
+        with:
+          route: GET /repos/{owner}/{repo}/releases/latest
+          owner: scverse
+          repo: napari-spatialdata
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install "asv[virtualenv]"
+        env:
+          PIP_CONSTRAINT: benchmarks/benchmark.txt
+
+      - name: asv machine
+        run: asv machine --yes
+
+      - name: Run benchmarks PR
+        uses: aganders3/headless-gui@v2
+        with:
+          run: |
+            asv run --show-stderr --quick  --attribute timeout=300 HEAD^!
+        env:
+          PR: 1 # prevents asv from running very compute-intensive benchmarks
+          PIP_CONSTRAINT: ${{ github.workspace }}/benchmarks/benchmark.txt
+
+      - name: Run benchmarks latest release
+        # here we check if the benchmark on the latest release is not broken
+        uses: aganders3/headless-gui@v2
+        with:
+          run: |
+            asv run --show-stderr --quick  --attribute timeout=300  ${{ fromJSON(steps.latest_release.outputs.data).target_commitish }}^!
+        env:
+          PR: 1 # prevents asv from running very compute-intensive benchmarks
+          PIP_CONSTRAINT: ${{ github.workspace }}/benchmarks/benchmark.txt
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.asv
 data/
 
 # Byte-compiled / optimized / DLL files