From 12137a3f86074c43e88ea05f391893f07d8c1a50 Mon Sep 17 00:00:00 2001
From: Bill Teng <bteng@tenstorrent.com>
Date: Thu, 3 Oct 2024 19:57:01 +0000
Subject: [PATCH] #0: add T3k profiler and TG/TGG model perf workflows to new
 pipeline select workflows

---
 .github/workflows/pipeline-select-t3k.yaml    | 10 +++
 .github/workflows/pipeline-select.yaml        | 18 +++++
 .../workflows/t3000-profiler-tests-impl.yaml  | 46 +++++++++++
 .github/workflows/t3000-profiler-tests.yaml   | 41 +---------
 .../workflows/tg-model-perf-tests-impl.yaml   | 79 +++++++++++++++++++
 .github/workflows/tg-model-perf-tests.yaml    | 74 +----------------
 .../workflows/tgg-model-perf-tests-impl.yaml  | 79 +++++++++++++++++++
 .github/workflows/tgg-model-perf-tests.yaml   | 74 +----------------
 8 files changed, 238 insertions(+), 183 deletions(-)
 create mode 100644 .github/workflows/t3000-profiler-tests-impl.yaml
 create mode 100644 .github/workflows/tg-model-perf-tests-impl.yaml
 create mode 100644 .github/workflows/tgg-model-perf-tests-impl.yaml

diff --git a/.github/workflows/pipeline-select-t3k.yaml b/.github/workflows/pipeline-select-t3k.yaml
index ddd2947319cd..3df726bdd6e9 100644
--- a/.github/workflows/pipeline-select-t3k.yaml
+++ b/.github/workflows/pipeline-select-t3k.yaml
@@ -36,6 +36,11 @@ on:
         required: false
         type: boolean
         default: false
+      t3000-profiler:
+        description: "T3000 profiler tests (requires tracy build)"
+        required: false
+        type: boolean
+        default: false
 
 run-name: ${{ inputs.description }}
 jobs:
@@ -70,3 +75,8 @@ jobs:
     secrets: inherit
     uses: ./.github/workflows/t3000-model-perf-tests-impl.yaml
     if: ${{ inputs.t3000-model-perf }}
+  t3000-profiler-tests:
+    needs: build-artifact
+    secrets: inherit
+    uses: ./.github/workflows/t3000-profiler-tests-impl.yaml
+    if: ${{ inputs.t3000-profiler }}
diff --git a/.github/workflows/pipeline-select.yaml b/.github/workflows/pipeline-select.yaml
index 8f991bb0c5c0..62d3ce08cd0e 100644
--- a/.github/workflows/pipeline-select.yaml
+++ b/.github/workflows/pipeline-select.yaml
@@ -41,6 +41,10 @@ on:
         required: false
         type: boolean
         default: false
+      tgg-model-perf:
+        required: false
+        type: boolean
+        default: false
       tg-unit:
         required: false
         type: boolean
@@ -49,6 +53,10 @@ on:
         required: false
         type: boolean
         default: false
+      tg-model-perf:
+        required: false
+        type: boolean
+        default: false
 
 run-name: ${{ inputs.description }}
 jobs:
@@ -88,6 +96,11 @@ jobs:
     secrets: inherit
     uses: ./.github/workflows/tgg-frequent-tests-impl.yaml
     if: ${{ inputs.tgg-frequent }}
+  tgg-model-perf-tests:
+    needs: build-artifact
+    secrets: inherit
+    uses: ./.github/workflows/tgg-model-perf-tests-impl.yaml
+    if: ${{ inputs.tgg-model-perf }}
   tg-unit-tests:
     needs: build-artifact
     secrets: inherit
@@ -98,3 +111,8 @@ jobs:
     secrets: inherit
     uses: ./.github/workflows/tg-frequent-tests-impl.yaml
     if: ${{ inputs.tg-frequent }}
+  tg-model-perf-tests:
+    needs: build-artifact
+    secrets: inherit
+    uses: ./.github/workflows/tg-model-perf-tests-impl.yaml
+    if: ${{ inputs.tg-model-perf }}
diff --git a/.github/workflows/t3000-profiler-tests-impl.yaml b/.github/workflows/t3000-profiler-tests-impl.yaml
new file mode 100644
index 000000000000..571ac1628e37
--- /dev/null
+++ b/.github/workflows/t3000-profiler-tests-impl.yaml
@@ -0,0 +1,46 @@
+name: "[internal] T3000 profiler tests impl"
+
+on:
+  workflow_call:
+
+jobs:
+  t3000-profiler-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          {
+            name: "T3000 profiler tests",
+            arch: wormhole_b0,
+            runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"],
+            cmd: './tests/scripts/run_profiler_regressions.sh'
+          },
+        ]
+    name: ${{ matrix.test-group.name }}
+    env:
+      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
+      ARCH_NAME: ${{ matrix.test-group.arch }}
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+    environment: dev
+    runs-on: ${{ matrix.test-group.runs-on }}
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
+      - name: Set up dynamic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+      - uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}_profiler
+      - name: Extract files
+        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
+      - uses: ./.github/actions/install-python-deps
+      - name: Run profiler regression tests
+        timeout-minutes: 30
+        run: |
+          ./tests/scripts/run_profiler_regressions.sh
+      - uses: ./.github/actions/slack-report
+        if: ${{ failure() }}
+        with:
+          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
+          owner: U03BJ1L3LUQ # Mo Memarian
diff --git a/.github/workflows/t3000-profiler-tests.yaml b/.github/workflows/t3000-profiler-tests.yaml
index b63ecedf2131..ccc9dda28762 100644
--- a/.github/workflows/t3000-profiler-tests.yaml
+++ b/.github/workflows/t3000-profiler-tests.yaml
@@ -15,42 +15,5 @@ jobs:
     secrets: inherit
   t3000-profiler-tests:
     needs: build-artifact-profiler
-    strategy:
-      fail-fast: false
-      matrix:
-        test-group: [
-          {
-            name: "T3000 profiler tests",
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"],
-            cmd: './tests/scripts/run_profiler_regressions.sh'
-          },
-        ]
-    name: ${{ matrix.test-group.name }}
-    env:
-      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
-      ARCH_NAME: ${{ matrix.test-group.arch }}
-      LOGURU_LEVEL: INFO
-      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
-    environment: dev
-    runs-on: ${{ matrix.test-group.runs-on }}
-    steps:
-      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
-      - name: Set up dynamic env vars for build
-        run: |
-          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
-        with:
-          name: TTMetal_build_${{ matrix.test-group.arch }}_profiler
-      - name: Extract files
-        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
-      - uses: ./.github/actions/install-python-deps
-      - name: Run profiler regression tests
-        timeout-minutes: 30
-        run: |
-          ./tests/scripts/run_profiler_regressions.sh
-      - uses: ./.github/actions/slack-report
-        if: ${{ failure() }}
-        with:
-          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
-          owner: U03BJ1L3LUQ # Mo Memarian
+    secrets: inherit
+    uses: ./.github/workflows/t3000-profiler-tests-impl.yaml
diff --git a/.github/workflows/tg-model-perf-tests-impl.yaml b/.github/workflows/tg-model-perf-tests-impl.yaml
new file mode 100644
index 000000000000..dd10b6109a9c
--- /dev/null
+++ b/.github/workflows/tg-model-perf-tests-impl.yaml
@@ -0,0 +1,79 @@
+name: "[internal] TG model perf tests impl"
+
+on:
+  workflow_call:
+
+jobs:
+  tg-model-perf-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          {
+            name: "TG LLM model perf tests",
+            model-type: "LLM",
+            arch: wormhole_b0,
+            runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
+            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tg_device --dispatch-mode ""'
+          },
+          {
+            name: "TG CNN model perf tests",
+            model-type: "CNN",
+            arch: wormhole_b0,
+            runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
+            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tg_device --dispatch-mode ""'
+          },
+        ]
+    name: ${{ matrix.test-group.name }}
+    env:
+      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
+      ARCH_NAME: ${{ matrix.test-group.arch }}
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+    environment: dev
+    runs-on: ${{ matrix.test-group.runs-on }}
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
+      - name: Enable performance mode
+        run: |
+          sudo cpupower frequency-set -g performance
+      - name: Ensure weka mount is active
+        run: |
+          sudo systemctl restart mnt-MLPerf.mount
+          sudo /etc/rc.local
+          ls -al /mnt/MLPerf/bit_error_tests
+      - name: Set up dynamic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+          echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
+      - uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}
+      - name: Extract files
+        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
+      - uses: ./.github/actions/install-python-deps
+      - name: Run model perf regression tests
+        timeout-minutes: 60
+        run: |
+          source ${{ github.workspace }}/python_env/bin/activate
+          cd $TT_METAL_HOME
+          export PYTHONPATH=$TT_METAL_HOME
+          ${{ matrix.test-group.cmd }}
+      - name: Check perf report exists
+        id: check-perf-report
+        if: ${{ !cancelled() }}
+        run: |
+          ls -hal
+          export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
+          ls -hal $PERF_REPORT_FILENAME
+          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
+      - name: Upload perf report
+        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }}
+          path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
+      - name: Disable performance mode
+        if: always()
+        run: |
+          sudo cpupower frequency-set -g ondemand
diff --git a/.github/workflows/tg-model-perf-tests.yaml b/.github/workflows/tg-model-perf-tests.yaml
index 0dd1580e3715..a813b7636021 100644
--- a/.github/workflows/tg-model-perf-tests.yaml
+++ b/.github/workflows/tg-model-perf-tests.yaml
@@ -13,75 +13,5 @@ jobs:
     secrets: inherit
   tg-model-perf-tests:
     needs: build-artifact
-    strategy:
-      fail-fast: false
-      matrix:
-        test-group: [
-          {
-            name: "TG LLM model perf tests",
-            model-type: "LLM",
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
-            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tg_device --dispatch-mode ""'
-          },
-          {
-            name: "TG CNN model perf tests",
-            model-type: "CNN",
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
-            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tg_device --dispatch-mode ""'
-          },
-        ]
-    name: ${{ matrix.test-group.name }}
-    env:
-      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
-      ARCH_NAME: ${{ matrix.test-group.arch }}
-      LOGURU_LEVEL: INFO
-      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
-    environment: dev
-    runs-on: ${{ matrix.test-group.runs-on }}
-    steps:
-      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
-      - name: Enable performance mode
-        run: |
-          sudo cpupower frequency-set -g performance
-      - name: Ensure weka mount is active
-        run: |
-          sudo systemctl restart mnt-MLPerf.mount
-          sudo /etc/rc.local
-          ls -al /mnt/MLPerf/bit_error_tests
-      - name: Set up dynamic env vars for build
-        run: |
-          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
-          echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
-        with:
-          name: TTMetal_build_${{ matrix.test-group.arch }}
-      - name: Extract files
-        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
-      - uses: ./.github/actions/install-python-deps
-      - name: Run model perf regression tests
-        timeout-minutes: 60
-        run: |
-          source ${{ github.workspace }}/python_env/bin/activate
-          cd $TT_METAL_HOME
-          export PYTHONPATH=$TT_METAL_HOME
-          ${{ matrix.test-group.cmd }}
-      - name: Check perf report exists
-        id: check-perf-report
-        if: ${{ !cancelled() }}
-        run: |
-          ls -hal
-          export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
-          ls -hal $PERF_REPORT_FILENAME
-          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
-      - name: Upload perf report
-        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }}
-          path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
-      - name: Disable performance mode
-        if: always()
-        run: |
-          sudo cpupower frequency-set -g ondemand
+    secrets: inherit
+    uses: ./.github/workflows/tg-model-perf-tests-impl.yaml
diff --git a/.github/workflows/tgg-model-perf-tests-impl.yaml b/.github/workflows/tgg-model-perf-tests-impl.yaml
new file mode 100644
index 000000000000..f3d44f2e2ba5
--- /dev/null
+++ b/.github/workflows/tgg-model-perf-tests-impl.yaml
@@ -0,0 +1,79 @@
+name: "[internal] TGG model perf tests impl"
+
+on:
+  workflow_call:
+
+jobs:
+  tgg-model-perf-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          {
+            name: "TGG LLM model perf tests",
+            model-type: "LLM",
+            arch: wormhole_b0,
+            runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"],
+            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tgg_device --dispatch-mode ""'
+          },
+          {
+            name: "TGG CNN model perf tests",
+            model-type: "CNN",
+            arch: wormhole_b0,
+            runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"],
+            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tgg_device --dispatch-mode ""'
+          },
+        ]
+    name: ${{ matrix.test-group.name }}
+    env:
+      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
+      ARCH_NAME: ${{ matrix.test-group.arch }}
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+    environment: dev
+    runs-on: ${{ matrix.test-group.runs-on }}
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
+      - name: Enable performance mode
+        run: |
+          sudo cpupower frequency-set -g performance
+      - name: Ensure weka mount is active
+        run: |
+          sudo systemctl restart mnt-MLPerf.mount
+          sudo /etc/rc.local
+          ls -al /mnt/MLPerf/bit_error_tests
+      - name: Set up dynamic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+          echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
+      - uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}
+      - name: Extract files
+        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
+      - uses: ./.github/actions/install-python-deps
+      - name: Run model perf regression tests
+        timeout-minutes: 60
+        run: |
+          source ${{ github.workspace }}/python_env/bin/activate
+          cd $TT_METAL_HOME
+          export PYTHONPATH=$TT_METAL_HOME
+          ${{ matrix.test-group.cmd }}
+      - name: Check perf report exists
+        id: check-perf-report
+        if: ${{ !cancelled() }}
+        run: |
+          ls -hal
+          export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
+          ls -hal $PERF_REPORT_FILENAME
+          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
+      - name: Upload perf report
+        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }}
+          path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
+      - name: Disable performance mode
+        if: always()
+        run: |
+          sudo cpupower frequency-set -g ondemand
diff --git a/.github/workflows/tgg-model-perf-tests.yaml b/.github/workflows/tgg-model-perf-tests.yaml
index 259fb3fa7b7b..c65fc7408d6b 100644
--- a/.github/workflows/tgg-model-perf-tests.yaml
+++ b/.github/workflows/tgg-model-perf-tests.yaml
@@ -13,75 +13,5 @@ jobs:
     secrets: inherit
   tgg-model-perf-tests:
     needs: build-artifact
-    strategy:
-      fail-fast: false
-      matrix:
-        test-group: [
-          {
-            name: "TGG LLM model perf tests",
-            model-type: "LLM",
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"],
-            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tgg_device --dispatch-mode ""'
-          },
-          {
-            name: "TGG CNN model perf tests",
-            model-type: "CNN",
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"],
-            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tgg_device --dispatch-mode ""'
-          },
-        ]
-    name: ${{ matrix.test-group.name }}
-    env:
-      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
-      ARCH_NAME: ${{ matrix.test-group.arch }}
-      LOGURU_LEVEL: INFO
-      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
-    environment: dev
-    runs-on: ${{ matrix.test-group.runs-on }}
-    steps:
-      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
-      - name: Enable performance mode
-        run: |
-          sudo cpupower frequency-set -g performance
-      - name: Ensure weka mount is active
-        run: |
-          sudo systemctl restart mnt-MLPerf.mount
-          sudo /etc/rc.local
-          ls -al /mnt/MLPerf/bit_error_tests
-      - name: Set up dynamic env vars for build
-        run: |
-          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
-          echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
-        with:
-          name: TTMetal_build_${{ matrix.test-group.arch }}
-      - name: Extract files
-        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
-      - uses: ./.github/actions/install-python-deps
-      - name: Run model perf regression tests
-        timeout-minutes: 60
-        run: |
-          source ${{ github.workspace }}/python_env/bin/activate
-          cd $TT_METAL_HOME
-          export PYTHONPATH=$TT_METAL_HOME
-          ${{ matrix.test-group.cmd }}
-      - name: Check perf report exists
-        id: check-perf-report
-        if: ${{ !cancelled() }}
-        run: |
-          ls -hal
-          export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
-          ls -hal $PERF_REPORT_FILENAME
-          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
-      - name: Upload perf report
-        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }}
-          path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
-      - name: Disable performance mode
-        if: always()
-        run: |
-          sudo cpupower frequency-set -g ondemand
+    secrets: inherit
+    uses: ./.github/workflows/tgg-model-perf-tests-impl.yaml