From dfd8986f38b99c19bcd3042be85ba5b21cafa798 Mon Sep 17 00:00:00 2001 From: Tapasvi Patel Date: Thu, 16 May 2024 19:13:33 +0000 Subject: [PATCH] #8558: Refactor t3000, tg and tgg pipelines, workflows and run test scripts --- ...-device-build-and-unit-tests-frequent.yaml | 39 ----- .../workflows/run-profiler-regression.yaml | 2 - .github/workflows/t3000-demo-tests.yaml | 54 +++++++ .github/workflows/t3000-frequent-tests.yaml | 53 +++++++ ...odels.yaml => t3000-model-perf-tests.yaml} | 54 ++++--- .github/workflows/t3000-profiler-tests.yaml | 43 ++++++ .github/workflows/t3000-unit-tests.yaml | 53 +++++++ .github/workflows/tg-unit-tests.yaml | 25 +-- .github/workflows/tgg-unit-tests.yaml | 23 +-- .../demos/falcon7b/tests/test_perf_falcon.py | 2 +- .../mixtral8x7b/tests/test_mixtral_perf.py | 2 +- pytest.ini | 2 +- .../multi_chip/run_end_to_end_demos.sh | 23 --- .../run_frequent_regressions_multi_device.sh | 35 ----- ...re_post_commit_regressions_multi_device.sh | 49 ------ .../multi_chip/run_unstable_multi_device.sh | 13 -- tests/scripts/run_performance.sh | 29 ---- tests/scripts/run_tests.sh | 141 +++++++++-------- tests/scripts/t3000/run_t3000_demo_tests.sh | 46 ++++++ .../scripts/t3000/run_t3000_frequent_tests.sh | 86 +++++++++++ .../t3000/run_t3000_model_perf_tests.sh | 95 ++++++++++++ tests/scripts/t3000/run_t3000_unit_tests.sh | 146 ++++++++++++++++++ .../tg/run_pre_post_commit_regressions_tg.sh | 17 -- tests/scripts/tg/run_tg_unit_tests.sh | 27 ++++ .../run_pre_post_commit_regressions_tgg.sh | 17 -- tests/scripts/tgg/run_tgg_unit_tests.sh | 27 ++++ 26 files changed, 769 insertions(+), 334 deletions(-) delete mode 100644 .github/workflows/multi-device-build-and-unit-tests-frequent.yaml create mode 100644 .github/workflows/t3000-demo-tests.yaml create mode 100644 .github/workflows/t3000-frequent-tests.yaml rename .github/workflows/{multi-device-perf-models.yaml => t3000-model-perf-tests.yaml} (53%) create mode 100644 .github/workflows/t3000-profiler-tests.yaml create mode 100644 .github/workflows/t3000-unit-tests.yaml delete mode 100755 tests/scripts/multi_chip/run_end_to_end_demos.sh delete mode 100755 tests/scripts/multi_chip/run_frequent_regressions_multi_device.sh delete mode 100755 tests/scripts/multi_chip/run_pre_post_commit_regressions_multi_device.sh delete mode 100644 tests/scripts/multi_chip/run_unstable_multi_device.sh create mode 100755 tests/scripts/t3000/run_t3000_demo_tests.sh create mode 100755 tests/scripts/t3000/run_t3000_frequent_tests.sh create mode 100644 tests/scripts/t3000/run_t3000_model_perf_tests.sh create mode 100755 tests/scripts/t3000/run_t3000_unit_tests.sh delete mode 100755 tests/scripts/tg/run_pre_post_commit_regressions_tg.sh create mode 100755 tests/scripts/tg/run_tg_unit_tests.sh delete mode 100755 tests/scripts/tgg/run_pre_post_commit_regressions_tgg.sh create mode 100755 tests/scripts/tgg/run_tgg_unit_tests.sh diff --git a/.github/workflows/multi-device-build-and-unit-tests-frequent.yaml b/.github/workflows/multi-device-build-and-unit-tests-frequent.yaml deleted file mode 100644 index bab243b876f0..000000000000 --- a/.github/workflows/multi-device-build-and-unit-tests-frequent.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: "Nightly multi-chip tests" - -on: - workflow_dispatch: - schedule: - - cron: "0 */8 * * *" # This cron schedule runs the workflow every 8 hours - -jobs: - multi-chip-nightly: - strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines - fail-fast: false - matrix: - runner-info: [ - # N300 2x4 - {name: "n300-2x4", arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-4", "multi-chip-num-chips-8"]}, - ] - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.runner-info.arch }} - environment: dev - runs-on: ${{ matrix.runner-info.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dyanmic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - name: Build tt-metal and libs - run: | - PYTHON_ENV_DIR=$(pwd)/build/python_env ./build_metal.sh - - name: Build tt-metal CPP tests - run: cmake --build build --target tests -- -j`nproc` - - name: Run frequent regression tests - timeout-minutes: 60 - run: | - source build/python_env/bin/activate - export PYTHONPATH=$TT_METAL_HOME - ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type frequent_multi_device --dispatch-mode "" diff --git a/.github/workflows/run-profiler-regression.yaml b/.github/workflows/run-profiler-regression.yaml index f975e54429e4..fbedb2b0d1f7 100644 --- a/.github/workflows/run-profiler-regression.yaml +++ b/.github/workflows/run-profiler-regression.yaml @@ -18,8 +18,6 @@ jobs: {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-1"]}, # N300 {arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-1", "multi-chip-num-chips-2"]}, - # N300 2x4 - {name: "n300-2x4", arch: wormhole_b0, runs-on: ["wormhole_b0", "multi-chip-num-pcie-4", "multi-chip-num-chips-8"]}, ] env: TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} diff --git a/.github/workflows/t3000-demo-tests.yaml b/.github/workflows/t3000-demo-tests.yaml new file mode 100644 index 000000000000..4ec223efa742 --- /dev/null +++ b/.github/workflows/t3000-demo-tests.yaml @@ -0,0 +1,54 @@ +name: "[T3K] T3000 demo tests" + +on: + push: + branches: + - tpatel/issue-8558 + workflow_dispatch: + schedule: + - cron: '0 0 * * *' # This cron schedule runs the workflow every day at 12am UTC + +jobs: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + secrets: inherit + t3000-demo-tests: + needs: build-artifact + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "T3000 demo tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type demos_t3000_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.runner-info.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: ${{ matrix.test-group.name }} tests + - name: Run demo regression tests + timeout-minutes: 180 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} diff --git a/.github/workflows/t3000-frequent-tests.yaml b/.github/workflows/t3000-frequent-tests.yaml new file mode 100644 index 000000000000..ab101d20c03c --- /dev/null +++ b/.github/workflows/t3000-frequent-tests.yaml @@ -0,0 +1,53 @@ +name: "[T3K] T3000 frequent tests" + +on: + push: + branches: + - tpatel/issue-8558 + workflow_dispatch: + schedule: + - cron: "0 */8 * * *" # This cron schedule runs the workflow every 8 hours + +jobs: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + secrets: inherit + t3000-frequent-tests: + needs: build-artifact + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "T3000 frequent tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-functional"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_t3000_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.runner-info.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run frequent regression tests + timeout-minutes: 60 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} diff --git a/.github/workflows/multi-device-perf-models.yaml b/.github/workflows/t3000-model-perf-tests.yaml similarity index 53% rename from .github/workflows/multi-device-perf-models.yaml rename to .github/workflows/t3000-model-perf-tests.yaml index 59903f7fd055..421453298b94 100644 --- a/.github/workflows/multi-device-perf-models.yaml +++ b/.github/workflows/t3000-model-perf-tests.yaml @@ -1,35 +1,49 @@ -name: "Multi-Nebula model perf regressions and output report" +name: "[T3K] T3000 model perf tests" on: + push: + branches: + - tpatel/issue-8558 workflow_dispatch: schedule: - cron: "0 */12 * * *" # This cron schedule runs the workflow every 12 hours jobs: - multi-device-models-perf: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + secrets: inherit + t3000-model-perf-tests: + needs: build-artifact strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines fail-fast: false matrix: - runner-info: [ - # N300 2x4 - # NOTE: Never use arch-wormhole_b0 tags, however we're using it here because this machine is used by devs during the day - # We don't want other CI runs to interrupt dev flows. However, we need to fix this once we have more 2x4 machines dedicated to CI - {name: "n300-2x4", arch: wormhole_b0, runs-on: ["perf-t3000", "arch-wormhole_b0", "multi-chip-num-pcie-4", "multi-chip-num-chips-8"], machine-type: "bare_metal"}, + test-group: [ + { + name: "T3000 LLM model perf tests", + model-type: "LLM", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_t3000_device --dispatch-mode ""' + }, + { + name: "T3000 CNN model perf tests", + model-type: "CNN", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_t3000_device --dispatch-mode ""' + }, ] - model-type: [llm_javelin, cnn_javelin, other] - name: "${{ matrix.model-type }} ${{ matrix.runner-info.arch }}" + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} env: TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.runner-info.arch }} + ARCH_NAME: ${{ matrix.test-group.arch }} LOGURU_LEVEL: INFO - TTNN_CONFIG_OVERRIDES: '{"enable_fast_runtime_mode": true}' + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib environment: dev - runs-on: ${{ matrix.runner-info.runs-on }} + runs-on: ${{ matrix.test-group.runs-on }} steps: - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Enable Performance mode + - name: Enable performance mode run: | sudo cpupower frequency-set -g performance - name: Ensure weka mount is active @@ -43,11 +57,13 @@ jobs: echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV - name: Build tt-metal and libs run: PYTHON_ENV_DIR=$(pwd)/build/python_env ./build_metal.sh - - name: Run performance regressions + - name: Run model perf regression tests timeout-minutes: 60 run: | - source build/python_env/bin/activate - ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.runner-info.machine-type }}_multi_device + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} - name: Check perf report exists id: check-perf-report if: ${{ !cancelled() }} @@ -62,7 +78,7 @@ jobs: with: name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.runner-info.arch }}-${{ matrix.runner-info.machine-type }} path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" - - name: Disable Performance mode + - name: Disable performance mode if: always() run: | sudo cpupower frequency-set -g ondemand diff --git a/.github/workflows/t3000-profiler-tests.yaml b/.github/workflows/t3000-profiler-tests.yaml new file mode 100644 index 000000000000..bab96e2ce8a8 --- /dev/null +++ b/.github/workflows/t3000-profiler-tests.yaml @@ -0,0 +1,43 @@ +name: "[T3K] T3000 profiler tests" + +on: + push: + branches: + - tpatel/issue-8558 + workflow_dispatch: + workflow_call: + schedule: + - cron: "0 */8 * * *" # This cron schedule runs the workflow every 8 hours + +jobs: + t3000-profiler-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "T3000 unit tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_profiler_regressions.sh' + }, + ] + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.runner-info.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - name: Build tt-metal and libs + run: | + ./scripts/build_scripts/build_with_profiler_opt.sh + - name: Run profiler regression tests + timeout-minutes: 30 + run: | + ./tests/scripts/run_profiler_regressions.sh diff --git a/.github/workflows/t3000-unit-tests.yaml b/.github/workflows/t3000-unit-tests.yaml new file mode 100644 index 000000000000..59369d045fbe --- /dev/null +++ b/.github/workflows/t3000-unit-tests.yaml @@ -0,0 +1,53 @@ +name: "[T3K] T3000 unit tests" + +on: + push: + branches: + - tpatel/issue-8558 + workflow_dispatch: + schedule: + - cron: "0 */3 * * *" # This cron schedule runs the workflow every 3 hours + +jobs: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + secrets: inherit + t3000-unit-tests: + needs: build-artifact + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "T3000 unit tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-functional"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_t3000_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.runner-info.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run unit regression tests + timeout-minutes: 120 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} diff --git a/.github/workflows/tg-unit-tests.yaml b/.github/workflows/tg-unit-tests.yaml index 89dcec33fd9f..4848fa0d4783 100644 --- a/.github/workflows/tg-unit-tests.yaml +++ b/.github/workflows/tg-unit-tests.yaml @@ -3,7 +3,7 @@ name: "[TG] TG unit tests" on: push: branches: - - galaxy/main + - tpatel/issue-8558 schedule: - cron: '0 0 * * *' # Runs every day at 12am UTC workflow_dispatch: @@ -13,25 +13,26 @@ jobs: uses: ./.github/workflows/build-artifact.yaml secrets: inherit TG-tests: + needs: build-artifact strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines fail-fast: false matrix: - runner-info: [ - # TG - {arch: wormhole_b0, runs-on: ["config-tg", "in-service"]}, - ] test-group: [ - {name: "TG Unit Tests", cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type post_commit_tg --dispatch-mode ""'}, + { + name: "TG unit tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-tg", "in-service", "runner-test", "bare-metal", "pipeline-functional"], + cmd: cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tg_device --dispatch-mode ""' + }, ] - name: ${{ matrix.test-group.name }} ${{ matrix.runner-info.arch }} ${{ matrix.runner-info.name }} + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} env: TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.runner-info.arch }} + ARCH_NAME: ${{ matrix.test-group.arch }} LOGURU_LEVEL: INFO LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - runs-on: ${{ matrix.runner-info.runs-on }} + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} steps: - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - name: Set up dynamic env vars for build @@ -43,7 +44,7 @@ jobs: - name: Extract files run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar - uses: ./.github/actions/install-python-deps - - name: ${{ matrix.test-group.name }} tests + - name: Run unit regression tests timeout-minutes: 45 run: | source ${{ github.workspace }}/python_env/bin/activate diff --git a/.github/workflows/tgg-unit-tests.yaml b/.github/workflows/tgg-unit-tests.yaml index eb3d6c23d182..131b90bc0497 100644 --- a/.github/workflows/tgg-unit-tests.yaml +++ b/.github/workflows/tgg-unit-tests.yaml @@ -1,6 +1,9 @@ name: "[TGG] TGG unit tests" on: + push: + branches: + - tpatel/issue-8558 workflow_dispatch: schedule: - cron: '0 0 * * *' # This cron schedule runs the workflow every day at 12am UTC @@ -10,22 +13,22 @@ jobs: uses: ./.github/workflows/build-artifact.yaml secrets: inherit TGG-tests: + needs: build-artifact strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines fail-fast: false matrix: - runner-info: [ - # TGG - {arch: wormhole_b0, runs-on: ["config-tgg", "in-service"]}, - ] test-group: [ - {name: "TGG Unit Tests", cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type post_commit_tgg --dispatch-mode ""'}, + { + name: "TGG unit tests", + arch: wormhole_b0, + runs-on: [arch-wormhole_b0, "config-tgg", "in-service", "runner-test", "bare-metal", "pipeline-functional"], + cmd: cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tgg_device --dispatch-mode ""' + }, ] - name: ${{ matrix.test-group.name }} ${{ matrix.runner-info.arch }} ${{ matrix.runner-info.name }} + name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} env: TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.runner-info.arch }} + ARCH_NAME: ${{ matrix.test-group.arch }} LOGURU_LEVEL: INFO LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib runs-on: ${{ matrix.runner-info.runs-on }} @@ -40,7 +43,7 @@ jobs: - name: Extract files run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar - uses: ./.github/actions/install-python-deps - - name: ${{ matrix.test-group.name }} tests + - name: Run unit regression tests timeout-minutes: 45 run: | source ${{ github.workspace }}/python_env/bin/activate diff --git a/models/demos/falcon7b/tests/test_perf_falcon.py b/models/demos/falcon7b/tests/test_perf_falcon.py index 7e036c3b2391..0765ed994123 100644 --- a/models/demos/falcon7b/tests/test_perf_falcon.py +++ b/models/demos/falcon7b/tests/test_perf_falcon.py @@ -584,7 +584,7 @@ def test_perf_wh_bare_metal( async_mode, ) - @pytest.mark.models_performance_bare_metal_multi_device + @pytest.mark.model_perf_t3000 @pytest.mark.parametrize( "llm_mode, num_devices, num_layers, batch, seq_len, kv_cache_len, model_config_str, expected_output_pcc, expected_k_cache_pcc, expected_v_cache_pcc, expected_inference_time, async_mode", ( diff --git a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py index 2fc1a67e1803..77cf593e60a4 100644 --- a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py +++ b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py @@ -36,7 +36,7 @@ def forward(self, x): return self.emb(x) -@pytest.mark.models_performance_bare_metal_multi_device +@pytest.mark.model_perf_t3000 @pytest.mark.parametrize( "generation_start_pos, expected_compile_time, expected_inference_time", ( diff --git a/pytest.ini b/pytest.ini index 10ee11deb4c3..b1f2bdbc22c0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -13,4 +13,4 @@ markers = models_performance_bare_metal: mark model silicon tests for performance on bare metal models_performance_virtual_machine: mark model silicon tests for performance on virtual_machine models_device_performance_bare_metal: mark model silicon tests for device performance on bare metal - models_performance_bare_metal_multi_device: mark model silicon tests for performance on multi-chip bare metal + model_perf_t3000: mark model silicon tests for performance on t3000 bare metal diff --git a/tests/scripts/multi_chip/run_end_to_end_demos.sh b/tests/scripts/multi_chip/run_end_to_end_demos.sh deleted file mode 100755 index f15ee8d92563..000000000000 --- a/tests/scripts/multi_chip/run_end_to_end_demos.sh +++ /dev/null @@ -1,23 +0,0 @@ - -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi - -cd $TT_METAL_HOME -export PYTHONPATH=$TT_METAL_HOME - -# Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py - -# Falcon40B end to end demo (prefill + decode) -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_5_loops.py diff --git a/tests/scripts/multi_chip/run_frequent_regressions_multi_device.sh b/tests/scripts/multi_chip/run_frequent_regressions_multi_device.sh deleted file mode 100755 index ac7e4fd1f128..000000000000 --- a/tests/scripts/multi_chip/run_frequent_regressions_multi_device.sh +++ /dev/null @@ -1,35 +0,0 @@ - -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi - -cd $TT_METAL_HOME -export PYTHONPATH=$TT_METAL_HOME - -pytest tests/ttnn/unit_tests/test_multi_device.py - -pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py -pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py - -# Llama2_70b related cached files and tests (the test should parse env variables similar to these) -export LLAMA_CKPT_DIR=/mnt/MLPerf/tt_dnn-models/llama-2/llama-2-70b-repacked/ -export LLAMA_TOKENIZER_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/tokenizer.model -export LLAMA_CACHE_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/llama-data-cache/weights-cache-2 - -pytest models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py -pytest models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py -pytest models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py -pytest models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py - -# Mistral8x7b 8 chip decode model test (env flags set inside the test) -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[10-1-pcc] diff --git a/tests/scripts/multi_chip/run_pre_post_commit_regressions_multi_device.sh b/tests/scripts/multi_chip/run_pre_post_commit_regressions_multi_device.sh deleted file mode 100755 index a2081e36d58c..000000000000 --- a/tests/scripts/multi_chip/run_pre_post_commit_regressions_multi_device.sh +++ /dev/null @@ -1,49 +0,0 @@ - -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi - -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips" -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips" -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips" -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips" - -TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*" -./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*" -./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*" -pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit - -# ttnn multi-chip apis unit tests -pytest tests/ttnn/unit_tests/test_multi_device.py - -# Falcon40b unit tests; prefill required 8x8 grids -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py -WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py - -# Mistral8x7b 8 chip decode tests (env flags set inside the tests) -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py -pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[1-1-pcc] - -# Falcon7B data parallel tests -pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py -pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py -pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py -pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py diff --git a/tests/scripts/multi_chip/run_unstable_multi_device.sh b/tests/scripts/multi_chip/run_unstable_multi_device.sh deleted file mode 100644 index bd08c570b9b1..000000000000 --- a/tests/scripts/multi_chip/run_unstable_multi_device.sh +++ /dev/null @@ -1,13 +0,0 @@ -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index 92b6e086ded3..0b0a0692c967 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -43,19 +43,6 @@ run_perf_models_llm_javelin() { env python models/perf/merge_perf_results.py } -run_perf_models_llm_javelin_multi_device() { - local tt_arch=$1 - local test_marker=$2 - - env pytest models/demos/falcon7b/tests -m $test_marker - - # Mistral8x7b env flags are set inside the tests - env pytest models/demos/t3000/mixtral8x7b/tests -m $test_marker - - ## Merge all the generated reports - env python models/perf/merge_perf_results.py -} - run_perf_models_cnn_javelin() { local tt_arch=$1 local test_marker=$2 @@ -70,16 +57,6 @@ run_perf_models_cnn_javelin() { env python models/perf/merge_perf_results.py } -run_perf_models_cnn_javelin_multi_device() { - local tt_arch=$1 - local test_marker=$2 - - # Add tests here - - ## Merge all the generated reports - env python models/perf/merge_perf_results.py -} - run_device_perf_models() { local test_marker=$1 @@ -153,8 +130,6 @@ main() { test_marker="models_performance_virtual_machine" elif [[ "$pipeline_type" == *"device_performance_bare_metal"* ]]; then test_marker="models_device_performance_bare_metal" - elif [[ "$pipeline_type" == *"_bare_metal_multi_device"* ]]; then - test_marker="models_performance_bare_metal_multi_device" elif [[ "$pipeline_type" == *"_bare_metal"* ]]; then test_marker="models_performance_bare_metal" else @@ -165,12 +140,8 @@ main() { if [[ "$pipeline_type" == *"device_performance"* ]]; then run_device_perf_models "$test_marker" run_device_perf_ops "$test_marker" - elif [[ "$pipeline_type" == "llm_javelin_models_performance_bare_metal_multi_device" ]]; then - run_perf_models_llm_javelin_multi_device "$tt_arch" "$test_marker" elif [[ "$pipeline_type" == "llm_javelin_models_performance"* ]]; then run_perf_models_llm_javelin "$tt_arch" "$test_marker" - elif [[ "$pipeline_type" == "cnn_javelin_models_performance_bare_metal_multi_device" ]]; then - run_perf_models_cnn_javelin_multi_device "$tt_arch" "$test_marker" elif [[ "$pipeline_type" == "cnn_javelin_models_performance"* ]]; then run_perf_models_cnn_javelin "$tt_arch" "$test_marker" elif [[ "$pipeline_type" == *"other_models_performance"* ]]; then diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh index 818cf3d6327d..37580d883098 100755 --- a/tests/scripts/run_tests.sh +++ b/tests/scripts/run_tests.sh @@ -102,42 +102,6 @@ run_frequent_api_pipeline_tests() { fi } -# Run frequent multi device pipeline tests - these are the t3000 + 4xn300 tests -run_frequent_multi_device_pipeline_tests() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - ./tests/scripts/multi_chip/run_frequent_regressions_multi_device.sh -} - -# Run end to end demos - these are the t3000 + 4xn300 tests -run_end_to_end_demos_multi_device() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - ./tests/scripts/multi_chip/run_end_to_end_demos.sh -} - -# Run post commit TG tests - these are 4xn150 + galaxy tests -run_post_commit_tg_pipeline_tests() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - ./tests/scripts/tg/run_pre_post_commit_regressions_tg.sh -} - -# Run post commit TGG tests - these are 8xn150 + 2xgalaxy tests -run_post_commit_tgg_pipeline_tests() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - ./tests/scripts/tgg/run_pre_post_commit_regressions_tgg.sh -} - run_models_performance() { local tt_arch=$1 local pipeline_type=$2 @@ -160,14 +124,6 @@ run_models_performance_bare_metal_pipeline_tests() { run_models_performance "$tt_arch" "$pipeline_type" } -run_models_performance_bare_metal_multi_device_pipeline_tests() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - run_models_performance_multi_device "$tt_arch" "$pipeline_type" -} - run_models_performance_virtual_machine_pipeline_tests() { local tt_arch=$1 local pipeline_type=$2 @@ -203,14 +159,6 @@ run_stress_post_commit_pipeline_tests() { done } -run_post_commit_multi_device_pipeline_tests() { - local tt_arch=$1 - local pipeline_type=$2 - local dispatch_mode=$3 - - ./tests/scripts/multi_chip/run_pre_post_commit_regressions_multi_device.sh -} - run_post_commit_multi_device_unstable_pipeline_tests() { local tt_arch=$1 local pipeline_type=$2 @@ -239,6 +187,66 @@ run_ttnn_sweeps_pipeline_tests() { ./tests/scripts/run_ttnn_sweeps.sh } +##########################T3000########################## +# Run t3000 unit tests +unit_t3000_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/t3000/run_t3000_unit_tests.sh +} + +# Run t3000 frequent tests +frequent_t3000_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/t3000/run_t3000_frequent_tests.sh +} + +# Run t3000 demo tests +demos_t3000_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/t3000/run_t3000_demo_tests.sh +} + +# Run t3000 model perf tests +model_perf_t3000_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/t3000/run_t3000_model_perf_tests.sh --pipeline-type "$pipeline_type" +} +##########################T3000########################## + +##########################TG########################## +# Run TG unit tests +unit_tg_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/tg/run_tg_unit_tests.sh +} +##########################TG########################## + +##########################TGG########################## +# Run TGG unit tests +unit_tgg_device() { + local tt_arch=$1 + local pipeline_type=$2 + local dispatch_mode=$3 + + ./tests/scripts/tgg/run_tgg_unit_tests.sh +} +##########################TGG########################## + run_pipeline_tests() { local tt_arch=$1 local pipeline_type=$2 @@ -257,28 +265,29 @@ run_pipeline_tests() { run_eager_package_end_to_end_pipeline_tests "$tt_arch" "$pipeline_type" elif [[ $pipeline_type == *"models_performance_bare_metal" || $pipeline_type == "models_device_performance_bare_metal" ]]; then run_models_performance_bare_metal_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == *"models_performance_bare_metal_multi_device" ]]; then - run_models_performance_bare_metal_multi_device_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" elif [[ $pipeline_type == "models_performance_virtual_machine" ]]; then run_models_performance_virtual_machine_pipeline_tests "$tt_arch" "$pipeline_type" elif [[ $pipeline_type == "stress_post_commit" ]]; then run_stress_post_commit_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "post_commit_multi_device" ]]; then - run_post_commit_multi_device_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "post_commit_multi_device_unstable" ]]; then - run_post_commit_multi_device_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "frequent_multi_device" ]]; then - run_frequent_multi_device_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "end_to_end_demos_multi_device" ]]; then - run_end_to_end_demos_multi_device "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "post_commit_tg" ]]; then - run_post_commit_tg_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" - elif [[ $pipeline_type == "post_commit_tgg" ]]; then - run_post_commit_tgg_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" elif [[ $pipeline_type == "microbenchmarks" ]]; then run_microbenchmarks_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" elif [[ $pipeline_type == "ttnn_sweeps" ]]; then run_ttnn_sweeps_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode" + # T3000 pipelines + elif [[ $pipeline_type == "unit_t3000_device" ]]; then + unit_t3000_device "$tt_arch" "$pipeline_type" "$dispatch_mode" + elif [[ $pipeline_type == "frequent_t3000_device" ]]; then + frequent_t3000_device "$tt_arch" "$pipeline_type" "$dispatch_mode" + elif [[ $pipeline_type == "demos_t3000_device" ]]; then + demos_t3000_device "$tt_arch" "$pipeline_type" "$dispatch_mode" + elif [[ $pipeline_type == *"model_perf_t3000_device" ]]; then + model_perf_t3000_device "$tt_arch" "$pipeline_type" "$dispatch_mode" + # TG pipelines + elif [[ $pipeline_type == "unit_tg_device" ]]; then + unit_tg_device "$tt_arch" "$pipeline_type" "$dispatch_mode" + # TGG pipelines + elif [[ $pipeline_type == "unit_tgg_device" ]]; then + unit_tgg_device "$tt_arch" "$pipeline_type" "$dispatch_mode" else echo "Unknown pipeline: $pipeline_type" exit 1 diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh new file mode 100755 index 000000000000..816844f977d9 --- /dev/null +++ b/tests/scripts/t3000/run_t3000_demo_tests.sh @@ -0,0 +1,46 @@ + +#/bin/bash +set -eo pipefail + +run_t3000_falcon40b_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_falcon40b_tests" + + # Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py + + # Falcon40B end to end demo (prefill + decode) + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_5_loops.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_ethernet_tests $duration seconds to complete" +} + +run_t3000_tests() { + # Run falcon40b tests + run_t3000_falcon40b_tests +} + +main() { + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + run_t3000_tests +} + +main "$@" diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh new file mode 100755 index 000000000000..ee97712c0ffd --- /dev/null +++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh @@ -0,0 +1,86 @@ + +#/bin/bash +set -eo pipefail + +run_t3000_ethernet_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_ethernet_tests" + + pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py + pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_ethernet_tests $duration seconds to complete" +} + +run_t3000_llama2() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_llama2" + + # Llama2_70b related cached files and tests (the test should parse env variables similar to these) + export LLAMA_CKPT_DIR=/mnt/MLPerf/tt_dnn-models/llama-2/llama-2-70b-repacked/ + export LLAMA_TOKENIZER_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/tokenizer.model + export LLAMA_CACHE_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/llama-data-cache/weights-cache-2 + + pytest models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py + pytest models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py + pytest models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py + pytest models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_llama2 $duration seconds to complete" +} + +run_t3000_mistral_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_mistral_tests" + + # Mistral8x7b 8 chip decode model test (env flags set inside the test) + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[10-1-pcc] + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_mistral_tests $duration seconds to complete" +} + +run_t3000_tests() { + # Run ethernet tests + run_t3000_ethernet_tests + + # Run llama2-70b tests + run_t3000_llama2-70b_tests + + # Run mistral tests + run_t3000_mistral_tests +} + +main() { + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + run_t3000_tests +} + +main "$@" diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh new file mode 100644 index 000000000000..0d26aec170cc --- /dev/null +++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh @@ -0,0 +1,95 @@ + +#/bin/bash +set -eo pipefail + +run_t3000_falcon7b_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_falcon7b_tests" + + env pytest models/demos/falcon7b/tests -m "model_perf_t3000" + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_falcon7b_tests $duration seconds to complete" +} + +run_t3000_mistral_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_mistral_tests" + + env pytest models/demos/t3000/mixtral8x7b/tests -m "model_perf_t3000" + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_mistral_tests $duration seconds to complete" +} + +run_t3000_llm_tests() { + # Run falcon7b tests + run_t3000_falcon7b_tests + + # Run mistral tests + run_t3000_mistral_tests + + # Merge all the generated reports + env python models/perf/merge_perf_results.py +} + +run_t3000_cnn_tests() { + # Write tests here +} + +main() { + # Parse the arguments + while [[ $# -gt 0 ]]; do + case $1 in + --pipeline-type) + pipeline_type=$2 + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + shift + done + + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$pipeline_type" ]]; then + echo "--pipeline-type cannot be empty" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + if [[ "$pipeline_type" == "llm_model_perf_t3000_device" ]]; then + run_t3000_llm_tests + elif [[ "$pipeline_type" == "cnn_model_perf_t3000_device" ]]; then + run_t3000_cnn_tests + else + echo "$pipeline_type is invalid (supported: [cnn_model_perf_t3000_device, cnn_model_perf_t3000_device])" 2>&1 + exit 1 + fi + + run_t3000_tests +} + +main "$@" diff --git a/tests/scripts/t3000/run_t3000_unit_tests.sh b/tests/scripts/t3000/run_t3000_unit_tests.sh new file mode 100755 index 000000000000..89e220fcefde --- /dev/null +++ b/tests/scripts/t3000/run_t3000_unit_tests.sh @@ -0,0 +1,146 @@ + +#/bin/bash +set -eo pipefail + +run_t3000_ttmetal_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_ttmetal_tests" + + TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips" + TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips" + TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips" + TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips" + TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*" + ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*" + ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*" + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_ttmetal_tests $duration seconds to complete" +} + +run_t3000_tteager_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_tteager_tests" + + pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_tteager_tests $duration seconds to complete" +} + +run_t3000_ttnn_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_ttnn_tests" + + pytest tests/ttnn/unit_tests/test_multi_device.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_ttnn_tests $duration seconds to complete" +} + +run_t3000_falcon7b_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_falcon7b_tests" + + pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py + pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py + pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py + pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_falcon7b_tests $duration seconds to complete" +} + +run_t3000_falcon40b_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_falcon40b_tests" + + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_falcon40b_tests $duration seconds to complete" +} + +run_t3000_mistral_tests() { + # Record the start time + start_time=$(date +%s) + + echo "Running run_t3000_mistral_tests" + + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py + pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[1-1-pcc] + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "run_t3000_mistral_tests $duration seconds to complete" +} + +run_t3000_tests() { + # Run ttmetal tests + run_t3000_ttmetal_tests + + # Run tteager tests + run_t3000_tteager_tests + + # Run ttnn tests + run_t3000_ttnn_tests + + # Run falcon7b tests + run_t3000_falcon7b_tests + + # Run falcon40b tests + run_t3000_falcon40b_tests + + # Run mistral tests + run_t3000_mistral_tests +} + +main() { + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + run_t3000_tests +} + +main "$@" diff --git a/tests/scripts/tg/run_pre_post_commit_regressions_tg.sh b/tests/scripts/tg/run_pre_post_commit_regressions_tg.sh deleted file mode 100755 index 3d8f32fdf8ef..000000000000 --- a/tests/scripts/tg/run_pre_post_commit_regressions_tg.sh +++ /dev/null @@ -1,17 +0,0 @@ - -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi - -# Write tests here! -echo "Fill me!" diff --git a/tests/scripts/tg/run_tg_unit_tests.sh b/tests/scripts/tg/run_tg_unit_tests.sh new file mode 100755 index 000000000000..dce82391b6e2 --- /dev/null +++ b/tests/scripts/tg/run_tg_unit_tests.sh @@ -0,0 +1,27 @@ + +#/bin/bash +set -eo pipefail + +run_tg_tests() { + # Write tests here +} + +main() { + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + run_tg_tests +} + +main "$@" diff --git a/tests/scripts/tgg/run_pre_post_commit_regressions_tgg.sh b/tests/scripts/tgg/run_pre_post_commit_regressions_tgg.sh deleted file mode 100755 index 3d8f32fdf8ef..000000000000 --- a/tests/scripts/tgg/run_pre_post_commit_regressions_tgg.sh +++ /dev/null @@ -1,17 +0,0 @@ - -#/bin/bash - -set -eo pipefail - -if [[ -z "$TT_METAL_HOME" ]]; then - echo "Must provide TT_METAL_HOME in environment" 1>&2 - exit 1 -fi - -if [[ -z "$ARCH_NAME" ]]; then - echo "Must provide ARCH_NAME in environment" 1>&2 - exit 1 -fi - -# Write tests here! -echo "Fill me!" diff --git a/tests/scripts/tgg/run_tgg_unit_tests.sh b/tests/scripts/tgg/run_tgg_unit_tests.sh new file mode 100755 index 000000000000..3f7d3b73496a --- /dev/null +++ b/tests/scripts/tgg/run_tgg_unit_tests.sh @@ -0,0 +1,27 @@ + +#/bin/bash +set -eo pipefail + +run_tgg_tests() { + # Write tests here +} + +main() { + if [[ -z "$TT_METAL_HOME" ]]; then + echo "Must provide TT_METAL_HOME in environment" 1>&2 + exit 1 + fi + + if [[ -z "$ARCH_NAME" ]]; then + echo "Must provide ARCH_NAME in environment" 1>&2 + exit 1 + fi + + # Run all tests + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + + run_tgg_tests +} + +main "$@"