From ed03f77abc6096d1ec4708d56e0eda08ba4f22da Mon Sep 17 00:00:00 2001 From: Aswinmcw Date: Fri, 8 Nov 2024 10:32:40 +0000 Subject: [PATCH] Add reduce_scatter t3k perf to pipeline --- .../t3000-model-perf-tests-impl.yaml | 5 +++-- .../t3000/run_t3000_model_perf_tests.sh | 20 +++++++++++++++++++ .../operations/ccl/perf/perf_csv.py | 9 ++++++++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml index c104d01fbaa..dbc8b679231 100644 --- a/.github/workflows/t3000-model-perf-tests-impl.yaml +++ b/.github/workflows/t3000-model-perf-tests-impl.yaml @@ -22,7 +22,8 @@ jobs: { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho - { name: "t3k CCL all_gather perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar + { name: "t3k CCL all_gather perf tests", model: "all_gather", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar + { name: "t3k CCL reduce_scatter perf tests", model: "reduce_scatter", arch: wormhole_b0, cmd: run_t3000_ccl_reduce_scatter_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run? ] name: ${{ matrix.test-group.name }} @@ -80,7 +81,7 @@ jobs: run: | TODAY=$(date +%Y_%m_%d) PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv" - PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv" + PERF_REPORT_FILENAME_CCL="CCL_${{ matrix.test-group.model }}_Perf_${TODAY}.csv" if [ "${{ matrix.test-group.tracy }}" == "true" ]; then if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL" diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh index 19a54d710b1..02ec0d8c541 100755 --- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh +++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh @@ -161,6 +161,25 @@ run_t3000_ccl_all_gather_perf_tests() { fi } +run_t3000_ccl_reduce_scatter_perf_tests() { + # Record the start time + fail=0 + start_time=$(date +%s) + + echo "LOG_METAL: Running run_t3000_ccl_reduce_scatter_perf_tests" + + tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh -t t3000 + fail+=$? + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "LOG_METAL: run_t3000_ccl_reduce_scatter_perf_tests $duration seconds to complete" + if [[ $fail -ne 0 ]]; then + exit 1 + fi +} + run_t3000_llm_tests() { # Run falcon7b tests run_t3000_falcon7b_tests @@ -195,6 +214,7 @@ run_t3000_cnn_tests() { run_t3000_ccl_tests() { # Run ccl performance tests run_t3000_ccl_all_gather_perf_tests + run_t3000_ccl_reduce_scatter_perf_tests } diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py index 3d5cc2aaeb5..569f608c48b 100644 --- a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py +++ b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py @@ -214,9 +214,16 @@ def calculate_bandwidth(row): averages_data.append(group_data) averages_df = pd.DataFrame(averages_data) + op_code = averages_df.iloc[0]["OP CODE"] today = time.strftime("%Y_%m_%d") - ccl_perf_file_path = f"CCL_Perf_{today}.csv" + if op_code == "AllGather": + ccl_perf_file_path = f"CCL_all_gather_Perf_{today}.csv" + elif op_code == "ReduceScatter": + ccl_perf_file_path = f"CCL_reduce_scatter_Perf_{today}.csv" + else: + ccl_perf_file_path = f"CCL_Perf_{today}.csv" + os.rename(file_path, ccl_perf_file_path) averages_df.to_csv(ccl_perf_file_path, index=False)