Skip to content

Commit

Permalink
Add reduce_scatter t3k perf to pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Aswinmcw committed Nov 14, 2024
1 parent 14a6f6f commit ed03f77
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/t3000-model-perf-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ jobs:
{ name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
{ name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
{ name: "t3k CCL all_gather perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
{ name: "t3k CCL all_gather perf tests", model: "all_gather", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
{ name: "t3k CCL reduce_scatter perf tests", model: "reduce_scatter", arch: wormhole_b0, cmd: run_t3000_ccl_reduce_scatter_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
#{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
]
name: ${{ matrix.test-group.name }}
Expand Down Expand Up @@ -80,7 +81,7 @@ jobs:
run: |
TODAY=$(date +%Y_%m_%d)
PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv"
PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv"
PERF_REPORT_FILENAME_CCL="CCL_${{ matrix.test-group.model }}_Perf_${TODAY}.csv"
if [ "${{ matrix.test-group.tracy }}" == "true" ]; then
if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then
echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL"
Expand Down
20 changes: 20 additions & 0 deletions tests/scripts/t3000/run_t3000_model_perf_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,25 @@ run_t3000_ccl_all_gather_perf_tests() {
fi
}

run_t3000_ccl_reduce_scatter_perf_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_ccl_reduce_scatter_perf_tests"

tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh -t t3000
fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_ccl_reduce_scatter_perf_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_llm_tests() {
# Run falcon7b tests
run_t3000_falcon7b_tests
Expand Down Expand Up @@ -195,6 +214,7 @@ run_t3000_cnn_tests() {
run_t3000_ccl_tests() {
# Run ccl performance tests
run_t3000_ccl_all_gather_perf_tests
run_t3000_ccl_reduce_scatter_perf_tests

}

Expand Down
9 changes: 8 additions & 1 deletion tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,16 @@ def calculate_bandwidth(row):
averages_data.append(group_data)

averages_df = pd.DataFrame(averages_data)
op_code = averages_df.iloc[0]["OP CODE"]

today = time.strftime("%Y_%m_%d")
ccl_perf_file_path = f"CCL_Perf_{today}.csv"
if op_code == "AllGather":
ccl_perf_file_path = f"CCL_all_gather_Perf_{today}.csv"
elif op_code == "ReduceScatter":
ccl_perf_file_path = f"CCL_reduce_scatter_Perf_{today}.csv"
else:
ccl_perf_file_path = f"CCL_Perf_{today}.csv"

os.rename(file_path, ccl_perf_file_path)

averages_df.to_csv(ccl_perf_file_path, index=False)
Expand Down

0 comments on commit ed03f77

Please sign in to comment.