Skip to content

ttnn - Run sweeps #1413

ttnn - Run sweeps

ttnn - Run sweeps #1413

name: "ttnn - Run sweeps"
on:
workflow_dispatch:
inputs:
sweep_name:
type: choice
description: "Which sweep module to run?"
required: true
default: "ALL SWEEPS (Nightly)"
options:
- ALL SWEEPS (Nightly)
- add
- ccl.line_all_gather
- ccl.all_gather_n300
- ccl.all_gather_n300_focused
- eltwise.unary.abs.abs_pytorch2
- eltwise.unary.relu.relu
- eltwise.unary.relu.relu_pytorch2
- eltwise.unary.gelu.gelu
- eltwise.unary.gelu.gelu_pytorch2
- eltwise.unary.hardsigmoid.hardsigmoid_pytorch2
- eltwise.unary.leaky_relu.leaky_relu_pytorch2
- eltwise.unary.abs.abs
- eltwise.unary.cos.cos
- eltwise.unary.cos.cos_pytorch2
- eltwise.unary.sin.sin
- eltwise.unary.sin.sin_pytorch2
- eltwise.unary.tril.tril_pytorch2
- eltwise.unary.clamp.clamp
- eltwise.unary.clip.clip
- eltwise.unary.cbrt.cbrt
- eltwise.unary.rsub.rsub
- eltwise.unary.rsub.rsub_pytorch2
- eltwise.unary.rsqrt.rsqrt_pytorch2
- eltwise.unary.rdiv.rdiv
- eltwise.unary.frac.frac
- eltwise.unary.ceil.ceil
- eltwise.unary.ceil.ceil_pytorch2
- eltwise.unary.trunc.trunc
- eltwise.unary.floor.floor
- eltwise.unary.floor.floor_pytorch2
- eltwise.unary.clone.clone
- eltwise.unary.elu.elu
- eltwise.unary.elu.elu_pytorch2
- eltwise.unary.erfc.erfc
- eltwise.unary.exp.exp
- eltwise.unary.exp.exp_pytorch2
- eltwise.unary.exp2.exp2
- eltwise.unary.expm1.expm1
- eltwise.unary.tanh.tanh
- eltwise.unary.tanh.tanh_pytorch2
- eltwise.unary.sign.sign
- eltwise.unary.rad2deg.rad2deg
- eltwise.unary.deg2rad.deg2rad
- eltwise.unary.relu6.relu6
- eltwise.unary.log.log
- eltwise.unary.log.log_pytorch2
- eltwise.unary.log1p.log1p
- eltwise.unary.log2.log2
- eltwise.unary.log10.log10
- eltwise.unary.bitwise.bitwise_and
- eltwise.unary.bitwise.bitwise_left_shift
- eltwise.unary.bitwise.bitwise_not
- eltwise.unary.bitwise.bitwise_not_pytorch2
- eltwise.unary.bitwise.bitwise_or
- eltwise.unary.bitwise.bitwise_right_shift
- eltwise.unary.bitwise.bitwise_xor
- eltwise.unary.log_sigmoid.log_sigmoid
- eltwise.unary.logical_not.logical_not_
- eltwise.unary.logical_not.logical_not
- eltwise.unary.logical_not.logical_not_output
- eltwise.unary.erf.erf
- eltwise.unary.erfinv.erfinv
- eltwise.unary.i0.i0
- eltwise.unary.silu.silu
- eltwise.unary.silu.silu_pytorch2
- eltwise.unary.glu.glu
- eltwise.unary.sigmoid.sigmoid
- eltwise.unary.sigmoid.sigmoid_pytorch2
- eltwise.unary.sigmoid_accurate.sigmoid_accurate
- eltwise.unary.tril.tril
- eltwise.unary.triu.triu
- eltwise.unary.normalize_hw.normalize_hw
- eltwise.unary.normalize_global.normalize_global
- eltwise.unary.heaviside.heaviside
- eltwise.unary.hardtanh.hardtanh
- eltwise.unary.hardswish.hardswish
- eltwise.unary.hardsigmoid.hardsigmoid
- eltwise.unary.hardshrink.hardshrink
- eltwise.unary.softmax.softmax
- eltwise.unary.identity.identity
- eltwise.unary.neg.neg
- eltwise.unary.sinh.sinh
- eltwise.unary.relu_min.relu_min
- eltwise.unary.relu_max.relu_max
- eltwise.unary.softplus.softplus
- eltwise.unary_backward.acos_bw
- eltwise.unary_backward.acosh_bw
- eltwise.unary_backward.atan_bw
- eltwise.unary_backward.cos_bw
- eltwise.unary_backward.frac_bw
- eltwise.unary_backward.i0_bw
- eltwise.unary_backward.rad2deg_bw
- eltwise.unary_backward.relu_bw
- eltwise.unary_backward.rsqrt_bw
- eltwise.unary_backward.sigmoid_bw
- eltwise.unary_backward.tan_bw
- eltwise.unary_backward.trunc_bw
- eltwise.unary_backward.clamp_bw.clamp_bw
- eltwise.unary_backward.hardtanh_bw.hardtanh_bw
- eltwise.unary_backward.mul_bw.mul_bw
- eltwise.unary_backward.softplus_bw.softplus_bw
- eltwise.unary_backward.threshold_bw.threshold_bw
- eltwise.unary_backward.div_bw.div_bw
- eltwise.unary_backward.log_bw.log_bw
- eltwise.unary_backward.relu6_bw.relu6_bw
- eltwise.unary_backward.log10_bw.log10_bw
- eltwise.unary_backward.abs_bw.abs_bw
- eltwise.unary_backward.sinh_bw.sinh_bw
- eltwise.unary_backward.sin_bw.sin_bw
- eltwise.unary_backward.square_bw.square_bw
- eltwise.unary.lgamma
- eltwise.unary.logit
- eltwise.unary.mish
- eltwise.unary.multigammaln
- eltwise.unary.isfinite
- eltwise.unary.isinf
- eltwise.unary.isnan
- eltwise.unary.isneginf
- eltwise.unary.isposinf
- eltwise.binary.add.add_all_pytorch2
- eltwise.unary.gtz.gtz
- eltwise.unary.ltz.ltz
- eltwise.unary.gez.gez
- eltwise.unary.lez.lez
- eltwise.unary.nez.nez
- eltwise.binary.subtract.subtract
- eltwise.binary.multiply.multiply
- eltwise.binary.multiply.mul_tensor_pytorch2
- eltwise.binary.div.div
- eltwise.binary.div_no_nan.div_no_nan
- eltwise.binary.logical_or.logical_or_
- eltwise.binary.logical_or.logical_or
- eltwise.binary.logical_or.logical_or_output
- eltwise.binary.logical_xor.logical_xor_
- eltwise.binary.logical_xor.logical_xor
- eltwise.binary.logical_and.logical_and_
- eltwise.binary.logical_and.logical_and
- eltwise.binary.logical_and.logical_and_output
- eltwise.binary.polyval.polyval
- eltwise.binary.remainder.remainder
- eltwise.binary.squared_difference.squared_difference
- eltwise.binary.squared_difference_output.squared_difference_output
- eltwise.binary.remainder.remainder_scalar_pytorch2
- eltwise.binary.bcast.bcast_h_sharded
- eltwise.binary.bcast.bcast
- eltwise.binary.eq.eq_scalar_pytorch2
- eltwise.binary.gt.gt_scalar_pytorch2
- eltwise.binary.le.le_tensor_pytorch2
- eltwise.binary.fmod.fmod
- eltwise.binary.floor_divide.floor_divide_pytorch2
- eltwise.binary.logaddexp.logaddexp
- eltwise.binary.ldexp.ldexp
- eltwise.binary.hypot.hypot
- eltwise.binary.xlogy.xlogy
- eltwise.binary_backward.embedding_bw.embedding_bw
- eltwise.binary_backward.addalpha_bw.addalpha_bw
- eltwise.binary_backward.subalpha_bw.subalpha_bw
- eltwise.binary_backward.xlogy_bw.xlogy_bw
- eltwise.binary_backward.hypot_bw.hypot_bw
- eltwise.composite.binary.addalpha.addalpha
- eltwise.composite.binary.subalpha.subalpha
- eltwise.composite.binary.minimum.minimum
- eltwise.composite.binary.minimum.minimum_pytorch2
- eltwise.composite.binary.maximum.maximum
- eltwise.composite.binary.maximum.maximum_pytorch2
- eltwise.composite.binary.pow.pow_pytorch2
- eltwise.composite.binary.pow.pow_scalar_pytorch2
- eltwise.composite.binary.pow.pow_tensor_pytorch2
- eltwise.ternary.addcmul.addcmul
- eltwise.ternary.addcdiv.addcdiv
- eltwise.ternary.mac.mac
- eltwise.ternary.lerp
- eltwise.ternary.where.where
- eltwise.ternary.where.where_pytorch2
- matmul.full.matmul_default_block_sharded
- matmul.full.matmul_default_height_sharded
- matmul.full.matmul_default_interleaved
- matmul.full.matmul_default_width_sharded
- matmul.short.matmul_create_program_config
- matmul.short.matmul_default_sharded
- matmul.short.matmul_default
- matmul.short.matmul_user_program_config_mcast_1d
- matmul.short.matmul_user_program_config_mcast_2d
- matmul.short.matmul_user_program_config
- matmul.short.matmul
- data_movement.concat.concat_interleaved_n_tensors
- data_movement.concat.concat_interleaved
- data_movement.concat.concat_sharded
- data_movement.concat.concat_pytorch2
- data_movement.slice.slice_pytorch2_rm
- data_movement.slice.slice_pytorch2_tiled
- data_movement.permute.permute
- data_movement.permute.permute_pytorch2_tiled
- data_movement.permute.permute_pytorch2_rm
- data_movement.transpose.transpose_pytorch2
- data_movement.transpose.transpose_interleaved
- data_movement.transpose.t_pytorch2
- data_movement.copy.copy
- data_movement.expand.expand_pytorch2
- data_movement.fill.fill_pytorch2
- data_movement.index_select.index_select_pytorch2
- data_movement.split.split_with_sizes_pytorch2
- data_movement.repeat.repeat
schedule:
- cron: "0 21 * * *" # This cron schedule runs the workflow at 9:00pm UTC nightly
jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
ttnn-generate-sweeps:
needs: build-artifact
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: wormhole_b0
ELASTIC_USERNAME: ${{ secrets.SWEEPS_ELASTIC_USERNAME }}
ELASTIC_PASSWORD: ${{ secrets.SWEEPS_ELASTIC_PASSWORD }}
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
environment: dev
timeout-minutes: 30
runs-on: [build, in-service]
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: ./.github/actions/prepare-metal-run
with:
arch: wormhole_b0
- name: Run ttnn sweeps generation (single sweep)
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.sweep_name != 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/parameter_generator.py --module-name ${{ github.event.inputs.sweep_name }} --elastic cloud --tag ci-main --explicit
- name: Run ttnn sweeps generation (all sweeps)
if: ${{ github.event_name == 'schedule' || github.event.inputs.sweep_name == 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/parameter_generator.py --elastic cloud --tag ci-main --explicit
ttnn-run-sweeps:
needs: ttnn-generate-sweeps
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
test-group:
[
{
name: "Grayskull E150 Sweeps",
arch: grayskull,
runs-on: ["cloud-virtual-machine", "E150", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
},
{
name: "Wormhole N150 Sweeps",
arch: wormhole_b0,
runs-on: ["cloud-virtual-machine", "N150", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
},
{
name: "Wormhole N300 Sweeps",
arch: wormhole_b0,
runs-on: ["cloud-virtual-machine", "N300", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
}
]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
ELASTIC_USERNAME: ${{ secrets.SWEEPS_ELASTIC_USERNAME }}
ELASTIC_PASSWORD: ${{ secrets.SWEEPS_ELASTIC_PASSWORD }}
TT_SMI_RESET_COMMAND: ${{ matrix.test-group.tt-smi-cmd }}
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
environment: dev
timeout-minutes: 720
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ matrix.test-group.arch }}
- name: Run ttnn sweeps (single sweep)
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.sweep_name != 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/runner.py --module-name ${{ github.event.inputs.sweep_name }} --elastic cloud --tag ci-main
- name: Run ttnn sweeps (all sweeps, nightly)
if: ${{ github.event_name == 'schedule' || github.event.inputs.sweep_name == 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/runner.py --elastic cloud --tag ci-main --suite-name nightly