From a1f1070ec2548654b0e08eb7aff71aea5dce11cb Mon Sep 17 00:00:00 2001 From: ngrujic Date: Mon, 25 Nov 2024 11:00:28 +0000 Subject: [PATCH] #11512: Add sweeps for lgamma, logit, mish, and multigammaln sharded --- .github/workflows/ttnn-run-sweeps.yaml | 12 +- .../sweep_utils/sharding_utils.py | 9 +- .../eltwise/unary/{ => lgamma}/lgamma.py | 0 .../eltwise/unary/lgamma/lgamma_sharded.py | 137 +++++++++++++++++ .../sweeps/eltwise/unary/{ => logit}/logit.py | 0 .../eltwise/unary/logit/logit_sharded.py | 139 ++++++++++++++++++ .../sweeps/eltwise/unary/{ => mish}/mish.py | 0 .../sweeps/eltwise/unary/mish/mish_sharded.py | 139 ++++++++++++++++++ .../unary/{ => multigammaln}/multigammaln.py | 0 9 files changed, 428 insertions(+), 8 deletions(-) rename tests/sweep_framework/sweeps/eltwise/unary/{ => lgamma}/lgamma.py (100%) create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py rename tests/sweep_framework/sweeps/eltwise/unary/{ => logit}/logit.py (100%) create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py rename tests/sweep_framework/sweeps/eltwise/unary/{ => mish}/mish.py (100%) create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py rename tests/sweep_framework/sweeps/eltwise/unary/{ => multigammaln}/multigammaln.py (100%) diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml index 442348e8e42..156b2859b71 100644 --- a/.github/workflows/ttnn-run-sweeps.yaml +++ b/.github/workflows/ttnn-run-sweeps.yaml @@ -172,10 +172,14 @@ on: - eltwise.binary_complex.add_bw.add_bw - eltwise.binary_complex.sub_bw.sub_bw - eltwise.binary_complex.mul_bw.mul_bw - - eltwise.unary.lgamma - - eltwise.unary.logit - - eltwise.unary.mish - - eltwise.unary.multigammaln + - eltwise.unary.lgamma.lgamma + - eltwise.unary.lgamma.lgamma_sharded + - eltwise.unary.logit.logit + - eltwise.unary.logit.logit_sharded + - eltwise.unary.mish.mish + - eltwise.unary.mish.mish_sharded + - eltwise.unary.multigammaln.multigammaln + - eltwise.unary.multigammaln.multigammaln_sharded - eltwise.unary.isfinite.isfinite - eltwise.unary.isfinite.isfinite_sharded - eltwise.unary.isinf.isinf diff --git a/tests/sweep_framework/sweep_utils/sharding_utils.py b/tests/sweep_framework/sweep_utils/sharding_utils.py index 22979144c22..c17a70118ea 100644 --- a/tests/sweep_framework/sweep_utils/sharding_utils.py +++ b/tests/sweep_framework/sweep_utils/sharding_utils.py @@ -56,14 +56,15 @@ def gen_sharded_spec_unary(num_shapes, y, x, max_tensor_size=4 * 1024 * 1024): elif sharding_strategy == "BLOCK": min_shard_size_y = 32 * y min_shard_size_x = 32 * x - mul_x = random.randint(1, 10) - mul_y = random.randint(1, 64 // mul_x) input_shape = random.choice( - _gen_reshape_args_from_volume(mul_y * min_shard_size_y, step=1, out_dims=rank - 1) + _gen_reshape_args_from_volume( + max_tensor_size // (min_shard_size_x * min_shard_size_y), step=1, out_dims=rank + ) ) input_shape = list(input_shape["reshape_dims"]) - input_shape.append(mul_x * min_shard_size_x) + input_shape[-1] *= min_shard_size_y + input_shape[-2] *= min_shard_size_x elif sharding_strategy == "WIDTH" or sharding_strategy == "HEIGHT": # if shard_width % total_cores != 0: raise RuntimeError("Invalid sharding core_grid") diff --git a/tests/sweep_framework/sweeps/eltwise/unary/lgamma.py b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma.py similarity index 100% rename from tests/sweep_framework/sweeps/eltwise/unary/lgamma.py rename to tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma.py diff --git a/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py new file mode 100644 index 00000000000..9966e3e0220 --- /dev/null +++ b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import json +import torch +import random +import ttnn +import math +from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size +from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + +# Override the default timeout in seconds for hang detection. +TIMEOUT = 120 +Y, X = get_device_grid_size() + +random.seed(0) + + +# Parameters provided to the test vector generator are defined here. +# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. +# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs. +# Developers can create their own generator functions and pass them to the parameters as inputs. +parameters = { + "nightly": { + "input_spec": gen_sharded_spec_unary(16, Y, X, max_tensor_size=2 * 1024 * 1024), + "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + }, +} + + +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values() + pre_sharded_height = math.prod(input_shape[:-1]) + pre_sharded_width = input_shape[-1] + + if input_layout == "ROW_MAJOR_LAYOUT": + return True, "Input to eltwise binary must be tilized" + + if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b: + return True, "bfloat8_b is only supported on tiled layout" + + return False, None + + +# This is the run instructions for the test, defined by the developer. +# The run function must take the above-defined parameters as inputs. +# The runner will call this run function with each test vector, and the returned results from this function will be stored. +# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. +def run( + input_spec, + input_a_dtype, + *, + device, +) -> list: + data_seed = random.randint(0, 20000000) + torch.manual_seed(data_seed) + + input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec( + input_spec + ) + + # print( + # f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}" + # ) + + if input_layout == ttnn.ROW_MAJOR_LAYOUT: + input_shape = sanitize_shape_rm(input_shape) + + torch_input_tensor_a = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype + )(input_shape) + torch_output_tensor = torch.lgamma(torch_input_tensor_a) + + sharded_config = ttnn.create_sharded_memory_config( + shape=input_shape, + core_grid=ttnn.CoreGrid(y=Y, x=X), + strategy=sharding_strategy, + orientation=shard_orientation, + use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape, + ) + + input_tensor_a = ttnn.from_torch( + torch_input_tensor_a, + dtype=input_a_dtype, + layout=input_layout, + device=device, + memory_config=sharded_config, + ) + + start_time = start_measuring_time() + output_tensor = ttnn.lgamma(input_tensor_a, memory_config=sharded_config) + e2e_perf = stop_measuring_time(start_time) + output_tensor = ttnn.to_torch(output_tensor) + + pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999) + # print(pcc) + return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf] + + +# Run sweeps locally +# from tests.sweep_framework.framework.permutations import * + +# start_time = start_measuring_time() +# for suite in parameters.keys(): +# device_id = 0 +# device = ttnn.open_device(device_id=device_id) +# suite_vectors = list(permutations(parameters[suite])) +# print(len(suite_vectors)) +# for vector in suite_vectors: +# invalidate_res = invalidate_vector(vector) +# if invalidate_res[0]: +# print(f"Invalidated: {invalidate_res[1]}") +# continue +# try: +# passed, _ = run(**vector, device=device) +# # if passed[0] != True: +# # print(passed) +# except Exception as e: +# print(e) + +# # break + +# ttnn.close_device(device) + +# e2e_perf = stop_measuring_time(start_time) +# print(f"time {e2e_perf / 1000000000}s") diff --git a/tests/sweep_framework/sweeps/eltwise/unary/logit.py b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit.py similarity index 100% rename from tests/sweep_framework/sweeps/eltwise/unary/logit.py rename to tests/sweep_framework/sweeps/eltwise/unary/logit/logit.py diff --git a/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py new file mode 100644 index 00000000000..a8eb2e4be74 --- /dev/null +++ b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py @@ -0,0 +1,139 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import json +import torch +import random +import ttnn +import math +from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size +from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf, gen_func_with_cast_tt + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + +# Override the default timeout in seconds for hang detection. +TIMEOUT = 120 +Y, X = get_device_grid_size() + +random.seed(0) + + +# Parameters provided to the test vector generator are defined here. +# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. +# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs. +# Developers can create their own generator functions and pass them to the parameters as inputs. +parameters = { + "nightly": { + "input_spec": gen_sharded_spec_unary(16, Y, X, max_tensor_size=1 * 1024 * 1024), + "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "eps": [0, 10e-6, 10e-4, 10e-2, 10e-1], + }, +} + + +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values() + pre_sharded_height = math.prod(input_shape[:-1]) + pre_sharded_width = input_shape[-1] + + if input_layout == "ROW_MAJOR_LAYOUT": + return True, "Input to eltwise binary must be tilized" + + if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b: + return True, "bfloat8_b is only supported on tiled layout" + + return False, None + + +# This is the run instructions for the test, defined by the developer. +# The run function must take the above-defined parameters as inputs. +# The runner will call this run function with each test vector, and the returned results from this function will be stored. +# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. +def run( + input_spec, + input_a_dtype, + eps, + *, + device, +) -> list: + data_seed = random.randint(0, 20000000) + torch.manual_seed(data_seed) + + input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec( + input_spec + ) + + print( + f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}" + ) + + if input_layout == ttnn.ROW_MAJOR_LAYOUT: + input_shape = sanitize_shape_rm(input_shape) + + torch_input_tensor_a = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype + )(input_shape) + torch_output_tensor = torch.logit(torch_input_tensor_a, eps) + + sharded_config = ttnn.create_sharded_memory_config( + shape=input_shape, + core_grid=ttnn.CoreGrid(y=Y, x=X), + strategy=sharding_strategy, + orientation=shard_orientation, + use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape, + ) + + input_tensor_a = ttnn.from_torch( + torch_input_tensor_a, + dtype=input_a_dtype, + layout=input_layout, + device=device, + memory_config=sharded_config, + ) + + start_time = start_measuring_time() + output_tensor = ttnn.logit(input_tensor_a, eps=eps, memory_config=sharded_config) + e2e_perf = stop_measuring_time(start_time) + output_tensor = ttnn.to_torch(output_tensor) + + pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999) + print(pcc) + return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf] + + +# Run sweeps locally +from tests.sweep_framework.framework.permutations import * + +start_time = start_measuring_time() +for suite in parameters.keys(): + device_id = 0 + device = ttnn.open_device(device_id=device_id) + suite_vectors = list(permutations(parameters[suite])) + print(len(suite_vectors)) + for vector in suite_vectors: + invalidate_res = invalidate_vector(vector) + if invalidate_res[0]: + print(f"Invalidated: {invalidate_res[1]}") + continue + try: + passed, _ = run(**vector, device=device) + # if passed[0] != True: + # print(passed) + except Exception as e: + print(e) + + # break + + ttnn.close_device(device) + +e2e_perf = stop_measuring_time(start_time) +print(f"time {e2e_perf / 1000000000}s") diff --git a/tests/sweep_framework/sweeps/eltwise/unary/mish.py b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish.py similarity index 100% rename from tests/sweep_framework/sweeps/eltwise/unary/mish.py rename to tests/sweep_framework/sweeps/eltwise/unary/mish/mish.py diff --git a/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py new file mode 100644 index 00000000000..727a982133b --- /dev/null +++ b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py @@ -0,0 +1,139 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import json +import torch +import random +import ttnn +import math +from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size +from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf, gen_func_with_cast_tt + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + +# Override the default timeout in seconds for hang detection. +TIMEOUT = 120 +Y, X = get_device_grid_size() + +random.seed(0) + + +# Parameters provided to the test vector generator are defined here. +# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. +# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs. +# Developers can create their own generator functions and pass them to the parameters as inputs. +parameters = { + "nightly": { + "input_spec": gen_sharded_spec_unary(16, Y, X), + "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + }, +} + + +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values() + pre_sharded_height = math.prod(input_shape[:-1]) + pre_sharded_width = input_shape[-1] + + if input_layout == "ROW_MAJOR_LAYOUT": + return True, "Input to eltwise binary must be tilized" + + if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b: + return True, "bfloat8_b is only supported on tiled layout" + + return False, None + + +# This is the run instructions for the test, defined by the developer. +# The run function must take the above-defined parameters as inputs. +# The runner will call this run function with each test vector, and the returned results from this function will be stored. +# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. +def run( + input_spec, + input_a_dtype, + *, + device, +) -> list: + data_seed = random.randint(0, 20000000) + torch.manual_seed(data_seed) + + input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec( + input_spec + ) + + print( + f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}" + ) + + if input_layout == ttnn.ROW_MAJOR_LAYOUT: + input_shape = sanitize_shape_rm(input_shape) + + torch_input_tensor_a = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype + )(input_shape) + + golden_function = ttnn.get_golden_function(ttnn.mish) + torch_output_tensor = golden_function(torch_input_tensor_a) + + sharded_config = ttnn.create_sharded_memory_config( + shape=input_shape, + core_grid=ttnn.CoreGrid(y=Y, x=X), + strategy=sharding_strategy, + orientation=shard_orientation, + use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape, + ) + + input_tensor_a = ttnn.from_torch( + torch_input_tensor_a, + dtype=input_a_dtype, + layout=input_layout, + device=device, + memory_config=sharded_config, + ) + + start_time = start_measuring_time() + output_tensor = ttnn.mish(input_tensor_a, memory_config=sharded_config) + e2e_perf = stop_measuring_time(start_time) + output_tensor = ttnn.to_torch(output_tensor) + + pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999) + print(pcc) + return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf] + + +# Run sweeps locally +from tests.sweep_framework.framework.permutations import * + +start_time = start_measuring_time() +for suite in parameters.keys(): + device_id = 0 + device = ttnn.open_device(device_id=device_id) + suite_vectors = list(permutations(parameters[suite])) + print(len(suite_vectors)) + for vector in suite_vectors: + invalidate_res = invalidate_vector(vector) + if invalidate_res[0]: + print(f"Invalidated: {invalidate_res[1]}") + continue + try: + passed, _ = run(**vector, device=device) + # if passed[0] != True: + # print(passed) + except Exception as e: + print(e) + + # break + + ttnn.close_device(device) + +e2e_perf = stop_measuring_time(start_time) +print(f"time {e2e_perf / 1000000000}s") diff --git a/tests/sweep_framework/sweeps/eltwise/unary/multigammaln.py b/tests/sweep_framework/sweeps/eltwise/unary/multigammaln/multigammaln.py similarity index 100% rename from tests/sweep_framework/sweeps/eltwise/unary/multigammaln.py rename to tests/sweep_framework/sweeps/eltwise/unary/multigammaln/multigammaln.py