From a1f1070ec2548654b0e08eb7aff71aea5dce11cb Mon Sep 17 00:00:00 2001
From: ngrujic <ngrujic@tenstorrent.com>
Date: Mon, 25 Nov 2024 11:00:28 +0000
Subject: [PATCH] #11512: Add sweeps for lgamma, logit, mish, and multigammaln
 sharded

---
 .github/workflows/ttnn-run-sweeps.yaml        |  12 +-
 .../sweep_utils/sharding_utils.py             |   9 +-
 .../eltwise/unary/{ => lgamma}/lgamma.py      |   0
 .../eltwise/unary/lgamma/lgamma_sharded.py    | 137 +++++++++++++++++
 .../sweeps/eltwise/unary/{ => logit}/logit.py |   0
 .../eltwise/unary/logit/logit_sharded.py      | 139 ++++++++++++++++++
 .../sweeps/eltwise/unary/{ => mish}/mish.py   |   0
 .../sweeps/eltwise/unary/mish/mish_sharded.py | 139 ++++++++++++++++++
 .../unary/{ => multigammaln}/multigammaln.py  |   0
 9 files changed, 428 insertions(+), 8 deletions(-)
 rename tests/sweep_framework/sweeps/eltwise/unary/{ => lgamma}/lgamma.py (100%)
 create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py
 rename tests/sweep_framework/sweeps/eltwise/unary/{ => logit}/logit.py (100%)
 create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py
 rename tests/sweep_framework/sweeps/eltwise/unary/{ => mish}/mish.py (100%)
 create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py
 rename tests/sweep_framework/sweeps/eltwise/unary/{ => multigammaln}/multigammaln.py (100%)

diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
index 442348e8e42..156b2859b71 100644
--- a/.github/workflows/ttnn-run-sweeps.yaml
+++ b/.github/workflows/ttnn-run-sweeps.yaml
@@ -172,10 +172,14 @@ on:
           - eltwise.binary_complex.add_bw.add_bw
           - eltwise.binary_complex.sub_bw.sub_bw
           - eltwise.binary_complex.mul_bw.mul_bw
-          - eltwise.unary.lgamma
-          - eltwise.unary.logit
-          - eltwise.unary.mish
-          - eltwise.unary.multigammaln
+          - eltwise.unary.lgamma.lgamma
+          - eltwise.unary.lgamma.lgamma_sharded
+          - eltwise.unary.logit.logit
+          - eltwise.unary.logit.logit_sharded
+          - eltwise.unary.mish.mish
+          - eltwise.unary.mish.mish_sharded
+          - eltwise.unary.multigammaln.multigammaln
+          - eltwise.unary.multigammaln.multigammaln_sharded
           - eltwise.unary.isfinite.isfinite
           - eltwise.unary.isfinite.isfinite_sharded
           - eltwise.unary.isinf.isinf
diff --git a/tests/sweep_framework/sweep_utils/sharding_utils.py b/tests/sweep_framework/sweep_utils/sharding_utils.py
index 22979144c22..c17a70118ea 100644
--- a/tests/sweep_framework/sweep_utils/sharding_utils.py
+++ b/tests/sweep_framework/sweep_utils/sharding_utils.py
@@ -56,14 +56,15 @@ def gen_sharded_spec_unary(num_shapes, y, x, max_tensor_size=4 * 1024 * 1024):
             elif sharding_strategy == "BLOCK":
                 min_shard_size_y = 32 * y
                 min_shard_size_x = 32 * x
-                mul_x = random.randint(1, 10)
-                mul_y = random.randint(1, 64 // mul_x)
 
                 input_shape = random.choice(
-                    _gen_reshape_args_from_volume(mul_y * min_shard_size_y, step=1, out_dims=rank - 1)
+                    _gen_reshape_args_from_volume(
+                        max_tensor_size // (min_shard_size_x * min_shard_size_y), step=1, out_dims=rank
+                    )
                 )
                 input_shape = list(input_shape["reshape_dims"])
-                input_shape.append(mul_x * min_shard_size_x)
+                input_shape[-1] *= min_shard_size_y
+                input_shape[-2] *= min_shard_size_x
 
             elif sharding_strategy == "WIDTH" or sharding_strategy == "HEIGHT":
                 # if shard_width % total_cores != 0: raise RuntimeError("Invalid sharding core_grid")
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/lgamma.py b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma.py
similarity index 100%
rename from tests/sweep_framework/sweeps/eltwise/unary/lgamma.py
rename to tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma.py
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py
new file mode 100644
index 00000000000..9966e3e0220
--- /dev/null
+++ b/tests/sweep_framework/sweeps/eltwise/unary/lgamma/lgamma_sharded.py
@@ -0,0 +1,137 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import json
+import torch
+import random
+import ttnn
+import math
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size
+from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 120
+Y, X = get_device_grid_size()
+
+random.seed(0)
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_spec": gen_sharded_spec_unary(16, Y, X, max_tensor_size=2 * 1024 * 1024),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if input_layout == "ROW_MAJOR_LAYOUT":
+        return True, "Input to eltwise binary must be tilized"
+
+    if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b is only supported on tiled layout"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_spec,
+    input_a_dtype,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec(
+        input_spec
+    )
+
+    # print(
+    #     f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}"
+    # )
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+    torch_output_tensor = torch.lgamma(torch_input_tensor_a)
+
+    sharded_config = ttnn.create_sharded_memory_config(
+        shape=input_shape,
+        core_grid=ttnn.CoreGrid(y=Y, x=X),
+        strategy=sharding_strategy,
+        orientation=shard_orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.lgamma(input_tensor_a, memory_config=sharded_config)
+    e2e_perf = stop_measuring_time(start_time)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    # print(pcc)
+    return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]
+
+
+# Run sweeps locally
+# from tests.sweep_framework.framework.permutations import *
+
+# start_time = start_measuring_time()
+# for suite in parameters.keys():
+#     device_id = 0
+#     device = ttnn.open_device(device_id=device_id)
+#     suite_vectors = list(permutations(parameters[suite]))
+#     print(len(suite_vectors))
+#     for vector in suite_vectors:
+#         invalidate_res = invalidate_vector(vector)
+#         if invalidate_res[0]:
+#             print(f"Invalidated: {invalidate_res[1]}")
+#             continue
+#         try:
+#             passed, _ = run(**vector, device=device)
+#             # if passed[0] != True:
+#             #     print(passed)
+#         except Exception as e:
+#             print(e)
+
+#         # break
+
+#     ttnn.close_device(device)
+
+# e2e_perf = stop_measuring_time(start_time)
+# print(f"time {e2e_perf / 1000000000}s")
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/logit.py b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit.py
similarity index 100%
rename from tests/sweep_framework/sweeps/eltwise/unary/logit.py
rename to tests/sweep_framework/sweeps/eltwise/unary/logit/logit.py
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py
new file mode 100644
index 00000000000..a8eb2e4be74
--- /dev/null
+++ b/tests/sweep_framework/sweeps/eltwise/unary/logit/logit_sharded.py
@@ -0,0 +1,139 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import json
+import torch
+import random
+import ttnn
+import math
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size
+from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf, gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 120
+Y, X = get_device_grid_size()
+
+random.seed(0)
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_spec": gen_sharded_spec_unary(16, Y, X, max_tensor_size=1 * 1024 * 1024),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "eps": [0, 10e-6, 10e-4, 10e-2, 10e-1],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if input_layout == "ROW_MAJOR_LAYOUT":
+        return True, "Input to eltwise binary must be tilized"
+
+    if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b is only supported on tiled layout"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_spec,
+    input_a_dtype,
+    eps,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec(
+        input_spec
+    )
+
+    print(
+        f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}"
+    )
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+    torch_output_tensor = torch.logit(torch_input_tensor_a, eps)
+
+    sharded_config = ttnn.create_sharded_memory_config(
+        shape=input_shape,
+        core_grid=ttnn.CoreGrid(y=Y, x=X),
+        strategy=sharding_strategy,
+        orientation=shard_orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.logit(input_tensor_a, eps=eps, memory_config=sharded_config)
+    e2e_perf = stop_measuring_time(start_time)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    print(pcc)
+    return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]
+
+
+# Run sweeps locally
+from tests.sweep_framework.framework.permutations import *
+
+start_time = start_measuring_time()
+for suite in parameters.keys():
+    device_id = 0
+    device = ttnn.open_device(device_id=device_id)
+    suite_vectors = list(permutations(parameters[suite]))
+    print(len(suite_vectors))
+    for vector in suite_vectors:
+        invalidate_res = invalidate_vector(vector)
+        if invalidate_res[0]:
+            print(f"Invalidated: {invalidate_res[1]}")
+            continue
+        try:
+            passed, _ = run(**vector, device=device)
+            # if passed[0] != True:
+            #     print(passed)
+        except Exception as e:
+            print(e)
+
+        # break
+
+    ttnn.close_device(device)
+
+e2e_perf = stop_measuring_time(start_time)
+print(f"time {e2e_perf / 1000000000}s")
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/mish.py b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish.py
similarity index 100%
rename from tests/sweep_framework/sweeps/eltwise/unary/mish.py
rename to tests/sweep_framework/sweeps/eltwise/unary/mish/mish.py
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py
new file mode 100644
index 00000000000..727a982133b
--- /dev/null
+++ b/tests/sweep_framework/sweeps/eltwise/unary/mish/mish_sharded.py
@@ -0,0 +1,139 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import json
+import torch
+import random
+import ttnn
+import math
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm, get_device_grid_size
+from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf, gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 120
+Y, X = get_device_grid_size()
+
+random.seed(0)
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_spec": gen_sharded_spec_unary(16, Y, X),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    input_shape, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if input_layout == "ROW_MAJOR_LAYOUT":
+        return True, "Input to eltwise binary must be tilized"
+
+    if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b is only supported on tiled layout"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_spec,
+    input_a_dtype,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    input_shape, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape, input_layout = parse_sharding_spec(
+        input_spec
+    )
+
+    print(
+        f"X {X} Y {Y} input_shape {input_shape} {input_a_dtype} {input_layout} {sharding_strategy} {shard_orientation} tensor_hw_as_shard_shape {tensor_hw_as_shard_shape}"
+    )
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    golden_function = ttnn.get_golden_function(ttnn.mish)
+    torch_output_tensor = golden_function(torch_input_tensor_a)
+
+    sharded_config = ttnn.create_sharded_memory_config(
+        shape=input_shape,
+        core_grid=ttnn.CoreGrid(y=Y, x=X),
+        strategy=sharding_strategy,
+        orientation=shard_orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.mish(input_tensor_a, memory_config=sharded_config)
+    e2e_perf = stop_measuring_time(start_time)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    print(pcc)
+    return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]
+
+
+# Run sweeps locally
+from tests.sweep_framework.framework.permutations import *
+
+start_time = start_measuring_time()
+for suite in parameters.keys():
+    device_id = 0
+    device = ttnn.open_device(device_id=device_id)
+    suite_vectors = list(permutations(parameters[suite]))
+    print(len(suite_vectors))
+    for vector in suite_vectors:
+        invalidate_res = invalidate_vector(vector)
+        if invalidate_res[0]:
+            print(f"Invalidated: {invalidate_res[1]}")
+            continue
+        try:
+            passed, _ = run(**vector, device=device)
+            # if passed[0] != True:
+            #     print(passed)
+        except Exception as e:
+            print(e)
+
+        # break
+
+    ttnn.close_device(device)
+
+e2e_perf = stop_measuring_time(start_time)
+print(f"time {e2e_perf / 1000000000}s")
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/multigammaln.py b/tests/sweep_framework/sweeps/eltwise/unary/multigammaln/multigammaln.py
similarity index 100%
rename from tests/sweep_framework/sweeps/eltwise/unary/multigammaln.py
rename to tests/sweep_framework/sweeps/eltwise/unary/multigammaln/multigammaln.py