tenstorrent · amalbasaTT · Nov 7, 2024 · Nov 7, 2024 · Nov 8, 2024 · Nov 8, 2024
@@ -93,14 +93,21 @@ on:
           - eltwise.unary.sigmoid.sigmoid_pytorch2
           - eltwise.unary.sigmoid_accurate.sigmoid_accurate
           - eltwise.unary.tril.tril
+          - eltwise.unary.tril.tril_sharded
           - eltwise.unary.triu.triu
+          - eltwise.unary.triu.triu_sharded
           - eltwise.unary.normalize_hw.normalize_hw
           - eltwise.unary.normalize_global.normalize_global
           - eltwise.unary.heaviside.heaviside
+          - eltwise.unary.heaviside.heaviside_sharded
           - eltwise.unary.hardtanh.hardtanh
+          - eltwise.unary.hardtanh.hardtanh_sharded
           - eltwise.unary.hardswish.hardswish
+          - eltwise.unary.hardswish.hardswish_sharded
           - eltwise.unary.hardsigmoid.hardsigmoid
+          - eltwise.unary.hardsigmoid.hardsigmoid_sharded
           - eltwise.unary.hardshrink.hardshrink
+          - eltwise.unary.hardshrink.hardshrink_sharded
           - eltwise.unary.softmax.softmax
           - eltwise.unary.identity.identity
           - eltwise.unary.neg.neg

@@ -0,0 +1,282 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+
+import torch
+import random
+import ttnn
+import math
+
+from tests.sweep_framework.sweep_utils.utils import get_device_grid_size
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import (
+    gen_func_with_cast_tt,
+    _gen_reshape_args_from_volume,
+    _get_factors,
+)
+
+
+Y, X = get_device_grid_size()
+
+
+def divup(a, b):
+    return (a + b - 1) // b
+
+
+def roundup(a, b):
+    result = divup(a, b) * b
+    return result
+
+
+def gen_unary_sharded_spec(
+    num_shapes,
+    num_core_samples,
+    shard_orientation,
+    sharding_strategy,
+    shard_height_mul_of_32=True,
+    max_tensor_size_per_core=256 * 256,
+):
+    assert sharding_strategy in ["BLOCK", "WIDTH", "HEIGHT", "TENSOR_HW"]
+
+    assert shard_orientation in ["COL_MAJOR", "ROW_MAJOR"]
+
+    for i in range(num_core_samples):
+        y = random.randint(1, Y)
+        x = random.randint(1, X)
+        max_tensor_size = y * x * max_tensor_size_per_core
+        for j in range(num_shapes):
+            for rank in [2, 3, 4]:
+                if sharding_strategy == "TENSOR_HW":
+                    min_tensor_height = 32
+                    min_tensor_width = 32
+                    max_tensor_height = int(math.sqrt(max_tensor_size_per_core))
+                    max_tensor_width = int(math.sqrt(max_tensor_size_per_core))
+                    interval = 32
+
+                    tensor_height = random.randrange(min_tensor_height, max_tensor_height + 1, interval)
+                    tensor_width = random.randrange(min_tensor_width, max_tensor_width + 1, interval)
+                    input_shape = [tensor_height, tensor_width]
+                    if rank != 2:
+                        rest_volume = random.randint(1, max_tensor_size // (tensor_height * tensor_width))
+                        rest_dims = random.choice(_gen_reshape_args_from_volume(rest_volume, step=1, out_dims=rank - 2))
+                        rest_dims = list(rest_dims["reshape_dims"])
+
+                elif sharding_strategy == "BLOCK":
+                    if shard_orientation == "ROW_MAJOR":
+                        if not shard_height_mul_of_32:
+                            min_pre_sharded_height = 32 * y
+                        else:
+                            min_pre_sharded_height = 1
+                        min_pre_sharded_width = 32 * x
+                        max_pre_sharded_height = int(math.sqrt(max_tensor_size_per_core)) * y
+                        max_pre_sharded_width = int(math.sqrt(max_tensor_size_per_core)) * x
+                        interval_height = 32 * y
+                        interval_width = 32 * x
+                    else:
+                        if not shard_height_mul_of_32:
+                            min_pre_sharded_height = 32 * x
+                        else:
+                            min_pre_sharded_height = 1
+                        min_pre_sharded_width = 32 * y
+                        max_pre_sharded_height = int(math.sqrt(max_tensor_size_per_core)) * x
+                        max_pre_sharded_width = int(math.sqrt(max_tensor_size_per_core)) * y
+                        interval_height = 32 * x
+                        interval_width = 32 * y
+
+                    pre_sharded_height = random.randrange(
+                        min_pre_sharded_height, max_pre_sharded_height + 1, interval_height
+                    )
+                    pre_sharded_width = random.randrange(
+                        min_pre_sharded_width, max_pre_sharded_width + 1, interval_width
+                    )
+
+                    if (
+                        shard_height_mul_of_32
+                    ):  # tensor height could grow beyond the maximum allowed when padding it to be multiple of total_num_cores * 32
+                        height_round_up = 32 * y if shard_orientation is "ROW_MAJOR" else 32 * x
+                        width_round_up = 32 * x if shard_orientation is "ROW_MAJOR" else 32 * y
+                        while roundup(pre_sharded_height, height_round_up) > max_pre_sharded_height:
+                            pre_sharded_height = random.randrange(
+                                min_pre_sharded_height, max_pre_sharded_height + 1, interval_height
+                            )
+                        while roundup(pre_sharded_width, width_round_up) > max_pre_sharded_width:
+                            pre_sharded_width = random.randrange(
+                                min_pre_sharded_width, max_pre_sharded_width + 1, interval_width
+                            )
+
+                    input_shape = random.choice(
+                        _gen_reshape_args_from_volume(pre_sharded_height, step=1, out_dims=rank - 1)
+                    )
+                    input_shape = list(input_shape["reshape_dims"])
+                    input_shape.append(pre_sharded_width)
+
+                elif sharding_strategy == "HEIGHT":
+                    if not shard_height_mul_of_32:
+                        min_pre_sharded_height = 32 * y * x
+                        interval = 32 * y * x
+                    else:
+                        min_pre_sharded_height = 1
+                        interval = 1
+                    min_pre_sharded_width = 32
+
+                    max_pre_sharded_height = int(math.sqrt(max_tensor_size))
+                    max_pre_sharded_width = max_pre_sharded_height
+
+                    pre_sharded_width = random.randrange(min_pre_sharded_width, max_pre_sharded_width + 1, 32)
+
+                    pre_sharded_height = random.randrange(min_pre_sharded_height, max_pre_sharded_height + 1, interval)
+                    if (
+                        shard_height_mul_of_32
+                    ):  # tensor height could grow beyond the maximum allowed when padding it to be multiple of total_num_cores * 32
+                        while roundup(pre_sharded_height, y * x * 32) > max_tensor_size // pre_sharded_width:
+                            pre_sharded_height = random.randrange(
+                                min_pre_sharded_height, max_pre_sharded_height + 1, interval
+                            )
+
+                    input_shape = random.choice(
+                        _gen_reshape_args_from_volume(pre_sharded_height, step=1, out_dims=rank - 1)
+                    )
+                    input_shape = list(input_shape["reshape_dims"])
+                    input_shape.append(pre_sharded_width)
+                else:
+                    if not shard_height_mul_of_32:
+                        min_pre_sharded_height = 32
+                        interval = 32
+                    else:
+                        min_pre_sharded_height = 1
+                        interval = 1
+
+                    min_pre_sharded_width = 32 * y * x
+                    max_pre_sharded_height = int(math.sqrt(max_tensor_size))
+                    max_pre_sharded_width = max_pre_sharded_height
+
+                    pre_sharded_height = random.randrange(min_pre_sharded_height, max_pre_sharded_height + 1, interval)
+                    if (
+                        shard_height_mul_of_32
+                    ):  # tensor height could grow beyond the maximum allowed when padding it to be multiple of total_num_cores * 32
+                        while roundup(pre_sharded_height, y * x * 32) > max_pre_sharded_height:
+                            pre_sharded_height = random.randrange(
+                                min_pre_sharded_height, max_pre_sharded_height + 1, interval
+                            )
+                    pre_sharded_width = random.randrange(min_pre_sharded_width, max_pre_sharded_width + 1, 32 * y * x)
+
+                    input_shape = random.choice(
+                        _gen_reshape_args_from_volume(pre_sharded_height, step=1, out_dims=rank - 1)
+                    )
+                    input_shape = list(input_shape["reshape_dims"])
+                    input_shape.append(pre_sharded_width)
+
+                yield {
+                    "input_shape": input_shape,
+                    "core_grid_size": (y, x),
+                    "sharding_strategy": sharding_strategy,
+                    "shard_orientation": shard_orientation,
+                    "shard_height_mul_of_32": shard_height_mul_of_32,
+                }
+
+
+def parse_sharding_spec(input_spec):
+    input_shape = input_spec["input_shape"]
+    sharding_strategy = input_spec["sharding_strategy"]
+    shard_orientation = input_spec["shard_orientation"]
+    core_grid_size = input_spec["core_grid_size"]
+    shard_height_mul_of_32 = input_spec["shard_height_mul_of_32"]
+
+    assert sharding_strategy in ["HEIGHT", "WIDTH", "BLOCK", "TENSOR_HW"]
+
+    tensor_hw_as_shard_shape = False
+
+    if sharding_strategy == "HEIGHT":
+        sharding_strategy = ttnn.ShardStrategy.HEIGHT
+    elif sharding_strategy == "WIDTH":
+        sharding_strategy = ttnn.ShardStrategy.WIDTH
+    elif sharding_strategy == "BLOCK":
+        sharding_strategy = ttnn.ShardStrategy.BLOCK
+    else:
+        sharding_strategy = ttnn.ShardStrategy.BLOCK
+        tensor_hw_as_shard_shape = True
+
+    if shard_orientation == "COL_MAJOR":
+        shard_orientation = ttnn.ShardOrientation.COL_MAJOR
+    else:
+        shard_orientation = ttnn.ShardOrientation.ROW_MAJOR
+
+    return (
+        input_shape,
+        core_grid_size,
+        shard_orientation,
+        sharding_strategy,
+        tensor_hw_as_shard_shape,
+        shard_height_mul_of_32,
+    )
+
+
+def invalidate_vector_sharding(
+    input_shape, input_layout, core_grid_size, sharding_strategy, shard_orientation, tensor_hw_as_shard_shape
+):
+    y, x = core_grid_size
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if not tensor_hw_as_shard_shape:
+        if sharding_strategy == ttnn.ShardStrategy.BLOCK:
+            if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR:
+                if pre_sharded_height % y != 0:
+                    return (
+                        True,
+                        "Prod of all dimensions except the innermost must be divisible by the y coordinate of coregrid when using block sharding",
+                    )
+                if pre_sharded_width % x != 0:
+                    return (
+                        True,
+                        "Innermost dimension must be divisible by the x coordinate of coregrid when using block sharding",
+                    )
+                if (pre_sharded_height // y) % 32 != 0:
+                    return True, "Shard height must be a multiple of input tile size"
+                if (pre_sharded_width // x) % 32 != 0:
+                    return True, "Shard width must be a multiple of input tile size"
+            else:
+                if pre_sharded_height % x != 0:
+                    return (
+                        True,
+                        "Prod of all dimensions except the innermost must be divisible by the x coordinate of coregrid when using block sharding",
+                    )
+                if pre_sharded_width % y != 0:
+                    return (
+                        True,
+                        "Innermost dimension must be divisible by the y coordinate of coregrid when using block sharding",
+                    )
+                if (pre_sharded_height // x) % 32 != 0:
+                    return True, "Shard height must be a multiple of input tile size"
+                if (pre_sharded_width // y) % 32 != 0:
+                    return True, "Shard width must be a multiple of input tile size"
+
+        elif sharding_strategy == ttnn.ShardStrategy.WIDTH:
+            if pre_sharded_width % (y * x) != 0:
+                return True, "Last dimension must be divisible by a total number of cores when using width sharding"
+            if pre_sharded_height % 32 != 0:
+                return True, "Shard height must be a multiple of input tile size"
+            if (pre_sharded_width // (x * y)) % 32 != 0:
+                return True, "Shard width must be a multiple of input tile size"
+
+        else:
+            if pre_sharded_height % (y * x) != 0:
+                return (
+                    True,
+                    "Prod of all dimensions except the innermost must be divisible by a total number of cores when using width sharding",
+                )
+            if (pre_sharded_height // (x * y)) % 32 != 0:
+                return True, "Shard height must be a multiple of input tile size"
+            if pre_sharded_width % 32 != 0:
+                return True, "Shard width must be a multiple of input tile size"
+
+    else:
+        if input_shape[-2] % 32 != 0 or input_shape[-1] % 32 != 0:
+            return (
+                True,
+                "Last two dimensions must be multiples of tile size when using tensor heght and width as shard shape",
+            )
+        if input_layout == ttnn.ROW_MAJOR_LAYOUT and (input_shape[-1] % input_shape[-2] != 0):
+            return True, "Physical size <width, height> must be a multuple of page size <1, width>"
+
+    return False, ""
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
+import os
 import random
 from loguru import logger
 from itertools import product
@@ -247,3 +248,45 @@ def complex_from_torch(torch_tensor, dtype, layout, memory_config, device):
         memory_config=memory_config,
     )
     return ttnn.complex_tensor(tt_real, tt_imag)
+
+
+def get_device_grid_size():
+    device_name = os.environ.get("ARCH_NAME", os.environ.get("TT_ARCH_NAME", "default")).lower()
+    assert device_name in ["wormhole_b0", "grayskull"]
+    if device_name == "grayskull":
+        y, x = 9, 12
+    else:
+        y, x = 8, 8
+
+    return y, x
+
+
+def get_sharded_config(input_shape, sharding_strategy, device_grid_size, shard_orientation):
+    assert sharding_strategy in ["block", "width", "height", "tensor_hw"]
+    assert shard_orientation in ["col_major", "row_major"]
+
+    if shard_orientation == "col_major":
+        orientation = ttnn.ShardOrientation.COL_MAJOR
+    else:
+        orientation = ttnn.ShardOrientation.ROW_MAJOR
+
+    if sharding_strategy == "block":
+        strategy = ttnn.ShardStrategy.BLOCK
+    elif sharding_strategy == "width":
+        strategy = ttnn.ShardStrategy.WIDTH
+    elif sharding_strategy == "height":
+        strategy = ttnn.ShardStrategy.HEIGHT
+    else:
+        strategy = ttnn.ShardStrategy.BLOCK
+
+    tensor_hw_as_shard_shape = True if sharding_strategy == "tensor_hw" else False
+
+    sharded_config = ttnn.create_sharded_memory_config(
+        shape=input_shape,
+        core_grid=device_grid_size,
+        strategy=strategy,
+        orientation=orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    return sharded_config