Skip to content

Commit

Permalink
#15099: add tests for forge transpose case
Browse files Browse the repository at this point in the history
#12705: enable odd inner-dim RM bfloat16 untilize and add some unit tests
#14227: add tests for odd inner-dim RM bfloat16 transpose
#13749: unaligned RM transpose fix
- also add more failing pt 2.0 cases
  • Loading branch information
sjameelTT committed Nov 22, 2024
1 parent e7cd350 commit b36462c
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
shape_wh = [
[[1, 1, 32, 32]], # Single core
[[3, 1, 320, 384]], # Multi core
[[1, 1024, 5, 1280]], # Non page-aligned
]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import ttnn

from loguru import logger
from models.utility_functions import is_grayskull, is_blackhole
from models.utility_functions import is_grayskull, is_blackhole, torch_random
from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_pcc, comp_equal
from models.utility_functions import skip_for_grayskull, skip_for_blackhole
from tests.ttnn.utils_for_testing import assert_with_pcc
Expand Down Expand Up @@ -644,7 +644,7 @@ def test_transpose_bfloat8_b(device, shape, swap_dims):
)
@pytest.mark.parametrize(
"shape",
[(1, 32, 12, 100), (1, 12, 32, 100), (1, 35, 7, 7), (1, 1, 1, 1)],
[(1, 32, 12, 100), (1, 12, 32, 100), (1, 35, 7, 7), (1, 1, 1, 1), (1, 12, 32, 100)],
)
def test_transpose_hc(dtype, shape, device):
if is_grayskull() and dtype == ttnn.float32:
Expand Down Expand Up @@ -691,15 +691,22 @@ def test_transpose_2D(dtype, shape, layout, device):
)
@pytest.mark.parametrize(
"shape",
[[32, 1, 32], [32, 1, 12], [1, 1, 35], [1, 16, 32], [2, 34, 8]],
[[32, 1, 32], [32, 1, 12], [1, 1, 35], [1, 16, 32], [2, 34, 8], (32, 12, 100), (6, 33, 34)],
)
@pytest.mark.parametrize(
"layout",
[ttnn.TILE_LAYOUT],
)
@pytest.mark.parametrize(
"dims",
[[0, 1], [0, 2], [2, 1], [-3, -2], [-3, -1], [-2, -1]],
[
[0, 1],
[0, 2],
[2, 1],
[-3, -2],
[-3, -1],
[-2, -1],
],
)
def test_transpose_3D(dtype, shape, layout, dims, device):
torch.manual_seed(2005)
Expand Down Expand Up @@ -750,14 +757,14 @@ def test_transpose_4d_wh_tile(shape, device):
@pytest.mark.parametrize(
"config",
[
[[64, 4, 49, 32], [-2, -1], ttnn.ROW_MAJOR_LAYOUT], # Page size must be divisible by sizeof(uint32_t)
[[1, 1370, 1, 3, 1280], [0, -2], ttnn.TILE_LAYOUT], # untilize doesn't work with 4D
[[12, 3], [0, 1], ttnn.ROW_MAJOR_LAYOUT], # need tensor for this one
[[1, 50, 1, 3, 768], [0, -2], ttnn.TILE_LAYOUT], # untilize doesn't work with 4D
[[21843, 768], [0, 1], ttnn.ROW_MAJOR_LAYOUT], # circular buffer overflow
],
)
@pytest.mark.parametrize("memory_config", [ttnn.L1_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG])
def test_transpose_failures(config, memory_config, device):
pytest.skip("Failures to fix after #13217 and #13005 are in - 5D, HC PCC issue and unaligned RM tensor")
pytest.skip("Failing pytorch 2.0 trace sweeps")
torch.manual_seed(2005)
torch_input = torch.randn(config[0], dtype=torch.bfloat16)
torch_output = torch_input.transpose(config[1][0], config[1][1])
Expand Down Expand Up @@ -793,6 +800,8 @@ def test_transpose_failures(config, memory_config, device):
[[1, 9, 8, 14], [1, 2], ttnn.ROW_MAJOR_LAYOUT], # unaligned RM that fallsback to tiled
[[1, 9, 8, 2], [1, 2], ttnn.ROW_MAJOR_LAYOUT], # unaligned RM that fallsback to tiled
[[1, 2, 8, 2], [1, 2], ttnn.ROW_MAJOR_LAYOUT], # unaligned RM that fallsback to tiled
[[64, 4, 49, 32], [-2, -1], ttnn.ROW_MAJOR_LAYOUT], # Page size must be divisible by sizeof(uint32_t)
[[12, 3], [0, 1], ttnn.ROW_MAJOR_LAYOUT], # need tensor for this one
[
[1, 8, 4096, 40],
[1, 2],
Expand Down Expand Up @@ -943,3 +952,62 @@ def test_transpose_unpadded(shape, dims, layout, dtype, pad_value, device):
assert ttnn.to_torch(a) == float("-inf")
tt_output = ttnn.to_torch(tt_output)
assert_with_pcc(torch_output, tt_output, 0.9999)


@pytest.mark.parametrize("b", [1])
@pytest.mark.parametrize("h", [18])
@pytest.mark.parametrize("w", [65])
@pytest.mark.parametrize("dim0", [1])
@pytest.mark.parametrize("dim1", [2])
def test_transpose_forge_llama(device, b, h, w, dim0, dim1):
torch.manual_seed(2005)

torch_input_tensor = torch_random((b, h, w), -0.1, 0.1, dtype=torch.bfloat16)
torch_output_tensor = torch_input_tensor.transpose(dim0, dim1)

input_tensor = ttnn.to_device(ttnn.from_torch(torch_input_tensor), device, memory_config=ttnn.DRAM_MEMORY_CONFIG)
input_tensor = ttnn.to_layout(input_tensor, layout=ttnn.TILE_LAYOUT)
output_tensor = ttnn.transpose(input_tensor, dim0, dim1, memory_config=ttnn.DRAM_MEMORY_CONFIG)
output_tensor = ttnn.from_device(output_tensor)
output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
output_tensor = ttnn.to_torch(output_tensor)

assert_with_pcc(torch_output_tensor, output_tensor)


@pytest.mark.parametrize("b", [1])
@pytest.mark.parametrize("h", [2])
@pytest.mark.parametrize("w", [3])
@pytest.mark.parametrize("dim0", [-1])
@pytest.mark.parametrize("dim1", [-2])
def test_transpose_forge_basic(device, b, h, w, dim0, dim1):
torch.manual_seed(2005)
torch_input_tensor = torch_random((1, b, h, w), -0.1, 0.1, dtype=torch.bfloat16)
torch_output_tensor = torch_input_tensor.transpose(dim0, dim1)
input_tensor = ttnn.to_device(ttnn.from_torch(torch_input_tensor), device, memory_config=ttnn.DRAM_MEMORY_CONFIG)
input_tensor = ttnn.to_layout(input_tensor, layout=ttnn.TILE_LAYOUT)
output_tensor = ttnn.transpose(input_tensor, dim0, dim1, memory_config=ttnn.DRAM_MEMORY_CONFIG)
output_tensor = ttnn.from_device(output_tensor)
output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
output_tensor = ttnn.to_torch(output_tensor)

assert_with_pcc(torch_output_tensor, output_tensor)


@pytest.mark.parametrize("b", [6])
@pytest.mark.parametrize("h", [33])
@pytest.mark.parametrize("w", [34])
@pytest.mark.parametrize("dim0", [1])
@pytest.mark.parametrize("dim1", [0])
def test_transpose_forge_hc(device, b, h, w, dim0, dim1):
torch.manual_seed(2005)
torch_input_tensor = torch_random((1, b, h, w), -0.1, 0.1, dtype=torch.bfloat16)
torch_output_tensor = torch_input_tensor.transpose(dim0, dim1)
input_tensor = ttnn.to_device(ttnn.from_torch(torch_input_tensor), device, memory_config=ttnn.DRAM_MEMORY_CONFIG)
input_tensor = ttnn.to_layout(input_tensor, layout=ttnn.TILE_LAYOUT)
output_tensor = ttnn.transpose(input_tensor, dim0, dim1, memory_config=ttnn.DRAM_MEMORY_CONFIG)
output_tensor = ttnn.from_device(output_tensor)
output_tensor = ttnn.to_layout(output_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
output_tensor = ttnn.to_torch(output_tensor)

assert_with_pcc(torch_output_tensor, output_tensor)
34 changes: 34 additions & 0 deletions tests/ttnn/unit_tests/test_to_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import ttnn

from tests.ttnn.utils_for_testing import assert_with_pcc, check_with_pcc_without_tensor_printout
from models.utility_functions import is_grayskull, is_blackhole, torch_random


@pytest.mark.parametrize("height", [32, 30])
Expand Down Expand Up @@ -125,3 +126,36 @@ def test_untilize_with_unpadding_W_16(device, in_dtype, use_multicore, use_pack_
passing, pcc_msg = check_with_pcc_without_tensor_printout(torch_input, output_torch)
logger.info(pcc_msg)
assert passing


@pytest.mark.parametrize("h", [1, 18, 65])
@pytest.mark.parametrize("w", [1, 15, 17, 29, 33, 49, 63, 65])
@pytest.mark.parametrize("input_layout", [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT])
@pytest.mark.parametrize("output_layout", [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT])
def test_to_layout_device(device, h, w, input_layout, output_layout):
torch.manual_seed(2005)
torch_input_tensor = torch_random((h, w), -0.1, 0.1, dtype=torch.bfloat16)
input_tensor = ttnn.from_torch(torch_input_tensor, device=device, dtype=ttnn.bfloat16, layout=input_layout)
new_layout_tensor = ttnn.to_layout(input_tensor, layout=output_layout)
torch_brought_back = ttnn.to_torch(new_layout_tensor)

assert_with_pcc(torch_input_tensor, torch_brought_back)


@pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True)
def test_to_layout_unet_shallow(device, use_program_cache):
torch_input = torch.rand([1, 1, 337920, 1])
input = ttnn.from_torch(torch_input, dtype=ttnn.bfloat16)

input = ttnn.to_layout(input, ttnn.TILE_LAYOUT)
input = ttnn.to_device(input, device)

sharded_memory_config = ttnn.create_sharded_memory_config(
[1, 1, 337920, 32], ttnn.CoreGrid(x=8, y=8), ttnn.ShardStrategy.HEIGHT
)
input = ttnn.to_memory_config(input, sharded_memory_config)
input = ttnn.to_memory_config(input, ttnn.L1_MEMORY_CONFIG)

input = ttnn.to_layout(input, ttnn.ROW_MAJOR_LAYOUT) # This fails
torch_output = ttnn.to_torch(input)
assert_with_pcc(torch_input, torch_output)
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ void UntilizeWithUnpadding::validate(const std::vector<Tensor>& input_tensors) c
TT_FATAL(input_tensor_a.get_layout() == Layout::TILE, "Can only untilize tile major data");

TT_FATAL(input_tensor_a.volume() % tt::constants::TILE_HW == 0, "Error");
TT_FATAL(((this->output_tensor_end[-1] + 1) % 2 == 0), "Can only unpad to row major tensor of even width");

if (input_tensor_a.memory_config().is_sharded()) {
if (input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) {
Expand Down

0 comments on commit b36462c

Please sign in to comment.