#0: remove test code

tenstorrent · Nov 29, 2024 · 0d77265 · 0d77265
1 parent c5cfaa0
commit 0d77265
Showing 1 changed file with 0 additions and 142 deletions.
diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_matmul_dram_sharded.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_matmul_dram_sharded.py
@@ -2,13 +2,6 @@
 
 # SPDX-License-Identifier: Apache-2.0
 
-import math
-import torch
-import ttnn
-import pytest
-from tqdm import tqdm
-
-
 import pytest
 from loguru import logger
 from models.utility_functions import is_wormhole_b0, is_grayskull, is_blackhole, skip_for_wormhole_b0
@@ -27,141 +20,6 @@
 )
 
 
-@pytest.mark.timeout(300)
-def test_dram_sharded_matmul(device, use_program_cache, reset_seeds):
-    # Model configuration
-    dim = 4096
-    vocab_size = 128256
-    split_size = vocab_size // 2
-
-    # Create dummy input
-    batch_size = 1
-    seq_len = 1
-    x = torch.randn(batch_size, seq_len, dim)
-
-    # Create dummy weights and split them
-    output_weight = torch.randn(vocab_size, dim)
-    output_weight_1 = output_weight[:split_size]
-    output_weight_2 = output_weight[split_size:]
-
-    # Perform PyTorch matmul for comparison
-    reference_output = torch.matmul(x, output_weight.t())
-
-    # Configure memory layout for output_weight (for both parts)
-    def create_output_mem_config(size):
-        # Calculate padded size to ensure it's divisible by (32 * 12)
-        padded_size = math.ceil(size / (32 * 12)) * (32 * 12)
-        if padded_size != size:
-            print(f"Original size: {size}, Padded size: {padded_size}")
-        shard_spec = ttnn.ShardSpec(
-            ttnn.CoreRangeSet(
-                {
-                    ttnn.CoreRange(
-                        ttnn.CoreCoord(0, 0),
-                        ttnn.CoreCoord(device.dram_grid_size().x - 1, device.dram_grid_size().y - 1),
-                    )
-                }
-            ),
-            (4096, padded_size // 12),
-            ttnn.ShardOrientation.ROW_MAJOR,
-            False,
-        )
-        return ttnn.MemoryConfig(ttnn.TensorMemoryLayout.WIDTH_SHARDED, ttnn.BufferType.DRAM, shard_spec)
-
-    # Convert output_weight parts to ttnn tensors
-    output_weight_ttnn_1 = ttnn.as_tensor(
-        output_weight_1.permute(1, 0),
-        device=device,
-        memory_config=create_output_mem_config(split_size),
-        layout=ttnn.TILE_LAYOUT,
-        dtype=ttnn.bfloat8_b,
-    )
-    output_weight_ttnn_2 = ttnn.as_tensor(
-        output_weight_2.permute(1, 0),
-        device=device,
-        memory_config=create_output_mem_config(split_size),
-        layout=ttnn.TILE_LAYOUT,
-        dtype=ttnn.bfloat8_b,
-    )
-
-    # Convert input to ttnn tensor
-    x_ttnn = ttnn.from_torch(
-        x,
-        device=device,
-        dtype=ttnn.bfloat16,
-        layout=ttnn.TILE_LAYOUT,
-        memory_config=ttnn.create_sharded_memory_config(
-            (32, 4096 // 64),  # Shard shape: [32, 64] -> 1 shard per core
-            ttnn.CoreGrid(y=8, x=8),
-            ttnn.ShardStrategy.WIDTH,
-            ttnn.ShardOrientation.ROW_MAJOR,
-            use_height_and_width_as_shard_shape=True,
-        ),
-    )
-
-    # Configure compute kernel
-    compute_kernel_config = ttnn.WormholeComputeKernelConfig(
-        math_fidelity=ttnn.MathFidelity.HiFi2,
-        math_approx_mode=False,
-        fp32_dest_acc_en=False,
-        packer_l1_acc=True,
-    )
-
-    # Configure program
-    program_config = ttnn.MatmulMultiCoreReuseMultiCastDRAMShardedProgramConfig(
-        in0_block_w=1,
-        per_core_M=1,
-        per_core_N=32,  # 128256 / 2 / tile_size / core_count
-        fused_activation=None,
-    )
-
-    for i in tqdm(range(100)):
-        # Run the linear layers
-        output_1 = ttnn.linear(
-            x_ttnn,
-            output_weight_ttnn_1,
-            compute_kernel_config=compute_kernel_config,
-            program_config=program_config,
-            memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
-            dtype=ttnn.bfloat8_b,
-        )
-        output_2 = ttnn.linear(
-            x_ttnn,
-            output_weight_ttnn_2,
-            compute_kernel_config=compute_kernel_config,
-            program_config=program_config,
-            memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
-            dtype=ttnn.bfloat8_b,
-        )
-
-        output_1 = ttnn.sharded_to_interleaved(output_1)
-        output_2 = ttnn.sharded_to_interleaved(output_2)
-
-        # Concatenate the outputs
-        output = ttnn.concat([output_1, output_2], dim=-1)
-
-        # Convert output back to PyTorch tensor
-        ttnn_output_torch = ttnn.to_torch(output)
-
-        # Assertions
-        assert ttnn_output_torch.shape == (
-            batch_size,
-            seq_len,
-            vocab_size,
-        ), f"Expected output shape {(batch_size, seq_len, vocab_size)}, but got {ttnn_output_torch.shape}"
-        assert not torch.isnan(ttnn_output_torch).any(), "Output contains NaN values"
-        assert not torch.isinf(ttnn_output_torch).any(), "Output contains infinite values"
-
-    print("Output shape:", ttnn_output_torch.shape)
-    print("TTNN output sample:", ttnn_output_torch[0, 0, :10].tolist())  # Print first 10 elements of the TTNN output
-    print(
-        "Reference output sample:", reference_output[0, 0, :10].tolist()
-    )  # Print first 10 elements of the reference output
-
-    # Compare TTNN output with PyTorch matmul
-    pcc = ttnn.pearson_correlation_coefficient(ttnn_output_torch.flatten(), reference_output.flatten())
-
-
 def find_max_subblock(out_block_h, out_block_w):
     max_product = 0
     best_h = 1