#11646: Replace tt_lib in bert files

tenstorrent · Aug 30, 2024 · e76df69 · e76df69
1 parent 0b801a5
commit e76df69
Show file tree

Hide file tree

Showing 12 changed files with 255 additions and 270 deletions.
diff --git a/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py b/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py
@@ -6,7 +6,6 @@
 
 import torch
 
-import tt_lib as ttl
 
 from tt_lib.utils import (
     pad_weight,
@@ -21,8 +20,6 @@
 def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_config):
     torch.manual_seed(1234)
 
-    tensor = ttl.tensor
-
     epsf = 1e-2
 
     test_dims = ((batch, 1, 384, 1024),)
@@ -40,22 +37,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
             if test_id >= 1:
                 gamma = torch.rand(1, 1, 1, W) * 2 - 1
                 gammah32 = gamma.reshape([1, 1, -1, 32])
-                ttgamma = tensor.Tensor(
+                ttgamma = ttnn.Tensor(
                     gammah32.reshape(-1).tolist(),
                     gammah32.shape,
                     dtype,
-                    tensor.Layout.ROW_MAJOR,
+                    ttnn.ROW_MAJOR_LAYOUT,
                     device,
                     in0_mem_config,
                 )
             if test_id >= 2:
                 beta = torch.rand(1, 1, 1, W) * 2.0 - 1.1
                 betah32 = beta.reshape([1, 1, -1, 32])
-                ttbeta = tensor.Tensor(
+                ttbeta = ttnn.Tensor(
                     betah32.reshape(-1).tolist(),
                     betah32.shape,
                     dtype,
-                    tensor.Layout.ROW_MAJOR,
+                    ttnn.ROW_MAJOR_LAYOUT,
                     device,
                     in0_mem_config,
                 )
@@ -66,19 +63,19 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
             if test_id < 3:
                 y *= 0.0  # zero out the y to exclude x+y from reference calculation
 
-            ttx = tensor.Tensor(
+            ttx = ttnn.Tensor(
                 tilize_to_list(x),
                 [N, C, H, W],
                 dtype,
-                tensor.Layout.TILE,
+                ttnn.TILE_LAYOUT,
                 device,
                 in0_mem_config,
             )
-            tty = tensor.Tensor(
+            tty = ttnn.Tensor(
                 tilize_to_list(y),
                 [N, C, H, W],
                 dtype,
-                tensor.Layout.TILE,
+                ttnn.TILE_LAYOUT,
                 device,
                 in0_mem_config,
             )
@@ -129,22 +126,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
 @pytest.mark.parametrize(
     "out_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["out_DRAM", "out_L1"],
 )
 @pytest.mark.parametrize(
     "in0_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["in0_DRAM", "in0_L1"],
 )
 @pytest.mark.parametrize(
     "dtype",
-    (ttl.tensor.DataType.BFLOAT16,),
+    (ttnn.bfloat16,),
     ids=["BFLOAT16"],
 )
 @pytest.mark.parametrize(

diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py
@@ -6,7 +6,6 @@
 
 
 import ttnn
-import tt_lib as ttl
 from models.utility_functions import (
     comp_pcc,
 )
@@ -25,17 +24,17 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
     A = torch.randn(a_shape)
 
     a_t = (
-        ttl.tensor.Tensor(
+        ttnn.Tensor(
             A.flatten().tolist(),
             a_shape,
             dtype,
-            ttl.tensor.Layout.ROW_MAJOR,
+            ttnn.ROW_MAJOR_LAYOUT,
         )
-        .to(ttl.tensor.Layout.TILE)
+        .to(ttnn.TILE_LAYOUT)
         .to(device, in0_mem_config)
     )
 
-    out = ttnn.experimental.concatenate_heads(a_t, ttl.tensor.CoreCoord(12, 9), memory_config=out_mem_config)
+    out = ttnn.experimental.concatenate_heads(a_t, ttnn.CoreCoord(12, 9), memory_config=out_mem_config)
 
     # Check memory of inputs and outputs
     assert a_t.memory_config().buffer_type == in0_mem_config.buffer_type
@@ -45,7 +44,7 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
     logger.debug(f"out: {out.memory_config().buffer_type} and {out.get_dtype()}")
 
     assert out.get_legacy_shape() == [batch, 1, 384, 1024]
-    tt_host_rm_out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
+    tt_host_rm_out = out.cpu().to(ttnn.ROW_MAJOR_LAYOUT)
     pyt_got_back_rm_out = tt_host_rm_out.to_torch()
 
     ref_out = torch.transpose(A, -3, -2).reshape([batch, 1, 384, 1024])
@@ -61,22 +60,22 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
 @pytest.mark.parametrize(
     "out_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["out_DRAM", "out_L1"],
 )
 @pytest.mark.parametrize(
     "in0_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["in0_DRAM", "in0_L1"],
 )
 @pytest.mark.parametrize(
     "dtype",
-    (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16),
+    (ttnn.bfloat8_b, ttnn.bfloat16),
     ids=["BFLOAT8_B", "BFLOAT16"],
 )
 @pytest.mark.parametrize(
@@ -93,19 +92,19 @@ def test_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
 
 
 def test_bert_large_concatenate_heads_with_program_cache(device, use_program_cache):
-    dtype = ttl.tensor.DataType.BFLOAT8_B
-    mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM)
+    dtype = ttnn.bfloat8_b
+    mem_config = ttnn.DRAM_MEMORY_CONFIG
     for _ in range(2):
         run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config)
         dummy_shape = [1, 1, 32, 32]
         py_dummy_tensor = torch.randn(dummy_shape)
-        tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
+        tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)
 
-    mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1)
+    mem_config = ttnn.L1_MEMORY_CONFIG
     for _ in range(2):
         run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config)
         dummy_shape = [1, 1, 32, 32]
         py_dummy_tensor = torch.randn(dummy_shape)
-        tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
+        tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)
 
     assert device.num_program_cache_entries() == 2
diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py
@@ -8,7 +8,6 @@
 
 import numpy as np
 
-import tt_lib as ttl
 import ttnn
 from models.utility_functions import (
     comp_pcc,
@@ -32,12 +31,9 @@ def run_bert_large_ff1_matmul_test(
         pytest.skip(f"Grid size {compute_grid_size} is not supported")
 
     if (
-        dtype == ttl.tensor.DataType.BFLOAT16
-        and out_mem_config.buffer_type == ttl.tensor.BufferType.L1
-        and (
-            in0_mem_config.buffer_type == ttl.tensor.BufferType.L1
-            or in1_mem_config.buffer_type == ttl.tensor.BufferType.L1
-        )
+        dtype == ttnn.bfloat16
+        and out_mem_config.buffer_type == ttnn.BufferType.L1
+        and (in0_mem_config.buffer_type == ttnn.BufferType.L1 or in1_mem_config.buffer_type == ttnn.BufferType.L1)
     ):
         pytest.skip("Skipping test since these tensors won't fit on device!")
 
@@ -52,36 +48,36 @@ def run_bert_large_ff1_matmul_test(
     BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float)
 
     a_t = (
-        ttl.tensor.Tensor(
+        ttnn.Tensor(
             A.flatten().tolist(),
             a_shape,
             dtype,
-            ttl.tensor.Layout.ROW_MAJOR,
+            ttnn.ROW_MAJOR_LAYOUT,
         )
-        .to(ttl.tensor.Layout.TILE)
+        .to(ttnn.TILE_LAYOUT)
         .to(device, in0_mem_config)
     )
     b_t = (
-        ttl.tensor.Tensor(
+        ttnn.Tensor(
             B.flatten().tolist(),
             b_shape,
             dtype,
-            ttl.tensor.Layout.ROW_MAJOR,
+            ttnn.ROW_MAJOR_LAYOUT,
         )
-        .to(ttl.tensor.Layout.TILE)
+        .to(ttnn.TILE_LAYOUT)
         .to(device, in1_mem_config)
     )
 
     if bias_mem_config is not None:
         bias_t = (
-            ttl.tensor.Tensor(
+            ttnn.Tensor(
                 BIAS.flatten().tolist(),
                 bias_shape,
                 dtype,
-                ttl.tensor.Layout.ROW_MAJOR,
+                ttnn.ROW_MAJOR_LAYOUT,
             )
             .pad(bias_pad_shape, [0, 0, 0, 0], 0)
-            .to(ttl.tensor.Layout.TILE)
+            .to(ttnn.TILE_LAYOUT)
             .to(device, bias_mem_config)
         )
     else:
@@ -107,7 +103,7 @@ def run_bert_large_ff1_matmul_test(
     logger.debug(f"out is on: {t2.memory_config().buffer_type}")
 
     assert t2.get_legacy_shape() == [9, 1, 384, 4096]
-    tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
+    tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT)
     pyt_got_back_rm = tt_host_rm.to_torch()
 
     ref_bmm = torch.matmul(A, B)
@@ -133,39 +129,39 @@ def run_bert_large_ff1_matmul_test(
 @pytest.mark.parametrize(
     "out_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["out_DRAM", "out_L1"],
 )
 @pytest.mark.parametrize(
     "bias_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
         None,
     ),
     ids=["bias_DRAM", "bias_L1", "bias_None"],
 )
 @pytest.mark.parametrize(
     "in1_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["in1_DRAM", "in1_L1"],
 )
 @pytest.mark.parametrize(
     "in0_mem_config",
     (
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
-        ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
+        ttnn.DRAM_MEMORY_CONFIG,
+        ttnn.L1_MEMORY_CONFIG,
     ),
     ids=["in0_DRAM", "in0_L1"],
 )
 @pytest.mark.parametrize(
     "dtype",
-    (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16),
+    (ttnn.bfloat8_b, ttnn.bfloat16),
     ids=["BFLOAT8_B", "BFLOAT16"],
 )
 def test_bert_large_ff1_matmul_test(
@@ -190,8 +186,8 @@ def test_bert_large_ff1_matmul_test(
 
 
 def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
-    dtype = ttl.tensor.DataType.BFLOAT8_B
-    mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM)
+    dtype = ttnn.bfloat8_b
+    mem_config = ttnn.DRAM_MEMORY_CONFIG
     for _ in range(2):
         run_bert_large_ff1_matmul_test(
             device,
@@ -204,9 +200,9 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
         )
         dummy_shape = [1, 1, 32, 32]
         py_dummy_tensor = torch.randn(dummy_shape)
-        tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
+        tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)
 
-    mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1)
+    mem_config = ttnn.L1_MEMORY_CONFIG
     for _ in range(2):
         run_bert_large_ff1_matmul_test(
             device,
@@ -219,6 +215,6 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
         )
         dummy_shape = [1, 1, 32, 32]
         py_dummy_tensor = torch.randn(dummy_shape)
-        tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
+        tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)
 
     assert device.num_program_cache_entries() == 2