Skip to content

Commit

Permalink
#11646: Replace tt_lib in bert files
Browse files Browse the repository at this point in the history
  • Loading branch information
mcw-anasuya committed Aug 30, 2024
1 parent 0b801a5 commit e76df69
Show file tree
Hide file tree
Showing 12 changed files with 255 additions and 270 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import torch

import tt_lib as ttl

from tt_lib.utils import (
pad_weight,
Expand All @@ -21,8 +20,6 @@
def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_config):
torch.manual_seed(1234)

tensor = ttl.tensor

epsf = 1e-2

test_dims = ((batch, 1, 384, 1024),)
Expand All @@ -40,22 +37,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
if test_id >= 1:
gamma = torch.rand(1, 1, 1, W) * 2 - 1
gammah32 = gamma.reshape([1, 1, -1, 32])
ttgamma = tensor.Tensor(
ttgamma = ttnn.Tensor(
gammah32.reshape(-1).tolist(),
gammah32.shape,
dtype,
tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
device,
in0_mem_config,
)
if test_id >= 2:
beta = torch.rand(1, 1, 1, W) * 2.0 - 1.1
betah32 = beta.reshape([1, 1, -1, 32])
ttbeta = tensor.Tensor(
ttbeta = ttnn.Tensor(
betah32.reshape(-1).tolist(),
betah32.shape,
dtype,
tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
device,
in0_mem_config,
)
Expand All @@ -66,19 +63,19 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
if test_id < 3:
y *= 0.0 # zero out the y to exclude x+y from reference calculation

ttx = tensor.Tensor(
ttx = ttnn.Tensor(
tilize_to_list(x),
[N, C, H, W],
dtype,
tensor.Layout.TILE,
ttnn.TILE_LAYOUT,
device,
in0_mem_config,
)
tty = tensor.Tensor(
tty = ttnn.Tensor(
tilize_to_list(y),
[N, C, H, W],
dtype,
tensor.Layout.TILE,
ttnn.TILE_LAYOUT,
device,
in0_mem_config,
)
Expand Down Expand Up @@ -129,22 +126,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c
@pytest.mark.parametrize(
"out_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["out_DRAM", "out_L1"],
)
@pytest.mark.parametrize(
"in0_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["in0_DRAM", "in0_L1"],
)
@pytest.mark.parametrize(
"dtype",
(ttl.tensor.DataType.BFLOAT16,),
(ttnn.bfloat16,),
ids=["BFLOAT16"],
)
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


import ttnn
import tt_lib as ttl
from models.utility_functions import (
comp_pcc,
)
Expand All @@ -25,17 +24,17 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
A = torch.randn(a_shape)

a_t = (
ttl.tensor.Tensor(
ttnn.Tensor(
A.flatten().tolist(),
a_shape,
dtype,
ttl.tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
)
.to(ttl.tensor.Layout.TILE)
.to(ttnn.TILE_LAYOUT)
.to(device, in0_mem_config)
)

out = ttnn.experimental.concatenate_heads(a_t, ttl.tensor.CoreCoord(12, 9), memory_config=out_mem_config)
out = ttnn.experimental.concatenate_heads(a_t, ttnn.CoreCoord(12, 9), memory_config=out_mem_config)

# Check memory of inputs and outputs
assert a_t.memory_config().buffer_type == in0_mem_config.buffer_type
Expand All @@ -45,7 +44,7 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
logger.debug(f"out: {out.memory_config().buffer_type} and {out.get_dtype()}")

assert out.get_legacy_shape() == [batch, 1, 384, 1024]
tt_host_rm_out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
tt_host_rm_out = out.cpu().to(ttnn.ROW_MAJOR_LAYOUT)
pyt_got_back_rm_out = tt_host_rm_out.to_torch()

ref_out = torch.transpose(A, -3, -2).reshape([batch, 1, 384, 1024])
Expand All @@ -61,22 +60,22 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,
@pytest.mark.parametrize(
"out_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["out_DRAM", "out_L1"],
)
@pytest.mark.parametrize(
"in0_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["in0_DRAM", "in0_L1"],
)
@pytest.mark.parametrize(
"dtype",
(ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16),
(ttnn.bfloat8_b, ttnn.bfloat16),
ids=["BFLOAT8_B", "BFLOAT16"],
)
@pytest.mark.parametrize(
Expand All @@ -93,19 +92,19 @@ def test_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config,


def test_bert_large_concatenate_heads_with_program_cache(device, use_program_cache):
dtype = ttl.tensor.DataType.BFLOAT8_B
mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM)
dtype = ttnn.bfloat8_b
mem_config = ttnn.DRAM_MEMORY_CONFIG
for _ in range(2):
run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config)
dummy_shape = [1, 1, 32, 32]
py_dummy_tensor = torch.randn(dummy_shape)
tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)

mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1)
mem_config = ttnn.L1_MEMORY_CONFIG
for _ in range(2):
run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config)
dummy_shape = [1, 1, 32, 32]
py_dummy_tensor = torch.randn(dummy_shape)
tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)

assert device.num_program_cache_entries() == 2
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import numpy as np

import tt_lib as ttl
import ttnn
from models.utility_functions import (
comp_pcc,
Expand All @@ -32,12 +31,9 @@ def run_bert_large_ff1_matmul_test(
pytest.skip(f"Grid size {compute_grid_size} is not supported")

if (
dtype == ttl.tensor.DataType.BFLOAT16
and out_mem_config.buffer_type == ttl.tensor.BufferType.L1
and (
in0_mem_config.buffer_type == ttl.tensor.BufferType.L1
or in1_mem_config.buffer_type == ttl.tensor.BufferType.L1
)
dtype == ttnn.bfloat16
and out_mem_config.buffer_type == ttnn.BufferType.L1
and (in0_mem_config.buffer_type == ttnn.BufferType.L1 or in1_mem_config.buffer_type == ttnn.BufferType.L1)
):
pytest.skip("Skipping test since these tensors won't fit on device!")

Expand All @@ -52,36 +48,36 @@ def run_bert_large_ff1_matmul_test(
BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float)

a_t = (
ttl.tensor.Tensor(
ttnn.Tensor(
A.flatten().tolist(),
a_shape,
dtype,
ttl.tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
)
.to(ttl.tensor.Layout.TILE)
.to(ttnn.TILE_LAYOUT)
.to(device, in0_mem_config)
)
b_t = (
ttl.tensor.Tensor(
ttnn.Tensor(
B.flatten().tolist(),
b_shape,
dtype,
ttl.tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
)
.to(ttl.tensor.Layout.TILE)
.to(ttnn.TILE_LAYOUT)
.to(device, in1_mem_config)
)

if bias_mem_config is not None:
bias_t = (
ttl.tensor.Tensor(
ttnn.Tensor(
BIAS.flatten().tolist(),
bias_shape,
dtype,
ttl.tensor.Layout.ROW_MAJOR,
ttnn.ROW_MAJOR_LAYOUT,
)
.pad(bias_pad_shape, [0, 0, 0, 0], 0)
.to(ttl.tensor.Layout.TILE)
.to(ttnn.TILE_LAYOUT)
.to(device, bias_mem_config)
)
else:
Expand All @@ -107,7 +103,7 @@ def run_bert_large_ff1_matmul_test(
logger.debug(f"out is on: {t2.memory_config().buffer_type}")

assert t2.get_legacy_shape() == [9, 1, 384, 4096]
tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT)
pyt_got_back_rm = tt_host_rm.to_torch()

ref_bmm = torch.matmul(A, B)
Expand All @@ -133,39 +129,39 @@ def run_bert_large_ff1_matmul_test(
@pytest.mark.parametrize(
"out_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["out_DRAM", "out_L1"],
)
@pytest.mark.parametrize(
"bias_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
None,
),
ids=["bias_DRAM", "bias_L1", "bias_None"],
)
@pytest.mark.parametrize(
"in1_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["in1_DRAM", "in1_L1"],
)
@pytest.mark.parametrize(
"in0_mem_config",
(
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
ttnn.DRAM_MEMORY_CONFIG,
ttnn.L1_MEMORY_CONFIG,
),
ids=["in0_DRAM", "in0_L1"],
)
@pytest.mark.parametrize(
"dtype",
(ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16),
(ttnn.bfloat8_b, ttnn.bfloat16),
ids=["BFLOAT8_B", "BFLOAT16"],
)
def test_bert_large_ff1_matmul_test(
Expand All @@ -190,8 +186,8 @@ def test_bert_large_ff1_matmul_test(


def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
dtype = ttl.tensor.DataType.BFLOAT8_B
mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM)
dtype = ttnn.bfloat8_b
mem_config = ttnn.DRAM_MEMORY_CONFIG
for _ in range(2):
run_bert_large_ff1_matmul_test(
device,
Expand All @@ -204,9 +200,9 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
)
dummy_shape = [1, 1, 32, 32]
py_dummy_tensor = torch.randn(dummy_shape)
tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)

mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1)
mem_config = ttnn.L1_MEMORY_CONFIG
for _ in range(2):
run_bert_large_ff1_matmul_test(
device,
Expand All @@ -219,6 +215,6 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache):
)
dummy_shape = [1, 1, 32, 32]
py_dummy_tensor = torch.randn(dummy_shape)
tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config)
tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config)

assert device.num_program_cache_entries() == 2
Loading

0 comments on commit e76df69

Please sign in to comment.