Skip to content

Commit

Permalink
#13541: Conv2d enable shallow convs in auto shard
Browse files Browse the repository at this point in the history
Enable shallow convs in auto shard codepath.
Shallow convs use less L1 memory, so we get more
tests to pass on torch traces.

Decouple shallow convs from split reader so that
we can enable shallow convs in a more generic way.
  • Loading branch information
Pavle Josipovic authored and pavlejosipovic committed Oct 25, 2024
1 parent e966f77 commit 0194005
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 277 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -635,13 +635,6 @@ def __init__(
width=self.conv1_output_width,
in_channels=self.conv1_input_channels,
out_channels=self.conv1_output_channels,
kernel_size=[self.conv1_kernel_size[0], self.conv1_kernel_size[1]],
stride=[self.conv1_stride[0], self.conv1_stride[1]],
padding=[self.conv1_padding[0], self.conv1_padding[1]],
dilation=[1, 1],
groups=1,
weights_width=self.conv1_weight_tensor.shape[3],
input_width=self.conv1_input_width,
)

def __del__(self):
Expand Down
6 changes: 3 additions & 3 deletions models/experimental/functional_unet/tt/unet_shallow_ttnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def __init__(
output_height=self.conv2.input_height,
output_width=self.conv2.input_width,
output_channels=self.conv1.out_channels,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_out_tiled=True,
)
Expand Down Expand Up @@ -320,7 +320,7 @@ def __init__(
output_height=self.conv2.input_height,
output_width=self.conv2.input_width,
output_channels=self.conv1.out_channels,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_out_tiled=True,
)
Expand Down Expand Up @@ -448,7 +448,7 @@ def __init__(self, parameters: ParameterDict, device, mesh_mapper=None) -> None:
output_height=self.bnc2.input_height,
output_width=self.bnc2.input_width,
output_channels=self.bnc.out_channels,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_out_tiled=True,
)
Expand Down
10 changes: 1 addition & 9 deletions tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# Contains following params
# [batch_size, output_channels, input_channels, input_height, input_width, kernel_height, kernel_width, stride_x, stride_y, pad_x, pad_y, groups, bias, dilation]
[1, 32, 1, 28, 28, 3, 3, 1, 1, 0, 0, 1, True, 1],
[1, 100, 100, 14, 14, 3, 3, 1, 1, 1, 1, 100, False, 1],
[1, 1008, 1008, 14, 14, 3, 3, 2, 2, 1, 1, 21, False, 1],
[1, 1008, 1008, 7, 7, 3, 3, 1, 1, 1, 1, 21, False, 1],
[1, 1024, 1024, 10, 10, 3, 3, 1, 1, 1, 1, 1024, False, 1],
Expand Down Expand Up @@ -454,25 +455,16 @@ def test_conv2d_localrun(device, input_spec):
# [batch_size, output_channels, input_channels, input_height, input_width, kernel_height, kernel_width, stride_x, stride_y, pad_x, pad_y, groups, bias, dilation]
# Input is 32MB maps to MM 64 cores, we neeed to avoid sharding this tensor and use dram intrelaved directly with MM
[1, 256, 1024, 128, 128, 1, 1, 1, 1, 0, 0, 1, False, 1], # 6
[1, 1024, 1024, 19, 19, 1, 1, 1, 1, 0, 0, 1, True, 1], # 9
[1, 2048, 1024, 7, 7, 1, 1, 1, 1, 0, 0, 1, True, 1], # 11
[1, 1056, 1056, 48, 48, 3, 3, 1, 1, 1, 1, 4, False, 1], # 14
[1, 1056, 1056, 96, 96, 3, 3, 2, 2, 1, 1, 4, False, 1], # 15
[1, 192, 192, 99, 99, 5, 5, 2, 2, 0, 0, 192, False, 1], # 100
[1, 2520, 2520, 14, 14, 3, 3, 2, 2, 1, 1, 15, False, 1], # 141
[1, 2904, 2904, 24, 24, 3, 3, 1, 1, 1, 1, 11, False, 1], # 170
[1, 2904, 2904, 48, 48, 3, 3, 2, 2, 1, 1, 11, False, 1], # 171
[1, 1024, 3, 224, 224, 16, 16, 16, 16, 0, 0, 1, True, 1], # 172
[1, 1024, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1], # 173
[1, 768, 3, 224, 224, 16, 16, 16, 16, 0, 0, 1, True, 1], # 181
[1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, False, 1], # 182
[1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1], # 183
[1, 32, 3, 299, 299, 3, 3, 2, 2, 0, 0, 1, False, 1], # 192
[1, 32, 3, 381, 381, 3, 3, 2, 2, 0, 0, 1, False, 1], # 197
[1, 768, 3, 384, 512, 32, 32, 32, 32, 0, 0, 1, True, 1], # 199
[1, 192, 3, 512, 672, 16, 16, 16, 16, 0, 0, 1, True, 1], # 202
[1, 1280, 3, 518, 518, 14, 14, 14, 14, 0, 0, 1, True, 1], # 203
[1, 64, 3, 720, 1280, 7, 7, 2, 2, 3, 3, 1, False, 1], # 204
[1, 64, 3, 800, 1088, 7, 7, 2, 2, 3, 3, 1, False, 1], # 205
[1, 336, 336, 112, 112, 3, 3, 2, 2, 1, 1, 2, False, 1], # 241
[1, 336, 336, 48, 48, 5, 5, 1, 1, 2, 2, 336, False, 1], # 245
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def run(
output_height=out_h,
output_width=out_w,
output_channels=in_c,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
is_out_tiled=False,
)
sharded_memory_config = ttnn._ttnn.operations.conv2d.create_sharded_memory_config_from_parallel_config(
Expand Down
4 changes: 2 additions & 2 deletions tests/ttnn/unit_tests/operations/test_maxpool2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def run_max_pool(
output_height=out_h,
output_width=out_w,
output_channels=in_c,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_out_tiled=False,
)
Expand Down Expand Up @@ -632,7 +632,7 @@ def test_pool_core_nondivis(
output_height=out_h,
output_width=out_w,
output_channels=in_c,
device=device,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_out_tiled=True,
)
Expand Down
Loading

0 comments on commit 0194005

Please sign in to comment.