Skip to content

Commit

Permalink
#0: Removed CreateComputeKernelConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
sankarmanoj-tt committed Nov 29, 2024
1 parent 997073e commit ad8ca49
Show file tree
Hide file tree
Showing 29 changed files with 174 additions and 106 deletions.
3 changes: 2 additions & 1 deletion models/demos/convnet_mnist/tt/convnet_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def convnet_mnist(
deallocate_activation=True,
reallocate_halo_output=True,
)
compute_config = ttnn.CreateComputeKernelConfig(
compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=ttnn.MathFidelity.LoFi,
math_approx_mode=True,
fp32_dest_acc_en=False,
Expand Down
3 changes: 2 additions & 1 deletion models/demos/llama3/tt/multimodal/llama_conv2d_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def __init__(
mesh_mapper=ttnn.ReplicateTensorToMesh(self.mesh_device),
)

self.compute_kernel_config = ttnn.CreateComputeKernelConfig(
self.compute_kernel_config = ttnn.init_device_compute_kernel_config(
mesh_device.arch(),
math_fidelity=ttnn.MathFidelity.HiFi2,
math_approx_mode=True,
fp32_dest_acc_en=True,
Expand Down
3 changes: 2 additions & 1 deletion models/demos/segformer/tt/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def __call__(self, device, input_tensor):
enable_act_double_buffer=True,
enable_split_reader=False,
)
compute_config = ttnn.CreateComputeKernelConfig(
compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=ttnn.MathFidelity.LoFi,
math_approx_mode=True,
fp32_dest_acc_en=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,9 @@ def run_downsample_if_req(
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -236,7 +238,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -302,7 +306,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand All @@ -329,7 +335,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -567,7 +575,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -878,7 +888,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ def run_downsample_if_req(
enable_split_reader=enable_split_reader,
enable_subblock_padding=enable_subblock_padding,
),
compute_config=ttnn.CreateComputeKernelConfig(
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.model_config["MATH_FIDELITY"],
packer_l1_acc=packer_l1_accum_enabled,
),
Expand Down Expand Up @@ -251,7 +252,8 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=transpose_shards,
),
compute_config=ttnn.CreateComputeKernelConfig(
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.model_config["MATH_FIDELITY"],
packer_l1_acc=packer_l1_acc,
),
Expand Down Expand Up @@ -341,7 +343,8 @@ def __call__(
enable_split_reader=enable_split_reader,
enable_subblock_padding=enable_subblock_padding,
),
compute_config=ttnn.CreateComputeKernelConfig(
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.model_config["MATH_FIDELITY"],
packer_l1_acc=packer_l1_acc,
),
Expand Down Expand Up @@ -386,7 +389,8 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=transpose_shards,
),
compute_config=ttnn.CreateComputeKernelConfig(
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.model_config["MATH_FIDELITY"],
packer_l1_acc=packer_l1_acc,
),
Expand Down Expand Up @@ -588,7 +592,8 @@ def __init__(
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
reshard_if_not_optimal=False,
)
self.conv1_compute_config = ttnn.CreateComputeKernelConfig(
self.conv1_compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.model_config["MATH_FIDELITY"],
packer_l1_acc=True if whb0_and_b16 else False,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ def run_downsample_if_req(
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -231,7 +233,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -295,7 +299,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand All @@ -322,7 +328,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -537,7 +545,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -840,7 +850,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def run_downsample_if_req(
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -233,7 +235,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -298,7 +302,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand All @@ -325,7 +331,9 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -562,7 +570,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -893,7 +903,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def run_downsample_if_req(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -239,7 +241,9 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -347,7 +351,9 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -375,7 +381,9 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -603,7 +611,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -936,7 +946,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
compute_config=ttnn.init_device_compute_kernel_config(
device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
6 changes: 4 additions & 2 deletions models/demos/vgg/tt/ttnn_vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def ttnn_vgg16(
reshard_if_not_optimal=True,
enable_weights_double_buffer=True,
)
compute_config = ttnn.CreateComputeKernelConfig(
compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=model_config["MATH_FIDELITY"],
math_approx_mode=True,
fp32_dest_acc_en=False,
Expand Down Expand Up @@ -227,7 +228,8 @@ def ttnn_vgg11(
),
enable_weights_double_buffer=True,
)
compute_config = ttnn.CreateComputeKernelConfig(
compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=model_config["MATH_FIDELITY"],
math_approx_mode=True,
fp32_dest_acc_en=True,
Expand Down
3 changes: 2 additions & 1 deletion models/demos/wormhole/mamba/tt/mamba_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def prepare_conv_config(self):
input_channels_alignment=32,
deallocate_activation=True,
)
self.conv1d_compute_config = ttnn.CreateComputeKernelConfig(
self.conv1d_compute_config = ttnn.init_device_compute_kernel_config(
device.arch(),
math_fidelity=self.config.math_fidelity,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def __call__(
if hidden_states.memory_config() != self.input_memory_config:
hidden_states = ttnn.to_memory_config(hidden_states, self.input_memory_config)

compute_config = ttnn.CreateComputeKernelConfig(
compute_config = ttnn.init_device_compute_kernel_config(
self.device.arch(),
math_fidelity=ttnn.MathFidelity.LoFi,
math_approx_mode=True,
fp32_dest_acc_en=True,
Expand Down
Loading

0 comments on commit ad8ca49

Please sign in to comment.