#0: Removed CreateComputeKernelConfig

tenstorrent · Nov 29, 2024 · ad8ca49 · ad8ca49
1 parent 997073e
commit ad8ca49
Show file tree

Hide file tree

Showing 29 changed files with 174 additions and 106 deletions.
diff --git a/models/demos/convnet_mnist/tt/convnet_mnist.py b/models/demos/convnet_mnist/tt/convnet_mnist.py
@@ -27,7 +27,8 @@ def convnet_mnist(
         deallocate_activation=True,
         reallocate_halo_output=True,
     )
-    compute_config = ttnn.CreateComputeKernelConfig(
+    compute_config = ttnn.init_device_compute_kernel_config(
+        device.arch(),
         math_fidelity=ttnn.MathFidelity.LoFi,
         math_approx_mode=True,
         fp32_dest_acc_en=False,

diff --git a/models/demos/llama3/tt/multimodal/llama_conv2d_patch.py b/models/demos/llama3/tt/multimodal/llama_conv2d_patch.py
@@ -79,7 +79,8 @@ def __init__(
             mesh_mapper=ttnn.ReplicateTensorToMesh(self.mesh_device),
         )
 
-        self.compute_kernel_config = ttnn.CreateComputeKernelConfig(
+        self.compute_kernel_config = ttnn.init_device_compute_kernel_config(
+            mesh_device.arch(),
             math_fidelity=ttnn.MathFidelity.HiFi2,
             math_approx_mode=True,
             fp32_dest_acc_en=True,

diff --git a/models/demos/segformer/tt/common.py b/models/demos/segformer/tt/common.py
@@ -50,7 +50,8 @@ def __call__(self, device, input_tensor):
             enable_act_double_buffer=True,
             enable_split_reader=False,
         )
-        compute_config = ttnn.CreateComputeKernelConfig(
+        compute_config = ttnn.init_device_compute_kernel_config(
+            device.arch(),
             math_fidelity=ttnn.MathFidelity.LoFi,
             math_approx_mode=True,
             fp32_dest_acc_en=False,

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py
@@ -188,7 +188,9 @@ def run_downsample_if_req(
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
-                compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+                compute_config=ttnn.init_device_compute_kernel_config(
+                    device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+                ),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -236,7 +238,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -302,7 +306,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -329,7 +335,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -567,7 +575,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -878,7 +888,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py
@@ -192,7 +192,8 @@ def run_downsample_if_req(
                     enable_split_reader=enable_split_reader,
                     enable_subblock_padding=enable_subblock_padding,
                 ),
-                compute_config=ttnn.CreateComputeKernelConfig(
+                compute_config=ttnn.init_device_compute_kernel_config(
+                    device.arch(),
                     math_fidelity=self.model_config["MATH_FIDELITY"],
                     packer_l1_acc=packer_l1_accum_enabled,
                 ),
@@ -251,7 +252,8 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=transpose_shards,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(),
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 packer_l1_acc=packer_l1_acc,
             ),
@@ -341,7 +343,8 @@ def __call__(
                 enable_split_reader=enable_split_reader,
                 enable_subblock_padding=enable_subblock_padding,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(),
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 packer_l1_acc=packer_l1_acc,
             ),
@@ -386,7 +389,8 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=transpose_shards,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(),
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 packer_l1_acc=packer_l1_acc,
             ),
@@ -588,7 +592,8 @@ def __init__(
             shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
             reshard_if_not_optimal=False,
         )
-        self.conv1_compute_config = ttnn.CreateComputeKernelConfig(
+        self.conv1_compute_config = ttnn.init_device_compute_kernel_config(
+            device.arch(),
             math_fidelity=self.model_config["MATH_FIDELITY"],
             packer_l1_acc=True if whb0_and_b16 else False,
         )

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py
@@ -185,7 +185,9 @@ def run_downsample_if_req(
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
-                compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+                compute_config=ttnn.init_device_compute_kernel_config(
+                    device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+                ),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -231,7 +233,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -295,7 +299,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -322,7 +328,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -537,7 +545,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -840,7 +850,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api_24.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api_24.py
@@ -187,7 +187,9 @@ def run_downsample_if_req(
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
-                compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+                compute_config=ttnn.init_device_compute_kernel_config(
+                    device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+                ),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -233,7 +235,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -298,7 +302,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -325,7 +331,9 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -562,7 +570,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -893,7 +903,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xxlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xxlarge_new_conv_api.py
@@ -187,7 +187,9 @@ def run_downsample_if_req(
                     reshard_if_not_optimal=reshard_if_not_optimal,
                     transpose_shards=height_sharding,
                 ),
-                compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+                compute_config=ttnn.init_device_compute_kernel_config(
+                    device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+                ),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -239,7 +241,9 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -347,7 +351,9 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -375,7 +381,9 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
 
@@ -603,7 +611,9 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -936,7 +946,9 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
-            compute_config=ttnn.CreateComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
+            compute_config=ttnn.init_device_compute_kernel_config(
+                device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
+            ),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/vgg/tt/ttnn_vgg.py b/models/demos/vgg/tt/ttnn_vgg.py
@@ -102,7 +102,8 @@ def ttnn_vgg16(
                 reshard_if_not_optimal=True,
                 enable_weights_double_buffer=True,
             )
-            compute_config = ttnn.CreateComputeKernelConfig(
+            compute_config = ttnn.init_device_compute_kernel_config(
+                device.arch(),
                 math_fidelity=model_config["MATH_FIDELITY"],
                 math_approx_mode=True,
                 fp32_dest_acc_en=False,
@@ -227,7 +228,8 @@ def ttnn_vgg11(
                 ),
                 enable_weights_double_buffer=True,
             )
-            compute_config = ttnn.CreateComputeKernelConfig(
+            compute_config = ttnn.init_device_compute_kernel_config(
+                device.arch(),
                 math_fidelity=model_config["MATH_FIDELITY"],
                 math_approx_mode=True,
                 fp32_dest_acc_en=True,

diff --git a/models/demos/wormhole/mamba/tt/mamba_conv.py b/models/demos/wormhole/mamba/tt/mamba_conv.py
@@ -58,7 +58,8 @@ def prepare_conv_config(self):
             input_channels_alignment=32,
             deallocate_activation=True,
         )
-        self.conv1d_compute_config = ttnn.CreateComputeKernelConfig(
+        self.conv1d_compute_config = ttnn.init_device_compute_kernel_config(
+            device.arch(),
             math_fidelity=self.config.math_fidelity,
         )
 

diff --git a/models/demos/wormhole/stable_diffusion/tt/ttnn_functional_downsample_2d_new_conv.py b/models/demos/wormhole/stable_diffusion/tt/ttnn_functional_downsample_2d_new_conv.py
@@ -136,7 +136,8 @@ def __call__(
         if hidden_states.memory_config() != self.input_memory_config:
             hidden_states = ttnn.to_memory_config(hidden_states, self.input_memory_config)
 
-        compute_config = ttnn.CreateComputeKernelConfig(
+        compute_config = ttnn.init_device_compute_kernel_config(
+            self.device.arch(),
             math_fidelity=ttnn.MathFidelity.LoFi,
             math_approx_mode=True,
             fp32_dest_acc_en=True,