From dab971c965affe4f76d9b7553ee25c1ed201c973 Mon Sep 17 00:00:00 2001
From: mtairum <mtairum@tenstorrent.com>
Date: Thu, 21 Nov 2024 17:51:31 +0000
Subject: [PATCH] #0: Remove debug code to speed up demo

---
 models/demos/llama3/demo/demo.py | 47 --------------------------------
 1 file changed, 47 deletions(-)

diff --git a/models/demos/llama3/demo/demo.py b/models/demos/llama3/demo/demo.py
index de80ee2961d6..0ce7216353c3 100644
--- a/models/demos/llama3/demo/demo.py
+++ b/models/demos/llama3/demo/demo.py
@@ -449,8 +449,6 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
         trace_id = ttnn.begin_trace_capture(mesh_device, cq_id=0)
 
         decode_input = ttnn.unsqueeze_to_4D(tt_embd(tt_out_tok))
-        # TODO Miguel: I think the problem is here, not updating the get rot mats
-        # The problem is that the get_rot_mats is using embedding that ends up on the host.
         rot_mats = rope_setup.get_rot_mats(rot_mat_idxs)
         tt_out = tt_model(
             decode_input,
@@ -532,51 +530,6 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
             )[0, 0, 0, :batch_size]
             ttnn.record_event(1, write_event)
 
-            # TODO Miguel Remove
-            print("==== ITERATION", iteration, "====")
-            # Check input
-            input_torch = ttnn.to_torch(decode_input, mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=3))
-            for i in range(batch_size):
-                input_equal = torch.eq(input_torch[:, :, 0, :], input_torch[:, :, i, :]).all()
-                if not input_equal:
-                    print("Batch", i, "input not equal")
-
-            # Check output
-            for i in range(batch_size):
-                out_equal = torch.eq(tt_output_torch[0], tt_output_torch[i])
-                if not out_equal:
-                    print("Batch", i, "output not equal")
-
-            # Check KV cache [Mismatch]
-            k_cache = ttnn.to_torch(
-                tt_model.layers[0].attention.layer_past[0], mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=1)
-            )
-            v_cache = ttnn.to_torch(
-                tt_model.layers[0].attention.layer_past[1], mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=1)
-            )
-            for i in range(batch_size):
-                k_equal = torch.eq(k_cache[0, :, :, :], k_cache[i, :, :, :]).all()
-                v_equal = torch.eq(v_cache[0, :, :, :], v_cache[i, :, :, :]).all()
-                if not k_equal:
-                    print("Batch", i, "k_cache not equal")
-                    # print(f"PCC = {comp_pcc(k_cache[0,:,:,:], k_cache[i,:,:,:])}")
-                if not v_equal:
-                    print("Batch", i, "v_cache not equal")
-                    # print(f"PCC = {comp_pcc(v_cache[0,:,:,:], v_cache[i,:,:,:])}")
-
-            # Check rot mats [All equal]
-            cos_out = ttnn.to_torch(rot_mats[0], mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=0))[0, :, :, :]
-            sin_out = ttnn.to_torch(rot_mats[1], mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=0))[0, :, :, :]
-
-            for i in range(batch_size):
-                cos_equal = torch.eq(cos_out[0, :, :], cos_out[i, :, :]).all()
-                sin_equal = torch.eq(sin_out[0, :, :], sin_out[i, :, :]).all()
-                if not cos_equal:
-                    print("Batch", i, "cos not equal")
-                if not sin_equal:
-                    print("Batch", i, "sin not equal")
-            ###########
-
             # Save output token to print out later
             for user in range(batch_size):
                 user_tok = tt_output_torch[user].tolist()