diff --git a/models/demos/llama3/demo/demo.py b/models/demos/llama3/demo/demo.py
index 975427aacc2..a2f443f3712 100644
--- a/models/demos/llama3/demo/demo.py
+++ b/models/demos/llama3/demo/demo.py
@@ -277,8 +277,7 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
     profiler.end("loading_weights_to_device")
     logger.info("Finished loading weights to device.")
 
-    # TODO Change this back to 100
-    max_generated_tokens = 20  # Maximum number of tokens to generate per user
+    max_generated_tokens = 100  # Maximum number of tokens to generate per user
     num_tokens_generated_decode = []
 
     logger.info("Starting inference...")
@@ -422,7 +421,6 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
         # Get cos/sin matrices for the current position of each user
         rot_mats = rope_setup.get_rot_mats(current_pos_padded)
         rot_mat_idxs = rope_setup.get_rot_idxs(current_pos_padded)
-
         # Compile
         logger.info(f"Compiling model trace...")
         decode_input = ttnn.unsqueeze_to_4D(tt_embd(tt_out_tok))
diff --git a/models/demos/llama3/tests/test_llama_attention.py b/models/demos/llama3/tests/test_llama_attention.py
index f3c50eb40be..a7bb88dc2d5 100644
--- a/models/demos/llama3/tests/test_llama_attention.py
+++ b/models/demos/llama3/tests/test_llama_attention.py
@@ -34,8 +34,14 @@
 )
 @pytest.mark.parametrize(
     "paged_attention",
-    (True, False),
-    ids=("paged_attention", "non_paged_attention"),
+    (
+        True,
+        # False,
+    ),
+    ids=(
+        "paged_attention",
+        # "non_paged_attention",
+    ),
 )
 @pytest.mark.parametrize(
     "paged_attention_params",
@@ -43,7 +49,7 @@
 )
 @pytest.mark.parametrize(
     "batch_size",
-    (32,),  # TODO Miguel: should we include batch==1 in the unit tests as well?
+    (1,),
 )
 @pytest.mark.parametrize(
     "max_seq_len",
diff --git a/models/demos/llama3/tests/test_llama_perf.py b/models/demos/llama3/tests/test_llama_perf.py
index 55dd13f7aa3..24daaa38f18 100644
--- a/models/demos/llama3/tests/test_llama_perf.py
+++ b/models/demos/llama3/tests/test_llama_perf.py
@@ -36,15 +36,6 @@
         (1024, 30),
     ),
 )
-@pytest.mark.parametrize(
-    "mesh_device",
-    [
-        {"N150": (1, 1), "N300": (1, 2), "T3K": (1, 8), "TG": (8, 4)}.get(
-            os.environ.get("FAKE_DEVICE"), len(ttnn.get_device_ids())
-        )
-    ],
-    indirect=True,
-)
 @pytest.mark.parametrize(
     "paged_attention",
     (
diff --git a/models/demos/llama3/tt/llama_common.py b/models/demos/llama3/tt/llama_common.py
index 4ca08fbcc43..43ca95bbe74 100644
--- a/models/demos/llama3/tt/llama_common.py
+++ b/models/demos/llama3/tt/llama_common.py
@@ -18,7 +18,7 @@ def forward(self, x):
 
 # Default configuration for Paged Attention
 class PagedAttentionConfig:
-    def __init__(self, block_size=64, max_num_blocks=2048):
+    def __init__(self, block_size=32, max_num_blocks=1024):
         self.block_size = block_size
         self.max_num_blocks = max_num_blocks
 
diff --git a/models/demos/llama3/tt/model_config.py b/models/demos/llama3/tt/model_config.py
index 35c0a7bbf36..5ddb79f166c 100644
--- a/models/demos/llama3/tt/model_config.py
+++ b/models/demos/llama3/tt/model_config.py
@@ -25,8 +25,8 @@
 
 # TODO: Miguel: Remove from here. I've added this to llama common instead, and each test should define their own values
 class PagedAttentionConfig:
-    block_size = 64
-    max_num_blocks = 2048
+    block_size = 32
+    max_num_blocks = 1024
 
 
 class TtModelArgs: