From d9c5fa408101e7f11609cfdfdd0f02b14ba6c578 Mon Sep 17 00:00:00 2001
From: mtairum <mtairum@tenstorrent.com>
Date: Fri, 15 Nov 2024 19:10:03 +0000
Subject: [PATCH] #14474: Fix OoO issues for Llama3 tests on CI

---
 models/demos/llama3/tt/model_config.py  | 21 ++++++++++++++-------
 tests/scripts/run_python_model_tests.sh | 11 +++++------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/models/demos/llama3/tt/model_config.py b/models/demos/llama3/tt/model_config.py
index d8e8bf7c4fe3..33d3c8b9f67f 100644
--- a/models/demos/llama3/tt/model_config.py
+++ b/models/demos/llama3/tt/model_config.py
@@ -139,13 +139,20 @@ def __init__(self, mesh_device, instruct=False, dummy_weights=False, max_batch_s
         # Reduce full 128k context length for combinations with memory constraints
         # Currently: n150 8b and t3k 70b with 8b/8b/8b MLPs
         # Default folder location for weights and cached files
-        # TODO Generalize for all llama3 weights
-        is_8b = self.dim == 4096 and self.n_layers == 32
-        is_70b = self.dim == 8192 and self.n_layers == 80
-        if self.num_devices == 1 and is_8b or is_70b:
-            self.max_seq_len = 8192 * 4  # 32k
-            self.kv_seq_len = 8192 * 4  # 32k
-            self.sliding_window = 8192 * 4  # 32k
+        # FIXME: Setup the max cache size accordingly depending on the target model, architecture and test type.
+        if (
+            self.num_devices <= 2
+        ):  # for 1-chip or 2-chip devices limit the seqlen to 16K (to avoid OoO on N150/N300 CI tests)
+            self.max_seq_len = 1024 * 16
+            self.kv_seq_len = 1024 * 16
+            self.sliding_window = 1024 * 16
+
+        if (
+            self.n_layers == 1
+        ):  # When running a single layer just reduce the seq len to 128, since we won't be decoding that many iterations
+            self.max_seq_len = 128
+            self.kv_seq_len = 128
+            self.sliding_window = 128
 
         # Some consumers like SentencePiece only accept str not Path for files
         self.model_base_path = Path(self.DEFAULT_CKPT_DIR)
diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh
index 09aca8be7695..3b17ca573123 100755
--- a/tests/scripts/run_python_model_tests.sh
+++ b/tests/scripts/run_python_model_tests.sh
@@ -49,13 +49,12 @@ run_python_model_tests_wormhole_b0() {
     llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/
     # Llama3.2-3B
     llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/
-    # Llama3.2-11B  (#Skip: Weights too big for single-chip ci VM)
+    # Llama3.2-11B
     llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/
 
-    # FIXME Issue #14474
     # Run all Llama3 tests for 8B, 1B, and 3B weights - dummy weights with tight PCC check
-    # for llama_dir in "$llama8b" "$llama1b" "$llama3b"; do
-    #     LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
-    #     echo "LOG_METAL: Llama3 tests for $llama_dir completed"
-    # done
+    for llama_dir in  "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do
+        LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
+        echo "LOG_METAL: Llama3 tests for $llama_dir completed"
+    done
 }