From d9c5fa408101e7f11609cfdfdd0f02b14ba6c578 Mon Sep 17 00:00:00 2001 From: mtairum Date: Fri, 15 Nov 2024 19:10:03 +0000 Subject: [PATCH] #14474: Fix OoO issues for Llama3 tests on CI --- models/demos/llama3/tt/model_config.py | 21 ++++++++++++++------- tests/scripts/run_python_model_tests.sh | 11 +++++------ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/models/demos/llama3/tt/model_config.py b/models/demos/llama3/tt/model_config.py index d8e8bf7c4fe3..33d3c8b9f67f 100644 --- a/models/demos/llama3/tt/model_config.py +++ b/models/demos/llama3/tt/model_config.py @@ -139,13 +139,20 @@ def __init__(self, mesh_device, instruct=False, dummy_weights=False, max_batch_s # Reduce full 128k context length for combinations with memory constraints # Currently: n150 8b and t3k 70b with 8b/8b/8b MLPs # Default folder location for weights and cached files - # TODO Generalize for all llama3 weights - is_8b = self.dim == 4096 and self.n_layers == 32 - is_70b = self.dim == 8192 and self.n_layers == 80 - if self.num_devices == 1 and is_8b or is_70b: - self.max_seq_len = 8192 * 4 # 32k - self.kv_seq_len = 8192 * 4 # 32k - self.sliding_window = 8192 * 4 # 32k + # FIXME: Setup the max cache size accordingly depending on the target model, architecture and test type. + if ( + self.num_devices <= 2 + ): # for 1-chip or 2-chip devices limit the seqlen to 16K (to avoid OoO on N150/N300 CI tests) + self.max_seq_len = 1024 * 16 + self.kv_seq_len = 1024 * 16 + self.sliding_window = 1024 * 16 + + if ( + self.n_layers == 1 + ): # When running a single layer just reduce the seq len to 128, since we won't be decoding that many iterations + self.max_seq_len = 128 + self.kv_seq_len = 128 + self.sliding_window = 128 # Some consumers like SentencePiece only accept str not Path for files self.model_base_path = Path(self.DEFAULT_CKPT_DIR) diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh index 09aca8be7695..3b17ca573123 100755 --- a/tests/scripts/run_python_model_tests.sh +++ b/tests/scripts/run_python_model_tests.sh @@ -49,13 +49,12 @@ run_python_model_tests_wormhole_b0() { llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/ # Llama3.2-3B llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/ - # Llama3.2-11B (#Skip: Weights too big for single-chip ci VM) + # Llama3.2-11B llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/ - # FIXME Issue #14474 # Run all Llama3 tests for 8B, 1B, and 3B weights - dummy weights with tight PCC check - # for llama_dir in "$llama8b" "$llama1b" "$llama3b"; do - # LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$? - # echo "LOG_METAL: Llama3 tests for $llama_dir completed" - # done + for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do + LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$? + echo "LOG_METAL: Llama3 tests for $llama_dir completed" + done }