Skip to content

Commit

Permalink
#14474: Fix OoO issues for Llama3 tests on CI
Browse files Browse the repository at this point in the history
  • Loading branch information
mtairum committed Nov 15, 2024
1 parent c9b9db0 commit d9c5fa4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
21 changes: 14 additions & 7 deletions models/demos/llama3/tt/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,20 @@ def __init__(self, mesh_device, instruct=False, dummy_weights=False, max_batch_s
# Reduce full 128k context length for combinations with memory constraints
# Currently: n150 8b and t3k 70b with 8b/8b/8b MLPs
# Default folder location for weights and cached files
# TODO Generalize for all llama3 weights
is_8b = self.dim == 4096 and self.n_layers == 32
is_70b = self.dim == 8192 and self.n_layers == 80
if self.num_devices == 1 and is_8b or is_70b:
self.max_seq_len = 8192 * 4 # 32k
self.kv_seq_len = 8192 * 4 # 32k
self.sliding_window = 8192 * 4 # 32k
# FIXME: Setup the max cache size accordingly depending on the target model, architecture and test type.
if (
self.num_devices <= 2
): # for 1-chip or 2-chip devices limit the seqlen to 16K (to avoid OoO on N150/N300 CI tests)
self.max_seq_len = 1024 * 16
self.kv_seq_len = 1024 * 16
self.sliding_window = 1024 * 16

if (
self.n_layers == 1
): # When running a single layer just reduce the seq len to 128, since we won't be decoding that many iterations
self.max_seq_len = 128
self.kv_seq_len = 128
self.sliding_window = 128

# Some consumers like SentencePiece only accept str not Path for files
self.model_base_path = Path(self.DEFAULT_CKPT_DIR)
Expand Down
11 changes: 5 additions & 6 deletions tests/scripts/run_python_model_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,12 @@ run_python_model_tests_wormhole_b0() {
llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/
# Llama3.2-3B
llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/
# Llama3.2-11B (#Skip: Weights too big for single-chip ci VM)
# Llama3.2-11B
llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/

# FIXME Issue #14474
# Run all Llama3 tests for 8B, 1B, and 3B weights - dummy weights with tight PCC check
# for llama_dir in "$llama8b" "$llama1b" "$llama3b"; do
# LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
# echo "LOG_METAL: Llama3 tests for $llama_dir completed"
# done
for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do
LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
echo "LOG_METAL: Llama3 tests for $llama_dir completed"
done
}

0 comments on commit d9c5fa4

Please sign in to comment.