#0: Update debug max seqlen

tenstorrent · Nov 25, 2024 · 50362ec · 50362ec
1 parent dab971c
commit 50362ec
Showing 1 changed file with 1 addition and 2 deletions.
diff --git a/models/demos/llama3/demo/demo.py b/models/demos/llama3/demo/demo.py
@@ -198,9 +198,8 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
     model_args = TtModelArgs(mesh_device, instruct=instruct_mode, max_batch_size=batch_size)
     tokenizer = Tokenizer(model_args.tokenizer_path)
 
-    # TODO Miguel: Setup max sequence length depending on the model being used to actually fit on device
     # Reduce max seq len and KV cache seq_len params to speed up the test
-    model_args.max_seq_len = 512
+    model_args.max_seq_len = 1024  # TODO REVERT: Miguel: Setup max sequence length depending on the model being used to actually fit on device
     model_args.kv_seq_len = model_args.max_seq_len
 
     if single_layer: