Skip to content

Commit

Permalink
#0: Update debug max seqlen
Browse files Browse the repository at this point in the history
  • Loading branch information
mtairum committed Nov 25, 2024
1 parent dab971c commit 50362ec
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions models/demos/llama3/demo/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,8 @@ def run_llama3_demo(user_input, single_layer, mesh_device, instruct_mode, is_ci_
model_args = TtModelArgs(mesh_device, instruct=instruct_mode, max_batch_size=batch_size)
tokenizer = Tokenizer(model_args.tokenizer_path)

# TODO Miguel: Setup max sequence length depending on the model being used to actually fit on device
# Reduce max seq len and KV cache seq_len params to speed up the test
model_args.max_seq_len = 512
model_args.max_seq_len = 1024 # TODO REVERT: Miguel: Setup max sequence length depending on the model being used to actually fit on device
model_args.kv_seq_len = model_args.max_seq_len

if single_layer:
Expand Down

0 comments on commit 50362ec

Please sign in to comment.