Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Llama3.2-Vision: Add reference submodule and tests #14051

Merged
merged 21 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
bab4a1d
#13368: Move repeat interleave to xattn cache generation.
cglagovichTT Oct 18, 2024
ee48aaa
#0: Clean up demo, enable arbitrary padding for multimodal text sequence
cglagovichTT Oct 21, 2024
29ae070
#13368: Add llama_models Meta reference for Llama3.2 as a submodule
cglagovichTT Oct 21, 2024
303be06
#13368: Change reference imports to use new submodule
cglagovichTT Oct 21, 2024
ef329cd
#13368: Clean up comments after pushing repeat_interleave into xattn_…
cglagovichTT Oct 21, 2024
27fa6d5
#13368: Clean up vision tests. Unify assertions and pcc checks. Fix L…
cglagovichTT Oct 21, 2024
f11162c
#13368: Fix LM head splits calculation
cglagovichTT Oct 22, 2024
f64f65a
#13368: For all vision tests, get model-specific parameters from mode…
cglagovichTT Oct 23, 2024
3a39037
#13368: Fixup tests
cglagovichTT Oct 23, 2024
8689d40
#13368: Add vision tests to unit, frequent, and demo
cglagovichTT Oct 23, 2024
80db0b8
#13368: Relaxed 11B perf estimate to avoid error in CI
mtairum Oct 22, 2024
dd10a03
#0: Added Llama-models python requirements
cglagovichTT Oct 23, 2024
2c5ff7f
#13368: Fixup mesh_device when not passed FAKE_DEVICE
cglagovichTT Oct 23, 2024
4e79091
Merge branch 'main' into llama32-vision
cglagovichTT Oct 24, 2024
698ac12
#0: Merge branch 'main' into llama32-vision
cglagovichTT Oct 25, 2024
7eef19b
#0: Merge branch 'main' into llama32-vision
cglagovichTT Oct 28, 2024
bc99440
#13368: Remove llama-specific packages from requirements-dev.txt
cglagovichTT Oct 28, 2024
d59f132
Merge branch 'main' into llama32-vision
cglagovichTT Oct 28, 2024
df3e545
#13368: Remove llama_models as submodule. Move its install to llama3 …
cglagovichTT Oct 29, 2024
3807f76
#13368: Fix resource path in multimodal demos.
cglagovichTT Oct 29, 2024
2a0eb87
Merge branch 'main' into llama32-vision
cglagovichTT Oct 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/t3000-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
{ name: "t3k_falcon40b_tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 50, owner_id: U053W15B6JF}, #Djordje Ivanovic
{ name: "t3k_llama3_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k_llama3_tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 30, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k_llama3_vision_tests", arch: wormhole_b0, cmd: run_t3000_llama3_vision_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k_falcon7b_tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 90, owner_id: U05RWH3QUPM}, #Salar Hosseini
{ name: "t3k_mixtral_tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 50, owner_id: U03PUAKE719}, # Miguel Tairum
]
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/t3000-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
{ name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 120, owner_id: U04S2UV6L8N}, #Sofija Jovic
{ name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k n300 mesh llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho
]
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/t3000-unit-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
{ name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 30, owner_id: U053W15B6JF}, #Djordje Ivanovic
{ name: "t3k llama3-small tests", arch: wormhole_b0, cmd: run_t3000_llama3-small_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3.2-11b tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3.2-11b-vision tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b-vision_unit_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k n300 mesh llama3.2-11b-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_unit_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k grok tests", arch: wormhole_b0, cmd: run_t3000_grok_tests, timeout: 30, owner_id: U03HY7MK4BT}, #Mark O'Connor
{ name: "t3k unet shallow tests", arch: wormhole_b0, cmd: run_t3000_unet_shallow_tests, timeout: 30, owner_id: U06ECNVR0EN}, #Evan Smal
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@
[submodule "tt_metal/third_party/tt_llk_blackhole"]
path = tt_metal/third_party/tt_llk_blackhole
url = https://github.com/tenstorrent/tt-llk-bh.git
[submodule "models/demos/llama3/reference/llama_models"]
path = models/demos/llama3/reference/llama_models
url = https://github.com/tenstorrent/llama-models.git
108 changes: 108 additions & 0 deletions models/demos/llama3/demo/multimodal_demo_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path
from typing import Optional
from loguru import logger

from PIL import Image as PIL_Image
from termcolor import cprint

from models.demos.llama3.demo.multimodal_demo_text import create_multimodal_model
import models.demos.llama3.reference.llama_models.models.llama3.reference_impl.generation as llama_reference_generation

from models.demos.llama3.reference.llama_models.models.llama3.api.datatypes import ImageMedia, UserMessage

THIS_DIR = Path(__file__).parent.parent.resolve() / "reference/llama_models/models/scripts/"

import torch
import pytest
import os
import ttnn


@pytest.mark.parametrize(
"mesh_device",
[
{"N150": (1, 1), "N300": (1, 2), "T3K": (1, 8), "TG": (8, 4)}.get(
os.environ.get("FAKE_DEVICE"), len(ttnn.get_device_ids())
)
],
indirect=True,
)
@pytest.mark.parametrize(
"target",
("tt", "cpu"),
)
@pytest.mark.parametrize(
"warmup_iters",
(0, 1),
)
def test_llama_multimodal_demo_chat(
mesh_device,
target,
warmup_iters,
temperature: float = 0.5,
top_p: float = 0.9,
max_seq_len: int = 512,
max_batch_size: int = 4,
max_gen_len: Optional[int] = 200,
model_parallel_size: Optional[int] = None,
):
mesh_device.enable_program_cache()
mesh_device.enable_async(True)
ckpt_dir = os.environ["LLAMA_DIR"]
tokenizer_path = str(Path(ckpt_dir) / "tokenizer.model")

logger.info(f"Creating reference model from checkpoint in '{ckpt_dir}'")
generator = llama_reference_generation.Llama.build(
ckpt_dir,
tokenizer_path=tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
model_parallel_size=model_parallel_size,
)

if target == "tt":
logger.info(f"Creating TT model on {len(mesh_device.get_devices())} devices")
model = create_multimodal_model(generator.args, mesh_device)
generator.model = model

# image understanding
dialogs = []
with open(THIS_DIR / "resources/dog.jpg", "rb") as f:
img = PIL_Image.open(f).convert("RGB")

dialogs = [
[
UserMessage(
content=[
ImageMedia(image=img),
"Describe this image in two sentences",
],
)
],
]
# text only
dialogs += [
[UserMessage(content="what is the recipe of mayonnaise in two sentences?")],
]

print(f"Running text completion on {target}")
for _ in range(warmup_iters + 1):
for dialog in dialogs:
result = generator.chat_completion(
dialog,
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)

for msg in dialog:
print(f"{msg.role.capitalize()}: {msg.content}\n")

out_message = result.generation
print(f"> {out_message.role.capitalize()}: {out_message.content}")
for t in out_message.tool_calls:
print(f" Tool call: {t.tool_name} ({t.arguments})")
print("\n==================================\n")
66 changes: 30 additions & 36 deletions models/demos/llama3/demo/multimodal_demo_text.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from pathlib import Path
from typing import Optional
from loguru import logger

from PIL import Image as PIL_Image
from termcolor import cprint

import importlib
import models.demos.llama3.reference.llama_models.models.llama3.reference_impl.generation as llama_reference_generation

llama_reference_generation = importlib.import_module(
"models.demos.t3000.llama2_70b.reference.llama-models.models.llama3.reference_impl.generation"
)
from models.demos.llama3.reference.llama_models.models.llama3.api.datatypes import ImageMedia

# Must import from reference for formatter to understand type of ImageMedia
datatypes = importlib.import_module("models.demos.t3000.llama2_70b.reference.llama-models.models.llama3.api.datatypes")
ImageMedia = datatypes.ImageMedia

# THIS_DIR = Path(__file__).parent.resolve()
# TODO: Generalize not to cglagovich home :)
THIS_DIR = Path("/home/cglagovich/tt-metal/models/demos/t3000/llama2_70b/reference/llama-models/models/scripts/")
THIS_DIR = Path(__file__).parent.parent.resolve() / "reference/llama_models/models/scripts/"

import torch
import pytest
Expand Down Expand Up @@ -59,14 +51,19 @@ def create_multimodal_model(model_args, mesh_device, dtype=ttnn.bfloat16):
"target",
("tt", "cpu"),
)
@pytest.mark.parametrize(
"warmup_iters",
(0, 1),
)
def test_llama_multimodal_demo_text(
mesh_device,
target,
temperature: float = 0,
warmup_iters,
temperature: float = 0.5,
top_p: float = 0.9,
max_seq_len: int = 512,
max_batch_size: int = 4,
max_gen_len: Optional[int] = None,
max_gen_len: Optional[int] = 200,
model_parallel_size: Optional[int] = None,
):
mesh_device.enable_program_cache()
Expand Down Expand Up @@ -96,33 +93,30 @@ def test_llama_multimodal_demo_text(

with open(THIS_DIR / "resources/ocr_image.jpeg", "rb") as f:
ocr_image = PIL_Image.open(f).convert("RGB")
# with open(THIS_DIR / "resources/clutter.jpeg", "rb") as f:
# clutter = PIL_Image.open(f).convert("RGB")

with open(THIS_DIR / "resources/clutter.jpeg", "rb") as f:
clutter = PIL_Image.open(f).convert("RGB")

interleaved_contents = [
# text only
# "The color of the sky is blue but sometimes it can also be",
"The color of the sky is blue but sometimes it can also be",
# image understanding
# [
# ImageMedia(image=img),
# "If I had to write a haiku for this one",
# ],
[ImageMedia(image=img), "If I had to write a haiku for this one"],
[ImageMedia(image=img2), "Couting the number of individual spaghetti strands in this image"],
[ImageMedia(image=ocr_image), "The full text in this image is as follows"],
# [
# ImageMedia(image=clutter),
# "The count of vases, books, and miscellaneous items in this image is",
# ]
[ImageMedia(image=clutter), "The count of vases, books, and miscellaneous items in this image is"],
]

print(f"Running text completion on {target}")
for content in interleaved_contents:
result = generator.text_completion(
content,
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)

cprint(f"{content}", end="")
cprint(f"{result.generation}", color="yellow")
print("\n==================================\n")
for _ in range(warmup_iters + 1):
for content in interleaved_contents:
result = generator.text_completion(
content,
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)

cprint(f"{content}", end="")
cprint(f"{result.generation}", color="yellow")
print("\n==================================\n")
25 changes: 23 additions & 2 deletions models/demos/llama3/lt
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,14 @@ def main(stdscr):
commands = parse_list(command_input, allow_space=False)

# Generate combinations (reordered)
combinations = [(c, m, d) for c in commands for m in models for d in devices]
# combinations = [(c, m, d) for c in commands for m in models for d in devices]
combinations = [
(c, m, d)
for c in commands
for m in models
for d in devices
if not (m in ["11b", "11b-b"] and d == "n150")
]

# Create output entries
for command, model, device in combinations:
Expand Down Expand Up @@ -230,7 +237,7 @@ def main(stdscr):
else:
# Ignore enter key when exiting
continue
elif c == curses.KEY_BACKSPACE or c == 127 or c == ord("x"):
elif c == curses.KEY_BACKSPACE or c == 127:
if current_line < len(input_fields):
current_field = current_line
# Remove last character from current field
Expand Down Expand Up @@ -506,6 +513,19 @@ def run_entry_command(entry, screen_lock, output_entries, screen_needs_update):
"model": "pytest models/demos/llama3/tests/test_llama_model.py::test_llama_model_inference[wormhole_b0-True-mesh_device0-full]",
"model-prefill": "pytest models/demos/llama3/tests/test_llama_model_prefill.py::test_llama_model_inference[wormhole_b0-True-mesh_device0-4096]",
"model-quick": "pytest models/demos/llama3/tests/test_llama_model.py -k quick",
"vision-mlp": "pytest models/demos/llama3/tests/multimodal/test_llama_image_mlp.py",
"vision-attn": "pytest models/demos/llama3/tests/multimodal/test_llama_image_attention.py",
"vision-block": "pytest models/demos/llama3/tests/multimodal/test_llama_image_block.py",
"vision-xfmr": "pytest models/demos/llama3/tests/multimodal/test_llama_image_transformer.py",
"vision-xattn": "pytest models/demos/llama3/tests/multimodal/test_llama_cross_attention.py",
"vision-xblock": "pytest models/demos/llama3/tests/multimodal/test_llama_cross_block.py",
"vision-conv": "pytest models/demos/llama3/tests/multimodal/test_llama_conv2d_patch.py",
"vision-class": "pytest models/demos/llama3/tests/multimodal/test_llama_class_embedding.py",
"vision-tile-pos": "pytest models/demos/llama3/tests/multimodal/test_llama_tile_position_embedding.py",
"vision-pos": "pytest models/demos/llama3/tests/multimodal/test_llama_positional_embedding.py",
"vision-encoder": "pytest models/demos/llama3/tests/multimodal/test_llama_vision_encoder.py",
"vision-text-xfmr": "pytest models/demos/llama3/tests/multimodal/test_llama_cross_attention_transformer_text.py",
"vision-vision-xfmr": "pytest models/demos/llama3/tests/multimodal/test_llama_cross_attention_transformer_vision.py",
}

# Check if the command is a shortcut and replace it if necessary
Expand Down Expand Up @@ -657,6 +677,7 @@ def get_llama_dir(model):
"3b": os.environ.get("LLAMA_32_3B_DIR", "/proj_sw/user_dev/llama32-data/Llama3.2-3B-Instruct"),
"8b": os.environ.get("LLAMA_31_8B_DIR", "/proj_sw/user_dev/llama31-8b-data/Meta-Llama-3.1-8B-Instruct"),
"11b": os.environ.get("LLAMA_32_11B_DIR", "/proj_sw/user_dev/llama32-data/Llama3.2-11B-Vision-Instruct"),
"11b-b": os.environ.get("LLAMA_32_11B_BASE_DIR", "/proj_sw/user_dev/llama32-data/Llama3.2-11B-Vision"),
}.get(model.lower(), "")

if not llama_dir or not os.path.exists(llama_dir):
Expand Down
1 change: 1 addition & 0 deletions models/demos/llama3/reference/llama_models
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this submodule has a method to install the dependencies, can you dynamically install it whenever these tests are run? We should not be including everything under the sun in this repo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I resolved this by dynamically installing in t3k test scripts

Submodule llama_models added at c217d3
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
# SPDX-License-Identifier: Apache-2.0

##### Python imports #####
import math
import pytest
from loguru import logger
import os
import itertools

##### PyTorch imports #####
import torch
Expand Down Expand Up @@ -56,18 +54,16 @@ def forward(self, x):
@pytest.mark.parametrize(
"mesh_device",
[
{"N150": (1, 1), "N300": (1, 2), "T3K": (2, 4), "TG": (8, 4)}.get(
{"N150": (1, 1), "N300": (1, 2), "T3K": (1, 8), "TG": (8, 4)}.get(
os.environ.get("FAKE_DEVICE"), len(ttnn.get_device_ids())
)
],
indirect=True,
)
@pytest.mark.parametrize(
"input_shape",
"bsz, num_concurrent_media, num_chunks",
[
((1, 4, 4, 1024, 1280)),
((1, 4, 4, 1024 + 1, 1280)),
((1, 4, 4, 1032, 1280)),
((1, 4, 4)),
],
)
@pytest.mark.parametrize(
Expand All @@ -81,12 +77,14 @@ def test_llama_class_embedding_inference(
use_program_cache,
reset_seeds,
# Input params
input_shape,
bsz,
num_concurrent_media,
num_chunks,
layout,
ensure_gc,
):
dtype = ttnn.bfloat16
pcc = 0.9999
pcc_required = 0.9999

mesh_device.enable_async(True)

Expand All @@ -97,13 +95,8 @@ def test_llama_class_embedding_inference(
k[len(first_layer_prefix) :]: v for k, v in state_dict.items() if (k.startswith(first_layer_prefix))
}

(
bsz,
num_concurrent_media,
num_chunks,
ntok,
dim,
) = input_shape
ntok = nearest_32(model_args.vision_chunk_ntok)
dim = model_args.vision_dim

##### Prepare inputs #####
input_tensor = torch.randn(bsz * num_concurrent_media * num_chunks, ntok, dim)
Expand Down Expand Up @@ -145,12 +138,8 @@ def test_llama_class_embedding_inference(
# Only select output from one device
tt_output_torch = tt_output_torch[..., :dim].view(reference_output.shape)

passing, pcc_message = comp_pcc(reference_output, tt_output_torch, pcc)
passing, pcc_message = comp_pcc(reference_output, tt_output_torch, pcc_required)

logger.info(comp_allclose(reference_output, tt_output_torch))
logger.info(f"PCC: {pcc_message}")
if passing:
logger.info(f"Llama_ClassEmbedding Passed!")
else:
logger.warning(f"Llama_ClassEmbedding Failed!")
assert passing, f"PCC value is lower than {pcc} for some of the outputs. Check Warnings!"
assert passing, f"PCC value is lower than {pcc_required} for some of the outputs. Check Warnings!"
Loading
Loading