Skip to content

Commit

Permalink
Add compression tests to internvl2 and phi3v (#999)
Browse files Browse the repository at this point in the history
* Fix NanoLLava quantization

* Add internvl2 compression tests

* Revert "Fix NanoLLava quantization"

This reverts commit 3eba1de.

* Add phi3 compression tests; fix phi3 preprocessors saving with optimum-cli quantization

* Trigger Tests

* Trigger Tests

* Trigger Tests
  • Loading branch information
nikita-savelyevv authored Nov 16, 2024
1 parent 040ee12 commit ef558f9
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 34 deletions.
9 changes: 4 additions & 5 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE

from ...exporters import TasksManager
from ...exporters.openvino.convert import save_preprocessors
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path
from ...utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors
from ...utils.save_utils import maybe_load_preprocessors
from ..base import BaseOptimumCLICommand, CommandInfo


Expand Down Expand Up @@ -350,11 +351,9 @@ def run(self):
)
model.save_pretrained(self.args.output)

maybe_save_preprocessors(self.args.model, self.args.output, trust_remote_code=self.args.trust_remote_code)
preprocessors = maybe_load_preprocessors(self.args.model, trust_remote_code=self.args.trust_remote_code)
save_preprocessors(preprocessors, model.config, self.args.output, self.args.trust_remote_code)
if not self.args.disable_convert_tokenizer:
preprocessors = maybe_load_preprocessors(
self.args.model, trust_remote_code=self.args.trust_remote_code
)
maybe_convert_tokenizers(library_name, self.args.output, preprocessors=preprocessors, task=task)
else:
# TODO : add input shapes
Expand Down
37 changes: 24 additions & 13 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union

import onnx
from transformers import PretrainedConfig
from transformers.generation import GenerationMixin
from transformers.utils import is_tf_available, is_torch_available

Expand Down Expand Up @@ -711,19 +712,7 @@ def export_from_model(
f"The generation config will not be saved, saving failed with following error:\n{exception}"
)

model_name_or_path = model.config._name_or_path
if preprocessors is not None:
# phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
if not hasattr(preprocessors[1], "chat_template"):
preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
for processor in preprocessors:
try:
processor.save_pretrained(output)
except Exception as ex:
logger.error(f"Saving {type(processor)} failed with {ex}")
else:
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
save_preprocessors(preprocessors, model.config, output, trust_remote_code)

files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()]

Expand Down Expand Up @@ -838,6 +827,28 @@ def export_tokenizer(
save_model(model, output / file_name.format(suffix))


def save_preprocessors(
preprocessors: List, config: PretrainedConfig, output: Union[str, Path], trust_remote_code: bool
):
model_name_or_path = config._name_or_path
if hasattr(config, "export_model_type"):
model_type = config.export_model_type.replace("_", "-")
else:
model_type = config.model_type.replace("_", "-")
if preprocessors is not None:
# phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
if not hasattr(preprocessors[1], "chat_template"):
preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
for processor in preprocessors:
try:
processor.save_pretrained(output)
except Exception as ex:
logger.error(f"Saving {type(processor)} failed with {ex}")
else:
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)


def _add_runtime_options_to_rt_info(model: Model, options: Dict):
"""
Add runtime optinos
Expand Down
20 changes: 20 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,26 @@ class OVCLIExportTestCase(unittest.TestCase):
]
)

if is_transformers_version(">=", "4.45.0"):
TEST_4BIT_CONFIGURATIONS.extend(
[
(
"image-text-to-text",
"internvl2",
'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "hessian_input_activation" '
"--dataset contextual --num-samples 1 --trust-remote-code",
{"int8": 6, "int4": 24},
),
(
"image-text-to-text",
"phi3_v",
'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" '
"--dataset contextual --num-samples 1 --trust-remote-code",
{"int8": 4, "int4": 14},
),
]
)

def _openvino_export(self, model_name: str, task: str):
with TemporaryDirectory() as tmpdir:
main_export(
Expand Down
64 changes: 48 additions & 16 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,23 +347,55 @@ class OVWeightCompressionTest(unittest.TestCase):
)

if is_transformers_version(">=", "4.45.0"):
LOAD_IN_4_BITS_SCOPE.append(
(
OVModelForVisualCausalLM,
"minicpmv",
True,
dict(
bits=4,
group_size=16,
dataset="contextual",
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
num_samples=1,
processor=MODEL_NAMES["minicpmv"],
trust_remote_code=True,
LOAD_IN_4_BITS_SCOPE.extend(
[
(
OVModelForVisualCausalLM,
"minicpmv",
True,
dict(
bits=4,
group_size=16,
dataset="contextual",
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
num_samples=1,
processor=MODEL_NAMES["minicpmv"],
trust_remote_code=True,
),
{"int4": 22, "int8": 8},
),
{"int4": 22, "int8": 8},
)
(
OVModelForVisualCausalLM,
"internvl2",
True,
dict(
bits=4,
group_size=4,
dataset="contextual",
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
num_samples=1,
trust_remote_code=True,
),
{"int4": 22, "int8": 8},
),
(
OVModelForVisualCausalLM,
"phi3_v",
True,
dict(
bits=4,
group_size=16,
dataset="contextual",
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
num_samples=1,
trust_remote_code=True,
),
{"int4": 14, "int8": 4},
),
]
)

SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = [
Expand Down

0 comments on commit ef558f9

Please sign in to comment.