Add compression tests to internvl2 and phi3v (#999)

* Fix NanoLLava quantization * Add internvl2 compression tests * Revert "Fix NanoLLava quantization" This reverts commit 3eba1de. * Add phi3 compression tests; fix phi3 preprocessors saving with optimum-cli quantization * Trigger Tests * Trigger Tests * Trigger Tests
huggingface · Nov 16, 2024 · ef558f9 · ef558f9
1 parent 040ee12
commit ef558f9
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 34 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -21,9 +21,10 @@
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 
 from ...exporters import TasksManager
+from ...exporters.openvino.convert import save_preprocessors
 from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
 from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path
-from ...utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors
+from ...utils.save_utils import maybe_load_preprocessors
 from ..base import BaseOptimumCLICommand, CommandInfo
 
 
@@ -350,11 +351,9 @@ def run(self):
             )
             model.save_pretrained(self.args.output)
 
-            maybe_save_preprocessors(self.args.model, self.args.output, trust_remote_code=self.args.trust_remote_code)
+            preprocessors = maybe_load_preprocessors(self.args.model, trust_remote_code=self.args.trust_remote_code)
+            save_preprocessors(preprocessors, model.config, self.args.output, self.args.trust_remote_code)
             if not self.args.disable_convert_tokenizer:
-                preprocessors = maybe_load_preprocessors(
-                    self.args.model, trust_remote_code=self.args.trust_remote_code
-                )
                 maybe_convert_tokenizers(library_name, self.args.output, preprocessors=preprocessors, task=task)
         else:
             # TODO : add input shapes

diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -21,6 +21,7 @@
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import onnx
+from transformers import PretrainedConfig
 from transformers.generation import GenerationMixin
 from transformers.utils import is_tf_available, is_torch_available
 
@@ -711,19 +712,7 @@ def export_from_model(
                     f"The generation config will not be saved, saving failed with following error:\n{exception}"
                 )
 
-        model_name_or_path = model.config._name_or_path
-        if preprocessors is not None:
-            # phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
-            if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
-                if not hasattr(preprocessors[1], "chat_template"):
-                    preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
-            for processor in preprocessors:
-                try:
-                    processor.save_pretrained(output)
-                except Exception as ex:
-                    logger.error(f"Saving {type(processor)} failed with {ex}")
-        else:
-            maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
+        save_preprocessors(preprocessors, model.config, output, trust_remote_code)
 
         files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()]
 
@@ -838,6 +827,28 @@ def export_tokenizer(
         save_model(model, output / file_name.format(suffix))
 
 
+def save_preprocessors(
+    preprocessors: List, config: PretrainedConfig, output: Union[str, Path], trust_remote_code: bool
+):
+    model_name_or_path = config._name_or_path
+    if hasattr(config, "export_model_type"):
+        model_type = config.export_model_type.replace("_", "-")
+    else:
+        model_type = config.model_type.replace("_", "-")
+    if preprocessors is not None:
+        # phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
+        if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
+            if not hasattr(preprocessors[1], "chat_template"):
+                preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
+        for processor in preprocessors:
+            try:
+                processor.save_pretrained(output)
+            except Exception as ex:
+                logger.error(f"Saving {type(processor)} failed with {ex}")
+    else:
+        maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
+
+
 def _add_runtime_options_to_rt_info(model: Model, options: Dict):
     """
     Add runtime optinos

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -152,6 +152,26 @@ class OVCLIExportTestCase(unittest.TestCase):
             ]
         )
 
+    if is_transformers_version(">=", "4.45.0"):
+        TEST_4BIT_CONFIGURATIONS.extend(
+            [
+                (
+                    "image-text-to-text",
+                    "internvl2",
+                    'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "hessian_input_activation" '
+                    "--dataset contextual --num-samples 1 --trust-remote-code",
+                    {"int8": 6, "int4": 24},
+                ),
+                (
+                    "image-text-to-text",
+                    "phi3_v",
+                    'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" '
+                    "--dataset contextual --num-samples 1 --trust-remote-code",
+                    {"int8": 4, "int4": 14},
+                ),
+            ]
+        )
+
     def _openvino_export(self, model_name: str, task: str):
         with TemporaryDirectory() as tmpdir:
             main_export(

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -347,23 +347,55 @@ class OVWeightCompressionTest(unittest.TestCase):
         )
 
     if is_transformers_version(">=", "4.45.0"):
-        LOAD_IN_4_BITS_SCOPE.append(
-            (
-                OVModelForVisualCausalLM,
-                "minicpmv",
-                True,
-                dict(
-                    bits=4,
-                    group_size=16,
-                    dataset="contextual",
-                    ratio=0.8,
-                    sensitivity_metric="mean_activation_magnitude",
-                    num_samples=1,
-                    processor=MODEL_NAMES["minicpmv"],
-                    trust_remote_code=True,
+        LOAD_IN_4_BITS_SCOPE.extend(
+            [
+                (
+                    OVModelForVisualCausalLM,
+                    "minicpmv",
+                    True,
+                    dict(
+                        bits=4,
+                        group_size=16,
+                        dataset="contextual",
+                        ratio=0.8,
+                        sensitivity_metric="mean_activation_magnitude",
+                        num_samples=1,
+                        processor=MODEL_NAMES["minicpmv"],
+                        trust_remote_code=True,
+                    ),
+                    {"int4": 22, "int8": 8},
                 ),
-                {"int4": 22, "int8": 8},
-            )
+                (
+                    OVModelForVisualCausalLM,
+                    "internvl2",
+                    True,
+                    dict(
+                        bits=4,
+                        group_size=4,
+                        dataset="contextual",
+                        ratio=0.8,
+                        sensitivity_metric="mean_activation_magnitude",
+                        num_samples=1,
+                        trust_remote_code=True,
+                    ),
+                    {"int4": 22, "int8": 8},
+                ),
+                (
+                    OVModelForVisualCausalLM,
+                    "phi3_v",
+                    True,
+                    dict(
+                        bits=4,
+                        group_size=16,
+                        dataset="contextual",
+                        ratio=0.8,
+                        sensitivity_metric="mean_activation_magnitude",
+                        num_samples=1,
+                        trust_remote_code=True,
+                    ),
+                    {"int4": 14, "int8": 4},
+                ),
+            ]
         )
 
     SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = [