Skip to content

Commit

Permalink
sd3 pipeline support (#916)
Browse files Browse the repository at this point in the history
* WIP: conversion and pipeline base

* Support SD3

* img2img pipeline

* fix model export

* update after migration on new pipeline style

* fix inference issues

* fix missed tokenizer export

* add support in quantization

* Update optimum/intel/openvino/modeling_diffusion.py

Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>

* add tests

* fix tests

* update tests

* Update tests/openvino/utils_tests.py

Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>

* fix tests

* add export tests

* fix cli tests

* use fp32 timesteps

* add flux

* fix after black update

* apply review comments

* compatibility with diffusers 0.31.0

* apply review comments

* Update tests/openvino/test_diffusion.py

Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>

* Update tests/openvino/test_diffusion.py

Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>

---------

Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
  • Loading branch information
eaidova and IlyasMoutawwakil authored Oct 24, 2024
1 parent a432102 commit 86598a6
Show file tree
Hide file tree
Showing 17 changed files with 1,001 additions and 110 deletions.
4 changes: 4 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,10 @@ def run(self):
from optimum.intel import OVStableDiffusionPipeline

model_cls = OVStableDiffusionPipeline
elif class_name == "StableDiffusion3Pipeline":
from optimum.intel import OVStableDiffusion3Pipeline

model_cls = OVStableDiffusion3Pipeline
else:
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")

Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def maybe_convert_tokenizers(library_name: str, output: Path, model=None, prepro
f"models won't be generated. Exception: {exception}"
)
elif model:
for tokenizer_name in ("tokenizer", "tokenizer_2"):
for tokenizer_name in ("tokenizer", "tokenizer_2", "tokenizer_3"):
tokenizer = getattr(model, tokenizer_name, None)
if tokenizer:
export_tokenizer(tokenizer, output / tokenizer_name, task=task)
Expand Down
265 changes: 247 additions & 18 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import functools
import gc
import logging
Expand All @@ -31,7 +32,12 @@
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
from optimum.exporters.utils import _get_submodels_and_export_configs as _default_get_submodels_and_export_configs
from optimum.exporters.utils import (
_get_submodels_and_export_configs as _default_get_submodels_and_export_configs,
)
from optimum.exporters.utils import (
get_diffusion_models_for_export,
)
from optimum.intel.utils.import_utils import (
_nncf_version,
_open_clip_version,
Expand Down Expand Up @@ -619,23 +625,27 @@ def export_from_model(
model, library_name, task, preprocessors, custom_export_configs, fn_get_submodels
)

logging.disable(logging.INFO)
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
model=model,
task=task,
monolith=False,
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
custom_architecture=custom_architecture,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
library_name=library_name,
model_kwargs=model_kwargs,
_variant="default",
legacy=False,
exporter="openvino",
stateful=stateful,
)
logging.disable(logging.NOTSET)
if library_name == "diffusers":
export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino")
stateful_submodels = False
else:
logging.disable(logging.INFO)
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
model=model,
task=task,
monolith=False,
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
custom_architecture=custom_architecture,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
library_name=library_name,
model_kwargs=model_kwargs,
_variant="default",
legacy=False,
exporter="openvino",
stateful=stateful,
)
logging.disable(logging.NOTSET)

if library_name == "open_clip":
if hasattr(model.config, "save_pretrained"):
Expand Down Expand Up @@ -701,6 +711,10 @@ def export_from_model(
if tokenizer_2 is not None:
tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))

tokenizer_3 = getattr(model, "tokenizer_3", None)
if tokenizer_3 is not None:
tokenizer_3.save_pretrained(output.joinpath("tokenizer_3"))

model.save_config(output)

export_models(
Expand Down Expand Up @@ -889,3 +903,218 @@ def _get_submodels_and_export_configs(
)
stateful_per_model = [stateful] * len(models_for_export)
return export_config, models_for_export, stateful_per_model


def get_diffusion_models_for_export_ext(
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
):
try:
from diffusers import (
StableDiffusion3Img2ImgPipeline,
StableDiffusion3InpaintPipeline,
StableDiffusion3Pipeline,
)

is_sd3 = isinstance(
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
)
except ImportError:
is_sd3 = False

try:
from diffusers import FluxPipeline

is_flux = isinstance(pipeline, FluxPipeline)
except ImportError:
is_flux = False

if not is_sd3 and not is_flux:
return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
if is_sd3:
models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype)
else:
models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype)

return None, models_for_export


def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
models_for_export = {}

# Text encoder
text_encoder = getattr(pipeline, "text_encoder", None)
if text_encoder is not None:
text_encoder.config.output_hidden_states = True
text_encoder.text_model.config.output_hidden_states = True
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-with-projection",
)
text_encoder_export_config = text_encoder_config_constructor(
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)

transformer = pipeline.transformer
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
transformer.config.time_cond_proj_dim = None
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=transformer,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="sd3-transformer",
)
transformer_export_config = export_config_constructor(
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["transformer"] = (transformer, transformer_export_config)

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
vae_encoder = copy.deepcopy(pipeline.vae)
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_encoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-encoder",
)
vae_encoder_export_config = vae_config_constructor(
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)

# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
vae_decoder = copy.deepcopy(pipeline.vae)
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_decoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-decoder",
)
vae_decoder_export_config = vae_config_constructor(
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
text_encoder_2.config.output_hidden_states = True
text_encoder_2.text_model.config.output_hidden_states = True
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_2,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-with-projection",
)
export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype)
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)

text_encoder_3 = getattr(pipeline, "text_encoder_3", None)
if text_encoder_3 is not None:
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_3,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="t5-encoder-model",
)
export_config = export_config_constructor(
text_encoder_3.config,
int_dtype=int_dtype,
float_dtype=float_dtype,
)
models_for_export["text_encoder_3"] = (text_encoder_3, export_config)

return models_for_export


def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
models_for_export = {}

# Text encoder
text_encoder = getattr(pipeline, "text_encoder", None)
if text_encoder is not None:
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-model",
)
text_encoder_export_config = text_encoder_config_constructor(
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)

transformer = pipeline.transformer
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
transformer.config.time_cond_proj_dim = None
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=transformer,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="flux-transformer",
)
transformer_export_config = export_config_constructor(
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["transformer"] = (transformer, transformer_export_config)

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
vae_encoder = copy.deepcopy(pipeline.vae)
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_encoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-encoder",
)
vae_encoder_export_config = vae_config_constructor(
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)

# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
vae_decoder = copy.deepcopy(pipeline.vae)
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_decoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-decoder",
)
vae_decoder_export_config = vae_config_constructor(
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_2,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="t5-encoder-model",
)
export_config = export_config_constructor(
text_encoder_2.config,
int_dtype=int_dtype,
float_dtype=float_dtype,
)
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)

return models_for_export
Loading

0 comments on commit 86598a6

Please sign in to comment.