From 86b56d2d2c8d2421dee443c0d1bc1aced3d2dcc7 Mon Sep 17 00:00:00 2001 From: Seunghoon Lee Date: Sat, 16 Dec 2023 14:37:04 +0900 Subject: [PATCH] follow up --- configs/olive/sd_text_encoder.json | 25 ++++++++++++++++++- configs/olive/sd_unet.json | 25 ++++++++++++++++++- configs/olive/sd_vae_decoder.json | 25 ++++++++++++++++++- configs/olive/sd_vae_encoder.json | 25 ++++++++++++++++++- configs/olive/sdxl_text_encoder.json | 25 ++++++++++++++++++- configs/olive/sdxl_text_encoder_2.json | 25 ++++++++++++++++++- configs/olive/sdxl_unet.json | 25 ++++++++++++++++++- configs/olive/sdxl_vae_decoder.json | 23 ++++++++++++++++- configs/olive/sdxl_vae_encoder.json | 23 +++++++++++++++++ configs/onnx/sd_text_encoder.json | 1 + configs/onnx/sd_unet.json | 1 + configs/onnx/sd_vae_decoder.json | 1 + configs/onnx/sd_vae_encoder.json | 1 + configs/onnx/sdxl_text_encoder.json | 1 + configs/onnx/sdxl_text_encoder_2.json | 1 + configs/onnx/sdxl_unet.json | 1 + configs/onnx/sdxl_vae_decoder.json | 1 + configs/onnx/sdxl_vae_encoder.json | 1 + modules/olive.py | 22 +++++++++-------- modules/onnx.py | 34 ++++++++++++++++++-------- modules/processing_diffusers.py | 3 +-- 21 files changed, 259 insertions(+), 30 deletions(-) diff --git a/configs/olive/sd_text_encoder.json b/configs/olive/sd_text_encoder.json index bc301c41b..639d58ddf 100644 --- a/configs/olive/sd_text_encoder.json +++ b/configs/olive/sd_text_encoder.json @@ -38,7 +38,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -73,8 +73,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sd_unet.json b/configs/olive/sd_unet.json index a58bc825c..c34b8987c 100644 --- a/configs/olive/sd_unet.json +++ b/configs/olive/sd_unet.json @@ -55,7 +55,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -90,8 +90,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sd_vae_decoder.json b/configs/olive/sd_vae_decoder.json index 6f6b3ae98..e30c8e037 100644 --- a/configs/olive/sd_vae_decoder.json +++ b/configs/olive/sd_vae_decoder.json @@ -45,7 +45,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -80,8 +80,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sd_vae_encoder.json b/configs/olive/sd_vae_encoder.json index a2976c147..7f29ca720 100644 --- a/configs/olive/sd_vae_encoder.json +++ b/configs/olive/sd_vae_encoder.json @@ -45,7 +45,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -80,8 +80,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sdxl_text_encoder.json b/configs/olive/sdxl_text_encoder.json index c3568e731..e8270e7df 100644 --- a/configs/olive/sdxl_text_encoder.json +++ b/configs/olive/sdxl_text_encoder.json @@ -71,7 +71,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -106,8 +106,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sdxl_text_encoder_2.json b/configs/olive/sdxl_text_encoder_2.json index 44f95e41b..221d51605 100644 --- a/configs/olive/sdxl_text_encoder_2.json +++ b/configs/olive/sdxl_text_encoder_2.json @@ -111,7 +111,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -146,8 +146,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sdxl_unet.json b/configs/olive/sdxl_unet.json index 1b1d9b22d..82984a7e6 100644 --- a/configs/olive/sdxl_unet.json +++ b/configs/olive/sdxl_unet.json @@ -61,7 +61,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -96,8 +96,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sdxl_vae_decoder.json b/configs/olive/sdxl_vae_decoder.json index 75d6fd737..33b2e549d 100644 --- a/configs/olive/sdxl_vae_decoder.json +++ b/configs/olive/sdxl_vae_decoder.json @@ -51,7 +51,7 @@ } }, "passes": { - "optimize": { + "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", "disable_search": true, "config": { @@ -108,8 +108,29 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": false, + "use_gpu": true + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": false, + "use_gpu": true + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/olive/sdxl_vae_encoder.json b/configs/olive/sdxl_vae_encoder.json index fc2585097..2ab771f6f 100644 --- a/configs/olive/sdxl_vae_encoder.json +++ b/configs/olive/sdxl_vae_encoder.json @@ -86,8 +86,31 @@ "GroupNorm": [0, 1, 2] } } + }, + "optimize_CUDAExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } + }, + "optimize_ROCMExecutionProvider": { + "type": "OrtTransformersOptimization", + "disable_search": true, + "config": { + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": true + } } }, + "pass_flows": [[]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sd_text_encoder.json b/configs/onnx/sd_text_encoder.json index 8ca69c357..b46ca4c8d 100644 --- a/configs/onnx/sd_text_encoder.json +++ b/configs/onnx/sd_text_encoder.json @@ -45,6 +45,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sd_unet.json b/configs/onnx/sd_unet.json index 7b4224223..0110bdf41 100644 --- a/configs/onnx/sd_unet.json +++ b/configs/onnx/sd_unet.json @@ -65,6 +65,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sd_vae_decoder.json b/configs/onnx/sd_vae_decoder.json index 0b10752df..73de99274 100644 --- a/configs/onnx/sd_vae_decoder.json +++ b/configs/onnx/sd_vae_decoder.json @@ -52,6 +52,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sd_vae_encoder.json b/configs/onnx/sd_vae_encoder.json index 899ffe2d2..e2ee06abf 100644 --- a/configs/onnx/sd_vae_encoder.json +++ b/configs/onnx/sd_vae_encoder.json @@ -52,6 +52,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sdxl_text_encoder.json b/configs/onnx/sdxl_text_encoder.json index b1d95a071..1a8afc363 100644 --- a/configs/onnx/sdxl_text_encoder.json +++ b/configs/onnx/sdxl_text_encoder.json @@ -78,6 +78,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sdxl_text_encoder_2.json b/configs/onnx/sdxl_text_encoder_2.json index 1663063a0..6eb520034 100644 --- a/configs/onnx/sdxl_text_encoder_2.json +++ b/configs/onnx/sdxl_text_encoder_2.json @@ -118,6 +118,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sdxl_unet.json b/configs/onnx/sdxl_unet.json index 78864af07..db31a3ff6 100644 --- a/configs/onnx/sdxl_unet.json +++ b/configs/onnx/sdxl_unet.json @@ -71,6 +71,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sdxl_vae_decoder.json b/configs/onnx/sdxl_vae_decoder.json index 9c0092568..ed27fa37c 100644 --- a/configs/onnx/sdxl_vae_decoder.json +++ b/configs/onnx/sdxl_vae_decoder.json @@ -58,6 +58,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/configs/onnx/sdxl_vae_encoder.json b/configs/onnx/sdxl_vae_encoder.json index 1c71e78e4..f048e2508 100644 --- a/configs/onnx/sdxl_vae_encoder.json +++ b/configs/onnx/sdxl_vae_encoder.json @@ -58,6 +58,7 @@ } } }, + "pass_flows": [["convert"]], "engine": { "search_strategy": { "execution_order": "joint", diff --git a/modules/olive.py b/modules/olive.py index b1c504581..2ea9df851 100644 --- a/modules/olive.py +++ b/modules/olive.py @@ -9,7 +9,7 @@ is_sdxl = False width = 512 height = 512 batch_size = 1 -hidden_state_size = 768 +cross_attention_dim = 768 time_ids_size = 5 @@ -87,14 +87,11 @@ def text_encoder_2_data_loader(data_dir, _, *args, **kwargs): def unet_inputs(_, torch_dtype, is_conversion_inputs=False): - # TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders - # For refiner, it should be multiplied by 1 (single text encoder) - if is_sdxl: inputs = { "sample": torch.rand((2 * batch_size, 4, height // 8, width // 8), dtype=torch_dtype), "timestep": torch.rand((1,), dtype=torch_dtype), - "encoder_hidden_states": torch.rand((2 * batch_size, 77, hidden_state_size), dtype=torch_dtype), + "encoder_hidden_states": torch.rand((2 * batch_size, 77, cross_attention_dim), dtype=torch_dtype), } if is_conversion_inputs: @@ -111,20 +108,25 @@ def unet_inputs(_, torch_dtype, is_conversion_inputs=False): inputs = { "sample": torch.rand((batch_size, 4, height // 8, width // 8), dtype=torch_dtype), "timestep": torch.rand((batch_size,), dtype=torch_dtype), - "encoder_hidden_states": torch.rand((batch_size, 77, hidden_state_size), dtype=torch_dtype), - "return_dict": False, + "encoder_hidden_states": torch.rand((batch_size, 77, cross_attention_dim), dtype=torch_dtype), } + # use as kwargs since they won't be in the correct position if passed along with the tuple of inputs + kwargs = { + "return_dict": False, + } if is_conversion_inputs: inputs["additional_inputs"] = { + **kwargs, "added_cond_kwargs": { "text_embeds": torch.rand((1, 1280), dtype=torch_dtype), - "time_ids": torch.rand((1, time_ids_size), dtype=torch_dtype), - } + "time_ids": torch.rand((1, 5), dtype=torch_dtype), + }, } else: + inputs.update(kwargs) inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype) - inputs["onnx::Shape_5"] = torch.rand((1, time_ids_size), dtype=torch_dtype) + inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype) return inputs diff --git a/modules/onnx.py b/modules/onnx.py index 22f0b56c2..b78ef9640 100644 --- a/modules/onnx.py +++ b/modules/onnx.py @@ -4,6 +4,7 @@ import torch import shutil import inspect import importlib +from packaging import version import numpy as np import onnxruntime as ort import diffusers @@ -109,6 +110,9 @@ def load_init_dict(cls: Type[diffusers.DiffusionPipeline], path: os.PathLike): R: Dict[str, Tuple[str]] = {} for k, v in merged: if isinstance(v, list): + if v[0] is None or v[1] is None: + log.debug(f"Skipping {k} while loading init dict of '{path}': {v}") + continue R[k] = v return R @@ -142,9 +146,17 @@ def load_submodels(path: os.PathLike, init_dict: Dict[str, Type], **kwargs): return loaded +def patch_kwargs(cls: Type[diffusers.DiffusionPipeline], kwargs: Dict) -> Dict: + if cls == OnnxStableDiffusionPipeline or cls == OnnxStableDiffusionImg2ImgPipeline or cls == OnnxStableDiffusionInpaintPipeline: + kwargs["safety_checker"] = None + kwargs["requires_safety_checker"] = False + + return kwargs + + def load_pipeline(cls: Type[diffusers.DiffusionPipeline], path: os.PathLike): if os.path.isdir(path): - return cls(**load_submodels(path, load_init_dict(cls, path))) + return cls(**patch_kwargs(cls, load_submodels(path, load_init_dict(cls, path)))) else: return cls.from_single_file(path) @@ -284,8 +296,7 @@ class OnnxRawPipeline(OnnxPipelineBase): if submodel in init_dict: del init_dict[submodel] # already loaded as OnnxRuntimeModel. kwargs.update(load_submodels(in_dir, init_dict)) # load others. - kwargs["safety_checker"] = None - kwargs["requires_safety_checker"] = False + kwargs = patch_kwargs(self.constructor, kwargs) pipeline = self.constructor(**kwargs) pipeline.to_json_file(os.path.join(out_dir, "model_index.json")) @@ -353,11 +364,13 @@ class OnnxRawPipeline(OnnxPipelineBase): with open(os.path.join(sd_configs_path, "olive", f"{'sdxl' if self._is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file: olive_config = json.load(config_file) + pass_key = f"optimize_{shared.opts.onnx_execution_provider}" + olive_config["pass_flows"] = [[pass_key]] olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx")) - olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16 - if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm): - olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True + olive_config["passes"][pass_key]["config"]["float16"] = shared.opts.onnx_olive_float16 olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] + if (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm) and version.parse(ort.__version__) < version.parse("1.17.0"): + olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False} run(olive_config) @@ -388,8 +401,7 @@ class OnnxRawPipeline(OnnxPipelineBase): if submodel in init_dict: del init_dict[submodel] # already loaded as OnnxRuntimeModel. kwargs.update(load_submodels(in_dir, init_dict)) # load others. - kwargs["safety_checker"] = None - kwargs["requires_safety_checker"] = False + kwargs = patch_kwargs(self.constructor, kwargs) pipeline = self.constructor(**kwargs) pipeline.to_json_file(os.path.join(out_dir, "model_index.json")) @@ -416,16 +428,18 @@ class OnnxRawPipeline(OnnxPipelineBase): return None def preprocess(self, width: int, height: int, batch_size: int): + if not shared.cmd_opts.debug: + ort.set_default_logger_severity(3) olive.width = width olive.height = height olive.batch_size = batch_size olive.is_sdxl = self._is_sdxl if olive.is_sdxl: - olive.hidden_state_size = 2048 + olive.cross_attention_dim = 2048 olive.time_ids_size = 6 else: - olive.hidden_state_size = height + 256 + olive.cross_attention_dim = height + 256 olive.time_ids_size = 5 converted_dir = self.convert(self.path if os.path.isdir(self.path) else shared.opts.onnx_temp_dir) diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index 4f4d02df7..74caefd2d 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -7,7 +7,6 @@ import torch import torchvision.transforms.functional as TF import diffusers from modules import shared, devices, processing, sd_samplers, sd_models, images, errors, masking, prompt_parser_diffusers, sd_hijack_hypertile, processing_correction, processing_vae -from modules.olive import OlivePipeline debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None @@ -223,7 +222,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing): generator = [torch.Generator(generator_device).manual_seed(s) for s in p.seeds] prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2) parser = 'Fixed attention' - if shared.opts.prompt_attention != 'Fixed attention' and 'StableDiffusion' in model.__class__.__name__ and not isinstance(model, diffusers.OnnxStableDiffusionPipeline): + if shared.opts.prompt_attention != 'Fixed attention' and 'StableDiffusion' in model.__class__.__name__ and not isinstance(model, OnnxStableDiffusionPipeline): try: prompt_parser_diffusers.encode_prompts(model, p, prompts, negative_prompts, kwargs.get("num_inference_steps", 1), kwargs.pop("clip_skip", None)) parser = shared.opts.prompt_attention