follow up

pull/2784/head
Seunghoon Lee 2023-12-16 14:37:04 +09:00
parent 106a1ea68f
commit 86b56d2d2c
No known key found for this signature in database
GPG Key ID: 436E38F4E70BD152
21 changed files with 259 additions and 30 deletions

View File

@ -38,7 +38,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -73,8 +73,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -55,7 +55,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -90,8 +90,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "unet",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "unet",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -45,7 +45,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -80,8 +80,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -45,7 +45,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -80,8 +80,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": false
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -71,7 +71,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -106,8 +106,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -111,7 +111,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -146,8 +146,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -61,7 +61,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -96,8 +96,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "unet",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "unet",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -51,7 +51,7 @@
}
},
"passes": {
"optimize": {
"optimize_DmlExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
@ -108,8 +108,29 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": false,
"use_gpu": true
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": false,
"use_gpu": true
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -86,8 +86,31 @@
"GroupNorm": [0, 1, 2]
}
}
},
"optimize_CUDAExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
},
"optimize_ROCMExecutionProvider": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "vae",
"opt_level": 0,
"float16": true,
"use_gpu": true,
"keep_io_types": true
}
}
},
"pass_flows": [[]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -45,6 +45,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -65,6 +65,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -52,6 +52,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -52,6 +52,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -78,6 +78,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -118,6 +118,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -71,6 +71,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -58,6 +58,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -58,6 +58,7 @@
}
}
},
"pass_flows": [["convert"]],
"engine": {
"search_strategy": {
"execution_order": "joint",

View File

@ -9,7 +9,7 @@ is_sdxl = False
width = 512
height = 512
batch_size = 1
hidden_state_size = 768
cross_attention_dim = 768
time_ids_size = 5
@ -87,14 +87,11 @@ def text_encoder_2_data_loader(data_dir, _, *args, **kwargs):
def unet_inputs(_, torch_dtype, is_conversion_inputs=False):
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
# For refiner, it should be multiplied by 1 (single text encoder)
if is_sdxl:
inputs = {
"sample": torch.rand((2 * batch_size, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((1,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((2 * batch_size, 77, hidden_state_size), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((2 * batch_size, 77, cross_attention_dim), dtype=torch_dtype),
}
if is_conversion_inputs:
@ -111,20 +108,25 @@ def unet_inputs(_, torch_dtype, is_conversion_inputs=False):
inputs = {
"sample": torch.rand((batch_size, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((batch_size,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((batch_size, 77, hidden_state_size), dtype=torch_dtype),
"return_dict": False,
"encoder_hidden_states": torch.rand((batch_size, 77, cross_attention_dim), dtype=torch_dtype),
}
# use as kwargs since they won't be in the correct position if passed along with the tuple of inputs
kwargs = {
"return_dict": False,
}
if is_conversion_inputs:
inputs["additional_inputs"] = {
**kwargs,
"added_cond_kwargs": {
"text_embeds": torch.rand((1, 1280), dtype=torch_dtype),
"time_ids": torch.rand((1, time_ids_size), dtype=torch_dtype),
}
"time_ids": torch.rand((1, 5), dtype=torch_dtype),
},
}
else:
inputs.update(kwargs)
inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype)
inputs["onnx::Shape_5"] = torch.rand((1, time_ids_size), dtype=torch_dtype)
inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype)
return inputs

View File

@ -4,6 +4,7 @@ import torch
import shutil
import inspect
import importlib
from packaging import version
import numpy as np
import onnxruntime as ort
import diffusers
@ -109,6 +110,9 @@ def load_init_dict(cls: Type[diffusers.DiffusionPipeline], path: os.PathLike):
R: Dict[str, Tuple[str]] = {}
for k, v in merged:
if isinstance(v, list):
if v[0] is None or v[1] is None:
log.debug(f"Skipping {k} while loading init dict of '{path}': {v}")
continue
R[k] = v
return R
@ -142,9 +146,17 @@ def load_submodels(path: os.PathLike, init_dict: Dict[str, Type], **kwargs):
return loaded
def patch_kwargs(cls: Type[diffusers.DiffusionPipeline], kwargs: Dict) -> Dict:
if cls == OnnxStableDiffusionPipeline or cls == OnnxStableDiffusionImg2ImgPipeline or cls == OnnxStableDiffusionInpaintPipeline:
kwargs["safety_checker"] = None
kwargs["requires_safety_checker"] = False
return kwargs
def load_pipeline(cls: Type[diffusers.DiffusionPipeline], path: os.PathLike):
if os.path.isdir(path):
return cls(**load_submodels(path, load_init_dict(cls, path)))
return cls(**patch_kwargs(cls, load_submodels(path, load_init_dict(cls, path))))
else:
return cls.from_single_file(path)
@ -284,8 +296,7 @@ class OnnxRawPipeline(OnnxPipelineBase):
if submodel in init_dict:
del init_dict[submodel] # already loaded as OnnxRuntimeModel.
kwargs.update(load_submodels(in_dir, init_dict)) # load others.
kwargs["safety_checker"] = None
kwargs["requires_safety_checker"] = False
kwargs = patch_kwargs(self.constructor, kwargs)
pipeline = self.constructor(**kwargs)
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
@ -353,11 +364,13 @@ class OnnxRawPipeline(OnnxPipelineBase):
with open(os.path.join(sd_configs_path, "olive", f"{'sdxl' if self._is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file:
olive_config = json.load(config_file)
pass_key = f"optimize_{shared.opts.onnx_execution_provider}"
olive_config["pass_flows"] = [[pass_key]]
olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
olive_config["passes"][pass_key]["config"]["float16"] = shared.opts.onnx_olive_float16
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
if (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm) and version.parse(ort.__version__) < version.parse("1.17.0"):
olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False}
run(olive_config)
@ -388,8 +401,7 @@ class OnnxRawPipeline(OnnxPipelineBase):
if submodel in init_dict:
del init_dict[submodel] # already loaded as OnnxRuntimeModel.
kwargs.update(load_submodels(in_dir, init_dict)) # load others.
kwargs["safety_checker"] = None
kwargs["requires_safety_checker"] = False
kwargs = patch_kwargs(self.constructor, kwargs)
pipeline = self.constructor(**kwargs)
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
@ -416,16 +428,18 @@ class OnnxRawPipeline(OnnxPipelineBase):
return None
def preprocess(self, width: int, height: int, batch_size: int):
if not shared.cmd_opts.debug:
ort.set_default_logger_severity(3)
olive.width = width
olive.height = height
olive.batch_size = batch_size
olive.is_sdxl = self._is_sdxl
if olive.is_sdxl:
olive.hidden_state_size = 2048
olive.cross_attention_dim = 2048
olive.time_ids_size = 6
else:
olive.hidden_state_size = height + 256
olive.cross_attention_dim = height + 256
olive.time_ids_size = 5
converted_dir = self.convert(self.path if os.path.isdir(self.path) else shared.opts.onnx_temp_dir)

View File

@ -7,7 +7,6 @@ import torch
import torchvision.transforms.functional as TF
import diffusers
from modules import shared, devices, processing, sd_samplers, sd_models, images, errors, masking, prompt_parser_diffusers, sd_hijack_hypertile, processing_correction, processing_vae
from modules.olive import OlivePipeline
debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
@ -223,7 +222,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
generator = [torch.Generator(generator_device).manual_seed(s) for s in p.seeds]
prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
parser = 'Fixed attention'
if shared.opts.prompt_attention != 'Fixed attention' and 'StableDiffusion' in model.__class__.__name__ and not isinstance(model, diffusers.OnnxStableDiffusionPipeline):
if shared.opts.prompt_attention != 'Fixed attention' and 'StableDiffusion' in model.__class__.__name__ and not isinstance(model, OnnxStableDiffusionPipeline):
try:
prompt_parser_diffusers.encode_prompts(model, p, prompts, negative_prompts, kwargs.get("num_inference_steps", 1), kwargs.pop("clip_skip", None))
parser = shared.opts.prompt_attention