import os import sys import json import shutil import tempfile from abc import ABCMeta from typing import Type, Tuple, List, Any, Dict, TYPE_CHECKING import torch import diffusers from installer import install from modules.logger import log from modules import shared from modules.paths import sd_configs_path, models_path from modules.sd_models import CheckpointInfo if TYPE_CHECKING: from modules.processing import StableDiffusionProcessing from modules.olive_script import config from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE from modules.onnx_impl.utils import extract_device, move_inference_session, check_diffusers_cache, check_pipeline_sdxl, check_cache_onnx, load_init_dict, load_submodel, load_submodels, patch_kwargs, load_pipeline, get_base_constructor, get_io_config from modules.onnx_impl.execution_providers import ExecutionProvider, EP_TO_NAME, get_provider SUBMODELS_SD = ("text_encoder", "unet", "vae_encoder", "vae_decoder",) SUBMODELS_SDXL = ("text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder",) SUBMODELS_SDXL_REFINER = ("text_encoder_2", "unet", "vae_encoder", "vae_decoder",) SUBMODELS_LARGE = ("text_encoder_2", "unet",) class PipelineBase(TorchCompatibleModule, diffusers.DiffusionPipeline, metaclass=ABCMeta): model_type: str sd_model_hash: str sd_checkpoint_info: CheckpointInfo sd_model_checkpoint: str def __init__(self): # pylint: disable=super-init-not-called self.model_type = self.__class__.__name__ def to(self, *args, **kwargs): if self.__class__ == OnnxRawPipeline: # cannot move pipeline which is not preprocessed. return self expected_modules, _ = self._get_signature_keys(self) for name in expected_modules: if not hasattr(self, name): log.warning(f"Pipeline does not have module '{name}'.") continue module = getattr(self, name) if "optimum.onnxruntime" in sys.modules: import optimum.onnxruntime if isinstance(module, optimum.onnxruntime.modeling_diffusion._ORTDiffusionModelPart): # pylint: disable=protected-access, no-member device = extract_device(args, kwargs) if device is None: return self module.session = move_inference_session(module.session, device) if not isinstance(module, diffusers.OnnxRuntimeModel): continue try: setattr(self, name, module.to(*args, **kwargs)) del module except Exception: log.debug(f"Component device/dtype conversion failed: module={name} args={args}, kwargs={kwargs}") return self @property def components(self): return {} @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **_): # pylint: disable=arguments-differ return OnnxRawPipeline( cls, pretrained_model_name_or_path, ) @classmethod def from_single_file(cls, pretrained_model_name_or_path, **_): return OnnxRawPipeline( cls, pretrained_model_name_or_path, ) @classmethod def from_ckpt(cls, pretrained_model_name_or_path, **_): return cls.from_single_file(pretrained_model_name_or_path) class CallablePipelineBase(PipelineBase): vae: VAE def __init__(self): super().__init__() self.vae = VAE(self) class OnnxRawPipeline(PipelineBase): config = {} _is_sdxl: bool is_refiner: bool from_diffusers_cache: bool path: os.PathLike original_filename: str constructor: type[PipelineBase] init_dict: dict[str, tuple[str]] = {} default_scheduler: Any = None # for Img2Img def __init__(self, constructor: type[PipelineBase], path: os.PathLike): # pylint: disable=super-init-not-called self._is_sdxl = check_pipeline_sdxl(constructor) self.from_diffusers_cache = check_diffusers_cache(path) self.path = path self.original_filename = os.path.basename(os.path.dirname(os.path.dirname(path)) if self.from_diffusers_cache else path) if os.path.isdir(path): self.init_dict = load_init_dict(constructor, path) self.default_scheduler = load_submodel(self.path, None, "scheduler", self.init_dict["scheduler"]) else: cls = diffusers.StableDiffusionXLPipeline if self._is_sdxl else diffusers.StableDiffusionPipeline try: pipeline = cls.from_single_file(path) self.default_scheduler = pipeline.scheduler path = shared.opts.onnx_temp_dir if os.path.isdir(path): shutil.rmtree(path) os.mkdir(path) pipeline.save_pretrained(path) del pipeline self.init_dict = load_init_dict(constructor, path) except Exception: log.error(f'ONNX: Failed to load ONNX pipeline: is_sdxl={self._is_sdxl}') log.warning('ONNX: You cannot load this model using the pipeline you selected. Please check Diffusers pipeline in Compute Settings.') return if "vae" in self.init_dict: del self.init_dict["vae"] self.is_refiner = self._is_sdxl and "Img2Img" not in constructor.__name__ and "Img2Img" in diffusers.DiffusionPipeline.load_config(path)["_class_name"] self.constructor = constructor if self.is_refiner: from modules.onnx_impl.pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline self.constructor = OnnxStableDiffusionXLImg2ImgPipeline self.model_type = self.constructor.__name__ def derive_properties(self, pipeline: diffusers.DiffusionPipeline): pipeline.sd_model_hash = self.sd_model_hash pipeline.sd_checkpoint_info = self.sd_checkpoint_info pipeline.sd_model_checkpoint = self.sd_model_checkpoint pipeline.scheduler = self.default_scheduler return pipeline def convert(self, submodels: list[str], in_dir: os.PathLike, out_dir: os.PathLike): install('onnx') # may not be installed yet, this performs check and installs as needed import onnx shutil.rmtree("cache", ignore_errors=True) shutil.rmtree("footprints", ignore_errors=True) if shared.opts.onnx_cache_converted: shutil.copytree( in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") ) from modules import olive_script as script for submodel in submodels: destination = os.path.join(out_dir, submodel) if not os.path.isdir(destination): os.mkdir(destination) model = getattr(script, f"{submodel}_load")(in_dir) sample = getattr(script, f"{submodel}_conversion_inputs")(None) with tempfile.TemporaryDirectory(prefix="onnx_conversion") as temp_dir: temp_path = os.path.join(temp_dir, "model.onnx") torch.onnx.export( model, sample, temp_path, opset_version=14, **get_io_config(submodel, self._is_sdxl), ) model = onnx.load(temp_path) onnx.save_model( model, os.path.join(destination, "model.onnx"), save_as_external_data=submodel in SUBMODELS_LARGE, all_tensors_to_one_file=True, location="weights.pb", ) log.info(f"ONNX: Successfully exported converted model: submodel={submodel}") kwargs = {} init_dict = self.init_dict.copy() for submodel in submodels: kwargs[submodel] = diffusers.OnnxRuntimeModel.load_model( os.path.join(out_dir, submodel, "model.onnx"), provider=get_provider(), ) if self._is_sdxl else diffusers.OnnxRuntimeModel.from_pretrained( os.path.join(out_dir, submodel), provider=get_provider(), ) if submodel in init_dict: del init_dict[submodel] # already loaded as OnnxRuntimeModel. kwargs.update(load_submodels(in_dir, self._is_sdxl, init_dict)) # load others. constructor = get_base_constructor(self.constructor, self.is_refiner) kwargs = patch_kwargs(constructor, kwargs) pipeline = constructor(**kwargs) model_index = json.loads(pipeline.to_json_string()) del pipeline for k, v in init_dict.items(): # copy missing submodels. (ORTStableDiffusionXLPipeline) if k not in model_index: model_index[k] = v with open(os.path.join(out_dir, "model_index.json"), 'w', encoding="utf-8") as file: json.dump(model_index, file) def run_olive(self, submodels: list[str], in_dir: os.PathLike, out_dir: os.PathLike): from olive.model import ONNXModelHandler from olive.workflows import run as run_workflows shutil.rmtree("cache", ignore_errors=True) shutil.rmtree("footprints", ignore_errors=True) if shared.opts.olive_cache_optimized: shutil.copytree( in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") ) optimized_model_paths = {} for submodel in submodels: log.info(f"\nProcessing {submodel}") with open(os.path.join(sd_configs_path, "olive", 'sdxl' if self._is_sdxl else 'sd', f"{submodel}.json"), encoding="utf-8") as config_file: olive_config: dict[str, dict[str, dict]] = json.load(config_file) for flow in olive_config["pass_flows"]: for i in range(len(flow)): flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider) olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx")) olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu" olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = [shared.opts.onnx_execution_provider] for pass_key in olive_config["passes"]: if olive_config["passes"][pass_key]["type"] == "OrtTransformersOptimization": float16 = shared.opts.olive_float16 and not (submodel == "vae_encoder" and shared.opts.olive_vae_encoder_float32) olive_config["passes"][pass_key]["config"]["float16"] = float16 if not float16: olive_config["passes"][pass_key]["config"]["force_fp16_inputs"] = {} if shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm: if float16: olive_config["passes"][pass_key]["config"]["keep_io_types"] = False run_workflows(olive_config) with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), encoding="utf-8") as footprint_file: footprints = json.load(footprint_file) processor_final_pass_footprint = None for _, footprint in footprints.items(): if footprint["from_pass"] == olive_config["passes"][olive_config["pass_flows"][-1][-1]]["type"]: processor_final_pass_footprint = footprint assert processor_final_pass_footprint, "Failed to optimize model" optimized_model_paths[submodel] = ONNXModelHandler( **processor_final_pass_footprint["model_config"]["config"] ).model_path log.info(f"Olive: Successfully processed model: submodel={submodel}") for submodel in submodels: src_path = optimized_model_paths[submodel] src_parent = os.path.dirname(src_path) dst_parent = os.path.join(out_dir, submodel) dst_path = os.path.join(dst_parent, "model.onnx") if not os.path.isdir(dst_parent): os.mkdir(dst_parent) shutil.copyfile(src_path, dst_path) data_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data")) if os.path.isfile(data_src_path): data_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data")) shutil.copyfile(data_src_path, data_dst_path) weights_src_path = os.path.join(src_parent, "weights.pb") if os.path.isfile(weights_src_path): weights_dst_path = os.path.join(dst_parent, "weights.pb") shutil.copyfile(weights_src_path, weights_dst_path) del optimized_model_paths kwargs = {} init_dict = self.init_dict.copy() for submodel in submodels: kwargs[submodel] = diffusers.OnnxRuntimeModel.load_model( os.path.join(out_dir, submodel, "model.onnx"), provider=get_provider(), ) if self._is_sdxl else diffusers.OnnxRuntimeModel.from_pretrained( os.path.join(out_dir, submodel), provider=get_provider(), ) if submodel in init_dict: del init_dict[submodel] # already loaded as OnnxRuntimeModel. kwargs.update(load_submodels(in_dir, self._is_sdxl, init_dict)) # load others. constructor = get_base_constructor(self.constructor, self.is_refiner) kwargs = patch_kwargs(constructor, kwargs) pipeline = constructor(**kwargs) model_index = json.loads(pipeline.to_json_string()) del pipeline for k, v in init_dict.items(): # copy missing submodels. (ORTStableDiffusionXLPipeline) if k not in model_index: model_index[k] = v with open(os.path.join(out_dir, "model_index.json"), 'w', encoding="utf-8") as file: json.dump(model_index, file) def preprocess(self, p: 'StableDiffusionProcessing'): disable_classifier_free_guidance = p.cfg_scale < 0.01 config.from_diffusers_cache = self.from_diffusers_cache config.is_sdxl = self._is_sdxl config.vae = os.path.join(models_path, "VAE", shared.opts.sd_vae) if not os.path.isfile(config.vae): del config.vae config.vae_sdxl_fp16_fix = self._is_sdxl and shared.opts.diffusers_vae_upcast == "false" config.width = p.width config.height = p.height config.batch_size = p.batch_size if self._is_sdxl and not self.is_refiner: config.cross_attention_dim = 2048 config.time_ids_size = 6 else: config.cross_attention_dim = 768 config.time_ids_size = 5 if not disable_classifier_free_guidance and "turbo" in str(self.path).lower(): log.warning("ONNX: It looks like you are trying to run a Turbo model with CFG Scale, which will lead to 'size mismatch' or 'unexpected parameter' error.") out_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename) if (self.from_diffusers_cache and check_cache_onnx(self.path)): # if model is ONNX format or had already converted, skip conversion. out_dir = self.path elif not os.path.isdir(out_dir): try: self.convert( (SUBMODELS_SDXL_REFINER if self.is_refiner else SUBMODELS_SDXL) if self._is_sdxl else SUBMODELS_SD, self.path if os.path.isdir(self.path) else shared.opts.onnx_temp_dir, out_dir, ) except Exception as e: log.error(f"ONNX: Failed to convert model: model='{self.original_filename}', error={e}") shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) shutil.rmtree(out_dir, ignore_errors=True) return kwargs = { "provider": get_provider(), } in_dir = out_dir if shared.opts.cuda_compile_backend == "olive-ai": submodels_for_olive = [] if "TE" in shared.opts.cuda_compile: if not self.is_refiner: submodels_for_olive.append("text_encoder") if self._is_sdxl: submodels_for_olive.append("text_encoder_2") if "Model" in shared.opts.cuda_compile: submodels_for_olive.append("unet") if "VAE" in shared.opts.cuda_compile: submodels_for_olive.append("vae_encoder") submodels_for_olive.append("vae_decoder") if len(submodels_for_olive) == 0: log.warning("Olive: Skipping olive run.") else: log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.") out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h") if not os.path.isdir(out_dir): # check the model is already optimized (cached) if not shared.opts.olive_cache_optimized: out_dir = shared.opts.onnx_temp_dir if p.width != p.height: log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.") if shared.opts.olive_static_dims: sess_options = DynamicSessionOptions() sess_options.enable_static_dims({ "is_sdxl": self._is_sdxl, "is_refiner": self.is_refiner, "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2, "height": p.height, "width": p.width, }) kwargs["sess_options"] = sess_options try: self.run_olive(submodels_for_olive, in_dir, out_dir) except Exception as e: log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}") shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) shutil.rmtree(out_dir, ignore_errors=True) pipeline = self.derive_properties(load_pipeline(self.constructor, out_dir, **kwargs)) if not shared.opts.onnx_cache_converted and in_dir != self.path: shutil.rmtree(in_dir) shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) return pipeline