diff --git a/configs/olive/sd/text_encoder.json b/configs/olive/sd/text_encoder.json index 77722d99a..22299cbc5 100644 --- a/configs/olive/sd/text_encoder.json +++ b/configs/olive/sd/text_encoder.json @@ -16,7 +16,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -38,7 +45,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -49,7 +55,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -78,12 +83,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -94,7 +100,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -117,17 +122,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/unet.json b/configs/olive/sd/unet.json index 5e08ef5de..03922652e 100644 --- a/configs/olive/sd/unet.json +++ b/configs/olive/sd/unet.json @@ -33,7 +33,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -55,7 +62,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -66,7 +72,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -95,12 +100,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -111,7 +117,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -134,17 +139,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "unet", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/vae_decoder.json b/configs/olive/sd/vae_decoder.json index 14d8d34de..8358745c4 100644 --- a/configs/olive/sd/vae_decoder.json +++ b/configs/olive/sd/vae_decoder.json @@ -23,7 +23,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -45,7 +52,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -56,7 +62,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -85,12 +90,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -101,7 +107,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -124,17 +129,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_decoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sd/vae_encoder.json b/configs/olive/sd/vae_encoder.json index c5643348c..3ce9091fa 100644 --- a/configs/olive/sd/vae_encoder.json +++ b/configs/olive/sd/vae_encoder.json @@ -23,7 +23,14 @@ "systems": { "local_system": { "type": "LocalSystem", - "config": { "accelerators": ["gpu"] } + "config": { + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] + } } }, "evaluators": { @@ -45,7 +52,6 @@ "passes": { "optimize_CPUExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -56,7 +62,6 @@ }, "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -85,12 +90,13 @@ "group_norm_channels_last": false }, "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + "force_fp16_inputs": { + "GroupNorm": [0, 1, 2] + } } }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -101,7 +107,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -124,17 +129,13 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, + "log_severity_level": 0, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/text_encoder.json b/configs/olive/sdxl/text_encoder.json index faf9a6621..164085874 100644 --- a/configs/olive/sdxl/text_encoder.json +++ b/configs/olive/sdxl/text_encoder.json @@ -50,7 +50,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -73,7 +78,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -109,7 +113,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -120,7 +123,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -132,17 +134,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/text_encoder_2.json b/configs/olive/sdxl/text_encoder_2.json index 5532657f7..aa1342736 100644 --- a/configs/olive/sdxl/text_encoder_2.json +++ b/configs/olive/sdxl/text_encoder_2.json @@ -46,7 +46,7 @@ ], "dynamic_axes": { "input_ids": { "0": "batch_size", "1": "sequence_length" }, - "text_embeds": { "0": "batch_size", "1": "sequence_length" }, + "text_embeds": { "0": "batch_size" }, "last_hidden_state": { "0": "batch_size", "1": "sequence_length" }, "hidden_states.0": { "0": "batch_size", "1": "sequence_length" }, "hidden_states.1": { "0": "batch_size", "1": "sequence_length" }, @@ -90,7 +90,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -113,7 +118,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -149,7 +153,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -160,7 +163,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "clip", "opt_level": 0, @@ -172,17 +174,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "text_encoder_2", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/unet.json b/configs/olive/sdxl/unet.json index 874002a96..28d869023 100644 --- a/configs/olive/sdxl/unet.json +++ b/configs/olive/sdxl/unet.json @@ -40,7 +40,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -63,7 +68,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -99,7 +103,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -110,7 +113,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "unet", "opt_level": 0, @@ -122,17 +124,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "unet", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/vae_decoder.json b/configs/olive/sdxl/vae_decoder.json index 0074e3dd6..1f6823e50 100644 --- a/configs/olive/sdxl/vae_decoder.json +++ b/configs/olive/sdxl/vae_decoder.json @@ -30,7 +30,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -53,7 +58,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -111,7 +115,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -121,7 +124,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -132,17 +134,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_decoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/configs/olive/sdxl/vae_encoder.json b/configs/olive/sdxl/vae_encoder.json index 540c3a9ee..e5d4f844e 100644 --- a/configs/olive/sdxl/vae_encoder.json +++ b/configs/olive/sdxl/vae_encoder.json @@ -30,7 +30,12 @@ "local_system": { "type": "LocalSystem", "config": { - "accelerators": ["gpu"] + "accelerators": [ + { + "device": "gpu", + "execution_providers": ["DmlExecutionProvider"] + } + ] } } }, @@ -53,7 +58,6 @@ "passes": { "optimize_DmlExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -89,7 +93,6 @@ }, "optimize_CUDAExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -100,7 +103,6 @@ }, "optimize_ROCMExecutionProvider": { "type": "OrtTransformersOptimization", - "disable_search": true, "config": { "model_type": "vae", "opt_level": 0, @@ -112,17 +114,12 @@ }, "pass_flows": [["optimize_AutoExecutionProvider"]], "engine": { - "search_strategy": { - "execution_order": "joint", - "search_algorithm": "exhaustive" - }, "evaluator": "common_evaluator", "evaluate_input_model": false, "host": "local_system", "target": "local_system", "cache_dir": "cache", "output_name": "vae_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] + "output_dir": "footprints" } } diff --git a/installer.py b/installer.py index 0216f71e5..2643bad7f 100644 --- a/installer.py +++ b/installer.py @@ -657,6 +657,8 @@ def check_torch(): install('hidet', 'hidet') if opts.get('cuda_compile_backend', '') == 'deep-cache': install('DeepCache') + if opts.get('cuda_compile_backend', '') == 'olive-ai': + install('olive-ai') if opts.get('nncf_compress_weights', False) and not args.use_openvino: install('nncf==2.7.0', 'nncf') if args.profile: diff --git a/modules/loader.py b/modules/loader.py index 6d6d4e043..34039f4f4 100644 --- a/modules/loader.py +++ b/modules/loader.py @@ -44,11 +44,6 @@ import onnxruntime onnxruntime.set_default_logger_severity(3) timer.startup.record("onnx") -# moved to webui.py:initialize() -# from modules.onnx_impl import initialize_olive # pylint: disable=ungrouped-imports -# initialize_olive() -# timer.startup.record("olive") - from fastapi import FastAPI # pylint: disable=W0611,C0411 import gradio # pylint: disable=W0611,C0411 timer.startup.record("gradio") diff --git a/modules/onnx_impl/__init__.py b/modules/onnx_impl/__init__.py index a61e5b50c..7e23e72e6 100644 --- a/modules/onnx_impl/__init__.py +++ b/modules/onnx_impl/__init__.py @@ -1,14 +1,11 @@ -import os -from typing import Any, Dict, Callable, Optional +from typing import Any, Dict, Optional import numpy as np import torch import diffusers import onnxruntime as ort import optimum.onnxruntime - initialized = False -run_olive_workflow = None class DynamicSessionOptions(ort.SessionOptions): @@ -50,6 +47,9 @@ class TorchCompatibleModule: device = torch.device("cpu") dtype = torch.float32 + def named_modules(self): # dummy + return () + def to(self, *_, **__): raise NotImplementedError @@ -84,9 +84,6 @@ class TemporalModule(TorchCompatibleModule): class OnnxRuntimeModel(TorchCompatibleModule, diffusers.OnnxRuntimeModel): config = {} # dummy - def named_modules(self): # dummy - return () - def to(self, *args, **kwargs): from modules.onnx_impl.utils import extract_device, move_inference_session @@ -245,28 +242,6 @@ def initialize_onnx(): initialized = True -def initialize_olive(): - global run_olive_workflow # pylint: disable=global-statement - from installer import installed, log - if not installed('olive-ai', quiet=True) or not installed('onnx', quiet=True): - return - import sys - import importlib - orig_sys_path = sys.path - venv_dir = os.environ.get("VENV_DIR", os.path.join(os.getcwd(), 'venv')) - try: - spec = importlib.util.find_spec('onnxruntime.transformers') - sys.path = [d for d in spec.submodule_search_locations + sys.path if sys.path[1] not in d or venv_dir in d] - from onnxruntime.transformers import convert_generation # pylint: disable=unused-import - spec = importlib.util.find_spec('olive') - sys.path = spec.submodule_search_locations + sys.path - run_olive_workflow = importlib.import_module('olive.workflows').run - except Exception as e: - run_olive_workflow = None - log.error(f'Olive: Failed to load olive-ai: {e}') - sys.path = orig_sys_path - - def install_olive(): from installer import installed, install, log if installed("olive-ai"): diff --git a/modules/onnx_impl/pipelines/__init__.py b/modules/onnx_impl/pipelines/__init__.py index 882eb3c48..bb0d9b010 100644 --- a/modules/onnx_impl/pipelines/__init__.py +++ b/modules/onnx_impl/pipelines/__init__.py @@ -15,7 +15,7 @@ from modules.paths import sd_configs_path, models_path from modules.sd_models import CheckpointInfo from modules.processing import StableDiffusionProcessing from modules.olive_script import config -from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE, run_olive_workflow +from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE from modules.onnx_impl.utils import extract_device, move_inference_session, check_diffusers_cache, check_pipeline_sdxl, check_cache_onnx, load_init_dict, load_submodel, load_submodels, patch_kwargs, load_pipeline, get_base_constructor, get_io_config from modules.onnx_impl.execution_providers import ExecutionProvider, EP_TO_NAME, get_provider @@ -161,7 +161,7 @@ class OnnxRawPipeline(PipelineBase): in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") ) - from modules import olive_script as olv + from modules import olive_script as script for submodel in submodels: destination = os.path.join(out_dir, submodel) @@ -169,8 +169,8 @@ class OnnxRawPipeline(PipelineBase): if not os.path.isdir(destination): os.mkdir(destination) - model = getattr(olv, f"{submodel}_load")(in_dir) - sample = getattr(olv, f"{submodel}_conversion_inputs")(None) + model = getattr(script, f"{submodel}_load")(in_dir) + sample = getattr(script, f"{submodel}_conversion_inputs")(None) with tempfile.TemporaryDirectory(prefix="onnx_conversion") as temp_dir: temp_path = os.path.join(temp_dir, "model.onnx") torch.onnx.export( @@ -219,13 +219,8 @@ class OnnxRawPipeline(PipelineBase): json.dump(model_index, file) def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLike): - if not shared.cmd_opts.debug: - ort.set_default_logger_severity(4) - - try: - from olive.model import ONNXModel # olive-ai==0.4.0 - except ImportError: - from olive.model import ONNXModelHandler as ONNXModel # olive-ai==0.5.0 + from olive.model import ONNXModelHandler + from olive.workflows import run as run_workflows shutil.rmtree("cache", ignore_errors=True) shutil.rmtree("footprints", ignore_errors=True) @@ -247,19 +242,20 @@ class OnnxRawPipeline(PipelineBase): for i in range(len(flow)): flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider) olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx")) - olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] + olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu" # TODO: npu + olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = [shared.opts.onnx_execution_provider] for pass_key in olive_config["passes"]: if olive_config["passes"][pass_key]["type"] == "OrtTransformersOptimization": float16 = shared.opts.olive_float16 and not (submodel == "vae_encoder" and shared.opts.olive_vae_encoder_float32) olive_config["passes"][pass_key]["config"]["float16"] = float16 + if not float16: + olive_config["passes"][pass_key]["config"]["force_fp16_inputs"] = {} if shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm: - if version.parse(ort.__version__) < version.parse("1.17.0"): - olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False} if float16: olive_config["passes"][pass_key]["config"]["keep_io_types"] = False - run_olive_workflow(olive_config) + run_workflows(olive_config) with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r", encoding="utf-8") as footprint_file: footprints = json.load(footprint_file) @@ -270,7 +266,7 @@ class OnnxRawPipeline(PipelineBase): assert processor_final_pass_footprint, "Failed to optimize model" - optimized_model_paths[submodel] = ONNXModel( + optimized_model_paths[submodel] = ONNXModelHandler( **processor_final_pass_footprint["model_config"]["config"] ).model_path @@ -371,53 +367,50 @@ class OnnxRawPipeline(PipelineBase): in_dir = out_dir if shared.opts.cuda_compile_backend == "olive-ai": - if run_olive_workflow is None: - log.warning('Olive: Skipping model compilation because olive-ai was loaded unsuccessfully.') + submodels_for_olive = [] + + if "Text Encoder" in shared.opts.cuda_compile: + if not self.is_refiner: + submodels_for_olive.append("text_encoder") + if self._is_sdxl: + submodels_for_olive.append("text_encoder_2") + if "Model" in shared.opts.cuda_compile: + submodels_for_olive.append("unet") + if "VAE" in shared.opts.cuda_compile: + submodels_for_olive.append("vae_encoder") + submodels_for_olive.append("vae_decoder") + + if len(submodels_for_olive) == 0: + log.warning("Olive: Skipping olive run.") else: - submodels_for_olive = [] + log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.") - if "Text Encoder" in shared.opts.cuda_compile: - if not self.is_refiner: - submodels_for_olive.append("text_encoder") - if self._is_sdxl: - submodels_for_olive.append("text_encoder_2") - if "Model" in shared.opts.cuda_compile: - submodels_for_olive.append("unet") - if "VAE" in shared.opts.cuda_compile: - submodels_for_olive.append("vae_encoder") - submodels_for_olive.append("vae_decoder") + out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h") + if not os.path.isdir(out_dir): # check the model is already optimized (cached) + if not shared.opts.olive_cache_optimized: + out_dir = shared.opts.onnx_temp_dir - if len(submodels_for_olive) == 0: - log.warning("Olive: Skipping olive run.") - else: - log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.") + if p.width != p.height: + log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.") - out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h") - if not os.path.isdir(out_dir): # check the model is already optimized (cached) - if not shared.opts.olive_cache_optimized: - out_dir = shared.opts.onnx_temp_dir + if shared.opts.olive_static_dims: + sess_options = DynamicSessionOptions() + sess_options.enable_static_dims({ + "is_sdxl": self._is_sdxl, + "is_refiner": self.is_refiner, - if p.width != p.height: - log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.") + "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2, + "height": p.height, + "width": p.width, + }) + kwargs["sess_options"] = sess_options - if shared.opts.olive_static_dims: - sess_options = DynamicSessionOptions() - sess_options.enable_static_dims({ - "is_sdxl": self._is_sdxl, - "is_refiner": self.is_refiner, - - "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2, - "height": p.height, - "width": p.width, - }) - kwargs["sess_options"] = sess_options - - try: - self.run_olive(submodels_for_olive, in_dir, out_dir) - except Exception as e: - log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}") - shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) - shutil.rmtree(out_dir, ignore_errors=True) + try: + self.run_olive(submodels_for_olive, in_dir, out_dir) + except Exception as e: + log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}") + shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) + shutil.rmtree(out_dir, ignore_errors=True) pipeline = self.derive_properties(load_pipeline(self.constructor, out_dir, **kwargs)) diff --git a/modules/onnx_impl/ui.py b/modules/onnx_impl/ui.py index 5d3496dba..f73e477c4 100644 --- a/modules/onnx_impl/ui.py +++ b/modules/onnx_impl/ui.py @@ -17,7 +17,6 @@ def create_ui(): from modules.shared import log, opts, cmd_opts, refresh_checkpoints from modules.sd_models import checkpoint_tiles, get_closet_checkpoint_match from modules.paths import sd_configs_path - from . import run_olive_workflow from .execution_providers import ExecutionProvider, install_execution_provider from .utils import check_diffusers_cache @@ -39,7 +38,7 @@ def create_ui(): ep_log = gr.HTML("") ep_install.click(fn=install_execution_provider, inputs=[ep_checkbox], outputs=[ep_log]) - if run_olive_workflow is not None: + if opts.cuda_compile_backend == "olive-ai": import olive.passes as olive_passes from olive.hardware.accelerator import AcceleratorSpec, Device accelerator = AcceleratorSpec(accelerator_type=Device.GPU, execution_provider=opts.onnx_execution_provider) @@ -147,7 +146,9 @@ def create_ui(): sd_configs[submodel]["passes"][pass_name]["config"][config_key] = value return listener - for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access + pass_cls = getattr(olive_passes, config_dict["type"], None) + default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access + for config_key, v in default_config.items(): component = None if v.type_ == bool: component = gr.Checkbox @@ -160,7 +161,7 @@ def create_ui(): sd_pass_config_components[submodel][pass_name][config_key] = component component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component) - pass_type.change(fn=sd_create_change_listener(submodel, "passes", config_key, "type"), inputs=pass_type) # pylint: disable=undefined-loop-variable + pass_type.change(fn=sd_create_change_listener(submodel, "passes", pass_name, "type"), inputs=pass_type) def sd_save(): for k, v in sd_configs.items(): @@ -208,7 +209,9 @@ def create_ui(): sdxl_configs[submodel]["passes"][pass_name]["config"][config_key] = value return listener - for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access + pass_cls = getattr(olive_passes, config_dict["type"], None) + default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access + for config_key, v in default_config.items(): component = None if v.type_ == bool: component = gr.Checkbox diff --git a/webui.py b/webui.py index 6a722cb2b..0f88cf5d1 100644 --- a/webui.py +++ b/webui.py @@ -82,12 +82,6 @@ def initialize(): log.debug('Initializing') check_rollback_vae() - if shared.opts.cuda_compile_backend == "olive-ai": - from modules.onnx_impl import initialize_olive, install_olive - install_olive() - initialize_olive() - timer.startup.record("olive") - modules.sd_samplers.list_samplers() timer.startup.record("samplers")