mirror of https://github.com/vladmandic/automatic
make olive optional
parent
e954695b5a
commit
84f2a9df95
|
|
@ -4,7 +4,7 @@
|
|||
"config": {
|
||||
"model_path": "",
|
||||
"model_loader": "text_encoder_load",
|
||||
"model_script": "modules/onnx.py",
|
||||
"model_script": "modules/olive.py",
|
||||
"io_config": {
|
||||
"input_names": ["input_ids"],
|
||||
"output_names": ["last_hidden_state", "pooler_output"],
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
"type": "latency",
|
||||
"sub_types": [{ "name": "avg" }],
|
||||
"user_config": {
|
||||
"user_script": "modules/onnx.py",
|
||||
"user_script": "modules/olive.py",
|
||||
"dataloader_func": "text_encoder_data_loader",
|
||||
"batch_size": 1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
"config": {
|
||||
"model_path": "",
|
||||
"model_loader": "unet_load",
|
||||
"model_script": "modules/onnx.py",
|
||||
"model_script": "modules/olive.py",
|
||||
"io_config": {
|
||||
"input_names": [
|
||||
"sample",
|
||||
|
|
@ -46,7 +46,7 @@
|
|||
"type": "latency",
|
||||
"sub_types": [{ "name": "avg" }],
|
||||
"user_config": {
|
||||
"user_script": "modules/onnx.py",
|
||||
"user_script": "modules/olive.py",
|
||||
"dataloader_func": "unet_data_loader",
|
||||
"batch_size": 2
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
"config": {
|
||||
"model_path": "",
|
||||
"model_loader": "vae_decoder_load",
|
||||
"model_script": "modules/onnx.py",
|
||||
"model_script": "modules/olive.py",
|
||||
"io_config": {
|
||||
"input_names": ["latent_sample", "return_dict"],
|
||||
"output_names": ["sample"],
|
||||
|
|
@ -36,7 +36,7 @@
|
|||
"type": "latency",
|
||||
"sub_types": [{ "name": "avg" }],
|
||||
"user_config": {
|
||||
"user_script": "modules/onnx.py",
|
||||
"user_script": "modules/olive.py",
|
||||
"dataloader_func": "vae_decoder_data_loader",
|
||||
"batch_size": 1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
"config": {
|
||||
"model_path": "",
|
||||
"model_loader": "vae_encoder_load",
|
||||
"model_script": "modules/onnx.py",
|
||||
"model_script": "modules/olive.py",
|
||||
"io_config": {
|
||||
"input_names": ["sample", "return_dict"],
|
||||
"output_names": ["latent_sample"],
|
||||
|
|
@ -36,7 +36,7 @@
|
|||
"type": "latency",
|
||||
"sub_types": [{ "name": "avg" }],
|
||||
"user_config": {
|
||||
"user_script": "modules/onnx.py",
|
||||
"user_script": "modules/olive.py",
|
||||
"dataloader_func": "vae_encoder_data_loader",
|
||||
"batch_size": 1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -583,7 +583,6 @@ def install_packages():
|
|||
install(clip_package, 'clip')
|
||||
invisiblewatermark_package = os.environ.get('INVISIBLEWATERMARK_PACKAGE', "git+https://github.com/patrickvonplaten/invisible-watermark.git@remove_onnxruntime_depedency")
|
||||
install(invisiblewatermark_package, 'invisible-watermark')
|
||||
install('olive-ai', 'olive-ai', ignore=True)
|
||||
install('pi-heif', 'pi_heif', ignore=True)
|
||||
tensorflow_package = os.environ.get('TENSORFLOW_PACKAGE', 'tensorflow==2.13.0')
|
||||
install(tensorflow_package, 'tensorflow-rocm' if 'rocm' in tensorflow_package else 'tensorflow', ignore=True)
|
||||
|
|
@ -731,14 +730,6 @@ def ensure_base_requirements():
|
|||
import rich # pylint: disable=unused-import
|
||||
except ImportError:
|
||||
pass
|
||||
try: # related to: https://github.com/microsoft/Olive/issues/675
|
||||
import olive.workflows # pylint: disable=unused-import
|
||||
except ImportError:
|
||||
install('olive-ai', 'Olive')
|
||||
try:
|
||||
import olive.workflows
|
||||
except ImportError:
|
||||
log.error('Failed to install dependency: olive-ai.')
|
||||
|
||||
|
||||
def install_requirements():
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ except ModuleNotFoundError:
|
|||
sys.modules["torch._dynamo"] = {} # HACK torch 1.13.1 does not have _dynamo. will be removed.
|
||||
|
||||
|
||||
def init_modules():
|
||||
global parser, args, script_path, extensions_dir # pylint: disable=global-statement
|
||||
def init_args():
|
||||
global parser, args # pylint: disable=global-statement
|
||||
import modules.cmd_args
|
||||
parser = modules.cmd_args.parser
|
||||
installer.add_args(parser)
|
||||
|
|
@ -39,6 +39,10 @@ def init_modules():
|
|||
|
||||
def init_paths():
|
||||
global script_path, extensions_dir # pylint: disable=global-statement
|
||||
try:
|
||||
import olive.workflows # pylint: disable=unused-import
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
import modules.paths
|
||||
modules.paths.register_paths()
|
||||
script_path = modules.paths.script_path
|
||||
|
|
|
|||
|
|
@ -0,0 +1,361 @@
|
|||
import os
|
||||
import sys
|
||||
import json
|
||||
import torch
|
||||
import shutil
|
||||
import diffusers
|
||||
from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
|
||||
from installer import log
|
||||
from modules import shared
|
||||
from modules.paths import sd_configs_path
|
||||
from modules.sd_models import CheckpointInfo
|
||||
from modules.onnx import ExecutionProvider, OnnxStableDiffusionPipeline
|
||||
|
||||
is_available = "olive" in sys.modules # Olive is not available if it is not loaded at startup.
|
||||
|
||||
def enable_olive_onchange():
|
||||
if shared.opts.onnx_enable_olive:
|
||||
if "olive" in sys.modules:
|
||||
log.info("You already have Olive installed. No additional installation is required.")
|
||||
return
|
||||
from installer import install
|
||||
install('olive-ai', 'Olive')
|
||||
log.info("Olive is installed. Please restart ui completely to load Olive.")
|
||||
else:
|
||||
from installer import pip
|
||||
global is_available
|
||||
if "olive" in sys.modules:
|
||||
del sys.modules["olive"]
|
||||
is_available = False
|
||||
if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
|
||||
shared.opts.diffusers_pipeline = 'ONNX Stable Diffusion'
|
||||
pip('uninstall olive-ai --yes --quiet', ignore=True, quiet=True)
|
||||
|
||||
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
|
||||
|
||||
EP_TO_NAME = {
|
||||
ExecutionProvider.CPU: "cpu",
|
||||
ExecutionProvider.DirectML: "gpu-dml",
|
||||
ExecutionProvider.CUDA: "gpu-?", # TODO
|
||||
ExecutionProvider.ROCm: "gpu-rocm",
|
||||
ExecutionProvider.OpenVINO: "?", # TODO
|
||||
}
|
||||
|
||||
class OlivePipeline(diffusers.DiffusionPipeline):
|
||||
sd_model_hash: str
|
||||
sd_checkpoint_info: CheckpointInfo
|
||||
sd_model_checkpoint: str
|
||||
config = {}
|
||||
|
||||
unoptimized: diffusers.DiffusionPipeline
|
||||
original_filename: str
|
||||
|
||||
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
|
||||
self.original_filename = os.path.basename(path)
|
||||
self.unoptimized = pipeline
|
||||
del pipeline
|
||||
if not os.path.exists(shared.opts.olive_temp_dir):
|
||||
os.mkdir(shared.opts.olive_temp_dir)
|
||||
self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
|
||||
|
||||
@staticmethod
|
||||
def from_pretrained(pretrained_model_name_or_path, **kwargs):
|
||||
return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
|
||||
|
||||
@staticmethod
|
||||
def from_single_file(pretrained_model_name_or_path, **kwargs):
|
||||
return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
|
||||
|
||||
@staticmethod
|
||||
def from_ckpt(*args, **kwargs):
|
||||
return OlivePipeline.from_single_file(**args, **kwargs)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def optimize(self, width: int, height: int):
|
||||
from olive.workflows import run
|
||||
from olive.model import ONNXModel
|
||||
|
||||
if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
|
||||
from olive.hardware.accelerator import AcceleratorLookup
|
||||
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
|
||||
|
||||
if width != height:
|
||||
log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
|
||||
|
||||
out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
|
||||
if os.path.isdir(out_dir):
|
||||
del self.unoptimized
|
||||
return OnnxStableDiffusionPipeline.from_pretrained(
|
||||
out_dir,
|
||||
).apply(self)
|
||||
|
||||
try:
|
||||
if shared.opts.onnx_cache_optimized:
|
||||
shutil.copytree(
|
||||
shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
|
||||
)
|
||||
|
||||
optimize_config["width"] = width
|
||||
optimize_config["height"] = height
|
||||
|
||||
optimized_model_paths = {}
|
||||
|
||||
for submodel in submodels:
|
||||
log.info(f"\nOptimizing {submodel}")
|
||||
|
||||
with open(os.path.join(sd_configs_path, "olive", f"config_{submodel}.json"), "r") as config_file:
|
||||
olive_config = json.load(config_file)
|
||||
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
|
||||
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
|
||||
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
|
||||
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
|
||||
|
||||
run(olive_config)
|
||||
|
||||
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
|
||||
footprints = json.load(footprint_file)
|
||||
conversion_footprint = None
|
||||
optimizer_footprint = None
|
||||
for _, footprint in footprints.items():
|
||||
if footprint["from_pass"] == "OnnxConversion":
|
||||
conversion_footprint = footprint
|
||||
elif footprint["from_pass"] == "OrtTransformersOptimization":
|
||||
optimizer_footprint = footprint
|
||||
|
||||
assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
|
||||
|
||||
optimized_model_paths[submodel] = ONNXModel(
|
||||
**optimizer_footprint["model_config"]["config"]
|
||||
).model_path
|
||||
|
||||
log.info(f"Optimized {submodel}")
|
||||
shutil.rmtree(shared.opts.olive_temp_dir)
|
||||
|
||||
kwargs = {
|
||||
"tokenizer": self.unoptimized.tokenizer,
|
||||
"scheduler": self.unoptimized.scheduler,
|
||||
"safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
|
||||
"feature_extractor": self.unoptimized.feature_extractor,
|
||||
}
|
||||
del self.unoptimized
|
||||
for submodel in submodels:
|
||||
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
|
||||
os.path.dirname(optimized_model_paths[submodel]),
|
||||
)
|
||||
|
||||
pipeline = OnnxStableDiffusionPipeline(
|
||||
**kwargs,
|
||||
requires_safety_checker=False,
|
||||
).apply(self)
|
||||
del kwargs
|
||||
if shared.opts.onnx_cache_optimized:
|
||||
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
|
||||
|
||||
for submodel in submodels:
|
||||
src_path = optimized_model_paths[submodel]
|
||||
src_parent = os.path.dirname(src_path)
|
||||
dst_parent = os.path.join(out_dir, submodel)
|
||||
dst_path = os.path.join(dst_parent, "model.onnx")
|
||||
if not os.path.isdir(dst_parent):
|
||||
os.mkdir(dst_parent)
|
||||
shutil.copyfile(src_path, dst_path)
|
||||
|
||||
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
|
||||
if os.path.isfile(weights_src_path):
|
||||
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
|
||||
shutil.copyfile(weights_src_path, weights_dst_path)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to optimize model '{self.original_filename}'.")
|
||||
log.error(e) # for test.
|
||||
shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
|
||||
shutil.rmtree(out_dir, ignore_errors=True)
|
||||
pipeline = None
|
||||
shutil.rmtree("cache", ignore_errors=True)
|
||||
shutil.rmtree("footprints", ignore_errors=True)
|
||||
return pipeline
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
optimize_config = {
|
||||
"is_sdxl": False,
|
||||
|
||||
"width": 512,
|
||||
"height": 512,
|
||||
}
|
||||
|
||||
|
||||
# Helper latency-only dataloader that creates random tensors with no label
|
||||
class RandomDataLoader:
|
||||
def __init__(self, create_inputs_func, batchsize, torch_dtype):
|
||||
self.create_input_func = create_inputs_func
|
||||
self.batchsize = batchsize
|
||||
self.torch_dtype = torch_dtype
|
||||
|
||||
def __getitem__(self, idx):
|
||||
label = None
|
||||
return self.create_input_func(self.batchsize, self.torch_dtype), label
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# TEXT ENCODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def text_encoder_inputs(batchsize, torch_dtype):
|
||||
input_ids = torch.zeros((batchsize, 77), dtype=torch_dtype)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"output_hidden_states": True,
|
||||
} if optimize_config["is_sdxl"] else input_ids
|
||||
|
||||
|
||||
def text_encoder_load(model_name):
|
||||
model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
|
||||
return model
|
||||
|
||||
|
||||
def text_encoder_conversion_inputs(model):
|
||||
return text_encoder_inputs(1, torch.int32)
|
||||
|
||||
|
||||
def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# TEXT ENCODER 2
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def text_encoder_2_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"input_ids": torch.zeros((batchsize, 77), dtype=torch_dtype),
|
||||
"output_hidden_states": True,
|
||||
}
|
||||
|
||||
|
||||
def text_encoder_2_load(model_name):
|
||||
model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
|
||||
return model
|
||||
|
||||
|
||||
def text_encoder_2_conversion_inputs(model):
|
||||
return text_encoder_2_inputs(1, torch.int64)
|
||||
|
||||
|
||||
def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(text_encoder_2_inputs, batchsize, torch.int64)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# UNET
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
|
||||
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
|
||||
# For refiner, it should be multiplied by 1 (single text encoder)
|
||||
height = optimize_config["height"]
|
||||
width = optimize_config["width"]
|
||||
|
||||
if optimize_config["is_sdxl"]:
|
||||
inputs = {
|
||||
"sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
|
||||
"timestep": torch.rand((1,), dtype=torch_dtype),
|
||||
"encoder_hidden_states": torch.rand((2 * batchsize, 77, height * 2), dtype=torch_dtype),
|
||||
}
|
||||
|
||||
if is_conversion_inputs:
|
||||
inputs["additional_inputs"] = {
|
||||
"added_cond_kwargs": {
|
||||
"text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
|
||||
"time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
|
||||
}
|
||||
}
|
||||
else:
|
||||
inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
|
||||
inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
|
||||
else:
|
||||
inputs = {
|
||||
"sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
|
||||
"timestep": torch.rand((batchsize,), dtype=torch_dtype),
|
||||
"encoder_hidden_states": torch.rand((batchsize, 77, height + 256), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
def unet_load(model_name):
|
||||
model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
|
||||
return model
|
||||
|
||||
|
||||
def unet_conversion_inputs(model):
|
||||
return tuple(unet_inputs(1, torch.float32, True).values())
|
||||
|
||||
|
||||
def unet_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(unet_inputs, batchsize, torch.float16)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# VAE ENCODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def vae_encoder_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
|
||||
def vae_encoder_load(model_name):
|
||||
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
|
||||
if not os.path.isdir(source):
|
||||
source += "_encoder"
|
||||
model = diffusers.AutoencoderKL.from_pretrained(source)
|
||||
model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample()
|
||||
return model
|
||||
|
||||
|
||||
def vae_encoder_conversion_inputs(model):
|
||||
return tuple(vae_encoder_inputs(1, torch.float32).values())
|
||||
|
||||
|
||||
def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(vae_encoder_inputs, batchsize, torch.float16)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# VAE DECODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def vae_decoder_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
|
||||
def vae_decoder_load(model_name):
|
||||
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
|
||||
if not os.path.isdir(source):
|
||||
source += "_decoder"
|
||||
model = diffusers.AutoencoderKL.from_pretrained(source)
|
||||
model.forward = model.decode
|
||||
return model
|
||||
|
||||
|
||||
def vae_decoder_conversion_inputs(model):
|
||||
return tuple(vae_decoder_inputs(1, torch.float32).values())
|
||||
|
||||
|
||||
def vae_decoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(vae_decoder_inputs, batchsize, torch.float16)
|
||||
332
modules/onnx.py
332
modules/onnx.py
|
|
@ -1,17 +1,13 @@
|
|||
import os
|
||||
import json
|
||||
import torch
|
||||
import shutil
|
||||
import importlib
|
||||
import diffusers
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from enum import Enum
|
||||
from typing import Union, Optional, Callable, List
|
||||
from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
|
||||
from installer import log
|
||||
from modules import shared
|
||||
from modules.paths import sd_configs_path
|
||||
from modules.sd_models import CheckpointInfo
|
||||
|
||||
class ExecutionProvider(str, Enum):
|
||||
|
|
@ -21,18 +17,8 @@ class ExecutionProvider(str, Enum):
|
|||
ROCm = "ROCMExecutionProvider"
|
||||
OpenVINO = "OpenVINOExecutionProvider"
|
||||
|
||||
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
|
||||
|
||||
available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()
|
||||
|
||||
EP_TO_NAME = {
|
||||
ExecutionProvider.CPU: "cpu",
|
||||
ExecutionProvider.DirectML: "gpu-dml",
|
||||
ExecutionProvider.CUDA: "gpu-?", # TODO
|
||||
ExecutionProvider.ROCm: "gpu-rocm",
|
||||
ExecutionProvider.OpenVINO: "?", # TODO
|
||||
}
|
||||
|
||||
def get_default_execution_provider() -> ExecutionProvider:
|
||||
from modules import devices
|
||||
if devices.backend == "cpu":
|
||||
|
|
@ -43,8 +29,6 @@ def get_default_execution_provider() -> ExecutionProvider:
|
|||
return ExecutionProvider.CUDA
|
||||
elif devices.backend == "rocm":
|
||||
if ExecutionProvider.ROCm in available_execution_providers:
|
||||
from olive.hardware.accelerator import AcceleratorLookup
|
||||
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
|
||||
return ExecutionProvider.ROCm
|
||||
else:
|
||||
log.warning("Currently, there's no pypi release for onnxruntime-rocm. Please download and install .whl file from https://download.onnxruntime.ai/ The inference will be fall back to CPU.")
|
||||
|
|
@ -259,319 +243,3 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
|
|||
return (image, has_nsfw_concept)
|
||||
|
||||
return diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
|
||||
|
||||
class OlivePipeline(diffusers.DiffusionPipeline):
|
||||
sd_model_hash: str
|
||||
sd_checkpoint_info: CheckpointInfo
|
||||
sd_model_checkpoint: str
|
||||
config = {}
|
||||
|
||||
unoptimized: diffusers.DiffusionPipeline
|
||||
original_filename: str
|
||||
|
||||
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
|
||||
self.original_filename = os.path.basename(path)
|
||||
self.unoptimized = pipeline
|
||||
del pipeline
|
||||
if not os.path.exists(shared.opts.olive_temp_dir):
|
||||
os.mkdir(shared.opts.olive_temp_dir)
|
||||
self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
|
||||
|
||||
@staticmethod
|
||||
def from_pretrained(pretrained_model_name_or_path, **kwargs):
|
||||
return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
|
||||
|
||||
@staticmethod
|
||||
def from_single_file(pretrained_model_name_or_path, **kwargs):
|
||||
return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
|
||||
|
||||
@staticmethod
|
||||
def from_ckpt(*args, **kwargs):
|
||||
return OlivePipeline.from_single_file(**args, **kwargs)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def optimize(self, width: int, height: int):
|
||||
from olive.workflows import run
|
||||
from olive.model import ONNXModel
|
||||
|
||||
if width != height:
|
||||
log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
|
||||
|
||||
out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
|
||||
if os.path.isdir(out_dir):
|
||||
del self.unoptimized
|
||||
return OnnxStableDiffusionPipeline.from_pretrained(
|
||||
out_dir,
|
||||
).apply(self)
|
||||
|
||||
try:
|
||||
if shared.opts.onnx_cache_optimized:
|
||||
shutil.copytree(
|
||||
shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
|
||||
)
|
||||
|
||||
optimize_config["width"] = width
|
||||
optimize_config["height"] = height
|
||||
|
||||
optimized_model_paths = {}
|
||||
|
||||
for submodel in submodels:
|
||||
log.info(f"\nOptimizing {submodel}")
|
||||
|
||||
with open(os.path.join(sd_configs_path, "olive", f"config_{submodel}.json"), "r") as config_file:
|
||||
olive_config = json.load(config_file)
|
||||
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
|
||||
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
|
||||
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
|
||||
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
|
||||
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
|
||||
|
||||
run(olive_config)
|
||||
|
||||
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
|
||||
footprints = json.load(footprint_file)
|
||||
conversion_footprint = None
|
||||
optimizer_footprint = None
|
||||
for _, footprint in footprints.items():
|
||||
if footprint["from_pass"] == "OnnxConversion":
|
||||
conversion_footprint = footprint
|
||||
elif footprint["from_pass"] == "OrtTransformersOptimization":
|
||||
optimizer_footprint = footprint
|
||||
|
||||
assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
|
||||
|
||||
optimized_model_paths[submodel] = ONNXModel(
|
||||
**optimizer_footprint["model_config"]["config"]
|
||||
).model_path
|
||||
|
||||
log.info(f"Optimized {submodel}")
|
||||
shutil.rmtree(shared.opts.olive_temp_dir)
|
||||
|
||||
kwargs = {
|
||||
"tokenizer": self.unoptimized.tokenizer,
|
||||
"scheduler": self.unoptimized.scheduler,
|
||||
"safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
|
||||
"feature_extractor": self.unoptimized.feature_extractor,
|
||||
}
|
||||
del self.unoptimized
|
||||
for submodel in submodels:
|
||||
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
|
||||
os.path.dirname(optimized_model_paths[submodel]),
|
||||
)
|
||||
|
||||
pipeline = OnnxStableDiffusionPipeline(
|
||||
**kwargs,
|
||||
requires_safety_checker=False,
|
||||
).apply(self)
|
||||
del kwargs
|
||||
if shared.opts.onnx_cache_optimized:
|
||||
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
|
||||
|
||||
for submodel in submodels:
|
||||
src_path = optimized_model_paths[submodel]
|
||||
src_parent = os.path.dirname(src_path)
|
||||
dst_parent = os.path.join(out_dir, submodel)
|
||||
dst_path = os.path.join(dst_parent, "model.onnx")
|
||||
if not os.path.isdir(dst_parent):
|
||||
os.mkdir(dst_parent)
|
||||
shutil.copyfile(src_path, dst_path)
|
||||
|
||||
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
|
||||
if os.path.isfile(weights_src_path):
|
||||
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
|
||||
shutil.copyfile(weights_src_path, weights_dst_path)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to optimize model '{self.original_filename}'.")
|
||||
log.error(e) # for test.
|
||||
shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
|
||||
shutil.rmtree(out_dir, ignore_errors=True)
|
||||
pipeline = None
|
||||
shutil.rmtree("cache", ignore_errors=True)
|
||||
shutil.rmtree("footprints", ignore_errors=True)
|
||||
return pipeline
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
optimize_config = {
|
||||
"is_sdxl": False,
|
||||
|
||||
"width": 512,
|
||||
"height": 512,
|
||||
}
|
||||
|
||||
|
||||
# Helper latency-only dataloader that creates random tensors with no label
|
||||
class RandomDataLoader:
|
||||
def __init__(self, create_inputs_func, batchsize, torch_dtype):
|
||||
self.create_input_func = create_inputs_func
|
||||
self.batchsize = batchsize
|
||||
self.torch_dtype = torch_dtype
|
||||
|
||||
def __getitem__(self, idx):
|
||||
label = None
|
||||
return self.create_input_func(self.batchsize, self.torch_dtype), label
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# TEXT ENCODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def text_encoder_inputs(batchsize, torch_dtype):
|
||||
input_ids = torch.zeros((batchsize, 77), dtype=torch_dtype)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"output_hidden_states": True,
|
||||
} if optimize_config["is_sdxl"] else input_ids
|
||||
|
||||
|
||||
def text_encoder_load(model_name):
|
||||
model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
|
||||
return model
|
||||
|
||||
|
||||
def text_encoder_conversion_inputs(model):
|
||||
return text_encoder_inputs(1, torch.int32)
|
||||
|
||||
|
||||
def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# TEXT ENCODER 2
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def text_encoder_2_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"input_ids": torch.zeros((batchsize, 77), dtype=torch_dtype),
|
||||
"output_hidden_states": True,
|
||||
}
|
||||
|
||||
|
||||
def text_encoder_2_load(model_name):
|
||||
model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
|
||||
return model
|
||||
|
||||
|
||||
def text_encoder_2_conversion_inputs(model):
|
||||
return text_encoder_2_inputs(1, torch.int64)
|
||||
|
||||
|
||||
def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(text_encoder_2_inputs, batchsize, torch.int64)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# UNET
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
|
||||
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
|
||||
# For refiner, it should be multiplied by 1 (single text encoder)
|
||||
height = optimize_config["height"]
|
||||
width = optimize_config["width"]
|
||||
|
||||
if optimize_config["is_sdxl"]:
|
||||
inputs = {
|
||||
"sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
|
||||
"timestep": torch.rand((1,), dtype=torch_dtype),
|
||||
"encoder_hidden_states": torch.rand((2 * batchsize, 77, height * 2), dtype=torch_dtype),
|
||||
}
|
||||
|
||||
if is_conversion_inputs:
|
||||
inputs["additional_inputs"] = {
|
||||
"added_cond_kwargs": {
|
||||
"text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
|
||||
"time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
|
||||
}
|
||||
}
|
||||
else:
|
||||
inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
|
||||
inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
|
||||
else:
|
||||
inputs = {
|
||||
"sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
|
||||
"timestep": torch.rand((batchsize,), dtype=torch_dtype),
|
||||
"encoder_hidden_states": torch.rand((batchsize, 77, height + 256), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
def unet_load(model_name):
|
||||
model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
|
||||
return model
|
||||
|
||||
|
||||
def unet_conversion_inputs(model):
|
||||
return tuple(unet_inputs(1, torch.float32, True).values())
|
||||
|
||||
|
||||
def unet_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(unet_inputs, batchsize, torch.float16)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# VAE ENCODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def vae_encoder_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
|
||||
def vae_encoder_load(model_name):
|
||||
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
|
||||
if not os.path.isdir(source):
|
||||
source += "_encoder"
|
||||
model = diffusers.AutoencoderKL.from_pretrained(source)
|
||||
model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample()
|
||||
return model
|
||||
|
||||
|
||||
def vae_encoder_conversion_inputs(model):
|
||||
return tuple(vae_encoder_inputs(1, torch.float32).values())
|
||||
|
||||
|
||||
def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(vae_encoder_inputs, batchsize, torch.float16)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# VAE DECODER
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def vae_decoder_inputs(batchsize, torch_dtype):
|
||||
return {
|
||||
"latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
|
||||
"return_dict": False,
|
||||
}
|
||||
|
||||
|
||||
def vae_decoder_load(model_name):
|
||||
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
|
||||
if not os.path.isdir(source):
|
||||
source += "_decoder"
|
||||
model = diffusers.AutoencoderKL.from_pretrained(source)
|
||||
model.forward = model.decode
|
||||
return model
|
||||
|
||||
|
||||
def vae_decoder_conversion_inputs(model):
|
||||
return tuple(vae_decoder_inputs(1, torch.float32).values())
|
||||
|
||||
|
||||
def vae_decoder_data_loader(data_dir, batchsize, *args, **kwargs):
|
||||
return RandomDataLoader(vae_decoder_inputs, batchsize, torch.float16)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from modules import errors, shared_items, shared_state, cmd_args, theme
|
|||
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
|
||||
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
|
||||
from modules.onnx import available_execution_providers, get_default_execution_provider
|
||||
from modules.olive import enable_olive_onchange
|
||||
import modules.interrogate
|
||||
import modules.memmon
|
||||
import modules.styles
|
||||
|
|
@ -439,8 +440,11 @@ options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
|
|||
|
||||
"onnx_sep": OptionInfo("<h2>ONNX Runtime</h2>", "", gr.HTML),
|
||||
"onnx_execution_provider": OptionInfo(get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": available_execution_providers }),
|
||||
"onnx_olive_float16": OptionInfo(True, 'Use FP16 on Olive optimization (will use FP32 if unchecked)'),
|
||||
"onnx_cache_optimized": OptionInfo(True, 'Cache Olive optimized models'),
|
||||
|
||||
"onnx_olive_sep": OptionInfo("<h3>Olive</h3>", "", gr.HTML),
|
||||
"onnx_enable_olive": OptionInfo(False, 'Enable pipeline for Olive', onchange=enable_olive_onchange),
|
||||
"onnx_olive_float16": OptionInfo(True, 'Olive use FP16 on optimization (will use FP32 if unchecked)'),
|
||||
"onnx_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
|
||||
}))
|
||||
|
||||
options_templates.update(options_section(('system-paths', "System Paths"), {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,8 @@ def list_crossattention():
|
|||
|
||||
def get_pipelines():
|
||||
import diffusers
|
||||
from modules.onnx import OnnxStableDiffusionPipeline, OlivePipeline
|
||||
from modules.onnx import OnnxStableDiffusionPipeline
|
||||
from modules.olive import OlivePipeline, is_available as is_olive_available
|
||||
from installer import log
|
||||
pipelines = { # note: not all pipelines can be used manually as they require prior pipeline next to decoder pipeline
|
||||
'Autodetect': None,
|
||||
|
|
@ -57,6 +58,8 @@ def get_pipelines():
|
|||
except Exception:
|
||||
pipelines['InstaFlow'] = getattr(diffusers, 'StableDiffusionPipeline', None)
|
||||
|
||||
if not is_olive_available:
|
||||
del pipelines['ONNX Stable Diffusion with Olive']
|
||||
for k, v in pipelines.items():
|
||||
if k != 'Autodetect' and v is None:
|
||||
log.error(f'Not available: pipeline={k} diffusers={diffusers.__version__} path={diffusers.__file__}')
|
||||
|
|
|
|||
Loading…
Reference in New Issue