make olive optional

pull/2784/head
Seunghoon Lee 2023-11-01 11:49:25 +09:00
parent e954695b5a
commit 84f2a9df95
No known key found for this signature in database
GPG Key ID: 436E38F4E70BD152
10 changed files with 385 additions and 354 deletions

View File

@ -4,7 +4,7 @@
"config": {
"model_path": "",
"model_loader": "text_encoder_load",
"model_script": "modules/onnx.py",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["input_ids"],
"output_names": ["last_hidden_state", "pooler_output"],
@ -29,7 +29,7 @@
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/onnx.py",
"user_script": "modules/olive.py",
"dataloader_func": "text_encoder_data_loader",
"batch_size": 1
}

View File

@ -4,7 +4,7 @@
"config": {
"model_path": "",
"model_loader": "unet_load",
"model_script": "modules/onnx.py",
"model_script": "modules/olive.py",
"io_config": {
"input_names": [
"sample",
@ -46,7 +46,7 @@
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/onnx.py",
"user_script": "modules/olive.py",
"dataloader_func": "unet_data_loader",
"batch_size": 2
}

View File

@ -4,7 +4,7 @@
"config": {
"model_path": "",
"model_loader": "vae_decoder_load",
"model_script": "modules/onnx.py",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["latent_sample", "return_dict"],
"output_names": ["sample"],
@ -36,7 +36,7 @@
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/onnx.py",
"user_script": "modules/olive.py",
"dataloader_func": "vae_decoder_data_loader",
"batch_size": 1
}

View File

@ -4,7 +4,7 @@
"config": {
"model_path": "",
"model_loader": "vae_encoder_load",
"model_script": "modules/onnx.py",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["sample", "return_dict"],
"output_names": ["latent_sample"],
@ -36,7 +36,7 @@
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/onnx.py",
"user_script": "modules/olive.py",
"dataloader_func": "vae_encoder_data_loader",
"batch_size": 1
}

View File

@ -583,7 +583,6 @@ def install_packages():
install(clip_package, 'clip')
invisiblewatermark_package = os.environ.get('INVISIBLEWATERMARK_PACKAGE', "git+https://github.com/patrickvonplaten/invisible-watermark.git@remove_onnxruntime_depedency")
install(invisiblewatermark_package, 'invisible-watermark')
install('olive-ai', 'olive-ai', ignore=True)
install('pi-heif', 'pi_heif', ignore=True)
tensorflow_package = os.environ.get('TENSORFLOW_PACKAGE', 'tensorflow==2.13.0')
install(tensorflow_package, 'tensorflow-rocm' if 'rocm' in tensorflow_package else 'tensorflow', ignore=True)
@ -731,14 +730,6 @@ def ensure_base_requirements():
import rich # pylint: disable=unused-import
except ImportError:
pass
try: # related to: https://github.com/microsoft/Olive/issues/675
import olive.workflows # pylint: disable=unused-import
except ImportError:
install('olive-ai', 'Olive')
try:
import olive.workflows
except ImportError:
log.error('Failed to install dependency: olive-ai.')
def install_requirements():

View File

@ -29,8 +29,8 @@ except ModuleNotFoundError:
sys.modules["torch._dynamo"] = {} # HACK torch 1.13.1 does not have _dynamo. will be removed.
def init_modules():
global parser, args, script_path, extensions_dir # pylint: disable=global-statement
def init_args():
global parser, args # pylint: disable=global-statement
import modules.cmd_args
parser = modules.cmd_args.parser
installer.add_args(parser)
@ -39,6 +39,10 @@ def init_modules():
def init_paths():
global script_path, extensions_dir # pylint: disable=global-statement
try:
import olive.workflows # pylint: disable=unused-import
except ModuleNotFoundError:
pass
import modules.paths
modules.paths.register_paths()
script_path = modules.paths.script_path

361
modules/olive.py Normal file
View File

@ -0,0 +1,361 @@
import os
import sys
import json
import torch
import shutil
import diffusers
from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
from installer import log
from modules import shared
from modules.paths import sd_configs_path
from modules.sd_models import CheckpointInfo
from modules.onnx import ExecutionProvider, OnnxStableDiffusionPipeline
is_available = "olive" in sys.modules # Olive is not available if it is not loaded at startup.
def enable_olive_onchange():
if shared.opts.onnx_enable_olive:
if "olive" in sys.modules:
log.info("You already have Olive installed. No additional installation is required.")
return
from installer import install
install('olive-ai', 'Olive')
log.info("Olive is installed. Please restart ui completely to load Olive.")
else:
from installer import pip
global is_available
if "olive" in sys.modules:
del sys.modules["olive"]
is_available = False
if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
shared.opts.diffusers_pipeline = 'ONNX Stable Diffusion'
pip('uninstall olive-ai --yes --quiet', ignore=True, quiet=True)
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
EP_TO_NAME = {
ExecutionProvider.CPU: "cpu",
ExecutionProvider.DirectML: "gpu-dml",
ExecutionProvider.CUDA: "gpu-?", # TODO
ExecutionProvider.ROCm: "gpu-rocm",
ExecutionProvider.OpenVINO: "?", # TODO
}
class OlivePipeline(diffusers.DiffusionPipeline):
sd_model_hash: str
sd_checkpoint_info: CheckpointInfo
sd_model_checkpoint: str
config = {}
unoptimized: diffusers.DiffusionPipeline
original_filename: str
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
self.original_filename = os.path.basename(path)
self.unoptimized = pipeline
del pipeline
if not os.path.exists(shared.opts.olive_temp_dir):
os.mkdir(shared.opts.olive_temp_dir)
self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
@staticmethod
def from_pretrained(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_single_file(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_ckpt(*args, **kwargs):
return OlivePipeline.from_single_file(**args, **kwargs)
def to(self, *args, **kwargs):
pass
def optimize(self, width: int, height: int):
from olive.workflows import run
from olive.model import ONNXModel
if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
from olive.hardware.accelerator import AcceleratorLookup
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
if width != height:
log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
if os.path.isdir(out_dir):
del self.unoptimized
return OnnxStableDiffusionPipeline.from_pretrained(
out_dir,
).apply(self)
try:
if shared.opts.onnx_cache_optimized:
shutil.copytree(
shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
)
optimize_config["width"] = width
optimize_config["height"] = height
optimized_model_paths = {}
for submodel in submodels:
log.info(f"\nOptimizing {submodel}")
with open(os.path.join(sd_configs_path, "olive", f"config_{submodel}.json"), "r") as config_file:
olive_config = json.load(config_file)
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
run(olive_config)
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
footprints = json.load(footprint_file)
conversion_footprint = None
optimizer_footprint = None
for _, footprint in footprints.items():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
elif footprint["from_pass"] == "OrtTransformersOptimization":
optimizer_footprint = footprint
assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
optimized_model_paths[submodel] = ONNXModel(
**optimizer_footprint["model_config"]["config"]
).model_path
log.info(f"Optimized {submodel}")
shutil.rmtree(shared.opts.olive_temp_dir)
kwargs = {
"tokenizer": self.unoptimized.tokenizer,
"scheduler": self.unoptimized.scheduler,
"safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
"feature_extractor": self.unoptimized.feature_extractor,
}
del self.unoptimized
for submodel in submodels:
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
os.path.dirname(optimized_model_paths[submodel]),
)
pipeline = OnnxStableDiffusionPipeline(
**kwargs,
requires_safety_checker=False,
).apply(self)
del kwargs
if shared.opts.onnx_cache_optimized:
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
for submodel in submodels:
src_path = optimized_model_paths[submodel]
src_parent = os.path.dirname(src_path)
dst_parent = os.path.join(out_dir, submodel)
dst_path = os.path.join(dst_parent, "model.onnx")
if not os.path.isdir(dst_parent):
os.mkdir(dst_parent)
shutil.copyfile(src_path, dst_path)
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
if os.path.isfile(weights_src_path):
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
shutil.copyfile(weights_src_path, weights_dst_path)
except Exception as e:
log.error(f"Failed to optimize model '{self.original_filename}'.")
log.error(e) # for test.
shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
shutil.rmtree(out_dir, ignore_errors=True)
pipeline = None
shutil.rmtree("cache", ignore_errors=True)
shutil.rmtree("footprints", ignore_errors=True)
return pipeline
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
optimize_config = {
"is_sdxl": False,
"width": 512,
"height": 512,
}
# Helper latency-only dataloader that creates random tensors with no label
class RandomDataLoader:
def __init__(self, create_inputs_func, batchsize, torch_dtype):
self.create_input_func = create_inputs_func
self.batchsize = batchsize
self.torch_dtype = torch_dtype
def __getitem__(self, idx):
label = None
return self.create_input_func(self.batchsize, self.torch_dtype), label
# -----------------------------------------------------------------------------
# TEXT ENCODER
# -----------------------------------------------------------------------------
def text_encoder_inputs(batchsize, torch_dtype):
input_ids = torch.zeros((batchsize, 77), dtype=torch_dtype)
return {
"input_ids": input_ids,
"output_hidden_states": True,
} if optimize_config["is_sdxl"] else input_ids
def text_encoder_load(model_name):
model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
return model
def text_encoder_conversion_inputs(model):
return text_encoder_inputs(1, torch.int32)
def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32)
# -----------------------------------------------------------------------------
# TEXT ENCODER 2
# -----------------------------------------------------------------------------
def text_encoder_2_inputs(batchsize, torch_dtype):
return {
"input_ids": torch.zeros((batchsize, 77), dtype=torch_dtype),
"output_hidden_states": True,
}
def text_encoder_2_load(model_name):
model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
return model
def text_encoder_2_conversion_inputs(model):
return text_encoder_2_inputs(1, torch.int64)
def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(text_encoder_2_inputs, batchsize, torch.int64)
# -----------------------------------------------------------------------------
# UNET
# -----------------------------------------------------------------------------
def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
# For refiner, it should be multiplied by 1 (single text encoder)
height = optimize_config["height"]
width = optimize_config["width"]
if optimize_config["is_sdxl"]:
inputs = {
"sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((1,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((2 * batchsize, 77, height * 2), dtype=torch_dtype),
}
if is_conversion_inputs:
inputs["additional_inputs"] = {
"added_cond_kwargs": {
"text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
"time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
}
}
else:
inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
else:
inputs = {
"sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((batchsize,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((batchsize, 77, height + 256), dtype=torch_dtype),
"return_dict": False,
}
return inputs
def unet_load(model_name):
model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
return model
def unet_conversion_inputs(model):
return tuple(unet_inputs(1, torch.float32, True).values())
def unet_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(unet_inputs, batchsize, torch.float16)
# -----------------------------------------------------------------------------
# VAE ENCODER
# -----------------------------------------------------------------------------
def vae_encoder_inputs(batchsize, torch_dtype):
return {
"sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
"return_dict": False,
}
def vae_encoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
if not os.path.isdir(source):
source += "_encoder"
model = diffusers.AutoencoderKL.from_pretrained(source)
model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample()
return model
def vae_encoder_conversion_inputs(model):
return tuple(vae_encoder_inputs(1, torch.float32).values())
def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(vae_encoder_inputs, batchsize, torch.float16)
# -----------------------------------------------------------------------------
# VAE DECODER
# -----------------------------------------------------------------------------
def vae_decoder_inputs(batchsize, torch_dtype):
return {
"latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
"return_dict": False,
}
def vae_decoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
if not os.path.isdir(source):
source += "_decoder"
model = diffusers.AutoencoderKL.from_pretrained(source)
model.forward = model.decode
return model
def vae_decoder_conversion_inputs(model):
return tuple(vae_decoder_inputs(1, torch.float32).values())
def vae_decoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(vae_decoder_inputs, batchsize, torch.float16)

View File

@ -1,17 +1,13 @@
import os
import json
import torch
import shutil
import importlib
import diffusers
import numpy as np
import onnxruntime as ort
from enum import Enum
from typing import Union, Optional, Callable, List
from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
from installer import log
from modules import shared
from modules.paths import sd_configs_path
from modules.sd_models import CheckpointInfo
class ExecutionProvider(str, Enum):
@ -21,18 +17,8 @@ class ExecutionProvider(str, Enum):
ROCm = "ROCMExecutionProvider"
OpenVINO = "OpenVINOExecutionProvider"
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()
EP_TO_NAME = {
ExecutionProvider.CPU: "cpu",
ExecutionProvider.DirectML: "gpu-dml",
ExecutionProvider.CUDA: "gpu-?", # TODO
ExecutionProvider.ROCm: "gpu-rocm",
ExecutionProvider.OpenVINO: "?", # TODO
}
def get_default_execution_provider() -> ExecutionProvider:
from modules import devices
if devices.backend == "cpu":
@ -43,8 +29,6 @@ def get_default_execution_provider() -> ExecutionProvider:
return ExecutionProvider.CUDA
elif devices.backend == "rocm":
if ExecutionProvider.ROCm in available_execution_providers:
from olive.hardware.accelerator import AcceleratorLookup
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
return ExecutionProvider.ROCm
else:
log.warning("Currently, there's no pypi release for onnxruntime-rocm. Please download and install .whl file from https://download.onnxruntime.ai/ The inference will be fall back to CPU.")
@ -259,319 +243,3 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
return (image, has_nsfw_concept)
return diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
class OlivePipeline(diffusers.DiffusionPipeline):
sd_model_hash: str
sd_checkpoint_info: CheckpointInfo
sd_model_checkpoint: str
config = {}
unoptimized: diffusers.DiffusionPipeline
original_filename: str
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
self.original_filename = os.path.basename(path)
self.unoptimized = pipeline
del pipeline
if not os.path.exists(shared.opts.olive_temp_dir):
os.mkdir(shared.opts.olive_temp_dir)
self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
@staticmethod
def from_pretrained(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_single_file(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_ckpt(*args, **kwargs):
return OlivePipeline.from_single_file(**args, **kwargs)
def to(self, *args, **kwargs):
pass
def optimize(self, width: int, height: int):
from olive.workflows import run
from olive.model import ONNXModel
if width != height:
log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
if os.path.isdir(out_dir):
del self.unoptimized
return OnnxStableDiffusionPipeline.from_pretrained(
out_dir,
).apply(self)
try:
if shared.opts.onnx_cache_optimized:
shutil.copytree(
shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
)
optimize_config["width"] = width
optimize_config["height"] = height
optimized_model_paths = {}
for submodel in submodels:
log.info(f"\nOptimizing {submodel}")
with open(os.path.join(sd_configs_path, "olive", f"config_{submodel}.json"), "r") as config_file:
olive_config = json.load(config_file)
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
run(olive_config)
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
footprints = json.load(footprint_file)
conversion_footprint = None
optimizer_footprint = None
for _, footprint in footprints.items():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
elif footprint["from_pass"] == "OrtTransformersOptimization":
optimizer_footprint = footprint
assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
optimized_model_paths[submodel] = ONNXModel(
**optimizer_footprint["model_config"]["config"]
).model_path
log.info(f"Optimized {submodel}")
shutil.rmtree(shared.opts.olive_temp_dir)
kwargs = {
"tokenizer": self.unoptimized.tokenizer,
"scheduler": self.unoptimized.scheduler,
"safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
"feature_extractor": self.unoptimized.feature_extractor,
}
del self.unoptimized
for submodel in submodels:
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
os.path.dirname(optimized_model_paths[submodel]),
)
pipeline = OnnxStableDiffusionPipeline(
**kwargs,
requires_safety_checker=False,
).apply(self)
del kwargs
if shared.opts.onnx_cache_optimized:
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
for submodel in submodels:
src_path = optimized_model_paths[submodel]
src_parent = os.path.dirname(src_path)
dst_parent = os.path.join(out_dir, submodel)
dst_path = os.path.join(dst_parent, "model.onnx")
if not os.path.isdir(dst_parent):
os.mkdir(dst_parent)
shutil.copyfile(src_path, dst_path)
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
if os.path.isfile(weights_src_path):
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
shutil.copyfile(weights_src_path, weights_dst_path)
except Exception as e:
log.error(f"Failed to optimize model '{self.original_filename}'.")
log.error(e) # for test.
shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
shutil.rmtree(out_dir, ignore_errors=True)
pipeline = None
shutil.rmtree("cache", ignore_errors=True)
shutil.rmtree("footprints", ignore_errors=True)
return pipeline
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
optimize_config = {
"is_sdxl": False,
"width": 512,
"height": 512,
}
# Helper latency-only dataloader that creates random tensors with no label
class RandomDataLoader:
def __init__(self, create_inputs_func, batchsize, torch_dtype):
self.create_input_func = create_inputs_func
self.batchsize = batchsize
self.torch_dtype = torch_dtype
def __getitem__(self, idx):
label = None
return self.create_input_func(self.batchsize, self.torch_dtype), label
# -----------------------------------------------------------------------------
# TEXT ENCODER
# -----------------------------------------------------------------------------
def text_encoder_inputs(batchsize, torch_dtype):
input_ids = torch.zeros((batchsize, 77), dtype=torch_dtype)
return {
"input_ids": input_ids,
"output_hidden_states": True,
} if optimize_config["is_sdxl"] else input_ids
def text_encoder_load(model_name):
model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
return model
def text_encoder_conversion_inputs(model):
return text_encoder_inputs(1, torch.int32)
def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32)
# -----------------------------------------------------------------------------
# TEXT ENCODER 2
# -----------------------------------------------------------------------------
def text_encoder_2_inputs(batchsize, torch_dtype):
return {
"input_ids": torch.zeros((batchsize, 77), dtype=torch_dtype),
"output_hidden_states": True,
}
def text_encoder_2_load(model_name):
model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
return model
def text_encoder_2_conversion_inputs(model):
return text_encoder_2_inputs(1, torch.int64)
def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(text_encoder_2_inputs, batchsize, torch.int64)
# -----------------------------------------------------------------------------
# UNET
# -----------------------------------------------------------------------------
def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
# For refiner, it should be multiplied by 1 (single text encoder)
height = optimize_config["height"]
width = optimize_config["width"]
if optimize_config["is_sdxl"]:
inputs = {
"sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((1,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((2 * batchsize, 77, height * 2), dtype=torch_dtype),
}
if is_conversion_inputs:
inputs["additional_inputs"] = {
"added_cond_kwargs": {
"text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
"time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
}
}
else:
inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
else:
inputs = {
"sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((batchsize,), dtype=torch_dtype),
"encoder_hidden_states": torch.rand((batchsize, 77, height + 256), dtype=torch_dtype),
"return_dict": False,
}
return inputs
def unet_load(model_name):
model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
return model
def unet_conversion_inputs(model):
return tuple(unet_inputs(1, torch.float32, True).values())
def unet_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(unet_inputs, batchsize, torch.float16)
# -----------------------------------------------------------------------------
# VAE ENCODER
# -----------------------------------------------------------------------------
def vae_encoder_inputs(batchsize, torch_dtype):
return {
"sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
"return_dict": False,
}
def vae_encoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
if not os.path.isdir(source):
source += "_encoder"
model = diffusers.AutoencoderKL.from_pretrained(source)
model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample()
return model
def vae_encoder_conversion_inputs(model):
return tuple(vae_encoder_inputs(1, torch.float32).values())
def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(vae_encoder_inputs, batchsize, torch.float16)
# -----------------------------------------------------------------------------
# VAE DECODER
# -----------------------------------------------------------------------------
def vae_decoder_inputs(batchsize, torch_dtype):
return {
"latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
"return_dict": False,
}
def vae_decoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
if not os.path.isdir(source):
source += "_decoder"
model = diffusers.AutoencoderKL.from_pretrained(source)
model.forward = model.decode
return model
def vae_decoder_conversion_inputs(model):
return tuple(vae_decoder_inputs(1, torch.float32).values())
def vae_decoder_data_loader(data_dir, batchsize, *args, **kwargs):
return RandomDataLoader(vae_decoder_inputs, batchsize, torch.float16)

View File

@ -17,6 +17,7 @@ from modules import errors, shared_items, shared_state, cmd_args, theme
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx import available_execution_providers, get_default_execution_provider
from modules.olive import enable_olive_onchange
import modules.interrogate
import modules.memmon
import modules.styles
@ -439,8 +440,11 @@ options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
"onnx_sep": OptionInfo("<h2>ONNX Runtime</h2>", "", gr.HTML),
"onnx_execution_provider": OptionInfo(get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": available_execution_providers }),
"onnx_olive_float16": OptionInfo(True, 'Use FP16 on Olive optimization (will use FP32 if unchecked)'),
"onnx_cache_optimized": OptionInfo(True, 'Cache Olive optimized models'),
"onnx_olive_sep": OptionInfo("<h3>Olive</h3>", "", gr.HTML),
"onnx_enable_olive": OptionInfo(False, 'Enable pipeline for Olive', onchange=enable_olive_onchange),
"onnx_olive_float16": OptionInfo(True, 'Olive use FP16 on optimization (will use FP32 if unchecked)'),
"onnx_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
}))
options_templates.update(options_section(('system-paths', "System Paths"), {

View File

@ -26,7 +26,8 @@ def list_crossattention():
def get_pipelines():
import diffusers
from modules.onnx import OnnxStableDiffusionPipeline, OlivePipeline
from modules.onnx import OnnxStableDiffusionPipeline
from modules.olive import OlivePipeline, is_available as is_olive_available
from installer import log
pipelines = { # note: not all pipelines can be used manually as they require prior pipeline next to decoder pipeline
'Autodetect': None,
@ -57,6 +58,8 @@ def get_pipelines():
except Exception:
pipelines['InstaFlow'] = getattr(diffusers, 'StableDiffusionPipeline', None)
if not is_olive_available:
del pipelines['ONNX Stable Diffusion with Olive']
for k, v in pipelines.items():
if k != 'Autodetect' and v is None:
log.error(f'Not available: pipeline={k} diffusers={diffusers.__version__} path={diffusers.__file__}')