refactor onnx and olive

pull/2784/head
Seunghoon Lee 2023-11-04 23:46:20 +09:00
parent b70258c926
commit 6507491d8f
No known key found for this signature in database
GPG Key ID: 436E38F4E70BD152
27 changed files with 1185 additions and 342 deletions

View File

@ -38,12 +38,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -55,15 +55,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -45,12 +45,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -45,12 +45,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -71,12 +71,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -111,12 +111,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -61,15 +61,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -51,12 +51,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -51,12 +51,6 @@
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,

View File

@ -0,0 +1,62 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "text_encoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["input_ids"],
"output_names": ["last_hidden_state", "pooler_output"],
"dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } }
},
"dummy_inputs_func": "text_encoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "text_encoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "text_encoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

82
configs/onnx/sd_unet.json Normal file
View File

@ -0,0 +1,82 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "unet_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": [
"sample",
"timestep",
"encoder_hidden_states",
"return_dict"
],
"output_names": ["out_sample"],
"dynamic_axes": {
"sample": {
"0": "unet_sample_batch",
"1": "unet_sample_channels",
"2": "unet_sample_height",
"3": "unet_sample_width"
},
"timestep": { "0": "unet_time_batch" },
"encoder_hidden_states": {
"0": "unet_hidden_batch",
"1": "unet_hidden_sequence"
}
}
},
"dummy_inputs_func": "unet_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "unet_data_loader",
"batch_size": 2
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "unet",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,69 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "vae_decoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["latent_sample", "return_dict"],
"output_names": ["sample"],
"dynamic_axes": {
"latent_sample": {
"0": "batch",
"1": "channels",
"2": "height",
"3": "width"
}
}
},
"dummy_inputs_func": "vae_decoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "vae_decoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "vae_decoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,69 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "vae_encoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["sample", "return_dict"],
"output_names": ["latent_sample"],
"dynamic_axes": {
"sample": {
"0": "batch",
"1": "channels",
"2": "height",
"3": "width"
}
}
},
"dummy_inputs_func": "vae_encoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "vae_encoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "vae_encoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,95 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "text_encoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["input_ids", "output_hidden_states"],
"output_names": [
"last_hidden_state",
"pooler_output",
"hidden_states.0",
"hidden_states.1",
"hidden_states.2",
"hidden_states.3",
"hidden_states.4",
"hidden_states.5",
"hidden_states.6",
"hidden_states.7",
"hidden_states.8",
"hidden_states.9",
"hidden_states.10",
"hidden_states.11",
"hidden_states.12"
],
"dynamic_axes": {
"input_ids": { "0": "batch_size", "1": "sequence_length" },
"last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
"pooler_output": { "0": "batch_size" },
"hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.12": { "0": "batch_size", "1": "sequence_length" }
}
},
"dummy_inputs_func": "text_encoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "text_encoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "text_encoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,135 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "text_encoder_2_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["input_ids", "output_hidden_states"],
"output_names": [
"text_embeds",
"last_hidden_state",
"hidden_states.0",
"hidden_states.1",
"hidden_states.2",
"hidden_states.3",
"hidden_states.4",
"hidden_states.5",
"hidden_states.6",
"hidden_states.7",
"hidden_states.8",
"hidden_states.9",
"hidden_states.10",
"hidden_states.11",
"hidden_states.12",
"hidden_states.13",
"hidden_states.14",
"hidden_states.15",
"hidden_states.16",
"hidden_states.17",
"hidden_states.18",
"hidden_states.19",
"hidden_states.20",
"hidden_states.21",
"hidden_states.22",
"hidden_states.23",
"hidden_states.24",
"hidden_states.25",
"hidden_states.26",
"hidden_states.27",
"hidden_states.28",
"hidden_states.29",
"hidden_states.30",
"hidden_states.31",
"hidden_states.32"
],
"dynamic_axes": {
"input_ids": { "0": "batch_size", "1": "sequence_length" },
"text_embeds": { "0": "batch_size", "1": "sequence_length" },
"last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.12": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.13": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.14": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.15": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.16": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.17": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.18": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.19": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.20": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.21": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.22": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.23": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.24": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.25": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.26": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.27": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.28": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.29": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.30": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.31": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.32": { "0": "batch_size", "1": "sequence_length" }
}
},
"dummy_inputs_func": "text_encoder_2_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "text_encoder_2_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "text_encoder_2",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,88 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "unet_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": [
"sample",
"timestep",
"encoder_hidden_states",
"text_embeds",
"time_ids"
],
"output_names": ["out_sample"],
"dynamic_axes": {
"sample": {
"0": "unet_sample_batch",
"1": "unet_sample_channels",
"2": "unet_sample_height",
"3": "unet_sample_width"
},
"timestep": { "0": "unet_time_batch" },
"encoder_hidden_states": {
"0": "unet_hidden_batch",
"1": "unet_hidden_sequence"
},
"text_embeds": {
"0": "unet_text_embeds_batch",
"1": "unet_text_embeds_size"
},
"time_ids": { "0": "unet_time_ids_batch", "1": "unet_time_ids_size" }
}
},
"dummy_inputs_func": "unet_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "unet_data_loader",
"batch_size": 2
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "unet",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,75 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "vae_decoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["latent_sample", "return_dict"],
"output_names": ["sample"],
"dynamic_axes": {
"latent_sample": {
"0": "batch_size",
"1": "num_channels_latent",
"2": "height_latent",
"3": "width_latent"
},
"sample": {
"0": "batch_size",
"1": "num_channels",
"2": "height",
"3": "width"
}
}
},
"dummy_inputs_func": "vae_decoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "vae_decoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "vae_decoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -0,0 +1,75 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "",
"model_loader": "vae_encoder_load",
"model_script": "modules/olive.py",
"io_config": {
"input_names": ["sample", "return_dict"],
"output_names": ["latent_sample"],
"dynamic_axes": {
"sample": {
"0": "batch_size",
"1": "num_channels",
"2": "height",
"3": "width"
},
"latent_sample": {
"0": "batch_size",
"1": "num_channels_latent",
"2": "height_latent",
"3": "width_latent"
}
}
},
"dummy_inputs_func": "vae_encoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/olive.py",
"dataloader_func": "vae_encoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "vae_encoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}

View File

@ -31,9 +31,8 @@ except ModuleNotFoundError:
def init_olive():
try:
if installer.opts['onnx_enable_olive']:
import olive.workflows # pylint: disable=unused-import
installer.log.debug('Load olive')
import olive.workflows # pylint: disable=unused-import
installer.log.debug('Load olive')
except Exception as e:
installer.log.error(f'Failed to load olive: {e}')

View File

@ -1,202 +1,21 @@
import os
import sys
import json
import torch
import shutil
import diffusers
from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
from installer import log
from modules import shared
from modules.paths import sd_configs_path
from modules.sd_models import CheckpointInfo
from modules.onnx import ExecutionProvider, get_execution_provider_options
is_available = "olive" in sys.modules # Olive is not available if it is not loaded at startup.
def enable_olive_onchange():
from installer import installed, install, uninstall
if shared.opts.onnx_enable_olive:
if not installed('olive-ai', reload=True, quiet=True):
install('olive-ai', 'olive-ai')
else:
global is_available
is_available = False
if "olive" in sys.modules:
del sys.modules["olive"]
if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
shared.opts.diffusers_pipeline = 'ONNX Stable Diffusion'
if installed('olive-ai', reload=True, quiet=True):
uninstall('olive-ai')
is_sdxl = False
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
width = 512
height = 512
batch_size = 1
EP_TO_NAME = {
ExecutionProvider.CPU: "cpu",
ExecutionProvider.DirectML: "gpu-dml",
ExecutionProvider.CUDA: "gpu-?", # TODO
ExecutionProvider.ROCm: "gpu-rocm",
ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive
}
class OlivePipeline(diffusers.DiffusionPipeline):
model_type = diffusers.OnnxStableDiffusionPipeline.__name__
sd_model_hash: str
sd_checkpoint_info: CheckpointInfo
sd_model_checkpoint: str
config = {}
unoptimized: diffusers.DiffusionPipeline
original_filename: str
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
self.original_filename = os.path.basename(path)
self.unoptimized = pipeline
del pipeline
if not os.path.exists(shared.opts.olive_temp_dir):
os.mkdir(shared.opts.olive_temp_dir)
self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
@staticmethod
def from_pretrained(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_single_file(pretrained_model_name_or_path, **kwargs):
return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_ckpt(*args, **kwargs):
return OlivePipeline.from_single_file(**args, **kwargs)
def derive_properties(self, pipeline: diffusers.OnnxStableDiffusionPipeline):
pipeline.sd_model_hash = self.sd_model_hash
pipeline.sd_checkpoint_info = self.sd_checkpoint_info
pipeline.sd_model_checkpoint = self.sd_model_checkpoint
return pipeline
def to(self, *args, **kwargs):
pass
def optimize(self, width: int, height: int):
from olive.workflows import run
from olive.model import ONNXModel
if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
from olive.hardware.accelerator import AcceleratorLookup
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
if width != height:
log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
if os.path.isdir(out_dir): # already optimized (cached)
del self.unoptimized
return self.derive_properties(
diffusers.OnnxStableDiffusionPipeline.from_pretrained(
out_dir,
)
)
try:
if shared.opts.onnx_cache_optimized:
shutil.copytree(
shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
)
optimize_config["width"] = width
optimize_config["height"] = height
optimized_model_paths = {}
for submodel in submodels:
log.info(f"\nOptimizing {submodel}")
with open(os.path.join(sd_configs_path, "olive", f"sd_{submodel}.json"), "r") as config_file:
olive_config = json.load(config_file)
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
run(olive_config)
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
footprints = json.load(footprint_file)
conversion_footprint = None
optimizer_footprint = None
for _, footprint in footprints.items():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
elif footprint["from_pass"] == "OrtTransformersOptimization":
optimizer_footprint = footprint
assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
optimized_model_paths[submodel] = ONNXModel(
**optimizer_footprint["model_config"]["config"]
).model_path
log.info(f"Optimized {submodel}")
shutil.rmtree(shared.opts.olive_temp_dir)
kwargs = {
"tokenizer": self.unoptimized.tokenizer,
"scheduler": self.unoptimized.scheduler,
"safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
"feature_extractor": self.unoptimized.feature_extractor,
}
del self.unoptimized
for submodel in submodels:
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
os.path.dirname(optimized_model_paths[submodel]),
provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
)
pipeline = self.derive_properties(
diffusers.OnnxStableDiffusionPipeline(
**kwargs,
requires_safety_checker=False,
)
)
del kwargs
if shared.opts.onnx_cache_optimized:
pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
for submodel in submodels:
src_path = optimized_model_paths[submodel]
src_parent = os.path.dirname(src_path)
dst_parent = os.path.join(out_dir, submodel)
dst_path = os.path.join(dst_parent, "model.onnx")
if not os.path.isdir(dst_parent):
os.mkdir(dst_parent)
shutil.copyfile(src_path, dst_path)
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
if os.path.isfile(weights_src_path):
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
shutil.copyfile(weights_src_path, weights_dst_path)
except Exception as e:
log.error(f"Failed to optimize model '{self.original_filename}'.")
log.error(e) # for test.
shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
shutil.rmtree(out_dir, ignore_errors=True)
pipeline = None
shutil.rmtree("cache", ignore_errors=True)
shutil.rmtree("footprints", ignore_errors=True)
return pipeline
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
optimize_config = {
"is_sdxl": False,
"width": 512,
"height": 512,
}
# Helper latency-only dataloader that creates random tensors with no label
class RandomDataLoader:
@ -219,11 +38,11 @@ def text_encoder_inputs(batchsize, torch_dtype):
return {
"input_ids": input_ids,
"output_hidden_states": True,
} if optimize_config["is_sdxl"] else input_ids
} if is_sdxl else input_ids
def text_encoder_load(model_name):
model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
model = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
return model
@ -248,7 +67,7 @@ def text_encoder_2_inputs(batchsize, torch_dtype):
def text_encoder_2_load(model_name):
model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
model = CLIPTextModelWithProjection.from_pretrained(model_name, subfolder="text_encoder_2")
return model
@ -268,10 +87,8 @@ def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
# TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
# For refiner, it should be multiplied by 1 (single text encoder)
height = optimize_config["height"]
width = optimize_config["width"]
if optimize_config["is_sdxl"]:
if is_sdxl:
inputs = {
"sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"timestep": torch.rand((1,), dtype=torch_dtype),
@ -281,12 +98,12 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
if is_conversion_inputs:
inputs["additional_inputs"] = {
"added_cond_kwargs": {
"text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
"text_embeds": torch.rand((2 * batchsize, 1280), dtype=torch_dtype),
"time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
}
}
else:
inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
inputs["text_embeds"] = torch.rand((2 * batchsize, 1280), dtype=torch_dtype)
inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
else:
inputs = {
@ -296,11 +113,22 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
"return_dict": False,
}
if is_conversion_inputs:
inputs["additional_inputs"] = {
"added_cond_kwargs": {
"text_embeds": torch.rand((1, 1280), dtype=torch_dtype),
"time_ids": torch.rand((1, 5), dtype=torch_dtype),
}
}
else:
inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype)
inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype)
return inputs
def unet_load(model_name):
model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
model = diffusers.UNet2DConditionModel.from_pretrained(model_name, subfolder="unet")
return model
@ -319,13 +147,13 @@ def unet_data_loader(data_dir, batchsize, *args, **kwargs):
def vae_encoder_inputs(batchsize, torch_dtype):
return {
"sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
"sample": torch.rand((batchsize, 3, height, width), dtype=torch_dtype),
"return_dict": False,
}
def vae_encoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
source = os.path.join(model_name, "vae")
if not os.path.isdir(source):
source += "_encoder"
model = diffusers.AutoencoderKL.from_pretrained(source)
@ -348,13 +176,13 @@ def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
def vae_decoder_inputs(batchsize, torch_dtype):
return {
"latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
"latent_sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
"return_dict": False,
}
def vae_decoder_load(model_name):
source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
source = os.path.join(model_name, "vae")
if not os.path.isdir(source):
source += "_decoder"
model = diffusers.AutoencoderKL.from_pretrained(source)

View File

@ -1,13 +1,18 @@
import os
import json
import torch
import shutil
import importlib
import diffusers
import numpy as np
import onnxruntime as ort
import diffusers
import optimum.onnxruntime
from enum import Enum
from typing import Union, Optional, Callable, List
from abc import ABCMeta
from typing import Any, Dict, Union, Optional, Callable, List
from installer import log
from modules import shared
from modules import shared, olive
from modules.paths import sd_configs_path
from modules.sd_models import CheckpointInfo
class ExecutionProvider(str, Enum):
@ -17,8 +22,17 @@ class ExecutionProvider(str, Enum):
ROCm = "ROCMExecutionProvider"
OpenVINO = "OpenVINOExecutionProvider"
submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()
EP_TO_NAME = {
ExecutionProvider.CPU: "cpu",
ExecutionProvider.DirectML: "gpu-dml",
ExecutionProvider.CUDA: "gpu-?", # TODO
ExecutionProvider.ROCm: "gpu-rocm",
ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive
}
def get_default_execution_provider() -> ExecutionProvider:
from modules import devices
if devices.backend == "cpu":
@ -64,19 +78,46 @@ class OnnxRuntimeModel(diffusers.OnnxRuntimeModel):
return ()
# OnnxRuntimeModel Hijack.
OnnxRuntimeModel.__module__ = 'diffusers'
diffusers.OnnxRuntimeModel = OnnxRuntimeModel
class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
model_type = diffusers.OnnxStableDiffusionPipeline.__name__
class OnnxPipelineBase(diffusers.DiffusionPipeline, metaclass=ABCMeta):
model_type: str
sd_model_hash: str
sd_checkpoint_info: CheckpointInfo
sd_model_checkpoint: str
def __init__(self):
self.model_type = self.__class__.__name__
class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline, OnnxPipelineBase):
def __init__(
self,
vae_encoder: diffusers.OnnxRuntimeModel,
vae_decoder: diffusers.OnnxRuntimeModel,
text_encoder: diffusers.OnnxRuntimeModel,
tokenizer,
unet: diffusers.OnnxRuntimeModel,
scheduler,
safety_checker: diffusers.OnnxRuntimeModel,
feature_extractor,
requires_safety_checker: bool = True
):
super().__init__(vae_encoder, vae_decoder, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker)
@staticmethod
def from_pretrained(pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
kwargs["provider"] = kwargs["provider"] if "provider" in kwargs else (shared.opts.onnx_execution_provider, get_execution_provider_options(),)
init_dict = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path), **kwargs)[0]
sess_options = kwargs.get("sess_options", ort.SessionOptions())
provider = kwargs.get("provider", (shared.opts.onnx_execution_provider, get_execution_provider_options(),))
model_config = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path))
init_dict = {}
for d in model_config:
if 'unet' in d:
init_dict = d
break
init_kwargs = {}
for k, v in init_dict.items():
if not isinstance(v, list):
@ -90,7 +131,8 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
constructor = getattr(library, constructor_name)
submodel_kwargs = {}
if issubclass(constructor, diffusers.OnnxRuntimeModel):
submodel_kwargs["provider"] = kwargs["provider"]
submodel_kwargs["sess_options"] = sess_options
submodel_kwargs["provider"] = provider
try:
init_kwargs[k] = constructor.from_pretrained(
os.path.join(pretrained_model_name_or_path, k),
@ -244,4 +286,316 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
return diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline
class OnnxStableDiffusionXLPipeline(optimum.onnxruntime.ORTStableDiffusionXLPipeline, OnnxPipelineBase):
def __init__(
self,
vae_decoder_session,
text_encoder_session,
unet_session,
config: Dict[str, Any],
tokenizer,
scheduler,
feature_extractor = None,
vae_encoder_session = None,
text_encoder_2_session = None,
tokenizer_2 = None,
use_io_binding: bool | None = None,
model_save_dir = None,
add_watermarker: bool | None = None
):
super().__init__(vae_decoder_session, text_encoder_session, unet_session, config, tokenizer, scheduler, feature_extractor, vae_encoder_session, text_encoder_2_session, tokenizer_2, use_io_binding, model_save_dir, add_watermarker)
OnnxStableDiffusionXLPipeline.__module__ = 'optimum.onnxruntime.modeling_diffusion'
OnnxStableDiffusionXLPipeline.__name__ = 'ORTStableDiffusionXLPipeline'
diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline
class OnnxAutoPipeline(OnnxPipelineBase):
"""
Possible Cases:
1. from .ckpt or .safetensors
2. from downloaded non-Onnx model
3. from downloaded Onnx model
4. from cached converted Onnx model
5. from cached optimized model
"""
constructor: Union[diffusers.OnnxStableDiffusionPipeline, diffusers.OnnxStableDiffusionXLPipeline]
config = {}
pipeline: diffusers.DiffusionPipeline
original_filename: str
def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
self.original_filename = os.path.basename(path)
self.pipeline = pipeline
del pipeline
if os.path.exists(shared.opts.onnx_temp_dir):
shutil.rmtree(shared.opts.onnx_temp_dir)
os.mkdir(shared.opts.onnx_temp_dir)
self.constructor = diffusers.OnnxStableDiffusionXLPipeline if hasattr(self.pipeline, "text_encoder_2") else diffusers.OnnxStableDiffusionPipeline
self.model_type = self.constructor.__name__
self.pipeline.save_pretrained(shared.opts.onnx_temp_dir)
@staticmethod
def from_pretrained(pretrained_model_name_or_path, **kwargs):
pipeline = None
try: # load from Onnx SD model
pipeline = diffusers.OnnxStableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
except Exception:
pass
if pipeline is None:
try: # load from Onnx SDXL model
pipeline = diffusers.OnnxStableDiffusionXLPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
except Exception:
pass
if pipeline is None:
try: # load from non-Onnx model
pipeline = diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
except Exception:
pass
return OnnxAutoPipeline(pretrained_model_name_or_path, pipeline)
@staticmethod
def from_single_file(pretrained_model_name_or_path, **kwargs):
return OnnxAutoPipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
@staticmethod
def from_ckpt(*args, **kwargs):
return OnnxAutoPipeline.from_single_file(**args, **kwargs)
def derive_properties(self, pipeline: OnnxPipelineBase):
pipeline.sd_model_hash = self.sd_model_hash
pipeline.sd_checkpoint_info = self.sd_checkpoint_info
pipeline.sd_model_checkpoint = self.sd_model_checkpoint
return pipeline
def to(self, *args, **kwargs):
pass
def convert(self):
if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
from olive.hardware.accelerator import AcceleratorLookup
AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
out_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename)
if os.path.isdir(out_dir): # already converted (cached)
self.pipeline = self.derive_properties(
self.constructor.from_pretrained(
out_dir,
)
)
return
try:
from olive.workflows import run
from olive.model import ONNXModel
shutil.rmtree("cache", ignore_errors=True)
shutil.rmtree("footprints", ignore_errors=True)
kwargs = {
"tokenizer": self.pipeline.tokenizer,
"scheduler": self.pipeline.scheduler,
"safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None,
"feature_extractor": self.pipeline.feature_extractor,
}
del self.pipeline
if shared.opts.onnx_cache_converted:
shutil.copytree(
shared.opts.onnx_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
)
converted_model_paths = {}
for submodel in submodels:
log.info(f"\nConverting {submodel}")
with open(os.path.join(sd_configs_path, "onnx", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file:
conversion_config = json.load(config_file)
conversion_config["input_model"]["config"]["model_path"] = os.path.abspath(shared.opts.onnx_temp_dir)
conversion_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
run(conversion_config)
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
footprints = json.load(footprint_file)
conversion_footprint = None
for _, footprint in footprints.items():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
assert conversion_footprint, "Failed to convert model"
converted_model_paths[submodel] = ONNXModel(
**conversion_footprint["model_config"]["config"]
).model_path
log.info(f"Converted {submodel}")
shutil.rmtree(shared.opts.onnx_temp_dir)
for submodel in submodels:
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
os.path.dirname(converted_model_paths[submodel]),
provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
)
self.pipeline = self.derive_properties(
self.constructor(
**kwargs,
requires_safety_checker=False,
)
)
if shared.opts.onnx_cache_converted:
self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
for submodel in submodels:
src_path = converted_model_paths[submodel]
src_parent = os.path.dirname(src_path)
dst_parent = os.path.join(out_dir, submodel)
dst_path = os.path.join(dst_parent, "model.onnx")
if not os.path.isdir(dst_parent):
os.mkdir(dst_parent)
shutil.copyfile(src_path, dst_path)
weights_src_path = os.path.join(src_parent, "weights.pb")
if os.path.isfile(weights_src_path):
weights_dst_path = os.path.join(dst_parent, "weights.pb")
shutil.copyfile(weights_src_path, weights_dst_path)
except Exception as e:
log.error(f"Failed to convert model '{self.original_filename}'.")
log.error(e) # for test.
shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
shutil.rmtree(out_dir, ignore_errors=True)
def optimize(self):
sess_options = ort.SessionOptions()
sess_options.add_free_dimension_override_by_name("unet_sample_batch", olive.batch_size * 2)
sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4)
sess_options.add_free_dimension_override_by_name("unet_sample_height", olive.height // 8)
sess_options.add_free_dimension_override_by_name("unet_sample_width", olive.width // 8)
sess_options.add_free_dimension_override_by_name("unet_time_batch", 1)
sess_options.add_free_dimension_override_by_name("unet_hidden_batch", olive.batch_size * 2)
sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77)
if olive.is_sdxl:
sess_options.add_free_dimension_override_by_name("unet_text_embeds_batch", olive.batch_size * 2)
sess_options.add_free_dimension_override_by_name("unet_text_embeds_size", 1280)
sess_options.add_free_dimension_override_by_name("unet_time_ids_batch", olive.batch_size * 2)
sess_options.add_free_dimension_override_by_name("unet_time_ids_size", 6)
in_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename)
out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{olive.width}w-{olive.height}h")
if os.path.isdir(out_dir): # already optimized (cached)
self.pipeline = self.derive_properties(
self.constructor.from_pretrained(
out_dir,
sess_options=sess_options,
)
)
return
try:
from olive.workflows import run
from olive.model import ONNXModel
shutil.rmtree("cache", ignore_errors=True)
shutil.rmtree("footprints", ignore_errors=True)
kwargs = {
"tokenizer": self.pipeline.tokenizer,
"scheduler": self.pipeline.scheduler,
"safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None,
"feature_extractor": self.pipeline.feature_extractor,
}
del self.pipeline
if shared.opts.onnx_cache_optimized:
shutil.copytree(
in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
)
optimized_model_paths = {}
for submodel in submodels:
log.info(f"\nOptimizing {submodel}")
with open(os.path.join(sd_configs_path, "olive", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file:
olive_config = json.load(config_file)
olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
run(olive_config)
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
footprints = json.load(footprint_file)
optimizer_footprint = None
for _, footprint in footprints.items():
if footprint["from_pass"] == "OrtTransformersOptimization":
optimizer_footprint = footprint
assert optimizer_footprint, "Failed to optimize model"
optimized_model_paths[submodel] = ONNXModel(
**optimizer_footprint["model_config"]["config"]
).model_path
log.info(f"Optimized {submodel}")
for submodel in submodels:
kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
os.path.dirname(optimized_model_paths[submodel]),
sess_options=sess_options,
provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
)
self.pipeline = self.derive_properties(
self.constructor(
**kwargs,
requires_safety_checker=False,
)
)
if shared.opts.onnx_cache_optimized:
self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
for submodel in submodels:
src_path = optimized_model_paths[submodel]
src_parent = os.path.dirname(src_path)
dst_parent = os.path.join(out_dir, submodel)
dst_path = os.path.join(dst_parent, "model.onnx")
if not os.path.isdir(dst_parent):
os.mkdir(dst_parent)
shutil.copyfile(src_path, dst_path)
weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
if os.path.isfile(weights_src_path):
weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
shutil.copyfile(weights_src_path, weights_dst_path)
except Exception as e:
log.error(f"Failed to optimize model '{self.original_filename}'.")
log.error(e) # for test.
shutil.rmtree(out_dir, ignore_errors=True)
def preprocess(self, width: int, height: int, batch_size: int):
olive.width = width
olive.height = height
olive.batch_size = batch_size
olive.is_sdxl = self.constructor == diffusers.OnnxStableDiffusionXLPipeline
self.convert()
if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
if width != height:
log.warning("Olive detected different width and height. The quality of the result is not guaranteed.")
self.optimize()
return self.pipeline

View File

@ -21,8 +21,8 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
orig_pipeline = shared.sd_model
results = []
if isinstance(shared.sd_model, OlivePipeline):
shared.sd_model = shared.sd_model.optimize(p.width, p.height)
if hasattr(shared.sd_model, 'preprocess'):
shared.sd_model = shared.sd_model.preprocess(p.width, p.height, p.batch_size)
def is_txt2img():
return sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE

View File

@ -147,7 +147,7 @@ def list_models():
model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]))
if shared.backend == shared.Backend.DIFFUSERS:
model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir, clear=True)
model_list += modelloader.load_diffusers_models(model_path=shared.opts.olive_sideloaded_models_path, command_path=shared.opts.olive_sideloaded_models_path, clear=False)
model_list += modelloader.load_diffusers_models(model_path=shared.opts.onnx_sideloaded_models_path, command_path=shared.opts.onnx_sideloaded_models_path, clear=False)
for filename in sorted(model_list, key=str.lower):
checkpoint_info = CheckpointInfo(filename)
if checkpoint_info.name is not None:
@ -791,67 +791,39 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"')
pipeline, model_type = detect_pipeline(checkpoint_info.path, op)
if 'ONNX' in shared.opts.diffusers_pipeline:
from modules.onnx import get_execution_provider_options
diffusers_load_config['provider'] = (shared.opts.onnx_execution_provider, get_execution_provider_options(),)
if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
try:
from modules.onnx import OnnxStableDiffusionPipeline
sd_model = OnnxStableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.olive_sideloaded_models_path)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
shared.log.error(f'Failed loading {op}: {checkpoint_info.path} olive={e}')
return
from modules.onnx import OnnxAutoPipeline
if os.path.isdir(checkpoint_info.path):
sd_model = OnnxAutoPipeline.from_pretrained(checkpoint_info.path)
else:
err1 = None
err2 = None
err3 = None
try: # try autopipeline first, best choice but not all pipelines are available
sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model = OnnxAutoPipeline.from_single_file(checkpoint_info.path)
if sd_model is None and os.path.isdir(checkpoint_info.path):
err1 = None
err2 = None
err3 = None
try: # try autopipeline first, best choice but not all pipelines are available
sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err1 = e
# shared.log.error(f'AutoPipeline: {e}')
try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines
if err1 is not None:
sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err1 = e
try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines
if err1 is not None:
sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err2 = e
try: # try basic pipeline next just in case
if err2 is not None:
sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err3 = e # ignore last error
if err3 is not None:
shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
return
if model_type in ['InstaFlow']: # forced pipeline
sd_model = pipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
else:
err1, err2, err3 = None, None, None
try: # 1 - autopipeline, best choice but not all pipelines are available
sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
except Exception as e:
err2 = e
# shared.log.error(f'DiffusionPipeline: {e}')
try: # try basic pipeline next just in case
if err2 is not None:
sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err1 = e
# shared.log.error(f'AutoPipeline: {e}')
try: # 2 - diffusion pipeline, works for most non-linked pipelines
if err1 is not None:
sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err2 = e
# shared.log.error(f'DiffusionPipeline: {e}')
try: # 3 - try basic pipeline just in case
if err2 is not None:
sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
sd_model.model_type = sd_model.__class__.__name__
except Exception as e:
err3 = e # ignore last error
shared.log.error(f'StableDiffusionPipeline: {e}')
if err3 is not None:
shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
return
except Exception as e:
err3 = e # ignore last error
shared.log.error(f'StableDiffusionPipeline: {e}')
if err3 is not None:
shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
return
elif os.path.isfile(checkpoint_info.path) and checkpoint_info.path.lower().endswith('.safetensors'):
# diffusers_load_config["local_files_only"] = True
diffusers_load_config["extract_ema"] = shared.opts.diffusers_extract_ema

View File

@ -17,7 +17,6 @@ from modules import errors, shared_items, shared_state, cmd_args, theme
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx import available_execution_providers, get_default_execution_provider
from modules.olive import enable_olive_onchange
import modules.interrogate
import modules.memmon
import modules.styles
@ -440,9 +439,9 @@ options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
"onnx_sep": OptionInfo("<h2>ONNX Runtime</h2>", "", gr.HTML),
"onnx_execution_provider": OptionInfo(get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": available_execution_providers }),
"onnx_cache_converted": OptionInfo(True, 'Cache converted models'),
"onnx_olive_sep": OptionInfo("<h3>Olive</h3>", "", gr.HTML),
"onnx_enable_olive": OptionInfo(False, 'Enable pipeline for Olive', onchange=enable_olive_onchange),
"onnx_olive_float16": OptionInfo(True, 'Olive use FP16 on optimization (will use FP32 if unchecked)'),
"onnx_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
}))
@ -471,8 +470,8 @@ options_templates.update(options_section(('system-paths', "System Paths"), {
"swinir_models_path": OptionInfo(os.path.join(paths.models_path, 'SwinIR'), "Folder with SwinIR models", folder=True),
"ldsr_models_path": OptionInfo(os.path.join(paths.models_path, 'LDSR'), "Folder with LDSR models", folder=True),
"clip_models_path": OptionInfo(os.path.join(paths.models_path, 'CLIP'), "Folder with CLIP models", folder=True),
"olive_cached_models_path": OptionInfo(os.path.join(paths.models_path, 'Olive', 'cache'), "Folder with olive optimized cached models", folder=True),
"olive_sideloaded_models_path": OptionInfo(os.path.join(paths.models_path, 'Olive', 'sideloaded'), "Folder with olive optimized sideloaded models", folder=True),
"onnx_cached_models_path": OptionInfo(os.path.join(paths.models_path, 'ONNX', 'cache'), "Folder with ONNX cached models", folder=True),
"onnx_sideloaded_models_path": OptionInfo(os.path.join(paths.models_path, 'ONNX', 'sideloaded'), "Folder with ONNX models from huggingface", folder=True),
"other_paths_sep_options": OptionInfo("<h2>Other paths</h2>", "", gr.HTML),
"openvino_cache_path": OptionInfo('cache', "Directory for OpenVINO cache", folder=True),

View File

@ -26,8 +26,7 @@ def list_crossattention():
def get_pipelines():
import diffusers
from modules.onnx import OnnxStableDiffusionPipeline
from modules.olive import OlivePipeline, is_available as is_olive_available
from modules.onnx import OnnxAutoPipeline
from installer import log
pipelines = { # note: not all pipelines can be used manually as they require prior pipeline next to decoder pipeline
'Autodetect': None,
@ -40,8 +39,8 @@ def get_pipelines():
'Stable Diffusion XL Img2Img': getattr(diffusers, 'StableDiffusionXLImg2ImgPipeline', None),
'Stable Diffusion XL Inpaint': getattr(diffusers, 'StableDiffusionXLInpaintPipeline', None),
'Stable Diffusion XL Instruct': getattr(diffusers, 'StableDiffusionXLInstructPix2PixPipeline', None),
'ONNX Stable Diffusion': OnnxStableDiffusionPipeline,
'ONNX Stable Diffusion with Olive': OlivePipeline,
'ONNX Stable Diffusion': OnnxAutoPipeline,
'ONNX Stable Diffusion with Olive': OnnxAutoPipeline,
'Latent Consistency Model': getattr(diffusers, 'LatentConsistencyModelPipeline', None),
'PixArt Alpha': getattr(diffusers, 'PixArtAlphaPipeline', None),
'UniDiffuser': getattr(diffusers, 'UniDiffuserPipeline', None),

View File

@ -373,9 +373,9 @@ def create_ui():
def hf_select(evt: gr.SelectData, data):
return data[evt.index[0]][0]
def hf_download_model(hub_id: str, token, variant, revision, mirror, olive_optimized):
def hf_download_model(hub_id: str, token, variant, revision, mirror, is_onnx, custom_pipeline):
from modules.modelloader import download_diffusers_model
download_diffusers_model(hub_id, cache_dir=opts.olive_sideloaded_models_path if olive_optimized else opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror)
download_diffusers_model(hub_id, cache_dir=opts.onnx_sideloaded_models_path if is_onnx else opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror, custom_pipeline=custom_pipeline)
from modules.sd_models import list_models # pylint: disable=W0621
list_models()
log.info(f'Diffuser model downloaded: model="{hub_id}"')
@ -394,7 +394,7 @@ def create_ui():
with gr.Row():
hf_variant = gr.Textbox(opts.cuda_dtype.lower(), label = 'Specify model variant', placeholder='')
hf_revision = gr.Textbox('', label = 'Specify model revision', placeholder='')
hf_olive = gr.Checkbox(False, label = 'Olive optimized')
hf_onnx = gr.Checkbox(False, label = 'ONNX model')
with gr.Row():
hf_token = gr.Textbox('', label='Huggingface token', placeholder='optional access token for private or gated models')
hf_mirror = gr.Textbox('', label='Huggingface mirror', placeholder='optional mirror site for downloads')
@ -411,7 +411,7 @@ def create_ui():
hf_search_text.submit(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results])
hf_search_btn.click(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results])
hf_results.select(fn=hf_select, inputs=[hf_results], outputs=[hf_selected])
hf_download_model_btn.click(fn=hf_download_model, inputs=[hf_selected, hf_token, hf_variant, hf_revision, hf_mirror, hf_olive], outputs=[models_outcome])
hf_download_model_btn.click(fn=hf_download_model, inputs=[hf_selected, hf_token, hf_variant, hf_revision, hf_mirror, hf_onnx, hf_custom_pipeline], outputs=[models_outcome])
with gr.Tab(label="CivitAI"):
data = []

View File

@ -25,6 +25,8 @@ lpips
omegaconf
open-clip-torch
opencv-contrib-python-headless
olive-ai
optimum
piexif
psutil
pyyaml