mirror of https://github.com/vladmandic/automatic
olive-ai 0.5.x support
parent
1db0ccc658
commit
5f5fa99360
|
|
@ -16,7 +16,14 @@
|
|||
"systems": {
|
||||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": { "accelerators": ["gpu"] }
|
||||
"config": {
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
|
|
@ -38,7 +45,6 @@
|
|||
"passes": {
|
||||
"optimize_CPUExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -49,7 +55,6 @@
|
|||
},
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -78,12 +83,13 @@
|
|||
"group_norm_channels_last": false
|
||||
},
|
||||
"force_fp32_ops": ["RandomNormalLike"],
|
||||
"force_fp16_inputs": { "GroupNorm": [0, 1, 2] }
|
||||
"force_fp16_inputs": {
|
||||
"GroupNorm": [0, 1, 2]
|
||||
}
|
||||
}
|
||||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -94,7 +100,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -117,17 +122,13 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"log_severity_level": 0,
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "text_encoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,14 @@
|
|||
"systems": {
|
||||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": { "accelerators": ["gpu"] }
|
||||
"config": {
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
|
|
@ -55,7 +62,6 @@
|
|||
"passes": {
|
||||
"optimize_CPUExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -66,7 +72,6 @@
|
|||
},
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -95,12 +100,13 @@
|
|||
"group_norm_channels_last": false
|
||||
},
|
||||
"force_fp32_ops": ["RandomNormalLike"],
|
||||
"force_fp16_inputs": { "GroupNorm": [0, 1, 2] }
|
||||
"force_fp16_inputs": {
|
||||
"GroupNorm": [0, 1, 2]
|
||||
}
|
||||
}
|
||||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -111,7 +117,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -134,17 +139,13 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"log_severity_level": 0,
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "unet",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,14 @@
|
|||
"systems": {
|
||||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": { "accelerators": ["gpu"] }
|
||||
"config": {
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
|
|
@ -45,7 +52,6 @@
|
|||
"passes": {
|
||||
"optimize_CPUExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -56,7 +62,6 @@
|
|||
},
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -85,12 +90,13 @@
|
|||
"group_norm_channels_last": false
|
||||
},
|
||||
"force_fp32_ops": ["RandomNormalLike"],
|
||||
"force_fp16_inputs": { "GroupNorm": [0, 1, 2] }
|
||||
"force_fp16_inputs": {
|
||||
"GroupNorm": [0, 1, 2]
|
||||
}
|
||||
}
|
||||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -101,7 +107,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -124,17 +129,13 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"log_severity_level": 0,
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "vae_decoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,14 @@
|
|||
"systems": {
|
||||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": { "accelerators": ["gpu"] }
|
||||
"config": {
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
|
|
@ -45,7 +52,6 @@
|
|||
"passes": {
|
||||
"optimize_CPUExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -56,7 +62,6 @@
|
|||
},
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -85,12 +90,13 @@
|
|||
"group_norm_channels_last": false
|
||||
},
|
||||
"force_fp32_ops": ["RandomNormalLike"],
|
||||
"force_fp16_inputs": { "GroupNorm": [0, 1, 2] }
|
||||
"force_fp16_inputs": {
|
||||
"GroupNorm": [0, 1, 2]
|
||||
}
|
||||
}
|
||||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -101,7 +107,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -124,17 +129,13 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"log_severity_level": 0,
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "vae_encoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,12 @@
|
|||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["gpu"]
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -73,7 +78,6 @@
|
|||
"passes": {
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -109,7 +113,6 @@
|
|||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -120,7 +123,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -132,17 +134,12 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "text_encoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@
|
|||
],
|
||||
"dynamic_axes": {
|
||||
"input_ids": { "0": "batch_size", "1": "sequence_length" },
|
||||
"text_embeds": { "0": "batch_size", "1": "sequence_length" },
|
||||
"text_embeds": { "0": "batch_size" },
|
||||
"last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
|
||||
"hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
|
||||
"hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
|
||||
|
|
@ -90,7 +90,12 @@
|
|||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["gpu"]
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -113,7 +118,6 @@
|
|||
"passes": {
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -149,7 +153,6 @@
|
|||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -160,7 +163,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "clip",
|
||||
"opt_level": 0,
|
||||
|
|
@ -172,17 +174,12 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "text_encoder_2",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,12 @@
|
|||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["gpu"]
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -63,7 +68,6 @@
|
|||
"passes": {
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -99,7 +103,6 @@
|
|||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -110,7 +113,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "unet",
|
||||
"opt_level": 0,
|
||||
|
|
@ -122,17 +124,12 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "unet",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,12 @@
|
|||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["gpu"]
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -53,7 +58,6 @@
|
|||
"passes": {
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -111,7 +115,6 @@
|
|||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -121,7 +124,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -132,17 +134,12 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "vae_decoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,12 @@
|
|||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["gpu"]
|
||||
"accelerators": [
|
||||
{
|
||||
"device": "gpu",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -53,7 +58,6 @@
|
|||
"passes": {
|
||||
"optimize_DmlExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -89,7 +93,6 @@
|
|||
},
|
||||
"optimize_CUDAExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -100,7 +103,6 @@
|
|||
},
|
||||
"optimize_ROCMExecutionProvider": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "vae",
|
||||
"opt_level": 0,
|
||||
|
|
@ -112,17 +114,12 @@
|
|||
},
|
||||
"pass_flows": [["optimize_AutoExecutionProvider"]],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "exhaustive"
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"evaluate_input_model": false,
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"cache_dir": "cache",
|
||||
"output_name": "vae_encoder",
|
||||
"output_dir": "footprints",
|
||||
"execution_providers": ["DmlExecutionProvider"]
|
||||
"output_dir": "footprints"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -657,6 +657,8 @@ def check_torch():
|
|||
install('hidet', 'hidet')
|
||||
if opts.get('cuda_compile_backend', '') == 'deep-cache':
|
||||
install('DeepCache')
|
||||
if opts.get('cuda_compile_backend', '') == 'olive-ai':
|
||||
install('olive-ai')
|
||||
if opts.get('nncf_compress_weights', False) and not args.use_openvino:
|
||||
install('nncf==2.7.0', 'nncf')
|
||||
if args.profile:
|
||||
|
|
|
|||
|
|
@ -44,11 +44,6 @@ import onnxruntime
|
|||
onnxruntime.set_default_logger_severity(3)
|
||||
timer.startup.record("onnx")
|
||||
|
||||
# moved to webui.py:initialize()
|
||||
# from modules.onnx_impl import initialize_olive # pylint: disable=ungrouped-imports
|
||||
# initialize_olive()
|
||||
# timer.startup.record("olive")
|
||||
|
||||
from fastapi import FastAPI # pylint: disable=W0611,C0411
|
||||
import gradio # pylint: disable=W0611,C0411
|
||||
timer.startup.record("gradio")
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
import os
|
||||
from typing import Any, Dict, Callable, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
import numpy as np
|
||||
import torch
|
||||
import diffusers
|
||||
import onnxruntime as ort
|
||||
import optimum.onnxruntime
|
||||
|
||||
|
||||
initialized = False
|
||||
run_olive_workflow = None
|
||||
|
||||
|
||||
class DynamicSessionOptions(ort.SessionOptions):
|
||||
|
|
@ -50,6 +47,9 @@ class TorchCompatibleModule:
|
|||
device = torch.device("cpu")
|
||||
dtype = torch.float32
|
||||
|
||||
def named_modules(self): # dummy
|
||||
return ()
|
||||
|
||||
def to(self, *_, **__):
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -84,9 +84,6 @@ class TemporalModule(TorchCompatibleModule):
|
|||
class OnnxRuntimeModel(TorchCompatibleModule, diffusers.OnnxRuntimeModel):
|
||||
config = {} # dummy
|
||||
|
||||
def named_modules(self): # dummy
|
||||
return ()
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
from modules.onnx_impl.utils import extract_device, move_inference_session
|
||||
|
||||
|
|
@ -245,28 +242,6 @@ def initialize_onnx():
|
|||
initialized = True
|
||||
|
||||
|
||||
def initialize_olive():
|
||||
global run_olive_workflow # pylint: disable=global-statement
|
||||
from installer import installed, log
|
||||
if not installed('olive-ai', quiet=True) or not installed('onnx', quiet=True):
|
||||
return
|
||||
import sys
|
||||
import importlib
|
||||
orig_sys_path = sys.path
|
||||
venv_dir = os.environ.get("VENV_DIR", os.path.join(os.getcwd(), 'venv'))
|
||||
try:
|
||||
spec = importlib.util.find_spec('onnxruntime.transformers')
|
||||
sys.path = [d for d in spec.submodule_search_locations + sys.path if sys.path[1] not in d or venv_dir in d]
|
||||
from onnxruntime.transformers import convert_generation # pylint: disable=unused-import
|
||||
spec = importlib.util.find_spec('olive')
|
||||
sys.path = spec.submodule_search_locations + sys.path
|
||||
run_olive_workflow = importlib.import_module('olive.workflows').run
|
||||
except Exception as e:
|
||||
run_olive_workflow = None
|
||||
log.error(f'Olive: Failed to load olive-ai: {e}')
|
||||
sys.path = orig_sys_path
|
||||
|
||||
|
||||
def install_olive():
|
||||
from installer import installed, install, log
|
||||
if installed("olive-ai"):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from modules.paths import sd_configs_path, models_path
|
|||
from modules.sd_models import CheckpointInfo
|
||||
from modules.processing import StableDiffusionProcessing
|
||||
from modules.olive_script import config
|
||||
from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE, run_olive_workflow
|
||||
from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE
|
||||
from modules.onnx_impl.utils import extract_device, move_inference_session, check_diffusers_cache, check_pipeline_sdxl, check_cache_onnx, load_init_dict, load_submodel, load_submodels, patch_kwargs, load_pipeline, get_base_constructor, get_io_config
|
||||
from modules.onnx_impl.execution_providers import ExecutionProvider, EP_TO_NAME, get_provider
|
||||
|
||||
|
|
@ -161,7 +161,7 @@ class OnnxRawPipeline(PipelineBase):
|
|||
in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
|
||||
)
|
||||
|
||||
from modules import olive_script as olv
|
||||
from modules import olive_script as script
|
||||
|
||||
for submodel in submodels:
|
||||
destination = os.path.join(out_dir, submodel)
|
||||
|
|
@ -169,8 +169,8 @@ class OnnxRawPipeline(PipelineBase):
|
|||
if not os.path.isdir(destination):
|
||||
os.mkdir(destination)
|
||||
|
||||
model = getattr(olv, f"{submodel}_load")(in_dir)
|
||||
sample = getattr(olv, f"{submodel}_conversion_inputs")(None)
|
||||
model = getattr(script, f"{submodel}_load")(in_dir)
|
||||
sample = getattr(script, f"{submodel}_conversion_inputs")(None)
|
||||
with tempfile.TemporaryDirectory(prefix="onnx_conversion") as temp_dir:
|
||||
temp_path = os.path.join(temp_dir, "model.onnx")
|
||||
torch.onnx.export(
|
||||
|
|
@ -219,13 +219,8 @@ class OnnxRawPipeline(PipelineBase):
|
|||
json.dump(model_index, file)
|
||||
|
||||
def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLike):
|
||||
if not shared.cmd_opts.debug:
|
||||
ort.set_default_logger_severity(4)
|
||||
|
||||
try:
|
||||
from olive.model import ONNXModel # olive-ai==0.4.0
|
||||
except ImportError:
|
||||
from olive.model import ONNXModelHandler as ONNXModel # olive-ai==0.5.0
|
||||
from olive.model import ONNXModelHandler
|
||||
from olive.workflows import run as run_workflows
|
||||
|
||||
shutil.rmtree("cache", ignore_errors=True)
|
||||
shutil.rmtree("footprints", ignore_errors=True)
|
||||
|
|
@ -247,19 +242,20 @@ class OnnxRawPipeline(PipelineBase):
|
|||
for i in range(len(flow)):
|
||||
flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider)
|
||||
olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
|
||||
olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
|
||||
olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu" # TODO: npu
|
||||
olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = [shared.opts.onnx_execution_provider]
|
||||
|
||||
for pass_key in olive_config["passes"]:
|
||||
if olive_config["passes"][pass_key]["type"] == "OrtTransformersOptimization":
|
||||
float16 = shared.opts.olive_float16 and not (submodel == "vae_encoder" and shared.opts.olive_vae_encoder_float32)
|
||||
olive_config["passes"][pass_key]["config"]["float16"] = float16
|
||||
if not float16:
|
||||
olive_config["passes"][pass_key]["config"]["force_fp16_inputs"] = {}
|
||||
if shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
|
||||
if version.parse(ort.__version__) < version.parse("1.17.0"):
|
||||
olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False}
|
||||
if float16:
|
||||
olive_config["passes"][pass_key]["config"]["keep_io_types"] = False
|
||||
|
||||
run_olive_workflow(olive_config)
|
||||
run_workflows(olive_config)
|
||||
|
||||
with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r", encoding="utf-8") as footprint_file:
|
||||
footprints = json.load(footprint_file)
|
||||
|
|
@ -270,7 +266,7 @@ class OnnxRawPipeline(PipelineBase):
|
|||
|
||||
assert processor_final_pass_footprint, "Failed to optimize model"
|
||||
|
||||
optimized_model_paths[submodel] = ONNXModel(
|
||||
optimized_model_paths[submodel] = ONNXModelHandler(
|
||||
**processor_final_pass_footprint["model_config"]["config"]
|
||||
).model_path
|
||||
|
||||
|
|
@ -371,53 +367,50 @@ class OnnxRawPipeline(PipelineBase):
|
|||
in_dir = out_dir
|
||||
|
||||
if shared.opts.cuda_compile_backend == "olive-ai":
|
||||
if run_olive_workflow is None:
|
||||
log.warning('Olive: Skipping model compilation because olive-ai was loaded unsuccessfully.')
|
||||
submodels_for_olive = []
|
||||
|
||||
if "Text Encoder" in shared.opts.cuda_compile:
|
||||
if not self.is_refiner:
|
||||
submodels_for_olive.append("text_encoder")
|
||||
if self._is_sdxl:
|
||||
submodels_for_olive.append("text_encoder_2")
|
||||
if "Model" in shared.opts.cuda_compile:
|
||||
submodels_for_olive.append("unet")
|
||||
if "VAE" in shared.opts.cuda_compile:
|
||||
submodels_for_olive.append("vae_encoder")
|
||||
submodels_for_olive.append("vae_decoder")
|
||||
|
||||
if len(submodels_for_olive) == 0:
|
||||
log.warning("Olive: Skipping olive run.")
|
||||
else:
|
||||
submodels_for_olive = []
|
||||
log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.")
|
||||
|
||||
if "Text Encoder" in shared.opts.cuda_compile:
|
||||
if not self.is_refiner:
|
||||
submodels_for_olive.append("text_encoder")
|
||||
if self._is_sdxl:
|
||||
submodels_for_olive.append("text_encoder_2")
|
||||
if "Model" in shared.opts.cuda_compile:
|
||||
submodels_for_olive.append("unet")
|
||||
if "VAE" in shared.opts.cuda_compile:
|
||||
submodels_for_olive.append("vae_encoder")
|
||||
submodels_for_olive.append("vae_decoder")
|
||||
out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h")
|
||||
if not os.path.isdir(out_dir): # check the model is already optimized (cached)
|
||||
if not shared.opts.olive_cache_optimized:
|
||||
out_dir = shared.opts.onnx_temp_dir
|
||||
|
||||
if len(submodels_for_olive) == 0:
|
||||
log.warning("Olive: Skipping olive run.")
|
||||
else:
|
||||
log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.")
|
||||
if p.width != p.height:
|
||||
log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.")
|
||||
|
||||
out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h")
|
||||
if not os.path.isdir(out_dir): # check the model is already optimized (cached)
|
||||
if not shared.opts.olive_cache_optimized:
|
||||
out_dir = shared.opts.onnx_temp_dir
|
||||
if shared.opts.olive_static_dims:
|
||||
sess_options = DynamicSessionOptions()
|
||||
sess_options.enable_static_dims({
|
||||
"is_sdxl": self._is_sdxl,
|
||||
"is_refiner": self.is_refiner,
|
||||
|
||||
if p.width != p.height:
|
||||
log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.")
|
||||
"hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2,
|
||||
"height": p.height,
|
||||
"width": p.width,
|
||||
})
|
||||
kwargs["sess_options"] = sess_options
|
||||
|
||||
if shared.opts.olive_static_dims:
|
||||
sess_options = DynamicSessionOptions()
|
||||
sess_options.enable_static_dims({
|
||||
"is_sdxl": self._is_sdxl,
|
||||
"is_refiner": self.is_refiner,
|
||||
|
||||
"hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2,
|
||||
"height": p.height,
|
||||
"width": p.width,
|
||||
})
|
||||
kwargs["sess_options"] = sess_options
|
||||
|
||||
try:
|
||||
self.run_olive(submodels_for_olive, in_dir, out_dir)
|
||||
except Exception as e:
|
||||
log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}")
|
||||
shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
|
||||
shutil.rmtree(out_dir, ignore_errors=True)
|
||||
try:
|
||||
self.run_olive(submodels_for_olive, in_dir, out_dir)
|
||||
except Exception as e:
|
||||
log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}")
|
||||
shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
|
||||
shutil.rmtree(out_dir, ignore_errors=True)
|
||||
|
||||
pipeline = self.derive_properties(load_pipeline(self.constructor, out_dir, **kwargs))
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ def create_ui():
|
|||
from modules.shared import log, opts, cmd_opts, refresh_checkpoints
|
||||
from modules.sd_models import checkpoint_tiles, get_closet_checkpoint_match
|
||||
from modules.paths import sd_configs_path
|
||||
from . import run_olive_workflow
|
||||
from .execution_providers import ExecutionProvider, install_execution_provider
|
||||
from .utils import check_diffusers_cache
|
||||
|
||||
|
|
@ -39,7 +38,7 @@ def create_ui():
|
|||
ep_log = gr.HTML("")
|
||||
ep_install.click(fn=install_execution_provider, inputs=[ep_checkbox], outputs=[ep_log])
|
||||
|
||||
if run_olive_workflow is not None:
|
||||
if opts.cuda_compile_backend == "olive-ai":
|
||||
import olive.passes as olive_passes
|
||||
from olive.hardware.accelerator import AcceleratorSpec, Device
|
||||
accelerator = AcceleratorSpec(accelerator_type=Device.GPU, execution_provider=opts.onnx_execution_provider)
|
||||
|
|
@ -147,7 +146,9 @@ def create_ui():
|
|||
sd_configs[submodel]["passes"][pass_name]["config"][config_key] = value
|
||||
return listener
|
||||
|
||||
for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access
|
||||
pass_cls = getattr(olive_passes, config_dict["type"], None)
|
||||
default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access
|
||||
for config_key, v in default_config.items():
|
||||
component = None
|
||||
if v.type_ == bool:
|
||||
component = gr.Checkbox
|
||||
|
|
@ -160,7 +161,7 @@ def create_ui():
|
|||
sd_pass_config_components[submodel][pass_name][config_key] = component
|
||||
component.change(fn=create_pass_config_change_listener(submodel, pass_name, config_key), inputs=component)
|
||||
|
||||
pass_type.change(fn=sd_create_change_listener(submodel, "passes", config_key, "type"), inputs=pass_type) # pylint: disable=undefined-loop-variable
|
||||
pass_type.change(fn=sd_create_change_listener(submodel, "passes", pass_name, "type"), inputs=pass_type)
|
||||
|
||||
def sd_save():
|
||||
for k, v in sd_configs.items():
|
||||
|
|
@ -208,7 +209,9 @@ def create_ui():
|
|||
sdxl_configs[submodel]["passes"][pass_name]["config"][config_key] = value
|
||||
return listener
|
||||
|
||||
for config_key, v in getattr(olive_passes, config_dict["type"], olive_passes.Pass)._default_config(accelerator).items(): # pylint: disable=protected-access
|
||||
pass_cls = getattr(olive_passes, config_dict["type"], None)
|
||||
default_config = {} if pass_cls is None else pass_cls._default_config(accelerator) # pylint: disable=protected-access
|
||||
for config_key, v in default_config.items():
|
||||
component = None
|
||||
if v.type_ == bool:
|
||||
component = gr.Checkbox
|
||||
|
|
|
|||
6
webui.py
6
webui.py
|
|
@ -82,12 +82,6 @@ def initialize():
|
|||
log.debug('Initializing')
|
||||
check_rollback_vae()
|
||||
|
||||
if shared.opts.cuda_compile_backend == "olive-ai":
|
||||
from modules.onnx_impl import initialize_olive, install_olive
|
||||
install_olive()
|
||||
initialize_olive()
|
||||
timer.startup.record("olive")
|
||||
|
||||
modules.sd_samplers.list_samplers()
|
||||
timer.startup.record("samplers")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue