diff --git a/configs/olive/sd_text_encoder.json b/configs/olive/sd_text_encoder.json index 004cb1393..93f75bd8a 100644 --- a/configs/olive/sd_text_encoder.json +++ b/configs/olive/sd_text_encoder.json @@ -38,12 +38,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sd_unet.json b/configs/olive/sd_unet.json index a2e40a0ca..4051f69f1 100644 --- a/configs/olive/sd_unet.json +++ b/configs/olive/sd_unet.json @@ -55,15 +55,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14, - "save_as_external_data": true, - "all_tensors_to_one_file": true, - "external_data_name": "weights.pb" - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sd_vae_decoder.json b/configs/olive/sd_vae_decoder.json index 82b19b54f..eec0bece6 100644 --- a/configs/olive/sd_vae_decoder.json +++ b/configs/olive/sd_vae_decoder.json @@ -45,12 +45,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sd_vae_encoder.json b/configs/olive/sd_vae_encoder.json index 52022fad6..0307dbe10 100644 --- a/configs/olive/sd_vae_encoder.json +++ b/configs/olive/sd_vae_encoder.json @@ -45,12 +45,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sdxl_text_encoder.json b/configs/olive/sdxl_text_encoder.json index c55c7f098..7f3a064ae 100644 --- a/configs/olive/sdxl_text_encoder.json +++ b/configs/olive/sdxl_text_encoder.json @@ -71,12 +71,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sdxl_text_encoder_2.json b/configs/olive/sdxl_text_encoder_2.json index 8e4b5df62..cb935ed42 100644 --- a/configs/olive/sdxl_text_encoder_2.json +++ b/configs/olive/sdxl_text_encoder_2.json @@ -111,12 +111,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sdxl_unet.json b/configs/olive/sdxl_unet.json index 1f24c1464..e835af72e 100644 --- a/configs/olive/sdxl_unet.json +++ b/configs/olive/sdxl_unet.json @@ -61,15 +61,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14, - "save_as_external_data": true, - "all_tensors_to_one_file": true, - "external_data_name": "weights.pb" - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sdxl_vae_decoder.json b/configs/olive/sdxl_vae_decoder.json index 065709273..e27a72abe 100644 --- a/configs/olive/sdxl_vae_decoder.json +++ b/configs/olive/sdxl_vae_decoder.json @@ -51,12 +51,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/olive/sdxl_vae_encoder.json b/configs/olive/sdxl_vae_encoder.json index 15ee602b0..2d05dcbd1 100644 --- a/configs/olive/sdxl_vae_encoder.json +++ b/configs/olive/sdxl_vae_encoder.json @@ -51,12 +51,6 @@ } }, "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, "optimize": { "type": "OrtTransformersOptimization", "disable_search": true, diff --git a/configs/onnx/sd_text_encoder.json b/configs/onnx/sd_text_encoder.json new file mode 100644 index 000000000..8ca69c357 --- /dev/null +++ b/configs/onnx/sd_text_encoder.json @@ -0,0 +1,62 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "text_encoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["input_ids"], + "output_names": ["last_hidden_state", "pooler_output"], + "dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } } + }, + "dummy_inputs_func": "text_encoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "text_encoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "text_encoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sd_unet.json b/configs/onnx/sd_unet.json new file mode 100644 index 000000000..7b4224223 --- /dev/null +++ b/configs/onnx/sd_unet.json @@ -0,0 +1,82 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "unet_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": [ + "sample", + "timestep", + "encoder_hidden_states", + "return_dict" + ], + "output_names": ["out_sample"], + "dynamic_axes": { + "sample": { + "0": "unet_sample_batch", + "1": "unet_sample_channels", + "2": "unet_sample_height", + "3": "unet_sample_width" + }, + "timestep": { "0": "unet_time_batch" }, + "encoder_hidden_states": { + "0": "unet_hidden_batch", + "1": "unet_hidden_sequence" + } + } + }, + "dummy_inputs_func": "unet_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "unet_data_loader", + "batch_size": 2 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "external_data_name": "weights.pb" + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "unet", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sd_vae_decoder.json b/configs/onnx/sd_vae_decoder.json new file mode 100644 index 000000000..0b10752df --- /dev/null +++ b/configs/onnx/sd_vae_decoder.json @@ -0,0 +1,69 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "vae_decoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["latent_sample", "return_dict"], + "output_names": ["sample"], + "dynamic_axes": { + "latent_sample": { + "0": "batch", + "1": "channels", + "2": "height", + "3": "width" + } + } + }, + "dummy_inputs_func": "vae_decoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "vae_decoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "vae_decoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sd_vae_encoder.json b/configs/onnx/sd_vae_encoder.json new file mode 100644 index 000000000..899ffe2d2 --- /dev/null +++ b/configs/onnx/sd_vae_encoder.json @@ -0,0 +1,69 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "vae_encoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["sample", "return_dict"], + "output_names": ["latent_sample"], + "dynamic_axes": { + "sample": { + "0": "batch", + "1": "channels", + "2": "height", + "3": "width" + } + } + }, + "dummy_inputs_func": "vae_encoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "vae_encoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "vae_encoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sdxl_text_encoder.json b/configs/onnx/sdxl_text_encoder.json new file mode 100644 index 000000000..b1d95a071 --- /dev/null +++ b/configs/onnx/sdxl_text_encoder.json @@ -0,0 +1,95 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "text_encoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["input_ids", "output_hidden_states"], + "output_names": [ + "last_hidden_state", + "pooler_output", + "hidden_states.0", + "hidden_states.1", + "hidden_states.2", + "hidden_states.3", + "hidden_states.4", + "hidden_states.5", + "hidden_states.6", + "hidden_states.7", + "hidden_states.8", + "hidden_states.9", + "hidden_states.10", + "hidden_states.11", + "hidden_states.12" + ], + "dynamic_axes": { + "input_ids": { "0": "batch_size", "1": "sequence_length" }, + "last_hidden_state": { "0": "batch_size", "1": "sequence_length" }, + "pooler_output": { "0": "batch_size" }, + "hidden_states.0": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.1": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.2": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.3": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.4": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.5": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.6": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.7": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.8": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.9": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.10": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.11": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.12": { "0": "batch_size", "1": "sequence_length" } + } + }, + "dummy_inputs_func": "text_encoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "text_encoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "text_encoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sdxl_text_encoder_2.json b/configs/onnx/sdxl_text_encoder_2.json new file mode 100644 index 000000000..1663063a0 --- /dev/null +++ b/configs/onnx/sdxl_text_encoder_2.json @@ -0,0 +1,135 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "text_encoder_2_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["input_ids", "output_hidden_states"], + "output_names": [ + "text_embeds", + "last_hidden_state", + "hidden_states.0", + "hidden_states.1", + "hidden_states.2", + "hidden_states.3", + "hidden_states.4", + "hidden_states.5", + "hidden_states.6", + "hidden_states.7", + "hidden_states.8", + "hidden_states.9", + "hidden_states.10", + "hidden_states.11", + "hidden_states.12", + "hidden_states.13", + "hidden_states.14", + "hidden_states.15", + "hidden_states.16", + "hidden_states.17", + "hidden_states.18", + "hidden_states.19", + "hidden_states.20", + "hidden_states.21", + "hidden_states.22", + "hidden_states.23", + "hidden_states.24", + "hidden_states.25", + "hidden_states.26", + "hidden_states.27", + "hidden_states.28", + "hidden_states.29", + "hidden_states.30", + "hidden_states.31", + "hidden_states.32" + ], + "dynamic_axes": { + "input_ids": { "0": "batch_size", "1": "sequence_length" }, + "text_embeds": { "0": "batch_size", "1": "sequence_length" }, + "last_hidden_state": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.0": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.1": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.2": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.3": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.4": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.5": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.6": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.7": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.8": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.9": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.10": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.11": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.12": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.13": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.14": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.15": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.16": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.17": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.18": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.19": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.20": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.21": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.22": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.23": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.24": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.25": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.26": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.27": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.28": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.29": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.30": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.31": { "0": "batch_size", "1": "sequence_length" }, + "hidden_states.32": { "0": "batch_size", "1": "sequence_length" } + } + }, + "dummy_inputs_func": "text_encoder_2_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "text_encoder_2_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "text_encoder_2", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sdxl_unet.json b/configs/onnx/sdxl_unet.json new file mode 100644 index 000000000..78864af07 --- /dev/null +++ b/configs/onnx/sdxl_unet.json @@ -0,0 +1,88 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "unet_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": [ + "sample", + "timestep", + "encoder_hidden_states", + "text_embeds", + "time_ids" + ], + "output_names": ["out_sample"], + "dynamic_axes": { + "sample": { + "0": "unet_sample_batch", + "1": "unet_sample_channels", + "2": "unet_sample_height", + "3": "unet_sample_width" + }, + "timestep": { "0": "unet_time_batch" }, + "encoder_hidden_states": { + "0": "unet_hidden_batch", + "1": "unet_hidden_sequence" + }, + "text_embeds": { + "0": "unet_text_embeds_batch", + "1": "unet_text_embeds_size" + }, + "time_ids": { "0": "unet_time_ids_batch", "1": "unet_time_ids_size" } + } + }, + "dummy_inputs_func": "unet_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "unet_data_loader", + "batch_size": 2 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "external_data_name": "weights.pb" + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "unet", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sdxl_vae_decoder.json b/configs/onnx/sdxl_vae_decoder.json new file mode 100644 index 000000000..9c0092568 --- /dev/null +++ b/configs/onnx/sdxl_vae_decoder.json @@ -0,0 +1,75 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "vae_decoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["latent_sample", "return_dict"], + "output_names": ["sample"], + "dynamic_axes": { + "latent_sample": { + "0": "batch_size", + "1": "num_channels_latent", + "2": "height_latent", + "3": "width_latent" + }, + "sample": { + "0": "batch_size", + "1": "num_channels", + "2": "height", + "3": "width" + } + } + }, + "dummy_inputs_func": "vae_decoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "vae_decoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "vae_decoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/configs/onnx/sdxl_vae_encoder.json b/configs/onnx/sdxl_vae_encoder.json new file mode 100644 index 000000000..1c71e78e4 --- /dev/null +++ b/configs/onnx/sdxl_vae_encoder.json @@ -0,0 +1,75 @@ +{ + "input_model": { + "type": "PyTorchModel", + "config": { + "model_path": "", + "model_loader": "vae_encoder_load", + "model_script": "modules/olive.py", + "io_config": { + "input_names": ["sample", "return_dict"], + "output_names": ["latent_sample"], + "dynamic_axes": { + "sample": { + "0": "batch_size", + "1": "num_channels", + "2": "height", + "3": "width" + }, + "latent_sample": { + "0": "batch_size", + "1": "num_channels_latent", + "2": "height_latent", + "3": "width_latent" + } + } + }, + "dummy_inputs_func": "vae_encoder_conversion_inputs" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "config": { + "accelerators": ["gpu"] + } + } + }, + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [{ "name": "avg" }], + "user_config": { + "user_script": "modules/olive.py", + "dataloader_func": "vae_encoder_data_loader", + "batch_size": 1 + } + } + ] + } + }, + "passes": { + "convert": { + "type": "OnnxConversion", + "config": { + "target_opset": 14 + } + } + }, + "engine": { + "search_strategy": { + "execution_order": "joint", + "search_algorithm": "exhaustive" + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_name": "vae_encoder", + "output_dir": "footprints", + "execution_providers": ["DmlExecutionProvider"] + } +} diff --git a/launch.py b/launch.py index 9f4d639f8..e6584e146 100755 --- a/launch.py +++ b/launch.py @@ -31,9 +31,8 @@ except ModuleNotFoundError: def init_olive(): try: - if installer.opts['onnx_enable_olive']: - import olive.workflows # pylint: disable=unused-import - installer.log.debug('Load olive') + import olive.workflows # pylint: disable=unused-import + installer.log.debug('Load olive') except Exception as e: installer.log.error(f'Failed to load olive: {e}') diff --git a/modules/olive.py b/modules/olive.py index 64a9d2387..41346295c 100644 --- a/modules/olive.py +++ b/modules/olive.py @@ -1,202 +1,21 @@ import os -import sys -import json import torch -import shutil import diffusers from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection -from installer import log -from modules import shared -from modules.paths import sd_configs_path -from modules.sd_models import CheckpointInfo -from modules.onnx import ExecutionProvider, get_execution_provider_options -is_available = "olive" in sys.modules # Olive is not available if it is not loaded at startup. -def enable_olive_onchange(): - from installer import installed, install, uninstall - if shared.opts.onnx_enable_olive: - if not installed('olive-ai', reload=True, quiet=True): - install('olive-ai', 'olive-ai') - else: - global is_available - is_available = False - if "olive" in sys.modules: - del sys.modules["olive"] - if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive': - shared.opts.diffusers_pipeline = 'ONNX Stable Diffusion' - if installed('olive-ai', reload=True, quiet=True): - uninstall('olive-ai') +is_sdxl = False -submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",) +width = 512 +height = 512 +batch_size = 1 -EP_TO_NAME = { - ExecutionProvider.CPU: "cpu", - ExecutionProvider.DirectML: "gpu-dml", - ExecutionProvider.CUDA: "gpu-?", # TODO - ExecutionProvider.ROCm: "gpu-rocm", - ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive -} - -class OlivePipeline(diffusers.DiffusionPipeline): - model_type = diffusers.OnnxStableDiffusionPipeline.__name__ - sd_model_hash: str - sd_checkpoint_info: CheckpointInfo - sd_model_checkpoint: str - config = {} - - unoptimized: diffusers.DiffusionPipeline - original_filename: str - - def __init__(self, path, pipeline: diffusers.DiffusionPipeline): - self.original_filename = os.path.basename(path) - self.unoptimized = pipeline - del pipeline - if not os.path.exists(shared.opts.olive_temp_dir): - os.mkdir(shared.opts.olive_temp_dir) - self.unoptimized.save_pretrained(shared.opts.olive_temp_dir) - - @staticmethod - def from_pretrained(pretrained_model_name_or_path, **kwargs): - return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)) - - @staticmethod - def from_single_file(pretrained_model_name_or_path, **kwargs): - return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs)) - - @staticmethod - def from_ckpt(*args, **kwargs): - return OlivePipeline.from_single_file(**args, **kwargs) - - def derive_properties(self, pipeline: diffusers.OnnxStableDiffusionPipeline): - pipeline.sd_model_hash = self.sd_model_hash - pipeline.sd_checkpoint_info = self.sd_checkpoint_info - pipeline.sd_model_checkpoint = self.sd_model_checkpoint - return pipeline - - def to(self, *args, **kwargs): - pass - - def optimize(self, width: int, height: int): - from olive.workflows import run - from olive.model import ONNXModel - - if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm: - from olive.hardware.accelerator import AcceleratorLookup - AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm) - - if width != height: - log.warning("Olive received different width and height. The quality of the result is not guaranteed.") - - out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h") - if os.path.isdir(out_dir): # already optimized (cached) - del self.unoptimized - return self.derive_properties( - diffusers.OnnxStableDiffusionPipeline.from_pretrained( - out_dir, - ) - ) - - try: - if shared.opts.onnx_cache_optimized: - shutil.copytree( - shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") - ) - - optimize_config["width"] = width - optimize_config["height"] = height - - optimized_model_paths = {} - - for submodel in submodels: - log.info(f"\nOptimizing {submodel}") - - with open(os.path.join(sd_configs_path, "olive", f"sd_{submodel}.json"), "r") as config_file: - olive_config = json.load(config_file) - olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16 - if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm): - olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True - olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] - - run(olive_config) - - with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file: - footprints = json.load(footprint_file) - conversion_footprint = None - optimizer_footprint = None - for _, footprint in footprints.items(): - if footprint["from_pass"] == "OnnxConversion": - conversion_footprint = footprint - elif footprint["from_pass"] == "OrtTransformersOptimization": - optimizer_footprint = footprint - - assert conversion_footprint and optimizer_footprint, "Failed to optimize model" - - optimized_model_paths[submodel] = ONNXModel( - **optimizer_footprint["model_config"]["config"] - ).model_path - - log.info(f"Optimized {submodel}") - shutil.rmtree(shared.opts.olive_temp_dir) - - kwargs = { - "tokenizer": self.unoptimized.tokenizer, - "scheduler": self.unoptimized.scheduler, - "safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None, - "feature_extractor": self.unoptimized.feature_extractor, - } - del self.unoptimized - for submodel in submodels: - kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained( - os.path.dirname(optimized_model_paths[submodel]), - provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),), - ) - - pipeline = self.derive_properties( - diffusers.OnnxStableDiffusionPipeline( - **kwargs, - requires_safety_checker=False, - ) - ) - del kwargs - if shared.opts.onnx_cache_optimized: - pipeline.to_json_file(os.path.join(out_dir, "model_index.json")) - - for submodel in submodels: - src_path = optimized_model_paths[submodel] - src_parent = os.path.dirname(src_path) - dst_parent = os.path.join(out_dir, submodel) - dst_path = os.path.join(dst_parent, "model.onnx") - if not os.path.isdir(dst_parent): - os.mkdir(dst_parent) - shutil.copyfile(src_path, dst_path) - - weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data")) - if os.path.isfile(weights_src_path): - weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data")) - shutil.copyfile(weights_src_path, weights_dst_path) - except Exception as e: - log.error(f"Failed to optimize model '{self.original_filename}'.") - log.error(e) # for test. - shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True) - shutil.rmtree(out_dir, ignore_errors=True) - pipeline = None - shutil.rmtree("cache", ignore_errors=True) - shutil.rmtree("footprints", ignore_errors=True) - return pipeline # ------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -optimize_config = { - "is_sdxl": False, - - "width": 512, - "height": 512, -} - # Helper latency-only dataloader that creates random tensors with no label class RandomDataLoader: @@ -219,11 +38,11 @@ def text_encoder_inputs(batchsize, torch_dtype): return { "input_ids": input_ids, "output_hidden_states": True, - } if optimize_config["is_sdxl"] else input_ids + } if is_sdxl else input_ids def text_encoder_load(model_name): - model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder") + model = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder") return model @@ -248,7 +67,7 @@ def text_encoder_2_inputs(batchsize, torch_dtype): def text_encoder_2_load(model_name): - model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2") + model = CLIPTextModelWithProjection.from_pretrained(model_name, subfolder="text_encoder_2") return model @@ -268,10 +87,8 @@ def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs): def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False): # TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders # For refiner, it should be multiplied by 1 (single text encoder) - height = optimize_config["height"] - width = optimize_config["width"] - if optimize_config["is_sdxl"]: + if is_sdxl: inputs = { "sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype), "timestep": torch.rand((1,), dtype=torch_dtype), @@ -281,12 +98,12 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False): if is_conversion_inputs: inputs["additional_inputs"] = { "added_cond_kwargs": { - "text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype), + "text_embeds": torch.rand((2 * batchsize, 1280), dtype=torch_dtype), "time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype), } } else: - inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype) + inputs["text_embeds"] = torch.rand((2 * batchsize, 1280), dtype=torch_dtype) inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype) else: inputs = { @@ -296,11 +113,22 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False): "return_dict": False, } + if is_conversion_inputs: + inputs["additional_inputs"] = { + "added_cond_kwargs": { + "text_embeds": torch.rand((1, 1280), dtype=torch_dtype), + "time_ids": torch.rand((1, 5), dtype=torch_dtype), + } + } + else: + inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype) + inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype) + return inputs def unet_load(model_name): - model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet") + model = diffusers.UNet2DConditionModel.from_pretrained(model_name, subfolder="unet") return model @@ -319,13 +147,13 @@ def unet_data_loader(data_dir, batchsize, *args, **kwargs): def vae_encoder_inputs(batchsize, torch_dtype): return { - "sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype), + "sample": torch.rand((batchsize, 3, height, width), dtype=torch_dtype), "return_dict": False, } def vae_encoder_load(model_name): - source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae") + source = os.path.join(model_name, "vae") if not os.path.isdir(source): source += "_encoder" model = diffusers.AutoencoderKL.from_pretrained(source) @@ -348,13 +176,13 @@ def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs): def vae_decoder_inputs(batchsize, torch_dtype): return { - "latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype), + "latent_sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype), "return_dict": False, } def vae_decoder_load(model_name): - source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae") + source = os.path.join(model_name, "vae") if not os.path.isdir(source): source += "_decoder" model = diffusers.AutoencoderKL.from_pretrained(source) diff --git a/modules/onnx.py b/modules/onnx.py index 0ab540c3b..ec58f7b88 100644 --- a/modules/onnx.py +++ b/modules/onnx.py @@ -1,13 +1,18 @@ import os +import json import torch +import shutil import importlib -import diffusers import numpy as np import onnxruntime as ort +import diffusers +import optimum.onnxruntime from enum import Enum -from typing import Union, Optional, Callable, List +from abc import ABCMeta +from typing import Any, Dict, Union, Optional, Callable, List from installer import log -from modules import shared +from modules import shared, olive +from modules.paths import sd_configs_path from modules.sd_models import CheckpointInfo class ExecutionProvider(str, Enum): @@ -17,8 +22,17 @@ class ExecutionProvider(str, Enum): ROCm = "ROCMExecutionProvider" OpenVINO = "OpenVINOExecutionProvider" +submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",) available_execution_providers: List[ExecutionProvider] = ort.get_available_providers() +EP_TO_NAME = { + ExecutionProvider.CPU: "cpu", + ExecutionProvider.DirectML: "gpu-dml", + ExecutionProvider.CUDA: "gpu-?", # TODO + ExecutionProvider.ROCm: "gpu-rocm", + ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive +} + def get_default_execution_provider() -> ExecutionProvider: from modules import devices if devices.backend == "cpu": @@ -64,19 +78,46 @@ class OnnxRuntimeModel(diffusers.OnnxRuntimeModel): return () +# OnnxRuntimeModel Hijack. +OnnxRuntimeModel.__module__ = 'diffusers' diffusers.OnnxRuntimeModel = OnnxRuntimeModel -class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline): - model_type = diffusers.OnnxStableDiffusionPipeline.__name__ +class OnnxPipelineBase(diffusers.DiffusionPipeline, metaclass=ABCMeta): + model_type: str sd_model_hash: str sd_checkpoint_info: CheckpointInfo sd_model_checkpoint: str + def __init__(self): + self.model_type = self.__class__.__name__ + + +class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline, OnnxPipelineBase): + def __init__( + self, + vae_encoder: diffusers.OnnxRuntimeModel, + vae_decoder: diffusers.OnnxRuntimeModel, + text_encoder: diffusers.OnnxRuntimeModel, + tokenizer, + unet: diffusers.OnnxRuntimeModel, + scheduler, + safety_checker: diffusers.OnnxRuntimeModel, + feature_extractor, + requires_safety_checker: bool = True + ): + super().__init__(vae_encoder, vae_decoder, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker) + @staticmethod def from_pretrained(pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs): - kwargs["provider"] = kwargs["provider"] if "provider" in kwargs else (shared.opts.onnx_execution_provider, get_execution_provider_options(),) - init_dict = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path), **kwargs)[0] + sess_options = kwargs.get("sess_options", ort.SessionOptions()) + provider = kwargs.get("provider", (shared.opts.onnx_execution_provider, get_execution_provider_options(),)) + model_config = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path)) + init_dict = {} + for d in model_config: + if 'unet' in d: + init_dict = d + break init_kwargs = {} for k, v in init_dict.items(): if not isinstance(v, list): @@ -90,7 +131,8 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline): constructor = getattr(library, constructor_name) submodel_kwargs = {} if issubclass(constructor, diffusers.OnnxRuntimeModel): - submodel_kwargs["provider"] = kwargs["provider"] + submodel_kwargs["sess_options"] = sess_options + submodel_kwargs["provider"] = provider try: init_kwargs[k] = constructor.from_pretrained( os.path.join(pretrained_model_name_or_path, k), @@ -244,4 +286,316 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline): return diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) + diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline + + +class OnnxStableDiffusionXLPipeline(optimum.onnxruntime.ORTStableDiffusionXLPipeline, OnnxPipelineBase): + def __init__( + self, + vae_decoder_session, + text_encoder_session, + unet_session, + config: Dict[str, Any], + tokenizer, + scheduler, + feature_extractor = None, + vae_encoder_session = None, + text_encoder_2_session = None, + tokenizer_2 = None, + use_io_binding: bool | None = None, + model_save_dir = None, + add_watermarker: bool | None = None + ): + super().__init__(vae_decoder_session, text_encoder_session, unet_session, config, tokenizer, scheduler, feature_extractor, vae_encoder_session, text_encoder_2_session, tokenizer_2, use_io_binding, model_save_dir, add_watermarker) + + +OnnxStableDiffusionXLPipeline.__module__ = 'optimum.onnxruntime.modeling_diffusion' +OnnxStableDiffusionXLPipeline.__name__ = 'ORTStableDiffusionXLPipeline' +diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline + + +class OnnxAutoPipeline(OnnxPipelineBase): + """ + Possible Cases: + 1. from .ckpt or .safetensors + 2. from downloaded non-Onnx model + 3. from downloaded Onnx model + 4. from cached converted Onnx model + 5. from cached optimized model + """ + constructor: Union[diffusers.OnnxStableDiffusionPipeline, diffusers.OnnxStableDiffusionXLPipeline] + config = {} + + pipeline: diffusers.DiffusionPipeline + original_filename: str + + def __init__(self, path, pipeline: diffusers.DiffusionPipeline): + self.original_filename = os.path.basename(path) + self.pipeline = pipeline + del pipeline + if os.path.exists(shared.opts.onnx_temp_dir): + shutil.rmtree(shared.opts.onnx_temp_dir) + os.mkdir(shared.opts.onnx_temp_dir) + self.constructor = diffusers.OnnxStableDiffusionXLPipeline if hasattr(self.pipeline, "text_encoder_2") else diffusers.OnnxStableDiffusionPipeline + self.model_type = self.constructor.__name__ + self.pipeline.save_pretrained(shared.opts.onnx_temp_dir) + + @staticmethod + def from_pretrained(pretrained_model_name_or_path, **kwargs): + pipeline = None + try: # load from Onnx SD model + pipeline = diffusers.OnnxStableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs) + except Exception: + pass + if pipeline is None: + try: # load from Onnx SDXL model + pipeline = diffusers.OnnxStableDiffusionXLPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs) + except Exception: + pass + if pipeline is None: + try: # load from non-Onnx model + pipeline = diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs) + except Exception: + pass + return OnnxAutoPipeline(pretrained_model_name_or_path, pipeline) + + @staticmethod + def from_single_file(pretrained_model_name_or_path, **kwargs): + return OnnxAutoPipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs)) + + @staticmethod + def from_ckpt(*args, **kwargs): + return OnnxAutoPipeline.from_single_file(**args, **kwargs) + + def derive_properties(self, pipeline: OnnxPipelineBase): + pipeline.sd_model_hash = self.sd_model_hash + pipeline.sd_checkpoint_info = self.sd_checkpoint_info + pipeline.sd_model_checkpoint = self.sd_model_checkpoint + return pipeline + + def to(self, *args, **kwargs): + pass + + def convert(self): + if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm: + from olive.hardware.accelerator import AcceleratorLookup + AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm) + + out_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename) + if os.path.isdir(out_dir): # already converted (cached) + self.pipeline = self.derive_properties( + self.constructor.from_pretrained( + out_dir, + ) + ) + return + + try: + from olive.workflows import run + from olive.model import ONNXModel + + shutil.rmtree("cache", ignore_errors=True) + shutil.rmtree("footprints", ignore_errors=True) + + kwargs = { + "tokenizer": self.pipeline.tokenizer, + "scheduler": self.pipeline.scheduler, + "safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None, + "feature_extractor": self.pipeline.feature_extractor, + } + del self.pipeline + + if shared.opts.onnx_cache_converted: + shutil.copytree( + shared.opts.onnx_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") + ) + + converted_model_paths = {} + + for submodel in submodels: + log.info(f"\nConverting {submodel}") + + with open(os.path.join(sd_configs_path, "onnx", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file: + conversion_config = json.load(config_file) + conversion_config["input_model"]["config"]["model_path"] = os.path.abspath(shared.opts.onnx_temp_dir) + conversion_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] + + run(conversion_config) + + with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file: + footprints = json.load(footprint_file) + conversion_footprint = None + for _, footprint in footprints.items(): + if footprint["from_pass"] == "OnnxConversion": + conversion_footprint = footprint + + assert conversion_footprint, "Failed to convert model" + + converted_model_paths[submodel] = ONNXModel( + **conversion_footprint["model_config"]["config"] + ).model_path + + log.info(f"Converted {submodel}") + shutil.rmtree(shared.opts.onnx_temp_dir) + + for submodel in submodels: + kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained( + os.path.dirname(converted_model_paths[submodel]), + provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),), + ) + + self.pipeline = self.derive_properties( + self.constructor( + **kwargs, + requires_safety_checker=False, + ) + ) + + if shared.opts.onnx_cache_converted: + self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json")) + + for submodel in submodels: + src_path = converted_model_paths[submodel] + src_parent = os.path.dirname(src_path) + dst_parent = os.path.join(out_dir, submodel) + dst_path = os.path.join(dst_parent, "model.onnx") + if not os.path.isdir(dst_parent): + os.mkdir(dst_parent) + shutil.copyfile(src_path, dst_path) + + weights_src_path = os.path.join(src_parent, "weights.pb") + if os.path.isfile(weights_src_path): + weights_dst_path = os.path.join(dst_parent, "weights.pb") + shutil.copyfile(weights_src_path, weights_dst_path) + except Exception as e: + log.error(f"Failed to convert model '{self.original_filename}'.") + log.error(e) # for test. + shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True) + shutil.rmtree(out_dir, ignore_errors=True) + + def optimize(self): + sess_options = ort.SessionOptions() + sess_options.add_free_dimension_override_by_name("unet_sample_batch", olive.batch_size * 2) + sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4) + sess_options.add_free_dimension_override_by_name("unet_sample_height", olive.height // 8) + sess_options.add_free_dimension_override_by_name("unet_sample_width", olive.width // 8) + sess_options.add_free_dimension_override_by_name("unet_time_batch", 1) + sess_options.add_free_dimension_override_by_name("unet_hidden_batch", olive.batch_size * 2) + sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77) + if olive.is_sdxl: + sess_options.add_free_dimension_override_by_name("unet_text_embeds_batch", olive.batch_size * 2) + sess_options.add_free_dimension_override_by_name("unet_text_embeds_size", 1280) + sess_options.add_free_dimension_override_by_name("unet_time_ids_batch", olive.batch_size * 2) + sess_options.add_free_dimension_override_by_name("unet_time_ids_size", 6) + in_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename) + out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{olive.width}w-{olive.height}h") + if os.path.isdir(out_dir): # already optimized (cached) + self.pipeline = self.derive_properties( + self.constructor.from_pretrained( + out_dir, + sess_options=sess_options, + ) + ) + return + + try: + from olive.workflows import run + from olive.model import ONNXModel + + shutil.rmtree("cache", ignore_errors=True) + shutil.rmtree("footprints", ignore_errors=True) + + kwargs = { + "tokenizer": self.pipeline.tokenizer, + "scheduler": self.pipeline.scheduler, + "safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None, + "feature_extractor": self.pipeline.feature_extractor, + } + del self.pipeline + + if shared.opts.onnx_cache_optimized: + shutil.copytree( + in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt") + ) + + optimized_model_paths = {} + + for submodel in submodels: + log.info(f"\nOptimizing {submodel}") + + with open(os.path.join(sd_configs_path, "olive", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file: + olive_config = json.load(config_file) + olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx")) + olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16 + if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm): + olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True + olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider] + + run(olive_config) + + with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file: + footprints = json.load(footprint_file) + optimizer_footprint = None + for _, footprint in footprints.items(): + if footprint["from_pass"] == "OrtTransformersOptimization": + optimizer_footprint = footprint + + assert optimizer_footprint, "Failed to optimize model" + + optimized_model_paths[submodel] = ONNXModel( + **optimizer_footprint["model_config"]["config"] + ).model_path + + log.info(f"Optimized {submodel}") + + for submodel in submodels: + kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained( + os.path.dirname(optimized_model_paths[submodel]), + sess_options=sess_options, + provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),), + ) + + self.pipeline = self.derive_properties( + self.constructor( + **kwargs, + requires_safety_checker=False, + ) + ) + + if shared.opts.onnx_cache_optimized: + self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json")) + + for submodel in submodels: + src_path = optimized_model_paths[submodel] + src_parent = os.path.dirname(src_path) + dst_parent = os.path.join(out_dir, submodel) + dst_path = os.path.join(dst_parent, "model.onnx") + if not os.path.isdir(dst_parent): + os.mkdir(dst_parent) + shutil.copyfile(src_path, dst_path) + + weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data")) + if os.path.isfile(weights_src_path): + weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data")) + shutil.copyfile(weights_src_path, weights_dst_path) + except Exception as e: + log.error(f"Failed to optimize model '{self.original_filename}'.") + log.error(e) # for test. + shutil.rmtree(out_dir, ignore_errors=True) + + def preprocess(self, width: int, height: int, batch_size: int): + olive.width = width + olive.height = height + olive.batch_size = batch_size + + olive.is_sdxl = self.constructor == diffusers.OnnxStableDiffusionXLPipeline + + self.convert() + + if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive': + if width != height: + log.warning("Olive detected different width and height. The quality of the result is not guaranteed.") + self.optimize() + + return self.pipeline diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index c7c58e0a6..4f4d02df7 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -21,8 +21,8 @@ def process_diffusers(p: processing.StableDiffusionProcessing): orig_pipeline = shared.sd_model results = [] - if isinstance(shared.sd_model, OlivePipeline): - shared.sd_model = shared.sd_model.optimize(p.width, p.height) + if hasattr(shared.sd_model, 'preprocess'): + shared.sd_model = shared.sd_model.preprocess(p.width, p.height, p.batch_size) def is_txt2img(): return sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE diff --git a/modules/sd_models.py b/modules/sd_models.py index 8ede576c9..659c40e90 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -147,7 +147,7 @@ def list_models(): model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])) if shared.backend == shared.Backend.DIFFUSERS: model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir, clear=True) - model_list += modelloader.load_diffusers_models(model_path=shared.opts.olive_sideloaded_models_path, command_path=shared.opts.olive_sideloaded_models_path, clear=False) + model_list += modelloader.load_diffusers_models(model_path=shared.opts.onnx_sideloaded_models_path, command_path=shared.opts.onnx_sideloaded_models_path, clear=False) for filename in sorted(model_list, key=str.lower): checkpoint_info = CheckpointInfo(filename) if checkpoint_info.name is not None: @@ -791,67 +791,39 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"') pipeline, model_type = detect_pipeline(checkpoint_info.path, op) if 'ONNX' in shared.opts.diffusers_pipeline: - from modules.onnx import get_execution_provider_options - diffusers_load_config['provider'] = (shared.opts.onnx_execution_provider, get_execution_provider_options(),) - if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive': - try: - from modules.onnx import OnnxStableDiffusionPipeline - sd_model = OnnxStableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.olive_sideloaded_models_path) - sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - shared.log.error(f'Failed loading {op}: {checkpoint_info.path} olive={e}') - return + from modules.onnx import OnnxAutoPipeline + if os.path.isdir(checkpoint_info.path): + sd_model = OnnxAutoPipeline.from_pretrained(checkpoint_info.path) else: - err1 = None - err2 = None - err3 = None - try: # try autopipeline first, best choice but not all pipelines are available - sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) + sd_model = OnnxAutoPipeline.from_single_file(checkpoint_info.path) + + if sd_model is None and os.path.isdir(checkpoint_info.path): + err1 = None + err2 = None + err3 = None + try: # try autopipeline first, best choice but not all pipelines are available + sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) + sd_model.model_type = sd_model.__class__.__name__ + except Exception as e: + err1 = e + # shared.log.error(f'AutoPipeline: {e}') + try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines + if err1 is not None: + sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err1 = e - try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines - if err1 is not None: - sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err2 = e - try: # try basic pipeline next just in case - if err2 is not None: - sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err3 = e # ignore last error - if err3 is not None: - shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}') - return - if model_type in ['InstaFlow']: # forced pipeline - sd_model = pipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - else: - err1, err2, err3 = None, None, None - try: # 1 - autopipeline, best choice but not all pipelines are available - sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) + except Exception as e: + err2 = e + # shared.log.error(f'DiffusionPipeline: {e}') + try: # try basic pipeline next just in case + if err2 is not None: + sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err1 = e - # shared.log.error(f'AutoPipeline: {e}') - try: # 2 - diffusion pipeline, works for most non-linked pipelines - if err1 is not None: - sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err2 = e - # shared.log.error(f'DiffusionPipeline: {e}') - try: # 3 - try basic pipeline just in case - if err2 is not None: - sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - sd_model.model_type = sd_model.__class__.__name__ - except Exception as e: - err3 = e # ignore last error - shared.log.error(f'StableDiffusionPipeline: {e}') - if err3 is not None: - shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}') - return + except Exception as e: + err3 = e # ignore last error + shared.log.error(f'StableDiffusionPipeline: {e}') + if err3 is not None: + shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}') + return elif os.path.isfile(checkpoint_info.path) and checkpoint_info.path.lower().endswith('.safetensors'): # diffusers_load_config["local_files_only"] = True diffusers_load_config["extract_ema"] = shared.opts.diffusers_extract_ema diff --git a/modules/shared.py b/modules/shared.py index 913e3e7f5..df3763072 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -17,7 +17,6 @@ from modules import errors, shared_items, shared_state, cmd_args, theme from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack from modules.onnx import available_execution_providers, get_default_execution_provider -from modules.olive import enable_olive_onchange import modules.interrogate import modules.memmon import modules.styles @@ -440,9 +439,9 @@ options_templates.update(options_section(('diffusers', "Diffusers Settings"), { "onnx_sep": OptionInfo("