refactor onnx and olive

2023-11-04 23:46:20 +09:00 · 2023-11-04 23:46:20 +09:00 · 6507491d8f
parent b70258c926
commit 6507491d8f
27 changed files with 1185 additions and 342 deletions
--- a/configs/olive/sd_text_encoder.json
+++ b/configs/olive/sd_text_encoder.json
@ -38,12 +38,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sd_unet.json
+++ b/configs/olive/sd_unet.json
@ -55,15 +55,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14,
-        "save_as_external_data": true,
-        "all_tensors_to_one_file": true,
-        "external_data_name": "weights.pb"
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sd_vae_decoder.json
+++ b/configs/olive/sd_vae_decoder.json
@ -45,12 +45,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sd_vae_encoder.json
+++ b/configs/olive/sd_vae_encoder.json
@ -45,12 +45,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sdxl_text_encoder.json
+++ b/configs/olive/sdxl_text_encoder.json
@ -71,12 +71,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sdxl_text_encoder_2.json
+++ b/configs/olive/sdxl_text_encoder_2.json
@ -111,12 +111,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sdxl_unet.json
+++ b/configs/olive/sdxl_unet.json
@ -61,15 +61,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14,
-        "save_as_external_data": true,
-        "all_tensors_to_one_file": true,
-        "external_data_name": "weights.pb"
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sdxl_vae_decoder.json
+++ b/configs/olive/sdxl_vae_decoder.json
@ -51,12 +51,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/olive/sdxl_vae_encoder.json
+++ b/configs/olive/sdxl_vae_encoder.json
@ -51,12 +51,6 @@
    }
  },
  "passes": {
-    "convert": {
-      "type": "OnnxConversion",
-      "config": {
-        "target_opset": 14
-      }
-    },
    "optimize": {
      "type": "OrtTransformersOptimization",
      "disable_search": true,
--- a/configs/onnx/sd_text_encoder.json
+++ b/configs/onnx/sd_text_encoder.json
@ -0,0 +1,62 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "text_encoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["input_ids"],
+        "output_names": ["last_hidden_state", "pooler_output"],
+        "dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } }
+      },
+      "dummy_inputs_func": "text_encoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "text_encoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "text_encoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sd_unet.json
+++ b/configs/onnx/sd_unet.json
@ -0,0 +1,82 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "unet_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": [
+          "sample",
+          "timestep",
+          "encoder_hidden_states",
+          "return_dict"
+        ],
+        "output_names": ["out_sample"],
+        "dynamic_axes": {
+          "sample": {
+            "0": "unet_sample_batch",
+            "1": "unet_sample_channels",
+            "2": "unet_sample_height",
+            "3": "unet_sample_width"
+          },
+          "timestep": { "0": "unet_time_batch" },
+          "encoder_hidden_states": {
+            "0": "unet_hidden_batch",
+            "1": "unet_hidden_sequence"
+          }
+        }
+      },
+      "dummy_inputs_func": "unet_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "unet_data_loader",
+            "batch_size": 2
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14,
+        "save_as_external_data": true,
+        "all_tensors_to_one_file": true,
+        "external_data_name": "weights.pb"
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "unet",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sd_vae_decoder.json
+++ b/configs/onnx/sd_vae_decoder.json
@ -0,0 +1,69 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "vae_decoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["latent_sample", "return_dict"],
+        "output_names": ["sample"],
+        "dynamic_axes": {
+          "latent_sample": {
+            "0": "batch",
+            "1": "channels",
+            "2": "height",
+            "3": "width"
+          }
+        }
+      },
+      "dummy_inputs_func": "vae_decoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "vae_decoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "vae_decoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sd_vae_encoder.json
+++ b/configs/onnx/sd_vae_encoder.json
@ -0,0 +1,69 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "vae_encoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["sample", "return_dict"],
+        "output_names": ["latent_sample"],
+        "dynamic_axes": {
+          "sample": {
+            "0": "batch",
+            "1": "channels",
+            "2": "height",
+            "3": "width"
+          }
+        }
+      },
+      "dummy_inputs_func": "vae_encoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "vae_encoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "vae_encoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sdxl_text_encoder.json
+++ b/configs/onnx/sdxl_text_encoder.json
@ -0,0 +1,95 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "text_encoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["input_ids", "output_hidden_states"],
+        "output_names": [
+          "last_hidden_state",
+          "pooler_output",
+          "hidden_states.0",
+          "hidden_states.1",
+          "hidden_states.2",
+          "hidden_states.3",
+          "hidden_states.4",
+          "hidden_states.5",
+          "hidden_states.6",
+          "hidden_states.7",
+          "hidden_states.8",
+          "hidden_states.9",
+          "hidden_states.10",
+          "hidden_states.11",
+          "hidden_states.12"
+        ],
+        "dynamic_axes": {
+          "input_ids": { "0": "batch_size", "1": "sequence_length" },
+          "last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
+          "pooler_output": { "0": "batch_size" },
+          "hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.12": { "0": "batch_size", "1": "sequence_length" }
+        }
+      },
+      "dummy_inputs_func": "text_encoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "text_encoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "text_encoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sdxl_text_encoder_2.json
+++ b/configs/onnx/sdxl_text_encoder_2.json
@ -0,0 +1,135 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "text_encoder_2_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["input_ids", "output_hidden_states"],
+        "output_names": [
+          "text_embeds",
+          "last_hidden_state",
+          "hidden_states.0",
+          "hidden_states.1",
+          "hidden_states.2",
+          "hidden_states.3",
+          "hidden_states.4",
+          "hidden_states.5",
+          "hidden_states.6",
+          "hidden_states.7",
+          "hidden_states.8",
+          "hidden_states.9",
+          "hidden_states.10",
+          "hidden_states.11",
+          "hidden_states.12",
+          "hidden_states.13",
+          "hidden_states.14",
+          "hidden_states.15",
+          "hidden_states.16",
+          "hidden_states.17",
+          "hidden_states.18",
+          "hidden_states.19",
+          "hidden_states.20",
+          "hidden_states.21",
+          "hidden_states.22",
+          "hidden_states.23",
+          "hidden_states.24",
+          "hidden_states.25",
+          "hidden_states.26",
+          "hidden_states.27",
+          "hidden_states.28",
+          "hidden_states.29",
+          "hidden_states.30",
+          "hidden_states.31",
+          "hidden_states.32"
+        ],
+        "dynamic_axes": {
+          "input_ids": { "0": "batch_size", "1": "sequence_length" },
+          "text_embeds": { "0": "batch_size", "1": "sequence_length" },
+          "last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.12": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.13": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.14": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.15": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.16": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.17": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.18": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.19": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.20": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.21": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.22": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.23": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.24": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.25": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.26": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.27": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.28": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.29": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.30": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.31": { "0": "batch_size", "1": "sequence_length" },
+          "hidden_states.32": { "0": "batch_size", "1": "sequence_length" }
+        }
+      },
+      "dummy_inputs_func": "text_encoder_2_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "text_encoder_2_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "text_encoder_2",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sdxl_unet.json
+++ b/configs/onnx/sdxl_unet.json
@ -0,0 +1,88 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "unet_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": [
+          "sample",
+          "timestep",
+          "encoder_hidden_states",
+          "text_embeds",
+          "time_ids"
+        ],
+        "output_names": ["out_sample"],
+        "dynamic_axes": {
+          "sample": {
+            "0": "unet_sample_batch",
+            "1": "unet_sample_channels",
+            "2": "unet_sample_height",
+            "3": "unet_sample_width"
+          },
+          "timestep": { "0": "unet_time_batch" },
+          "encoder_hidden_states": {
+            "0": "unet_hidden_batch",
+            "1": "unet_hidden_sequence"
+          },
+          "text_embeds": {
+            "0": "unet_text_embeds_batch",
+            "1": "unet_text_embeds_size"
+          },
+          "time_ids": { "0": "unet_time_ids_batch", "1": "unet_time_ids_size" }
+        }
+      },
+      "dummy_inputs_func": "unet_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "unet_data_loader",
+            "batch_size": 2
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14,
+        "save_as_external_data": true,
+        "all_tensors_to_one_file": true,
+        "external_data_name": "weights.pb"
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "unet",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sdxl_vae_decoder.json
+++ b/configs/onnx/sdxl_vae_decoder.json
@ -0,0 +1,75 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "vae_decoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["latent_sample", "return_dict"],
+        "output_names": ["sample"],
+        "dynamic_axes": {
+          "latent_sample": {
+            "0": "batch_size",
+            "1": "num_channels_latent",
+            "2": "height_latent",
+            "3": "width_latent"
+          },
+          "sample": {
+            "0": "batch_size",
+            "1": "num_channels",
+            "2": "height",
+            "3": "width"
+          }
+        }
+      },
+      "dummy_inputs_func": "vae_decoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "vae_decoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "vae_decoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/configs/onnx/sdxl_vae_encoder.json
+++ b/configs/onnx/sdxl_vae_encoder.json
@ -0,0 +1,75 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "vae_encoder_load",
+      "model_script": "modules/olive.py",
+      "io_config": {
+        "input_names": ["sample", "return_dict"],
+        "output_names": ["latent_sample"],
+        "dynamic_axes": {
+          "sample": {
+            "0": "batch_size",
+            "1": "num_channels",
+            "2": "height",
+            "3": "width"
+          },
+          "latent_sample": {
+            "0": "batch_size",
+            "1": "num_channels_latent",
+            "2": "height_latent",
+            "3": "width_latent"
+          }
+        }
+      },
+      "dummy_inputs_func": "vae_encoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": ["gpu"]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive.py",
+            "dataloader_func": "vae_encoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "convert": {
+      "type": "OnnxConversion",
+      "config": {
+        "target_opset": 14
+      }
+    }
+  },
+  "engine": {
+    "search_strategy": {
+      "execution_order": "joint",
+      "search_algorithm": "exhaustive"
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "vae_encoder",
+    "output_dir": "footprints",
+    "execution_providers": ["DmlExecutionProvider"]
+  }
+}
--- a/launch.py
+++ b/launch.py
@ -31,9 +31,8 @@ except ModuleNotFoundError:

 def init_olive():
    try:
-        if installer.opts['onnx_enable_olive']:
-            import olive.workflows # pylint: disable=unused-import
-            installer.log.debug('Load olive')
+        import olive.workflows # pylint: disable=unused-import
+        installer.log.debug('Load olive')
    except Exception as e:
        installer.log.error(f'Failed to load olive: {e}')

--- a/modules/olive.py
+++ b/modules/olive.py
@ -1,202 +1,21 @@
 import os
-import sys
-import json
 import torch
-import shutil
 import diffusers
 from transformers.models.clip.modeling_clip import CLIPTextModel, CLIPTextModelWithProjection
-from installer import log
-from modules import shared
-from modules.paths import sd_configs_path
-from modules.sd_models import CheckpointInfo
-from modules.onnx import ExecutionProvider, get_execution_provider_options

-is_available = "olive" in sys.modules # Olive is not available if it is not loaded at startup.

-def enable_olive_onchange():
-    from installer import installed, install, uninstall
-    if shared.opts.onnx_enable_olive:
-        if not installed('olive-ai', reload=True, quiet=True):
-            install('olive-ai', 'olive-ai')
-    else:
-        global is_available
-        is_available = False
-        if "olive" in sys.modules:
-            del sys.modules["olive"]
-        if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
-            shared.opts.diffusers_pipeline = 'ONNX Stable Diffusion'
-        if installed('olive-ai', reload=True, quiet=True):
-            uninstall('olive-ai')
+is_sdxl = False

-submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
+width = 512
+height = 512
+batch_size = 1

-EP_TO_NAME = {
-    ExecutionProvider.CPU: "cpu",
-    ExecutionProvider.DirectML: "gpu-dml",
-    ExecutionProvider.CUDA: "gpu-?", # TODO
-    ExecutionProvider.ROCm: "gpu-rocm",
-    ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive
-}
-
-class OlivePipeline(diffusers.DiffusionPipeline):
-    model_type = diffusers.OnnxStableDiffusionPipeline.__name__
-    sd_model_hash: str
-    sd_checkpoint_info: CheckpointInfo
-    sd_model_checkpoint: str
-    config = {}
-
-    unoptimized: diffusers.DiffusionPipeline
-    original_filename: str
-
-    def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
-        self.original_filename = os.path.basename(path)
-        self.unoptimized = pipeline
-        del pipeline
-        if not os.path.exists(shared.opts.olive_temp_dir):
-            os.mkdir(shared.opts.olive_temp_dir)
-        self.unoptimized.save_pretrained(shared.opts.olive_temp_dir)
-
-    @staticmethod
-    def from_pretrained(pretrained_model_name_or_path, **kwargs):
-        return OlivePipeline(pretrained_model_name_or_path, diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs))
-
-    @staticmethod
-    def from_single_file(pretrained_model_name_or_path, **kwargs):
-        return OlivePipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
-
-    @staticmethod
-    def from_ckpt(*args, **kwargs):
-        return OlivePipeline.from_single_file(**args, **kwargs)
-
-    def derive_properties(self, pipeline: diffusers.OnnxStableDiffusionPipeline):
-        pipeline.sd_model_hash = self.sd_model_hash
-        pipeline.sd_checkpoint_info = self.sd_checkpoint_info
-        pipeline.sd_model_checkpoint = self.sd_model_checkpoint
-        return pipeline
-
-    def to(self, *args, **kwargs):
-        pass
-
-    def optimize(self, width: int, height: int):
-        from olive.workflows import run
-        from olive.model import ONNXModel
-
-        if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
-            from olive.hardware.accelerator import AcceleratorLookup
-            AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
-
-        if width != height:
-            log.warning("Olive received different width and height. The quality of the result is not guaranteed.")
-
-        out_dir = os.path.join(shared.opts.olive_cached_models_path, f"{self.original_filename}-{width}w-{height}h")
-        if os.path.isdir(out_dir): # already optimized (cached)
-            del self.unoptimized
-            return self.derive_properties(
-                diffusers.OnnxStableDiffusionPipeline.from_pretrained(
-                    out_dir,
-                )
-            )
-
-        try:
-            if shared.opts.onnx_cache_optimized:
-                shutil.copytree(
-                    shared.opts.olive_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
-                )
-
-            optimize_config["width"] = width
-            optimize_config["height"] = height
-
-            optimized_model_paths = {}
-
-            for submodel in submodels:
-                log.info(f"\nOptimizing {submodel}")
-
-                with open(os.path.join(sd_configs_path, "olive", f"sd_{submodel}.json"), "r") as config_file:
-                    olive_config = json.load(config_file)
-                olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
-                if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
-                    olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
-                olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
-
-                run(olive_config)
-
-                with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
-                    footprints = json.load(footprint_file)
-                conversion_footprint = None
-                optimizer_footprint = None
-                for _, footprint in footprints.items():
-                    if footprint["from_pass"] == "OnnxConversion":
-                        conversion_footprint = footprint
-                    elif footprint["from_pass"] == "OrtTransformersOptimization":
-                        optimizer_footprint = footprint
-
-                assert conversion_footprint and optimizer_footprint, "Failed to optimize model"
-
-                optimized_model_paths[submodel] = ONNXModel(
-                    **optimizer_footprint["model_config"]["config"]
-                ).model_path
-
-                log.info(f"Optimized {submodel}")
-            shutil.rmtree(shared.opts.olive_temp_dir)
-
-            kwargs = {
-                "tokenizer": self.unoptimized.tokenizer,
-                "scheduler": self.unoptimized.scheduler,
-                "safety_checker": self.unoptimized.safety_checker if hasattr(self.unoptimized, "safety_checker") else None,
-                "feature_extractor": self.unoptimized.feature_extractor,
-            }
-            del self.unoptimized
-            for submodel in submodels:
-                kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
-                    os.path.dirname(optimized_model_paths[submodel]),
-                    provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
-                )
-
-            pipeline = self.derive_properties(
-                diffusers.OnnxStableDiffusionPipeline(
-                    **kwargs,
-                    requires_safety_checker=False,
-                )
-            )
-            del kwargs
-            if shared.opts.onnx_cache_optimized:
-                pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
-
-                for submodel in submodels:
-                    src_path = optimized_model_paths[submodel]
-                    src_parent = os.path.dirname(src_path)
-                    dst_parent = os.path.join(out_dir, submodel)
-                    dst_path = os.path.join(dst_parent, "model.onnx")
-                    if not os.path.isdir(dst_parent):
-                        os.mkdir(dst_parent)
-                    shutil.copyfile(src_path, dst_path)
-
-                    weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
-                    if os.path.isfile(weights_src_path):
-                        weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
-                        shutil.copyfile(weights_src_path, weights_dst_path)
-        except Exception as e:
-            log.error(f"Failed to optimize model '{self.original_filename}'.")
-            log.error(e) # for test.
-            shutil.rmtree(shared.opts.olive_temp_dir, ignore_errors=True)
-            shutil.rmtree(out_dir, ignore_errors=True)
-            pipeline = None
-        shutil.rmtree("cache", ignore_errors=True)
-        shutil.rmtree("footprints", ignore_errors=True)
-        return pipeline

 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------

-optimize_config = {
-    "is_sdxl": False,
-
-    "width": 512,
-    "height": 512,
-}
-

 # Helper latency-only dataloader that creates random tensors with no label
 class RandomDataLoader:
@ -219,11 +38,11 @@ def text_encoder_inputs(batchsize, torch_dtype):
    return {
        "input_ids": input_ids,
        "output_hidden_states": True,
-    } if optimize_config["is_sdxl"] else input_ids
+    } if is_sdxl else input_ids


 def text_encoder_load(model_name):
-    model = CLIPTextModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder")
+    model = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
    return model


@ -248,7 +67,7 @@ def text_encoder_2_inputs(batchsize, torch_dtype):


 def text_encoder_2_load(model_name):
-    model = CLIPTextModelWithProjection.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="text_encoder_2")
+    model = CLIPTextModelWithProjection.from_pretrained(model_name, subfolder="text_encoder_2")
    return model


@ -268,10 +87,8 @@ def text_encoder_2_data_loader(data_dir, batchsize, *args, **kwargs):
 def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
    # TODO (pavignol): All the multiplications by 2 here are bacause the XL base has 2 text encoders
    # For refiner, it should be multiplied by 1 (single text encoder)
-    height = optimize_config["height"]
-    width = optimize_config["width"]

-    if optimize_config["is_sdxl"]:
+    if is_sdxl:
        inputs = {
            "sample": torch.rand((2 * batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
            "timestep": torch.rand((1,), dtype=torch_dtype),
@ -281,12 +98,12 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
        if is_conversion_inputs:
            inputs["additional_inputs"] = {
                "added_cond_kwargs": {
-                    "text_embeds": torch.rand((2 * batchsize, height + 256), dtype=torch_dtype),
+                    "text_embeds": torch.rand((2 * batchsize, 1280), dtype=torch_dtype),
                    "time_ids": torch.rand((2 * batchsize, 6), dtype=torch_dtype),
                }
            }
        else:
-            inputs["text_embeds"] = torch.rand((2 * batchsize, height + 256), dtype=torch_dtype)
+            inputs["text_embeds"] = torch.rand((2 * batchsize, 1280), dtype=torch_dtype)
            inputs["time_ids"] = torch.rand((2 * batchsize, 6), dtype=torch_dtype)
    else:
        inputs = {
@ -296,11 +113,22 @@ def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
            "return_dict": False,
        }

+        if is_conversion_inputs:
+            inputs["additional_inputs"] = {
+                "added_cond_kwargs": {
+                    "text_embeds": torch.rand((1, 1280), dtype=torch_dtype),
+                    "time_ids": torch.rand((1, 5), dtype=torch_dtype),
+                }
+            }
+        else:
+            inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype)
+            inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype)
+
    return inputs


 def unet_load(model_name):
-    model = diffusers.UNet2DConditionModel.from_pretrained(os.path.abspath(shared.opts.olive_temp_dir), subfolder="unet")
+    model = diffusers.UNet2DConditionModel.from_pretrained(model_name, subfolder="unet")
    return model


@ -319,13 +147,13 @@ def unet_data_loader(data_dir, batchsize, *args, **kwargs):

 def vae_encoder_inputs(batchsize, torch_dtype):
    return {
-        "sample": torch.rand((batchsize, 3, optimize_config["height"], optimize_config["width"]), dtype=torch_dtype),
+        "sample": torch.rand((batchsize, 3, height, width), dtype=torch_dtype),
        "return_dict": False,
    }


 def vae_encoder_load(model_name):
-    source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
+    source = os.path.join(model_name, "vae")
    if not os.path.isdir(source):
        source += "_encoder"
    model = diffusers.AutoencoderKL.from_pretrained(source)
@ -348,13 +176,13 @@ def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs):

 def vae_decoder_inputs(batchsize, torch_dtype):
    return {
-        "latent_sample": torch.rand((batchsize, 4, optimize_config["height"] // 8, optimize_config["width"] // 8), dtype=torch_dtype),
+        "latent_sample": torch.rand((batchsize, 4, height // 8, width // 8), dtype=torch_dtype),
        "return_dict": False,
    }


 def vae_decoder_load(model_name):
-    source = os.path.join(os.path.abspath(shared.opts.olive_temp_dir), "vae")
+    source = os.path.join(model_name, "vae")
    if not os.path.isdir(source):
        source += "_decoder"
    model = diffusers.AutoencoderKL.from_pretrained(source)
--- a/modules/onnx.py
+++ b/modules/onnx.py
@ -1,13 +1,18 @@
 import os
+import json
 import torch
+import shutil
 import importlib
-import diffusers
 import numpy as np
 import onnxruntime as ort
+import diffusers
+import optimum.onnxruntime
 from enum import Enum
-from typing import Union, Optional, Callable, List
+from abc import ABCMeta
+from typing import Any, Dict, Union, Optional, Callable, List
 from installer import log
-from modules import shared
+from modules import shared, olive
+from modules.paths import sd_configs_path
 from modules.sd_models import CheckpointInfo

 class ExecutionProvider(str, Enum):
@ -17,8 +22,17 @@ class ExecutionProvider(str, Enum):
    ROCm = "ROCMExecutionProvider"
    OpenVINO = "OpenVINOExecutionProvider"

+submodels = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
 available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()

+EP_TO_NAME = {
+    ExecutionProvider.CPU: "cpu",
+    ExecutionProvider.DirectML: "gpu-dml",
+    ExecutionProvider.CUDA: "gpu-?", # TODO
+    ExecutionProvider.ROCm: "gpu-rocm",
+    ExecutionProvider.OpenVINO: "gpu", # Other devices can use --use-openvino instead of olive
+}
+
 def get_default_execution_provider() -> ExecutionProvider:
    from modules import devices
    if devices.backend == "cpu":
@ -64,19 +78,46 @@ class OnnxRuntimeModel(diffusers.OnnxRuntimeModel):
        return ()


+# OnnxRuntimeModel Hijack.
+OnnxRuntimeModel.__module__ = 'diffusers'
 diffusers.OnnxRuntimeModel = OnnxRuntimeModel


-class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
-    model_type = diffusers.OnnxStableDiffusionPipeline.__name__
+class OnnxPipelineBase(diffusers.DiffusionPipeline, metaclass=ABCMeta):
+    model_type: str
    sd_model_hash: str
    sd_checkpoint_info: CheckpointInfo
    sd_model_checkpoint: str

+    def __init__(self):
+        self.model_type = self.__class__.__name__
+
+
+class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline, OnnxPipelineBase):
+    def __init__(
+        self,
+        vae_encoder: diffusers.OnnxRuntimeModel,
+        vae_decoder: diffusers.OnnxRuntimeModel,
+        text_encoder: diffusers.OnnxRuntimeModel,
+        tokenizer,
+        unet: diffusers.OnnxRuntimeModel,
+        scheduler,
+        safety_checker: diffusers.OnnxRuntimeModel,
+        feature_extractor,
+        requires_safety_checker: bool = True
+    ):
+        super().__init__(vae_encoder, vae_decoder, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker)
+
    @staticmethod
    def from_pretrained(pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
-        kwargs["provider"] = kwargs["provider"] if "provider" in kwargs else (shared.opts.onnx_execution_provider, get_execution_provider_options(),)
-        init_dict = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path), **kwargs)[0]
+        sess_options = kwargs.get("sess_options", ort.SessionOptions())
+        provider = kwargs.get("provider", (shared.opts.onnx_execution_provider, get_execution_provider_options(),))
+        model_config = super(OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline).extract_init_dict(diffusers.DiffusionPipeline.load_config(pretrained_model_name_or_path))
+        init_dict = {}
+        for d in model_config:
+            if 'unet' in d:
+                init_dict = d
+                break
        init_kwargs = {}
        for k, v in init_dict.items():
            if not isinstance(v, list):
@ -90,7 +131,8 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):
            constructor = getattr(library, constructor_name)
            submodel_kwargs = {}
            if issubclass(constructor, diffusers.OnnxRuntimeModel):
-                submodel_kwargs["provider"] = kwargs["provider"]
+                submodel_kwargs["sess_options"] = sess_options
+                submodel_kwargs["provider"] = provider
            try:
                init_kwargs[k] = constructor.from_pretrained(
                    os.path.join(pretrained_model_name_or_path, k),
@ -244,4 +286,316 @@ class OnnxStableDiffusionPipeline(diffusers.OnnxStableDiffusionPipeline):

        return diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)

+
 diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline
+
+
+class OnnxStableDiffusionXLPipeline(optimum.onnxruntime.ORTStableDiffusionXLPipeline, OnnxPipelineBase):
+    def __init__(
+        self,
+        vae_decoder_session,
+        text_encoder_session,
+        unet_session,
+        config: Dict[str, Any],
+        tokenizer,
+        scheduler,
+        feature_extractor = None,
+        vae_encoder_session = None,
+        text_encoder_2_session = None,
+        tokenizer_2 = None,
+        use_io_binding: bool | None = None,
+        model_save_dir = None,
+        add_watermarker: bool | None = None
+    ):
+        super().__init__(vae_decoder_session, text_encoder_session, unet_session, config, tokenizer, scheduler, feature_extractor, vae_encoder_session, text_encoder_2_session, tokenizer_2, use_io_binding, model_save_dir, add_watermarker)
+
+
+OnnxStableDiffusionXLPipeline.__module__ = 'optimum.onnxruntime.modeling_diffusion'
+OnnxStableDiffusionXLPipeline.__name__ = 'ORTStableDiffusionXLPipeline'
+diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline
+
+
+class OnnxAutoPipeline(OnnxPipelineBase):
+    """
+    Possible Cases:
+    1. from .ckpt or .safetensors
+    2. from downloaded non-Onnx model
+    3. from downloaded Onnx model
+    4. from cached converted Onnx model
+    5. from cached optimized model
+    """
+    constructor: Union[diffusers.OnnxStableDiffusionPipeline, diffusers.OnnxStableDiffusionXLPipeline]
+    config = {}
+
+    pipeline: diffusers.DiffusionPipeline
+    original_filename: str
+
+    def __init__(self, path, pipeline: diffusers.DiffusionPipeline):
+        self.original_filename = os.path.basename(path)
+        self.pipeline = pipeline
+        del pipeline
+        if os.path.exists(shared.opts.onnx_temp_dir):
+            shutil.rmtree(shared.opts.onnx_temp_dir)
+        os.mkdir(shared.opts.onnx_temp_dir)
+        self.constructor = diffusers.OnnxStableDiffusionXLPipeline if hasattr(self.pipeline, "text_encoder_2") else diffusers.OnnxStableDiffusionPipeline
+        self.model_type = self.constructor.__name__
+        self.pipeline.save_pretrained(shared.opts.onnx_temp_dir)
+
+    @staticmethod
+    def from_pretrained(pretrained_model_name_or_path, **kwargs):
+        pipeline = None
+        try: # load from Onnx SD model
+            pipeline = diffusers.OnnxStableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        except Exception:
+            pass
+        if pipeline is None:
+            try: # load from Onnx SDXL model
+                pipeline = diffusers.OnnxStableDiffusionXLPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
+            except Exception:
+                pass
+        if pipeline is None:
+            try: # load from non-Onnx model
+                pipeline = diffusers.DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, **kwargs)
+            except Exception:
+                pass
+        return OnnxAutoPipeline(pretrained_model_name_or_path, pipeline)
+
+    @staticmethod
+    def from_single_file(pretrained_model_name_or_path, **kwargs):
+        return OnnxAutoPipeline(pretrained_model_name_or_path, diffusers.StableDiffusionPipeline.from_single_file(pretrained_model_name_or_path, **kwargs))
+
+    @staticmethod
+    def from_ckpt(*args, **kwargs):
+        return OnnxAutoPipeline.from_single_file(**args, **kwargs)
+
+    def derive_properties(self, pipeline: OnnxPipelineBase):
+        pipeline.sd_model_hash = self.sd_model_hash
+        pipeline.sd_checkpoint_info = self.sd_checkpoint_info
+        pipeline.sd_model_checkpoint = self.sd_model_checkpoint
+        return pipeline
+
+    def to(self, *args, **kwargs):
+        pass
+
+    def convert(self):
+        if shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
+            from olive.hardware.accelerator import AcceleratorLookup
+            AcceleratorLookup.EXECUTION_PROVIDERS["gpu"].append(ExecutionProvider.ROCm)
+
+        out_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename)
+        if os.path.isdir(out_dir): # already converted (cached)
+            self.pipeline = self.derive_properties(
+                self.constructor.from_pretrained(
+                    out_dir,
+                )
+            )
+            return
+
+        try:
+            from olive.workflows import run
+            from olive.model import ONNXModel
+
+            shutil.rmtree("cache", ignore_errors=True)
+            shutil.rmtree("footprints", ignore_errors=True)
+
+            kwargs = {
+                "tokenizer": self.pipeline.tokenizer,
+                "scheduler": self.pipeline.scheduler,
+                "safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None,
+                "feature_extractor": self.pipeline.feature_extractor,
+            }
+            del self.pipeline
+
+            if shared.opts.onnx_cache_converted:
+                shutil.copytree(
+                    shared.opts.onnx_temp_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
+                )
+
+            converted_model_paths = {}
+
+            for submodel in submodels:
+                log.info(f"\nConverting {submodel}")
+
+                with open(os.path.join(sd_configs_path, "onnx", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file:
+                    conversion_config = json.load(config_file)
+                conversion_config["input_model"]["config"]["model_path"] = os.path.abspath(shared.opts.onnx_temp_dir)
+                conversion_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
+
+                run(conversion_config)
+
+                with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
+                    footprints = json.load(footprint_file)
+                conversion_footprint = None
+                for _, footprint in footprints.items():
+                    if footprint["from_pass"] == "OnnxConversion":
+                        conversion_footprint = footprint
+
+                assert conversion_footprint, "Failed to convert model"
+
+                converted_model_paths[submodel] = ONNXModel(
+                    **conversion_footprint["model_config"]["config"]
+                ).model_path
+
+                log.info(f"Converted {submodel}")
+            shutil.rmtree(shared.opts.onnx_temp_dir)
+
+            for submodel in submodels:
+                kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
+                    os.path.dirname(converted_model_paths[submodel]),
+                    provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
+                )
+
+            self.pipeline = self.derive_properties(
+                self.constructor(
+                    **kwargs,
+                    requires_safety_checker=False,
+                )
+            )
+
+            if shared.opts.onnx_cache_converted:
+                self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
+
+                for submodel in submodels:
+                    src_path = converted_model_paths[submodel]
+                    src_parent = os.path.dirname(src_path)
+                    dst_parent = os.path.join(out_dir, submodel)
+                    dst_path = os.path.join(dst_parent, "model.onnx")
+                    if not os.path.isdir(dst_parent):
+                        os.mkdir(dst_parent)
+                    shutil.copyfile(src_path, dst_path)
+
+                    weights_src_path = os.path.join(src_parent, "weights.pb")
+                    if os.path.isfile(weights_src_path):
+                        weights_dst_path = os.path.join(dst_parent, "weights.pb")
+                        shutil.copyfile(weights_src_path, weights_dst_path)
+        except Exception as e:
+            log.error(f"Failed to convert model '{self.original_filename}'.")
+            log.error(e) # for test.
+            shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
+            shutil.rmtree(out_dir, ignore_errors=True)
+
+    def optimize(self):
+        sess_options = ort.SessionOptions()
+        sess_options.add_free_dimension_override_by_name("unet_sample_batch", olive.batch_size * 2)
+        sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4)
+        sess_options.add_free_dimension_override_by_name("unet_sample_height", olive.height // 8)
+        sess_options.add_free_dimension_override_by_name("unet_sample_width", olive.width // 8)
+        sess_options.add_free_dimension_override_by_name("unet_time_batch", 1)
+        sess_options.add_free_dimension_override_by_name("unet_hidden_batch", olive.batch_size * 2)
+        sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77)
+        if olive.is_sdxl:
+            sess_options.add_free_dimension_override_by_name("unet_text_embeds_batch", olive.batch_size * 2)
+            sess_options.add_free_dimension_override_by_name("unet_text_embeds_size", 1280)
+            sess_options.add_free_dimension_override_by_name("unet_time_ids_batch", olive.batch_size * 2)
+            sess_options.add_free_dimension_override_by_name("unet_time_ids_size", 6)
+        in_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename)
+        out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{olive.width}w-{olive.height}h")
+        if os.path.isdir(out_dir): # already optimized (cached)
+            self.pipeline = self.derive_properties(
+                self.constructor.from_pretrained(
+                    out_dir,
+                    sess_options=sess_options,
+                )
+            )
+            return
+
+        try:
+            from olive.workflows import run
+            from olive.model import ONNXModel
+
+            shutil.rmtree("cache", ignore_errors=True)
+            shutil.rmtree("footprints", ignore_errors=True)
+
+            kwargs = {
+                "tokenizer": self.pipeline.tokenizer,
+                "scheduler": self.pipeline.scheduler,
+                "safety_checker": self.pipeline.safety_checker if hasattr(self.pipeline, "safety_checker") else None,
+                "feature_extractor": self.pipeline.feature_extractor,
+            }
+            del self.pipeline
+
+            if shared.opts.onnx_cache_optimized:
+                shutil.copytree(
+                    in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
+                )
+
+            optimized_model_paths = {}
+
+            for submodel in submodels:
+                log.info(f"\nOptimizing {submodel}")
+
+                with open(os.path.join(sd_configs_path, "olive", f"{'sdxl' if olive.is_sdxl else 'sd'}_{submodel}.json"), "r") as config_file:
+                    olive_config = json.load(config_file)
+                olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
+                olive_config["passes"]["optimize"]["config"]["float16"] = shared.opts.onnx_olive_float16
+                if (submodel == "unet" or "vae" in submodel) and (shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm):
+                    olive_config["passes"]["optimize"]["config"]["optimization_options"]["group_norm_channels_last"] = True
+                olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]
+
+                run(olive_config)
+
+                with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r") as footprint_file:
+                    footprints = json.load(footprint_file)
+                optimizer_footprint = None
+                for _, footprint in footprints.items():
+                    if footprint["from_pass"] == "OrtTransformersOptimization":
+                        optimizer_footprint = footprint
+
+                assert optimizer_footprint, "Failed to optimize model"
+
+                optimized_model_paths[submodel] = ONNXModel(
+                    **optimizer_footprint["model_config"]["config"]
+                ).model_path
+
+                log.info(f"Optimized {submodel}")
+
+            for submodel in submodels:
+                kwargs[submodel] = diffusers.OnnxRuntimeModel.from_pretrained(
+                    os.path.dirname(optimized_model_paths[submodel]),
+                    sess_options=sess_options,
+                    provider=(shared.opts.onnx_execution_provider, get_execution_provider_options(),),
+                )
+
+            self.pipeline = self.derive_properties(
+                self.constructor(
+                    **kwargs,
+                    requires_safety_checker=False,
+                )
+            )
+
+            if shared.opts.onnx_cache_optimized:
+                self.pipeline.to_json_file(os.path.join(out_dir, "model_index.json"))
+
+                for submodel in submodels:
+                    src_path = optimized_model_paths[submodel]
+                    src_parent = os.path.dirname(src_path)
+                    dst_parent = os.path.join(out_dir, submodel)
+                    dst_path = os.path.join(dst_parent, "model.onnx")
+                    if not os.path.isdir(dst_parent):
+                        os.mkdir(dst_parent)
+                    shutil.copyfile(src_path, dst_path)
+
+                    weights_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
+                    if os.path.isfile(weights_src_path):
+                        weights_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
+                        shutil.copyfile(weights_src_path, weights_dst_path)
+        except Exception as e:
+            log.error(f"Failed to optimize model '{self.original_filename}'.")
+            log.error(e) # for test.
+            shutil.rmtree(out_dir, ignore_errors=True)
+
+    def preprocess(self, width: int, height: int, batch_size: int):
+        olive.width = width
+        olive.height = height
+        olive.batch_size = batch_size
+
+        olive.is_sdxl = self.constructor == diffusers.OnnxStableDiffusionXLPipeline
+
+        self.convert()
+
+        if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
+            if width != height:
+                log.warning("Olive detected different width and height. The quality of the result is not guaranteed.")
+            self.optimize()
+
+        return self.pipeline
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@ -21,8 +21,8 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
    orig_pipeline = shared.sd_model
    results = []

-    if isinstance(shared.sd_model, OlivePipeline):
-        shared.sd_model = shared.sd_model.optimize(p.width, p.height)
+    if hasattr(shared.sd_model, 'preprocess'):
+        shared.sd_model = shared.sd_model.preprocess(p.width, p.height, p.batch_size)

    def is_txt2img():
        return sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -147,7 +147,7 @@ def list_models():
    model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]))
    if shared.backend == shared.Backend.DIFFUSERS:
        model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir, clear=True)
-        model_list += modelloader.load_diffusers_models(model_path=shared.opts.olive_sideloaded_models_path, command_path=shared.opts.olive_sideloaded_models_path, clear=False)
+        model_list += modelloader.load_diffusers_models(model_path=shared.opts.onnx_sideloaded_models_path, command_path=shared.opts.onnx_sideloaded_models_path, clear=False)
    for filename in sorted(model_list, key=str.lower):
        checkpoint_info = CheckpointInfo(filename)
        if checkpoint_info.name is not None:
@ -791,67 +791,39 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
        shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"')
        pipeline, model_type = detect_pipeline(checkpoint_info.path, op)
        if 'ONNX' in shared.opts.diffusers_pipeline:
-            from modules.onnx import get_execution_provider_options
-            diffusers_load_config['provider'] = (shared.opts.onnx_execution_provider, get_execution_provider_options(),)
-            if shared.opts.diffusers_pipeline == 'ONNX Stable Diffusion with Olive':
-                try:
-                    from modules.onnx import OnnxStableDiffusionPipeline
-                    sd_model = OnnxStableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.olive_sideloaded_models_path)
-                    sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    shared.log.error(f'Failed loading {op}: {checkpoint_info.path} olive={e}')
-                    return
+            from modules.onnx import OnnxAutoPipeline
+            if os.path.isdir(checkpoint_info.path):
+                sd_model = OnnxAutoPipeline.from_pretrained(checkpoint_info.path)
            else:
-                err1 = None
-                err2 = None
-                err3 = None
-                try: # try autopipeline first, best choice but not all pipelines are available
-                    sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
+                sd_model = OnnxAutoPipeline.from_single_file(checkpoint_info.path)
+
+        if sd_model is None and os.path.isdir(checkpoint_info.path):
+            err1 = None
+            err2 = None
+            err3 = None
+            try: # try autopipeline first, best choice but not all pipelines are available
+                sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
+                sd_model.model_type = sd_model.__class__.__name__
+            except Exception as e:
+                err1 = e
+                # shared.log.error(f'AutoPipeline: {e}')
+            try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines
+                if err1 is not None:
+                    sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
                    sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err1 = e
-                try: # try diffusion pipeline next second-best choice, works for most non-linked pipelines
-                    if err1 is not None:
-                        sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-                        sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err2 = e
-                try: # try basic pipeline next just in case
-                    if err2 is not None:
-                        sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-                        sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err3 = e # ignore last error
-                if err3 is not None:
-                    shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
-                    return
-            if model_type in ['InstaFlow']: # forced pipeline
-                sd_model = pipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-            else:
-                err1, err2, err3 = None, None, None
-                try: # 1 - autopipeline, best choice but not all pipelines are available
-                    sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
+            except Exception as e:
+                err2 = e
+                # shared.log.error(f'DiffusionPipeline: {e}')
+            try: # try basic pipeline next just in case
+                if err2 is not None:
+                    sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
                    sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err1 = e
-                    # shared.log.error(f'AutoPipeline: {e}')
-                try: # 2 - diffusion pipeline, works for most non-linked pipelines
-                    if err1 is not None:
-                        sd_model = diffusers.DiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-                        sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err2 = e
-                    # shared.log.error(f'DiffusionPipeline: {e}')
-                try: # 3 - try basic pipeline just in case
-                    if err2 is not None:
-                        sd_model = diffusers.StableDiffusionPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-                        sd_model.model_type = sd_model.__class__.__name__
-                except Exception as e:
-                    err3 = e # ignore last error
-                    shared.log.error(f'StableDiffusionPipeline: {e}')
-                if err3 is not None:
-                    shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
-                    return
+            except Exception as e:
+                err3 = e # ignore last error
+                shared.log.error(f'StableDiffusionPipeline: {e}')
+            if err3 is not None:
+                shared.log.error(f'Failed loading {op}: {checkpoint_info.path} auto={err1} diffusion={err2}')
+                return
        elif os.path.isfile(checkpoint_info.path) and checkpoint_info.path.lower().endswith('.safetensors'):
            # diffusers_load_config["local_files_only"] = True
            diffusers_load_config["extract_ema"] = shared.opts.diffusers_extract_ema
--- a/modules/shared.py
+++ b/modules/shared.py
@ -17,7 +17,6 @@ from modules import errors, shared_items, shared_state, cmd_args, theme
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
 from modules.onnx import available_execution_providers, get_default_execution_provider
-from modules.olive import enable_olive_onchange
 import modules.interrogate
 import modules.memmon
 import modules.styles
@ -440,9 +439,9 @@ options_templates.update(options_section(('diffusers', "Diffusers Settings"), {

    "onnx_sep": OptionInfo("<h2>ONNX Runtime</h2>", "", gr.HTML),
    "onnx_execution_provider": OptionInfo(get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": available_execution_providers }),
+    "onnx_cache_converted": OptionInfo(True, 'Cache converted models'),

    "onnx_olive_sep": OptionInfo("<h3>Olive</h3>", "", gr.HTML),
-    "onnx_enable_olive": OptionInfo(False, 'Enable pipeline for Olive', onchange=enable_olive_onchange),
    "onnx_olive_float16": OptionInfo(True, 'Olive use FP16 on optimization (will use FP32 if unchecked)'),
    "onnx_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
 }))
@ -471,8 +470,8 @@ options_templates.update(options_section(('system-paths', "System Paths"), {
    "swinir_models_path": OptionInfo(os.path.join(paths.models_path, 'SwinIR'), "Folder with SwinIR models", folder=True),
    "ldsr_models_path": OptionInfo(os.path.join(paths.models_path, 'LDSR'), "Folder with LDSR models", folder=True),
    "clip_models_path": OptionInfo(os.path.join(paths.models_path, 'CLIP'), "Folder with CLIP models", folder=True),
-    "olive_cached_models_path": OptionInfo(os.path.join(paths.models_path, 'Olive', 'cache'), "Folder with olive optimized cached models", folder=True),
-    "olive_sideloaded_models_path": OptionInfo(os.path.join(paths.models_path, 'Olive', 'sideloaded'), "Folder with olive optimized sideloaded models", folder=True),
+    "onnx_cached_models_path": OptionInfo(os.path.join(paths.models_path, 'ONNX', 'cache'), "Folder with ONNX cached models", folder=True),
+    "onnx_sideloaded_models_path": OptionInfo(os.path.join(paths.models_path, 'ONNX', 'sideloaded'), "Folder with ONNX models from huggingface", folder=True),

    "other_paths_sep_options": OptionInfo("<h2>Other paths</h2>", "", gr.HTML),
    "openvino_cache_path": OptionInfo('cache', "Directory for OpenVINO cache", folder=True),
--- a/modules/shared_items.py
+++ b/modules/shared_items.py
@ -26,8 +26,7 @@ def list_crossattention():

 def get_pipelines():
    import diffusers
-    from modules.onnx import OnnxStableDiffusionPipeline
-    from modules.olive import OlivePipeline, is_available as is_olive_available
+    from modules.onnx import OnnxAutoPipeline
    from installer import log
    pipelines = { # note: not all pipelines can be used manually as they require prior pipeline next to decoder pipeline
        'Autodetect': None,
@ -40,8 +39,8 @@ def get_pipelines():
        'Stable Diffusion XL Img2Img': getattr(diffusers, 'StableDiffusionXLImg2ImgPipeline', None),
        'Stable Diffusion XL Inpaint': getattr(diffusers, 'StableDiffusionXLInpaintPipeline', None),
        'Stable Diffusion XL Instruct': getattr(diffusers, 'StableDiffusionXLInstructPix2PixPipeline', None),
-        'ONNX Stable Diffusion': OnnxStableDiffusionPipeline,
-        'ONNX Stable Diffusion with Olive': OlivePipeline,
+        'ONNX Stable Diffusion': OnnxAutoPipeline,
+        'ONNX Stable Diffusion with Olive': OnnxAutoPipeline,
        'Latent Consistency Model': getattr(diffusers, 'LatentConsistencyModelPipeline', None),
        'PixArt Alpha': getattr(diffusers, 'PixArtAlphaPipeline', None),
        'UniDiffuser': getattr(diffusers, 'UniDiffuserPipeline', None),
--- a/modules/ui_models.py
+++ b/modules/ui_models.py
@ -373,9 +373,9 @@ def create_ui():
                def hf_select(evt: gr.SelectData, data):
                    return data[evt.index[0]][0]

-                def hf_download_model(hub_id: str, token, variant, revision, mirror, olive_optimized):
+                def hf_download_model(hub_id: str, token, variant, revision, mirror, is_onnx, custom_pipeline):
                    from modules.modelloader import download_diffusers_model
-                    download_diffusers_model(hub_id, cache_dir=opts.olive_sideloaded_models_path if olive_optimized else opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror)
+                    download_diffusers_model(hub_id, cache_dir=opts.onnx_sideloaded_models_path if is_onnx else opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror, custom_pipeline=custom_pipeline)
                    from modules.sd_models import list_models # pylint: disable=W0621
                    list_models()
                    log.info(f'Diffuser model downloaded: model="{hub_id}"')
@ -394,7 +394,7 @@ def create_ui():
                            with gr.Row():
                                hf_variant = gr.Textbox(opts.cuda_dtype.lower(), label = 'Specify model variant', placeholder='')
                                hf_revision = gr.Textbox('', label = 'Specify model revision', placeholder='')
-                                hf_olive = gr.Checkbox(False, label = 'Olive optimized')
+                                hf_onnx = gr.Checkbox(False, label = 'ONNX model')
                    with gr.Row():
                        hf_token = gr.Textbox('', label='Huggingface token', placeholder='optional access token for private or gated models')
                        hf_mirror = gr.Textbox('', label='Huggingface mirror', placeholder='optional mirror site for downloads')
@ -411,7 +411,7 @@ def create_ui():
                hf_search_text.submit(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results])
                hf_search_btn.click(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results])
                hf_results.select(fn=hf_select, inputs=[hf_results], outputs=[hf_selected])
-                hf_download_model_btn.click(fn=hf_download_model, inputs=[hf_selected, hf_token, hf_variant, hf_revision, hf_mirror, hf_olive], outputs=[models_outcome])
+                hf_download_model_btn.click(fn=hf_download_model, inputs=[hf_selected, hf_token, hf_variant, hf_revision, hf_mirror, hf_onnx, hf_custom_pipeline], outputs=[models_outcome])

            with gr.Tab(label="CivitAI"):
                data = []
--- a/requirements.txt
+++ b/requirements.txt
@ -25,6 +25,8 @@ lpips
 omegaconf
 open-clip-torch
 opencv-contrib-python-headless
+olive-ai
+optimum
 piexif
 psutil
 pyyaml