Merge pull request #4139 from vladmandic/dev

Dev merge
2025-08-20 14:42:31 -04:00 · 2025-08-20 14:42:31 -04:00 · 3ec0d35d89
parent 419fad3682 c79471b2cd
commit 3ec0d35d89
150 changed files with 30993 additions and 14628 deletions
--- a/.pylintrc
+++ b/.pylintrc
@ -204,6 +204,7 @@ disable=abstract-method,
        unnecessary-lambda-assigment,
        unnecessary-lambda,
        unused-wildcard-import,
+        useless-return,
        use-dict-literal,
        use-symbolic-message-instead,
        useless-suppression,
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,56 @@
 # Change Log for SD.Next

+## Update for 2025-08-20
+
+A quick service release with several important hotfixes, improved localization support and adding new **Qwen** model variants...
+
+[ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
+
+- **Models**
+  - [Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit)  
+    Image editing using natural language prompting, similar to `Flux.1-Kontext`, but based on larger 20B `Qwen-Image` model  
+  - [Nunchaku-Qwen-Image](https://huggingface.co/nunchaku-tech/nunchaku-qwen-image)  
+    if you have a compatible nVidia GPU, Nunchaku is the fastest quantization engine, currently available for Flux.1, SANA and Qwen-Image models  
+    *note*: release version of `nunchaku==0.3.2` does NOT include support, so you need to build [nunchaku](https://nunchaku.tech/docs/nunchaku/installation/installation.html) from source  
+- [SD.Next Model Samples Gallery](https://vladmandic.github.io/sd-samples/compare.html)  
+  - updated with new models  
+- **Features**
+  - new *setting -> huggingface -> download method*  
+    default is `rust` as new `xet` is known to cause issues  
+  - support for `flux.1-kontext` lora  
+  - support for `qwen-image` lora  
+  - new *setting -> quantization -> modules dtype dict*  
+    used to manually override quant types per module  
+- **UI**
+  - new artwork for reference models in networks  
+    thanks @liutyi  
+  - updated [localization](https://vladmandic.github.io/sdnext-docs/Locale/) for all 8 languages  
+  - localization support for ModernUI  
+  - single-click on locale rotates current locale  
+    double-click on locale resets locale to `en`  
+  - exclude ModernUI from list of extensions  
+    ModernUI is enabled in settings, not by manually enabling extension  
+- **Docs**
+  - Models and Video pages updated with links to original model repos, model licenses and original release dates  
+    thanks @alerikaisattera  
+- **Fixes**
+  - nunchaku use new download links and default to `0.3.2`  
+    nunchaku wheels: <https://huggingface.co/nunchaku-tech/nunchaku/tree/main>  
+  - fix OpenVINO with offloading  
+  - add explicit offload calls on prompt encode  
+  - error reporting on model load failure  
+  - fix torch version checks  
+  - remove extra cache clear  
+  - enable explicit sync calls for `rocm` on windows  
+  - note if restart-needed on initial startup import error  
+  - bypass diffusers-lora-fuse on quantized models  
+  - monkey-patch diffusers to use original weights shape when loading lora  
+  - guard against null prompt  
+  - install `hf_transfter` and `hf_xet` when needed  
+  - fix ui cropped network tags  
+  - enum reference models on startup  
+  - dont report errors if agent scheduler is disabled  
+
 ## Update for 2025-08-15

 ### Highlights for 2025-08-15
--- a/TODO.md
+++ b/TODO.md
@ -4,8 +4,10 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma

 ## Future Candidates

- Remote TE
- Unified `CLIPTextModelWithProjection` loader  
+- Remote TE  
+- Mobile ModernUI  
+- [Canvas](https://konvajs.org/)  
+
 - [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915)  
 - Refactor: Sampler options  
 - Refactor: [GGUF](https://huggingface.co/docs/diffusers/main/en/quantization/gguf)  
@ -40,7 +42,6 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - Remove: CodeFormer  
 - Remove: GFPGAN  
 - ModernUI: Lite vs Expert mode  
- [Canvas](https://konvajs.org/)  

 ### Future Considerations
 - [TensorRT](https://github.com/huggingface/diffusers/pull/11173)  
--- a/cli/localize.js
+++ b/cli/localize.js
@ -6,10 +6,9 @@ const process = require('process');
 const { GoogleGenerativeAI } = require('@google/generative-ai');

 const api_key = process.env.GOOGLE_AI_API_KEY;
-const model = 'gemini-2.0-flash-exp';
+const model = 'gemini-2.5-flash';
 const prompt = `
-Translate attached JSON from English to {language} using following rules: fields id and label should be preserved from original, field localized should be a translated version of field label and field hint should be translated in-place.
-Every JSON entry should have id, label, localized and hint fields. Output should be pure JSON without any additional text. To better match translation, context of the text is related to Stable Diffusion and topic of Generative AI.`;
+Translate attached JSON from English to {language} using following rules: fields id, label and reload should be preserved from original, field localized should be a translated version of field label and field hint should be translated in-place. if field is less than 3 characters, do not translate it and keep it as is. Every JSON entry should have id, label, localized, reload and hint fields. Output should be pure JSON without any additional text. To better match translation, context of the text is related to Stable Diffusion and topic of Generative AI.`;
 const languages = {
  hr: 'Croatian',
  de: 'German',
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@ -1 +1 @@
-Subproject commit 574fcf4e8790e6faf3a3a500e4aedf399d0b0e4a
+Subproject commit da4ccd4aa75e3b42937674ba23d406a02783df4f
--- a/html/locale_de.json
+++ b/html/locale_de.json
--- a/html/locale_en.json
+++ b/html/locale_en.json
--- a/html/locale_es.json
+++ b/html/locale_es.json
--- a/html/locale_fr.json
+++ b/html/locale_fr.json
--- a/html/locale_hr.json
+++ b/html/locale_hr.json
--- a/html/locale_it.json
+++ b/html/locale_it.json
--- a/html/locale_ja.json
+++ b/html/locale_ja.json
--- a/html/locale_ko.json
+++ b/html/locale_ko.json
--- a/html/locale_pt.json
+++ b/html/locale_pt.json
--- a/html/locale_ru.json
+++ b/html/locale_ru.json
--- a/html/locale_zh.json
+++ b/html/locale_zh.json
--- a/html/reference.json
+++ b/html/reference.json
@ -75,7 +75,7 @@
    "skip": true,
    "variant": "bf16",
    "desc": "Stable Cascade is a diffusion model built upon the Würstchen architecture and its main difference to other models like Stable Diffusion is that it is working at a much smaller latent space. Why is this important? The smaller the latent space, the faster you can run inference and the cheaper the training becomes. How small is the latent space? Stable Diffusion uses a compression factor of 8, resulting in a 1024x1024 image being encoded to 128x128. Stable Cascade achieves a compression factor of 42, meaning that it is possible to encode a 1024x1024 image to 24x24, while maintaining crisp reconstructions. The text-conditional model is then trained in the highly compressed latent space. Previous versions of this architecture, achieved a 16x cost reduction over Stable Diffusion 1.5",
-    "preview": "stabilityai--stable-cascade.jpg",
+    "preview": "stabilityai--stable-cascade-lite.jpg",
    "extras": "sampler: Default, cfg_scale: 4.0, image_cfg_scale: 1.0"
  },
  "StabilityAI Stable Diffusion 3 Medium": {
@ -91,7 +91,7 @@
    "skip": true,
    "variant": "fp16",
    "desc": "Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer with improvements (MMDiT-X) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.",
-    "preview": "stabilityai--stable-diffusion-3_5.jpg",
+    "preview": "stabilityai--stable-diffusion-3_5-medium.jpg",
    "extras": "sampler: Default, cfg_scale: 7.0"
  },
  "StabilityAI Stable Diffusion 3.5 Large": {
@ -99,7 +99,7 @@
    "skip": true,
    "variant": "fp16",
    "desc": "Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.",
-    "preview": "stabilityai--stable-diffusion-3_5.jpg",
+    "preview": "stabilityai--stable-diffusion-3_5-large.jpg",
    "extras": "sampler: Default, cfg_scale: 7.0"
  },
  "StabilityAI Stable Diffusion 3.5 Turbo": {
@ -107,7 +107,7 @@
    "skip": true,
    "variant": "fp16",
    "desc": "Stable Diffusion 3.5 Large Turbo is a Multimodal Diffusion Transformer (MMDiT) text-to-image model with Adversarial Diffusion Distillation (ADD) that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency, with a focus on fewer inference steps.",
-    "preview": "stabilityai--stable-diffusion-3_5.jpg",
+    "preview": "stabilityai--stable-diffusion-3_5-large-turbo.jpg",
    "extras": "sampler: Default, cfg_scale: 7.0"
  },

@ -142,21 +142,21 @@

  "lodestones Chroma Unlocked HD": {
    "path": "lodestones/Chroma1-HD",
-    "preview": "lodestones--Chroma.jpg",
+    "preview": "lodestones--Chroma-HD.jpg",
    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
  },
  "lodestones Chroma Unlocked HD Annealed": {
    "path": "vladmandic/chroma-unlocked-v50-annealed",
-    "preview": "lodestones--Chroma.jpg",
+    "preview": "lodestones--Chroma-annealed.jpg",
    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
  },
  "lodestones Chroma Unlocked HD Flash": {
    "path": "lodestones/Chroma1-Flash",
-    "preview": "lodestones--Chroma.jpg",
+    "preview": "lodestones--Chroma-flash.jpg",
    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 1.0"
@ -170,7 +170,7 @@
  },
  "lodestones Chroma Unlocked v48 Detail Calibrated": {
    "path": "vladmandic/chroma-unlocked-v48-detail-calibrated",
-    "preview": "lodestones--Chroma.jpg",
+    "preview": "lodestones--Chroma-detail.jpg",
    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 1.0"
@ -183,9 +183,16 @@
    "skip": true,
    "extras": ""
  },
+  "Qwen-Image-Edit": {
+    "path": "Qwen/Qwen-Image-Edit",
+    "preview": "Qwen--Qwen-Image.jpg",
+    "desc": " Qwen-Image-Edit, the image editing version of Qwen-Image. Built upon our 20B Qwen-Image model, Qwen-Image-Edit successfully extends Qwen-Image’s unique text rendering capabilities to image editing tasks, enabling precise text editing.",
+    "skip": true,
+    "extras": ""
+  },
  "Qwen-Lightning": {
    "path": "vladmandic/Qwen-Lightning",
-    "preview": "Qwen--Qwen-Image.jpg",
+    "preview": "Qwen-Lightning.jpg",
    "desc": " Qwen-Lightning is step-distilled from Qwen-Image to allow for generation in 8 steps.",
    "skip": true,
    "extras": "steps: 8"
@ -208,7 +215,7 @@

  "Wan-AI Wan2.1 1.3B": {
    "path": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
-    "preview": "Wan-AI--Wan2.1.jpg",
+    "preview": "Wan-AI--Wan2.1-1_3B.jpg",
    "desc": "Wan is an advanced and powerful visual generation model developed by Tongyi Lab of Alibaba Group. It can generate videos based on text, images, and other control signals. The Wan2.1 series models are now fully open-source.",
    "skip": true,
    "extras": "sampler: Default"
@ -222,14 +229,14 @@
  },
  "Wan-AI Wan2.2 5B": {
    "path": "Wan-AI/Wan2.2-TI2V-5B-Diffusers",
-    "preview": "Wan-AI--Wan2.2.jpg",
+    "preview": "Wan-AI--Wan2.2_5B.jpg",
    "desc": "Wan2.2, offering more powerful capabilities, better performance, and superior visual quality. With Wan2.2, we have focused on incorporating the following technical innovations: MoE Architecture, Data Scalling, Cinematic Aesthetics, Efficient High-Definition Hybrid",
    "skip": true,
    "extras": "sampler: Default"
  },
  "Wan-AI Wan2.2 A14B": {
    "path": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
-    "preview": "Wan-AI--Wan2.2.jpg",
+    "preview": "Wan2.2-T2V-A14B.jpg",
    "desc": "Wan2.2, offering more powerful capabilities, better performance, and superior visual quality. With Wan2.2, we have focused on incorporating the following technical innovations: MoE Architecture, Data Scalling, Cinematic Aesthetics, Efficient High-Definition Hybrid",
    "skip": true,
    "extras": "sampler: Default"
@ -244,14 +251,14 @@
  },
  "Freepik F-Lite Texture": {
    "path": "Freepik/F-Lite-Texture",
-    "preview": "Freepik--F-Lite.jpg",
+    "preview": "Freepik--F-Lite-Texture.jpg",
    "desc": "F Lite is a 10B parameter diffusion model created by Freepik and Fal, trained exclusively on copyright-safe and SFW content. The model was trained on Freepik's internal dataset comprising approximately 80 million copyright-safe images, making it the first publicly available model of this scale trained exclusively on legally compliant and SFW content.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
  },
  "Freepik F-Lite 7B": {
    "path": "Freepik/F-Lite-7B",
-    "preview": "Freepik--F-Lite.jpg",
+    "preview": "Freepik--F-Lite-7B.jpg",
    "desc": "F Lite is a 10B parameter diffusion model created by Freepik and Fal, trained exclusively on copyright-safe and SFW content. The model was trained on Freepik's internal dataset comprising approximately 80 million copyright-safe images, making it the first publicly available model of this scale trained exclusively on legally compliant and SFW content.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
@ -274,43 +281,43 @@
  "NVLabs Sana 1.5 1.6B 1k": {
    "path": "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers",
    "desc": "Sana is an efficient model with scaling of training-time and inference time techniques. SANA-1.5 delivers: efficient model growth from 1.6B Sana-1.0 model to 4.8B, achieving similar or better performance than training from scratch and saving 60% training cost; efficient model depth pruning, slimming any model size as you want; powerful VLM selection based inference scaling, smaller model+inference scaling > larger model.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana15_1600M_1024px_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.5 4.8B 1k": {
    "path": "Efficient-Large-Model/SANA1.5_4.8B_1024px_diffusers",
    "desc": "Sana is an efficient model with scaling of training-time and inference time techniques. SANA-1.5 delivers: efficient model growth from 1.6B Sana-1.0 model to 4.8B, achieving similar or better performance than training from scratch and saving 60% training cost; efficient model depth pruning, slimming any model size as you want; powerful VLM selection based inference scaling, smaller model+inference scaling > larger model.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana15_4800M_1024px_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.5 1.6B 1k Sprint": {
    "path": "Efficient-Large-Model/Sana_Sprint_1.6B_1024px_diffusers",
    "desc": "SANA-Sprint is an ultra-efficient diffusion model for text-to-image (T2I) generation, reducing inference steps from 20 to 1-4 while achieving state-of-the-art performance.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana15_Sprint_1600M_1024px_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.0 1.6B 4k": {
    "path": "Efficient-Large-Model/Sana_1600M_4Kpx_BF16_diffusers",
    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana15_1600M_4Kpx_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.0 1.6B 2k": {
    "path": "Efficient-Large-Model/Sana_1600M_2Kpx_BF16_diffusers",
    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana1_1600M_2Kpx_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.0 1.6B 1k": {
    "path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana1_1600M_1024px_diffusers.jpg",
    "skip": true
  },
  "NVLabs Sana 1.0 0.6B 0.5k": {
    "path": "Efficient-Large-Model/Sana_600M_512px_diffusers",
    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
-    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "preview": "Efficient-Large-Model--Sana1_600M_1024px_diffusers.jpg",
    "skip": true
  },

@ -323,7 +330,7 @@
  "nVidia Cosmos-Predict2 T2I 14B": {
    "path": "nvidia/Cosmos-Predict2-14B-Text2Image",
    "desc": "Cosmos-Predict2: A family of highly performant pre-trained world foundation models purpose-built for generating physics-aware images, videos and world states for physical AI development.",
-    "preview": "nvidia--Cosmos-Predict2-2B-Text2Image.jpg",
+    "preview": "nvidia--Cosmos-Predict2-14B-Text2Image.jpg",
    "skip": true
  },

@ -350,7 +357,7 @@
  "AuraFlow 0.2": {
    "path": "fal/AuraFlow-v0.2",
    "desc": "AuraFlow v0.2 is the fully open-sourced largest flow-based text-to-image generation model. The model was trained with more compute compared to the previous version, AuraFlow-v0.1",
-    "preview": "fal--AuraFlow-v0.3.jpg",
+    "preview": "fal--AuraFlow-v0.2.jpg",
    "skip": true
  },

@ -404,21 +411,21 @@
  "Pixart-Σ Small": {
    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-512-MS",
    "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
-    "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "preview": "PixArt-alpha--pixart_sigma_sdxl2-512.jpg",
    "skip": true,
    "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 2.0"
  },
  "Pixart-Σ Medium": {
    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
    "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
-    "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "preview": "PixArt-alpha--pixart_sigma_sdxl2-1024.jpg",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 2.0"
  },
  "Pixart-Σ Large": {
    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-2K-MS",
    "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
-    "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "preview": "PixArt-alpha--pixart_sigma_sdxl2-2K.jpg",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 2.0"
  },
@ -432,19 +439,19 @@
  "Tencent HunyuanDiT 1.2 Distilled": {
    "path": "Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers-Distilled",
    "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.",
-    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.2-Diffusers.jpg",
+    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.2-Distilled.jpg",
    "extras": "sampler: Default, cfg_scale: 2.0"
  },
  "Tencent HunyuanDiT 1.1": {
    "path": "Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers",
    "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.",
-    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.2-Diffusers.jpg",
+    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.1-Diffusers.jpg",
    "extras": "sampler: Default, cfg_scale: 2.0"
  },
  "Tencent HunyuanDiT 1.1 Distilled": {
    "path": "Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled",
    "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.",
-    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.2-Diffusers.jpg",
+    "preview": "Tencent-Hunyuan--HunyuanDiT-v1.1-Distilled.jpg",
    "extras": "sampler: Default, cfg_scale: 2.0"
  },

@ -455,6 +462,7 @@
    "skip": true,
    "extras": "sampler: Default"
  },
+
  "AlphaVLLM Lumina 2": {
    "path": "Alpha-VLLM/Lumina-Image-2.0",
    "desc": "A Unified and Efficient Image Generative Model. Lumina-Image-2.0 is a 2 billion parameter flow-based diffusion transformer capable of generating images from text descriptions.",
@ -473,21 +481,21 @@
  "HiDream-I1 Dev": {
    "path": "HiDream-ai/HiDream-I1-Dev",
    "desc": "HiDream-I1 is a new open-source image generative foundation model with 17B parameters that achieves state-of-the-art image generation quality within seconds.",
-    "preview": "HiDream-ai--HiDream-I1-Fast.jpg",
+    "preview": "HiDream-ai--HiDream-I1-Dev.jpg",
    "skip": true,
    "extras": "sampler: Default"
  },
  "HiDream-I1 Full": {
    "path": "HiDream-ai/HiDream-I1-Full",
    "desc": "HiDream-I1 is a new open-source image generative foundation model with 17B parameters that achieves state-of-the-art image generation quality within seconds.",
-    "preview": "HiDream-ai--HiDream-I1-Fast.jpg",
+    "preview": "HiDream-ai--HiDream-I1-Full.jpg",
    "skip": true,
    "extras": "sampler: Default"
  },
  "HiDream-E1 Full": {
    "path": "HiDream-ai/HiDream-E1-Full",
    "desc": "HiDream-E1 is an image editing model built on HiDream-I1.",
-    "preview": "HiDream-ai--HiDream-I1-Fast.jpg",
+    "preview": "HiDream-ai--HiDream-E1-Full.jpg",
    "skip": true,
    "extras": "sampler: Default"
  },
@ -547,7 +555,7 @@
  "Playground v2.5": {
    "path": "playground-v2.5-1024px-aesthetic.fp16.safetensors@https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/resolve/main/playground-v2.5-1024px-aesthetic.fp16.safetensors?download=true",
    "desc": "Playground v2.5 is a diffusion-based text-to-image generative model, and a successor to Playground v2. Playground v2.5 is the state-of-the-art open-source model in aesthetic quality. Our user studies demonstrate that our model outperforms SDXL, Playground v2, PixArt-α, DALL-E 3, and Midjourney 5.2.",
-    "preview": "playgroundai--playground-v2-1024px-aesthetic.jpg",
+    "preview": "playgroundai--playground-v2_5-1024px-aesthetic.jpg",
    "extras": "sampler: DPM++ 2M EDM"
  },

@ -573,13 +581,13 @@
  "ShuttleAI Shuttle 3.1 Aesthetic": {
    "path": "shuttleai/shuttle-3.1-aesthetic",
    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
-    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "preview": "shuttleai--shuttle-3_1-aestetic.jpg",
    "skip": true
  },
  "ShuttleAI Shuttle Jaguar": {
    "path": "shuttleai/shuttle-jaguar",
    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
-    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "preview": "shuttleai--shuttle-jaguar.jpg",
    "skip": true
  },

@ -649,7 +657,7 @@
  "DeepFloyd IF Large": {
    "path": "DeepFloyd/IF-I-L-v1.0",
    "desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.",
-    "preview": "DeepFloyd--IF-I-M-v1.0.jpg",
+    "preview": "DeepFloyd--IF-I-L-v1.0.jpg",
    "extras": "sampler: Default"
  }

--- a/installer.py
+++ b/installer.py
@ -107,6 +107,11 @@ def install_traceback(suppress: list = []):
 def setup_logging():
    from functools import partial, partialmethod
    from logging.handlers import RotatingFileHandler
+    try:
+        import rich # pylint: disable=unused-import
+    except Exception:
+        log.error('Please restart SD.Next so changes take effect')
+        sys.exit(1)
    from rich.theme import Theme
    from rich.logging import RichHandler
    from rich.console import Console
@ -591,9 +596,12 @@ def check_python(supported_minors=[], experimental_minors=[], reason=None):
 # check diffusers version
 def check_diffusers():
    t_start = time.time()
-    if args.skip_all or args.skip_git:
+    if args.skip_all:
        return
-    sha = '58bf2682612bc29b7cdb8a10ba6eee28a024d6d3' # diffusers commit hash
+    if args.skip_git:
+        install('diffusers')
+        return
+    sha = '4fcd0bc7ebb934a1559d0b516f09534ba22c8a0d' # diffusers commit hash
    pkg = pkg_resources.working_set.by_key.get('diffusers', None)
    minor = int(pkg.version.split('.')[1] if pkg is not None else -1)
    cur = opts.get('diffusers_version', '') if minor > -1 else ''
@ -1310,7 +1318,6 @@ def install_requirements():

 # set environment variables controling the behavior of various libraries
 def set_environment():
-    from modules.paths import models_path
    log.debug('Setting environment tuning')
    os.environ.setdefault('ACCELERATE', 'True')
    os.environ.setdefault('ATTN_PRECISION', 'fp16')
@ -1319,25 +1326,22 @@ def set_environment():
    os.environ.setdefault('CUDA_DEVICE_DEFAULT_PERSISTING_L2_CACHE_PERCENTAGE_LIMIT', '0')
    os.environ.setdefault('CUDA_LAUNCH_BLOCKING', '0')
    os.environ.setdefault('CUDA_MODULE_LOADING', 'LAZY')
-    os.environ.setdefault('TORCH_CUDNN_V8_API_ENABLED', '1')
+    os.environ.setdefault('DO_NOT_TRACK', '1')
    os.environ.setdefault('FORCE_CUDA', '1')
    os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
-    os.environ.setdefault('HF_HUB_DISABLE_EXPERIMENTAL_WARNING', '1')
-    os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
    os.environ.setdefault('K_DIFFUSION_USE_COMPILE', '0')
+    os.environ.setdefault('KINETO_LOG_LEVEL', '3')
    os.environ.setdefault('NUMEXPR_MAX_THREADS', '16')
    os.environ.setdefault('PYTHONHTTPSVERIFY', '0')
    os.environ.setdefault('SAFETENSORS_FAST_GPU', '1')
    os.environ.setdefault('TF_CPP_MIN_LOG_LEVEL', '2')
    os.environ.setdefault('TF_ENABLE_ONEDNN_OPTS', '0')
-    os.environ.setdefault('USE_TORCH', '1')
+    os.environ.setdefault('TORCH_CUDNN_V8_API_ENABLED', '1')
    os.environ.setdefault('TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD', '1')
-    os.environ.setdefault('UVICORN_TIMEOUT_KEEP_ALIVE', '60')
-    os.environ.setdefault('KINETO_LOG_LEVEL', '3')
-    os.environ.setdefault('DO_NOT_TRACK', '1')
+    os.environ.setdefault('USE_TORCH', '1')
    os.environ.setdefault('UV_INDEX_STRATEGY', 'unsafe-any-match')
    os.environ.setdefault('UV_NO_BUILD_ISOLATION', '1')
-    os.environ.setdefault('HF_HUB_CACHE', opts.get('hfcache_dir', os.path.join(models_path, 'huggingface')))
+    os.environ.setdefault('UVICORN_TIMEOUT_KEEP_ALIVE', '60')
    allocator = f'garbage_collection_threshold:{opts.get("torch_gc_threshold", 80)/100:0.2f},max_split_size_mb:512'
    if opts.get("torch_malloc", "native") == 'cudaMallocAsync':
        allocator += ',backend:cudaMallocAsync'
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@ -732,6 +732,17 @@ div#extras_scale_to_tab div.form {
  background: var(--background-fill-primary) !important;
 }

+.tooltip-reload-notice {
+  margin-top: 0.3em;
+}
+
+.tooltip-reload-text {
+  font-size: var(--text-xs);
+  font-style: italic;
+  opacity: 0.75;
+  display: block;
+}
+
 .locale {
  background-color: var(--input-background-fill);
  color: var(--body-text-color);
@ -1267,7 +1278,7 @@ table.settings-value-table td {
    scroll-snap-align: start;
    height: var(--card-size);
    width: var(--card-size);
-    contain: strict;
+    contain: layout style;
 }

 *.extra-network-cards .card-selected {
@ -1294,6 +1305,7 @@ table.settings-value-table td {
 }

 .extra-network-cards .card .overlay .reference {
+  color: beige;
  background-color: rgba(0, 0, 0, 0.2);
 }

--- a/javascript/setHints.js
+++ b/javascript/setHints.js
@ -12,18 +12,35 @@ const localeData = {
  expandTimeout: null, // New property for expansion timeout
  currentElement: null, // Track current element for expansion
 };
+let localeTimeout = null;

 async function cycleLocale() {
-  log('cycleLocale', localeData.prev, localeData.locale);
-  const index = allLocales.indexOf(localeData.prev);
-  localeData.locale = allLocales[(index + 1) % allLocales.length];
+  clearTimeout(localeTimeout);
+  localeTimeout = setTimeout(() => {
+    log('cycleLocale', localeData.prev, localeData.locale);
+    const index = allLocales.indexOf(localeData.prev);
+    localeData.locale = allLocales[(index + 1) % allLocales.length];
+    localeData.btn.innerText = localeData.locale;
+    // localeData.btn.style.backgroundColor = localeData.locale !== 'en' ? 'var(--primary-500)' : '';
+    localeData.finished = false;
+    localeData.data = [];
+    localeData.prev = localeData.locale;
+    window.opts.ui_locale = localeData.locale;
+    setHints(); // eslint-disable-line no-use-before-define
+  }, 250);
+}
+
+async function resetLocale() {
+  clearTimeout(localeTimeout); // Prevent the single click logic
+  localeData.locale = 'en';
+  log('resetLocale', localeData.locale);
+  const index = allLocales.indexOf(localeData.locale);
+  localeData.locale = allLocales[(index) % allLocales.length];
  localeData.btn.innerText = localeData.locale;
-  // localeData.btn.style.backgroundColor = localeData.locale !== 'en' ? 'var(--primary-500)' : '';
  localeData.finished = false;
  localeData.data = [];
-  localeData.prev = localeData.locale;
  window.opts.ui_locale = localeData.locale;
-  await setHints(); // eslint-disable-line no-use-before-define
+  setHints(); // eslint-disable-line no-use-before-define
 }

 async function tooltipCreate() {
@ -40,6 +57,7 @@ async function tooltipCreate() {
    gradioApp().appendChild(localeData.btn);
  }
  localeData.btn.innerText = localeData.locale;
+  localeData.btn.ondblclick = resetLocale;
  localeData.btn.onclick = cycleLocale;
  if (window.opts.tooltips === 'None') localeData.type = 0;
  if (window.opts.tooltips === 'Browser default') localeData.type = 1;
@ -104,6 +122,25 @@ async function tooltipShow(e) {
      content += `<div class="long-content"><div class="separator"></div>${e.target.dataset.longHint}</div>`;
    }

+    // Add reload notice if needed
+    if (e.target.dataset.reload) {
+      const reloadType = e.target.dataset.reload;
+      let reloadText = '';
+      if (reloadType === 'model') {
+        reloadText = 'Requires model reload';
+      } else if (reloadType === 'server') {
+        reloadText = 'Requires server restart';
+      }
+      if (reloadText) {
+        content += `
+          <div class="tooltip-reload-notice">
+            <div class="separator"></div>
+            <span class="tooltip-reload-text">${reloadText}</span>
+          </div>
+        `;
+      }
+    }
+
    localeData.hint.innerHTML = content;
    localeData.hint.classList.add('tooltip-show');

@ -237,6 +274,33 @@ async function getLocaleData(desiredLocale = null) {
  return json;
 }

+async function replaceTextContent(el, text) {
+  if (el.children.length === 1 && el.firstElementChild.classList.contains('mask-icon')) return;
+  if (el.querySelector('span')) el = el.querySelector('span');
+  if (el.querySelector('div')) el = el.querySelector('div');
+  if (el.classList.contains('mask-icon')) return; // skip icon buttons
+  if (el.dataset.selector) { // replace on rehosted child if exists
+    el = el.firstElementChild || el.querySelector(el.dataset.selector);
+    replaceTextContent(el, text);
+    return;
+  }
+  el.textContent = text;
+}
+
+async function setHint(el, entry) {
+  if (localeData.type === 1) {
+    el.title = entry.hint;
+  } else if (localeData.type === 2) {
+    el.dataset.hint = entry.hint;
+    if (entry.longHint && entry.longHint.length > 0) el.dataset.longHint = entry.longHint;
+    if (entry.reload && entry.reload.length > 0) el.dataset.reload = entry.reload;
+    el.addEventListener('mouseover', tooltipShow);
+    el.addEventListener('mouseout', tooltipHide);
+  } else {
+    // tooltips disabled
+  }
+}
+
 async function setHints(analyze = false) {
  let json = {};
  let overrideData = [];
@ -260,32 +324,21 @@ async function setHints(analyze = false) {
  let hints = 0;
  const t0 = performance.now();
  for (const el of elements) {
+    // localize elements text
    let found;
    if (el.dataset.original) found = localeData.data.find((l) => l.label.toLowerCase().trim() === el.dataset.original.toLowerCase().trim());
    else found = localeData.data.find((l) => l.label.toLowerCase().trim() === el.textContent.toLowerCase().trim());
    if (found?.localized?.length > 0) {
      if (!el.dataset.original) el.dataset.original = el.textContent;
      localized++;
-      el.textContent = found.localized;
+      replaceTextContent(el, found.localized);
    } else if (found?.label && !localeData.initial && (localeData.locale === 'en')) { // reset to english
-      el.textContent = found.label;
+      replaceTextContent(el, found.label);
    }
-    // replaceButtonText(el);
+    // set hints
    if (found?.hint?.length > 0) {
      hints++;
-      if (localeData.type === 1) {
-        el.title = found.hint;
-      } else if (localeData.type === 2) {
-        el.dataset.hint = found.hint;
-        // Set long hint if available
-        if (found.longHint && found.longHint.length > 0) {
-          el.dataset.longHint = found.longHint;
-        }
-        el.addEventListener('mouseover', tooltipShow);
-        el.addEventListener('mouseout', tooltipHide);
-      } else {
-        // tooltips disabled
-      }
+      setHint(el, found);
    }
  }
  localeData.finished = true;
--- a/models/Reference/DeepFloyd--IF-I-L-v1.0.jpg
+++ b/models/Reference/DeepFloyd--IF-I-L-v1.0.jpg
--- a/models/Reference/Efficient-Large-Model--Sana15_1600M_1024px_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana15_1600M_1024px_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana15_1600M_4Kpx_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana15_1600M_4Kpx_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana15_4800M_1024px_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana15_4800M_1024px_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana15_Sprint_1600M_1024px_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana15_Sprint_1600M_1024px_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana1_1600M_1024px_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana1_1600M_1024px_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana1_1600M_2Kpx_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana1_1600M_2Kpx_diffusers.jpg
--- a/models/Reference/Efficient-Large-Model--Sana1_600M_1024px_diffusers.jpg
+++ b/models/Reference/Efficient-Large-Model--Sana1_600M_1024px_diffusers.jpg
--- a/models/Reference/Freepik--F-Lite-7B.jpg
+++ b/models/Reference/Freepik--F-Lite-7B.jpg
--- a/models/Reference/Freepik--F-Lite-Texture.jpg
+++ b/models/Reference/Freepik--F-Lite-Texture.jpg
--- a/models/Reference/HiDream-ai--HiDream-E1-Full.jpg
+++ b/models/Reference/HiDream-ai--HiDream-E1-Full.jpg
--- a/models/Reference/HiDream-ai--HiDream-I1-Dev.jpg
+++ b/models/Reference/HiDream-ai--HiDream-I1-Dev.jpg
--- a/models/Reference/HiDream-ai--HiDream-I1-Fast.jpg
+++ b/models/Reference/HiDream-ai--HiDream-I1-Fast.jpg
--- a/models/Reference/HiDream-ai--HiDream-I1-Full.jpg
+++ b/models/Reference/HiDream-ai--HiDream-I1-Full.jpg
--- a/models/Reference/PixArt-alpha--PixArt-Sigma-XL-2-1024-MS.jpg
+++ b/models/Reference/PixArt-alpha--PixArt-Sigma-XL-2-1024-MS.jpg
--- a/models/Reference/PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg
+++ b/models/Reference/PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg
--- a/models/Reference/PixArt-alpha--pixart_sigma_sdxl2-2K.jpg
+++ b/models/Reference/PixArt-alpha--pixart_sigma_sdxl2-2K.jpg
--- a/models/Reference/PixArt-alpha--pixart_sigma_sdxl2-512.jpg
+++ b/models/Reference/PixArt-alpha--pixart_sigma_sdxl2-512.jpg
--- a/models/Reference/Qwen--Qwen-Image.jpg
+++ b/models/Reference/Qwen--Qwen-Image.jpg
--- a/models/Reference/Qwen-Lightning.jpg
+++ b/models/Reference/Qwen-Lightning.jpg
--- a/models/Reference/SimianLuo--LCM_Dreamshaper_v7.jpg
+++ b/models/Reference/SimianLuo--LCM_Dreamshaper_v7.jpg
--- a/models/Reference/TempestV0.1-Artistic.jpg
+++ b/models/Reference/TempestV0.1-Artistic.jpg
--- a/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.1-Diffusers.jpg
+++ b/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.1-Diffusers.jpg
--- a/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.1-Distilled.jpg
+++ b/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.1-Distilled.jpg
--- a/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.2-Distilled.jpg
+++ b/models/Reference/Tencent-Hunyuan--HunyuanDiT-v1.2-Distilled.jpg
--- a/models/Reference/Wan-AI--Wan2.1-1_3B.jpg
+++ b/models/Reference/Wan-AI--Wan2.1-1_3B.jpg
--- a/models/Reference/Wan-AI--Wan2.1.jpg
+++ b/models/Reference/Wan-AI--Wan2.1.jpg
--- a/models/Reference/Wan-AI--Wan2.2.jpg
+++ b/models/Reference/Wan-AI--Wan2.2.jpg
--- a/models/Reference/Wan-AI--Wan2.2_5B.jpg
+++ b/models/Reference/Wan-AI--Wan2.2_5B.jpg
--- a/models/Reference/Wan2.2-T2V-A14B.jpg
+++ b/models/Reference/Wan2.2-T2V-A14B.jpg
--- a/models/Reference/black-forest-labs--FLUX.1-Kontext-dev.jpg
+++ b/models/Reference/black-forest-labs--FLUX.1-Kontext-dev.jpg
--- a/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg
+++ b/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg
--- a/models/Reference/black-forest-labs--FLUX.1-dev.jpg
+++ b/models/Reference/black-forest-labs--FLUX.1-dev.jpg
--- a/models/Reference/black-forest-labs--FLUX.1-schnell.jpg
+++ b/models/Reference/black-forest-labs--FLUX.1-schnell.jpg
--- a/models/Reference/damo-vilab--text-to-video-ms-1.7b.jpg
+++ b/models/Reference/damo-vilab--text-to-video-ms-1.7b.jpg
--- a/models/Reference/dreamshaperXL_v21TurboDPMSDE.jpg
+++ b/models/Reference/dreamshaperXL_v21TurboDPMSDE.jpg
--- a/models/Reference/dreamshaper_8.jpg
+++ b/models/Reference/dreamshaper_8.jpg
--- a/models/Reference/fal--AuraFlow-v0.2.jpg
+++ b/models/Reference/fal--AuraFlow-v0.2.jpg
--- a/models/Reference/juggernautXL_v9Rdphoto2Lightning.jpg
+++ b/models/Reference/juggernautXL_v9Rdphoto2Lightning.jpg
--- a/models/Reference/juggernautXL_v9Rundiffusionphoto2.jpg
+++ b/models/Reference/juggernautXL_v9Rundiffusionphoto2.jpg
--- a/models/Reference/juggernaut_reborn.jpg
+++ b/models/Reference/juggernaut_reborn.jpg
--- a/models/Reference/latent-consistency--lcm-sdxl.jpg
+++ b/models/Reference/latent-consistency--lcm-sdxl.jpg
--- a/models/Reference/lodestones--Chroma-HD.jpg
+++ b/models/Reference/lodestones--Chroma-HD.jpg
--- a/models/Reference/lodestones--Chroma-annealed.jpg
+++ b/models/Reference/lodestones--Chroma-annealed.jpg
--- a/models/Reference/lodestones--Chroma-detail.jpg
+++ b/models/Reference/lodestones--Chroma-detail.jpg
--- a/models/Reference/lodestones--Chroma-flash.jpg
+++ b/models/Reference/lodestones--Chroma-flash.jpg
--- a/models/Reference/nvidia--Cosmos-Predict2-14B-Text2Image.jpg
+++ b/models/Reference/nvidia--Cosmos-Predict2-14B-Text2Image.jpg
--- a/models/Reference/ostris--Flex.1-alpha.jpg
+++ b/models/Reference/ostris--Flex.1-alpha.jpg
--- a/models/Reference/ostris--Flex.2-preview.jpg
+++ b/models/Reference/ostris--Flex.2-preview.jpg
--- a/models/Reference/playgroundai--playground-v2.5-1024px-aesthetic.jpg
+++ b/models/Reference/playgroundai--playground-v2.5-1024px-aesthetic.jpg
--- a/models/Reference/playgroundai--playground-v2_5-1024px-aesthetic.jpg
+++ b/models/Reference/playgroundai--playground-v2_5-1024px-aesthetic.jpg
--- a/models/Reference/sd_xl_base_1.0.jpg
+++ b/models/Reference/sd_xl_base_1.0.jpg
--- a/models/Reference/sdxl_turbo.jpg
+++ b/models/Reference/sdxl_turbo.jpg
--- a/models/Reference/segmind--SegMoE-2x1-v0.jpg
+++ b/models/Reference/segmind--SegMoE-2x1-v0.jpg
--- a/models/Reference/shuttleai--shuttle-3_1-aestetic.jpg
+++ b/models/Reference/shuttleai--shuttle-3_1-aestetic.jpg
--- a/models/Reference/shuttleai--shuttle-jaguar.jpg
+++ b/models/Reference/shuttleai--shuttle-jaguar.jpg
--- a/models/Reference/stabilityai--stable-cascade-lite.jpg
+++ b/models/Reference/stabilityai--stable-cascade-lite.jpg
--- a/models/Reference/stabilityai--stable-cascade.jpg
+++ b/models/Reference/stabilityai--stable-cascade.jpg
--- a/models/Reference/stabilityai--stable-diffusion-2-1-base.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-2-1-base.jpg
--- a/models/Reference/stabilityai--stable-diffusion-2-1.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-2-1.jpg
--- a/models/Reference/stabilityai--stable-diffusion-3.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-3.jpg
--- a/models/Reference/stabilityai--stable-diffusion-3_5-large-turbo.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-3_5-large-turbo.jpg
--- a/models/Reference/stabilityai--stable-diffusion-3_5-large.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-3_5-large.jpg
--- a/models/Reference/stabilityai--stable-diffusion-3_5-medium.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-3_5-medium.jpg
--- a/models/Reference/stabilityai--stable-diffusion-3_5.jpg
+++ b/models/Reference/stabilityai--stable-diffusion-3_5.jpg
--- a/models/Reference/stabilityai--stable-video-diffusion-img2vid-xt.jpg
+++ b/models/Reference/stabilityai--stable-video-diffusion-img2vid-xt.jpg
--- a/models/Reference/stabilityai--stable-video-diffusion-img2vid.jpg
+++ b/models/Reference/stabilityai--stable-video-diffusion-img2vid.jpg
--- a/models/Reference/v1-5-pruned-fp16-emaonly.jpg
+++ b/models/Reference/v1-5-pruned-fp16-emaonly.jpg
--- a/modules/loader.py
+++ b/modules/loader.py
@ -200,22 +200,15 @@ diffusers.utils.deprecation_utils.deprecate = deprecate_warn
 diffusers.utils.deprecate = deprecate_warn


-def patch_torch_version():
-    if not hasattr(torch, '__version_backup__'):
-        torch.__version_backup__ = torch.__version__
-        # Convert string version to tuple format to solve TypeError caused by BnB
-        version_parts = torch.__version__.split('+')[0].split('.')
-        torch.__version_tuple__ = tuple(int(x) for x in version_parts[:3])
-        # Support both string and tuple for version check
-        class VersionString(str):
-            def __ge__(self, other):
-                if isinstance(other, tuple):
-                    self_tuple = tuple(int(x) for x in self.split('+')[0].split('.')[:len(other)])
-                    return self_tuple >= other
-                return super().__ge__(other)
-        torch.__version__ = VersionString(torch.__version__)
+class VersionString(str): # support both string and tuple for version check
+    def __ge__(self, version):
+        if isinstance(version, tuple):
+            version_tuple = re.findall(r'\d+', torch.__version__.split('+')[0])
+            version_tuple = tuple(int(x) for x in version_tuple[:3])
+            return version_tuple >= version
+        return super().__ge__(version)


-patch_torch_version()
+torch.__version__ = VersionString(torch.__version__)
 errors.log.info(f'Torch: torch=={torch.__version__} torchvision=={torchvision.__version__}')
 errors.log.info(f'Packages: diffusers=={diffusers.__version__} transformers=={transformers.__version__} accelerate=={accelerate.__version__} gradio=={gradio.__version__} pydantic=={pydantic.__version__} numpy=={np.__version__}')
--- a/modules/lora/lora_load.py
+++ b/modules/lora/lora_load.py
@ -314,7 +314,7 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
                shared.log.trace(f'Network load: type=LoRA list={shared.sd_model.get_list_adapters()}')
                shared.log.trace(f'Network load: type=LoRA active={shared.sd_model.get_active_adapters()}')
            shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
-            if shared.opts.lora_fuse_diffusers and not lora_overrides.check_fuse():
+            if shared.opts.lora_fuse_diffusers and not lora_overrides.disable_fuse():
                shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # diffusers with fuse uses fixed scale since later apply does the scaling
                shared.sd_model.unload_lora_weights()
            l.timer.activate += time.time() - t1
--- a/modules/lora/lora_overrides.py
+++ b/modules/lora/lora_overrides.py
@ -47,6 +47,7 @@ force_models_diffusers = [ # forced always
 ]

 force_classes_diffusers = [ # forced always
+    'FluxKontextPipeline', 'FluxKontextInpaintPipeline',
 ]

 fuse_ignore = [
@ -68,5 +69,10 @@ def get_method(shorthash=''):
    else:
        return 'native'

-def check_fuse():
+
+def disable_fuse():
+    if hasattr(shared.sd_model, 'quantization_config'):
+        return True
+    if hasattr(shared.sd_model, 'transformer') and hasattr(shared.sd_model.transformer, 'quantization_config'):
+        return True
    return shared.sd_model_type in fuse_ignore
--- a/modules/mit_nunchaku.py
+++ b/modules/mit_nunchaku.py
@ -4,7 +4,7 @@ from installer import log, pip
 from modules import devices


-ver = '0.3.1'
+ver = '0.3.2'
 ok = False


@ -52,7 +52,7 @@ def install_nunchaku():
        url = os.environ.get('NUNCHAKU_COMMAND', None)
        if url is None:
            arch = f'{arch}_' if arch == 'linux' else ''
-            url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
+            url = f'https://huggingface.co/nunchaku-tech/nunchaku/resolve/main/nunchaku-{ver}'
            url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}{suffix}.whl'
        cmd = f'install --upgrade {url}'
        # pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.2.0+torch2.6-cp311-cp311-linux_x86_64.whl
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@ -1,6 +1,8 @@
 import os
+import re
 import sys
 import copy
+import json
 import time
 import diffusers
 import transformers
@ -137,6 +139,26 @@ def create_sdnq_config(kwargs = None, allow: bool = True, module: str = 'Model',
        if weights_dtype is None or weights_dtype == 'none':
            return kwargs

+        sdnq_modules_to_not_convert = [m.strip() for m in re.split(';|,| ', shared.opts.sdnq_modules_to_not_convert) if len(m.strip()) > 1]
+        if len(sdnq_modules_to_not_convert) > 0:
+            modules_to_not_convert.extend(sdnq_modules_to_not_convert)
+
+        try:
+            if len(shared.opts.sdnq_modules_dtype_dict) > 2:
+                sdnq_modules_dtype_dict = shared.opts.sdnq_modules_dtype_dict
+                if "{" not in sdnq_modules_dtype_dict:
+                    sdnq_modules_dtype_dict = "{" + sdnq_modules_dtype_dict + "}"
+                sdnq_modules_dtype_dict = json.loads(bytes(sdnq_modules_dtype_dict, 'utf-8'))
+                for key, value in sdnq_modules_dtype_dict.items():
+                    if isinstance(value, str):
+                        value = [m.strip() for m in re.split(';|,| ', value) if len(m.strip()) > 1]
+                    if key not in modules_dtype_dict.keys():
+                        modules_dtype_dict[key] = value
+                    else:
+                        modules_dtype_dict[key].extend(value)
+        except Exception as e:
+            log.warning(f'Quantization: SDNQ failed to parse sdnq_modules_dtype_dict: {e}')
+
        quantization_device, return_device = get_sdnq_devices()

        sdnq_config = SDNQConfig(
@ -150,9 +172,9 @@ def create_sdnq_config(kwargs = None, allow: bool = True, module: str = 'Model',
            quantization_device=quantization_device,
            return_device=return_device,
            modules_to_not_convert=modules_to_not_convert,
-            modules_dtype_dict=modules_dtype_dict,
+            modules_dtype_dict=modules_dtype_dict.copy(),
        )
-        log.debug(f'Quantization: module="{module}" type=sdnq dtype={weights_dtype} matmul={shared.opts.sdnq_use_quantized_matmul} group_size={shared.opts.sdnq_quantize_weights_group_size} quant_conv={shared.opts.sdnq_quantize_conv_layers} matmul_conv={shared.opts.sdnq_use_quantized_matmul_conv} dequantize_fp32={shared.opts.sdnq_dequantize_fp32} quantize_with_gpu={shared.opts.sdnq_quantize_with_gpu} quantization_device={quantization_device} return_device={return_device} device_map={shared.opts.device_map} offload_mode={shared.opts.diffusers_offload_mode} non_blocking={shared.opts.diffusers_offload_nonblocking}')
+        log.debug(f'Quantization: module="{module}" type=sdnq mode=pre dtype={weights_dtype} matmul={shared.opts.sdnq_use_quantized_matmul} group_size={shared.opts.sdnq_quantize_weights_group_size} quant_conv={shared.opts.sdnq_quantize_conv_layers} matmul_conv={shared.opts.sdnq_use_quantized_matmul_conv} dequantize_fp32={shared.opts.sdnq_dequantize_fp32} quantize_with_gpu={shared.opts.sdnq_quantize_with_gpu} quantization_device={quantization_device} return_device={return_device} device_map={shared.opts.device_map} offload_mode={shared.opts.diffusers_offload_mode} non_blocking={shared.opts.diffusers_offload_nonblocking} modules_to_not_convert={modules_to_not_convert} modules_dtype_dict={modules_dtype_dict}')
        if kwargs is None:
            return sdnq_config
        else:
@ -179,7 +201,7 @@ def check_nunchaku(module: str = ''):
    return True


-def create_config(kwargs = None, allow: bool = True, module: str = 'Model', modules_to_not_convert = [], modules_dtype_dict = {}):
+def create_config(kwargs = None, allow: bool = True, module: str = 'Model', modules_to_not_convert: list = [], modules_dtype_dict: dict = {}):
    if kwargs is None:
        kwargs = {}
    kwargs = create_sdnq_config(kwargs, allow=allow, module=module, modules_to_not_convert=modules_to_not_convert, modules_dtype_dict=modules_dtype_dict)
@ -393,8 +415,6 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh

    if weights_dtype is None or weights_dtype == 'none':
        return model
-    if debug:
-        log.trace(f'Quantization: type=SDNQ op={op} cls={model.__class__} dtype={weights_dtype} mode{shared.opts.diffusers_offload_mode}')

    quantization_device, return_device = get_sdnq_devices()

@ -410,6 +430,26 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
        else:
            modules_dtype_dict["minimum_6bit"].extend(["img_mod", "pos_embed", "time_text_embed", "img_in", "txt_in", "norm_out"])

+    sdnq_modules_to_not_convert = [m.strip() for m in re.split(';|,| ', shared.opts.sdnq_modules_to_not_convert) if len(m.strip()) > 1]
+    if len(sdnq_modules_to_not_convert) > 0:
+        modules_to_not_convert.extend(sdnq_modules_to_not_convert)
+
+    try:
+        if len(shared.opts.sdnq_modules_dtype_dict) > 2:
+            sdnq_modules_dtype_dict = shared.opts.sdnq_modules_dtype_dict
+            if "{" not in sdnq_modules_dtype_dict:
+                sdnq_modules_dtype_dict = "{" + sdnq_modules_dtype_dict + "}"
+            sdnq_modules_dtype_dict = json.loads(bytes(sdnq_modules_dtype_dict, 'utf-8'))
+            for key, value in sdnq_modules_dtype_dict.items():
+                if isinstance(value, str):
+                    value = [m.strip() for m in re.split(';|,| ', value) if len(m.strip()) > 1]
+                if key not in modules_dtype_dict.keys():
+                    modules_dtype_dict[key] = value
+                else:
+                    modules_dtype_dict[key].extend(value)
+    except Exception as e:
+        log.warning(f'Quantization: SDNQ failed to parse sdnq_modules_dtype_dict: {e}')
+
    model.eval()
    backup_embeddings = None
    if hasattr(model, "get_input_embeddings"):
@ -429,7 +469,7 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
        quantization_device=quantization_device,
        return_device=return_device,
        modules_to_not_convert=modules_to_not_convert,
-        modules_dtype_dict=modules_dtype_dict,
+        modules_dtype_dict=modules_dtype_dict.copy(),
        op=op,
    )
    t1 = time.time()
@ -459,6 +499,8 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
        model = model.to(devices.cpu)
    if do_gc:
        devices.torch_gc(force=True, reason='sdnq')
+
+    log.debug(f'Quantization: module="{op if op is not None else model.__class__}" type=sdnq mode=post dtype={weights_dtype} matmul={shared.opts.sdnq_use_quantized_matmul} group_size={shared.opts.sdnq_quantize_weights_group_size} quant_conv={shared.opts.sdnq_quantize_conv_layers} matmul_conv={shared.opts.sdnq_use_quantized_matmul_conv} dequantize_fp32={shared.opts.sdnq_dequantize_fp32} quantize_with_gpu={shared.opts.sdnq_quantize_with_gpu} quantization_device={quantization_device} return_device={return_device} device_map={shared.opts.device_map} offload_mode={shared.opts.diffusers_offload_mode} non_blocking={shared.opts.diffusers_offload_nonblocking} modules_to_not_convert={modules_to_not_convert} modules_dtype_dict={modules_dtype_dict}')
    return model


--- a/modules/modeldata.py
+++ b/modules/modeldata.py
@ -60,6 +60,8 @@ def get_model_type(pipe):
        model_type = 'bria'
    elif 'Qwen' in name:
        model_type = 'qwen'
+    elif 'NextStep' in name:
+        model_type = 'nextstep'
    # video models
    elif "CogVideo" in name:
        model_type = 'cogvideo'
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@ -27,16 +27,20 @@ def hf_login(token=None):
        log.debug('HF login: no token provided')
        return False
    if os.environ.get('HUGGING_FACE_HUB_TOKEN', None) is not None:
-        log.warning('HF login: removing existing env variable: HUGGING_FACE_HUB_TOKEN')
-        del os.environ['HUGGING_FACE_HUB_TOKEN']
+        os.environ.pop('HUGGING_FACE_HUB_TOKEN', None)
+        os.unsetenv('HUGGING_FACE_HUB_TOKEN')
    if os.environ.get('HF_TOKEN', None) is not None:
-        log.warning('HF login: removing existing env variable: HF_TOKEN')
-        del os.environ['HF_TOKEN']
+        os.environ.pop('HF_TOKEN', None)
+        os.unsetenv('HF_TOKEN')
    if loggedin != token:
        stdout = io.StringIO()
-        with contextlib.redirect_stdout(stdout):
+        try:
            hf.logout()
+        except Exception:
+            pass
+        with contextlib.redirect_stdout(stdout):
            hf.login(token=token, add_to_git_credential=False, write_permission=False)
+        os.environ['HF_TOKEN'] = token
        text = stdout.getvalue() or ''
        obfuscated_token = 'hf_...' + token[-4:]
        line = [l for l in text.split('\n') if 'Token' in l]
--- a/modules/models_hf.py
+++ b/modules/models_hf.py
@ -1,18 +1,57 @@
 import os
-import gradio as gr
-from modules.shared import log, opts
+from installer import log, install
+from modules.shared import opts


+# initialize huggingface environment
 def hf_init():
    os.environ.setdefault('HF_HUB_DISABLE_EXPERIMENTAL_WARNING', '1')
-    os.environ.setdefault('HF_HUB_DISABLE_SYMLINKS_WARNING', '1')
+    os.environ.setdefault('HF_HUB_DISABLE_EXPERIMENTAL_WARNING', '1')
    os.environ.setdefault('HF_HUB_DISABLE_IMPLICIT_TOKEN', '1')
-    os.environ.setdefault('HUGGINGFACE_HUB_VERBOSITY', 'warning')
+    os.environ.setdefault('HF_HUB_DISABLE_SYMLINKS_WARNING', '1')
+    os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
+    os.environ.setdefault('HF_HUB_VERBOSITY', 'warning')
+    os.environ.setdefault('HF_HUB_DOWNLOAD_TIMEOUT', '60')
+    os.environ.setdefault('HF_HUB_ETAG_TIMEOUT', '10')
    os.environ.setdefault('HF_ENABLE_PARALLEL_LOADING', 'true' if opts.sd_parallel_load else 'false')
+    os.environ.setdefault('HF_HUB_CACHE', opts.hfcache_dir)
+    if opts.hf_transfer_mode == 'requests':
+        os.environ.setdefault('HF_XET_HIGH_PERFORMANCE', 'false')
+        os.environ.setdefault('HF_HUB_ENABLE_HF_TRANSFER', 'false')
+        os.environ.setdefault('HF_HUB_DISABLE_XET', 'true')
+    elif opts.hf_transfer_mode == 'rust':
+        install('hf_transfer')
+        import huggingface_hub
+        huggingface_hub.utils._runtime.is_hf_transfer_available = lambda: True  # pylint: disable=protected-access
+        os.environ.setdefault('HF_XET_HIGH_PERFORMANCE', 'false')
+        os.environ.setdefault('HF_HUB_ENABLE_HF_TRANSFER', 'true')
+        os.environ.setdefault('HF_HUB_DISABLE_XET', 'true')
+    elif opts.hf_transfer_mode == 'xet':
+        install('hf_xet')
+        import huggingface_hub
+        huggingface_hub.utils._runtime.is_xet_available = lambda: True  # pylint: disable=protected-access
+        os.environ.setdefault('HF_XET_HIGH_PERFORMANCE', 'true')
+        os.environ.setdefault('HF_HUB_ENABLE_HF_TRANSFER', 'true')
+        os.environ.setdefault('HF_HUB_DISABLE_XET', 'false')
+
+    obfuscated_token = None
+    if len(opts.huggingface_token) > 0 and opts.huggingface_token.startswith('hf_'):
+        obfuscated_token = 'hf_...' + opts.huggingface_token[-4:]
+    log.info(f'Huggingface init: transfer={opts.hf_transfer_mode} parallel={opts.sd_parallel_load} direct={opts.diffusers_to_gpu} token="{obfuscated_token}" cache="{opts.hfcache_dir}"')
+
+
+def hf_check_cache():
+    prev_default = os.environ.get("SD_HFCACHEDIR", None) or os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub')
+    from modules.modelstats import stat
+    if opts.hfcache_dir != prev_default:
+        size, _mtime = stat(prev_default)
+        if size//1024//1024 > 0:
+            log.warning(f'Cache location changed: previous="{prev_default}" size={size//1024//1024} MB')
+    size, _mtime = stat(opts.hfcache_dir)
+    log.debug(f'Huggingface cache: path="{opts.hfcache_dir}" size={size//1024//1024} MB')


 def hf_search(keyword):
-    hf_init()
    import huggingface_hub as hf
    hf_api = hf.HfApi()
    models = hf_api.list_models(model_name=keyword, full=True, library="diffusers", limit=50, sort="downloads", direction=-1)
@ -23,12 +62,11 @@ def hf_search(keyword):
    return data


-def hf_select(evt: gr.SelectData, data):
+def hf_select(evt, data):
    return data[evt.index[0]][0]


 def hf_download_model(hub_id: str, token, variant, revision, mirror, custom_pipeline):
-    hf_init()
    from modules.modelloader import download_diffusers_model
    download_diffusers_model(hub_id, cache_dir=opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror, custom_pipeline=custom_pipeline)
    from modules.sd_models import list_models  # pylint: disable=W0621
--- a/modules/modelstats.py
+++ b/modules/modelstats.py
@ -102,6 +102,8 @@ def analyze():
        keys = sd_models.get_signature(shared.sd_model).keys()
    model.modules.clear()
    for k in keys: # pylint: disable=protected-access
+        if k.startswith('_'):
+            continue
        component = getattr(shared.sd_model, k, None)
        module = Module(k, component)
        model.modules.append(module)
--- a/modules/onnx_impl/init.py
+++ b/modules/onnx_impl/init.py
@ -2,12 +2,19 @@ from typing import Any, Dict, Optional
 import numpy as np
 import torch
 import diffusers
-import onnxruntime as ort
+from installer import log, installed, install


 initialized = False


+try:
+    import onnxruntime as ort
+except Exception as e:
+    log.error(f'ONNX import error: {e}')
+    ort = None
+
+
 class DynamicSessionOptions(ort.SessionOptions):
    config: Optional[Dict] = None

@ -194,7 +201,6 @@ def initialize_onnx():
    global initialized # pylint: disable=global-statement
    if initialized:
        return
-    from installer import log, installed
    from modules import devices
    if not installed('onnx', quiet=True):
        return
@ -203,53 +209,43 @@ def initialize_onnx():
        from .execution_providers import ExecutionProvider, TORCH_DEVICE_TO_EP, available_execution_providers
        if devices.backend == "rocm":
            TORCH_DEVICE_TO_EP["cuda"] = ExecutionProvider.ROCm
-        from .pipelines.onnx_stable_diffusion_pipeline import OnnxStableDiffusionPipeline
-        from .pipelines.onnx_stable_diffusion_img2img_pipeline import OnnxStableDiffusionImg2ImgPipeline
-        from .pipelines.onnx_stable_diffusion_inpaint_pipeline import OnnxStableDiffusionInpaintPipeline
-        from .pipelines.onnx_stable_diffusion_upscale_pipeline import OnnxStableDiffusionUpscalePipeline
-
-        OnnxRuntimeModel.__module__ = 'diffusers' # OnnxRuntimeModel Hijack.
-        diffusers.OnnxRuntimeModel = OnnxRuntimeModel
-
-        diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline
-        diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionPipeline
-
-        diffusers.OnnxStableDiffusionImg2ImgPipeline = OnnxStableDiffusionImg2ImgPipeline
-        diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionImg2ImgPipeline
-
-        diffusers.OnnxStableDiffusionInpaintPipeline = OnnxStableDiffusionInpaintPipeline
-        diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionInpaintPipeline
-
-        diffusers.OnnxStableDiffusionUpscalePipeline = OnnxStableDiffusionUpscalePipeline
-
        log.debug(f'ONNX: version={ort.__version__}, available={available_execution_providers}')
+
    except Exception as e:
-        log.error(f'ONNX failed to initialize: {e}')
-
-    try:
-        # load xl pipelines. may fail if the user has the latest diffusers (0.30.x)
-        import optimum.onnxruntime
-        from .pipelines.onnx_stable_diffusion_xl_pipeline import OnnxStableDiffusionXLPipeline
-        from .pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline
-
-        diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline
-        diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLPipeline
-
-        diffusers.OnnxStableDiffusionXLImg2ImgPipeline = OnnxStableDiffusionXLImg2ImgPipeline
-        diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLImg2ImgPipeline
-
-        diffusers.ORTStableDiffusionXLPipeline = diffusers.OnnxStableDiffusionXLPipeline # Huggingface model compatibility
-        diffusers.ORTStableDiffusionXLImg2ImgPipeline = diffusers.OnnxStableDiffusionXLImg2ImgPipeline
-
-        optimum.onnxruntime.modeling_diffusion.ORTPipelinePart.to = ORTPipelinePart_to # pylint: disable=protected-access
-    except Exception as e:
-        log.debug(f'ONNX failed to initialize XL pipelines: {e}')
+        log.error(f'ONNX initialization: {e}')

    initialized = True


+def initialize_onnx_pipelines():
+    try: # may fail on onnx import
+        import onnx # pylint: disable=unused-import
+        OnnxRuntimeModel.__module__ = 'diffusers' # OnnxRuntimeModel Hijack.
+        diffusers.OnnxRuntimeModel = OnnxRuntimeModel
+        from .pipelines.onnx_stable_diffusion_pipeline import OnnxStableDiffusionPipeline
+        from .pipelines.onnx_stable_diffusion_img2img_pipeline import OnnxStableDiffusionImg2ImgPipeline
+        from .pipelines.onnx_stable_diffusion_inpaint_pipeline import OnnxStableDiffusionInpaintPipeline
+        from .pipelines.onnx_stable_diffusion_upscale_pipeline import OnnxStableDiffusionUpscalePipeline
+        from .pipelines.onnx_stable_diffusion_xl_pipeline import OnnxStableDiffusionXLPipeline
+        from .pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline
+        diffusers.OnnxStableDiffusionPipeline = OnnxStableDiffusionPipeline
+        diffusers.OnnxStableDiffusionImg2ImgPipeline = OnnxStableDiffusionImg2ImgPipeline
+        diffusers.OnnxStableDiffusionInpaintPipeline = OnnxStableDiffusionInpaintPipeline
+        diffusers.OnnxStableDiffusionUpscalePipeline = OnnxStableDiffusionUpscalePipeline
+        diffusers.OnnxStableDiffusionXLPipeline = OnnxStableDiffusionXLPipeline
+        diffusers.OnnxStableDiffusionXLImg2ImgPipeline = OnnxStableDiffusionXLImg2ImgPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionImg2ImgPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["onnx-stable-diffusion"] = diffusers.OnnxStableDiffusionInpaintPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["onnx-stable-diffusion-xl"] = diffusers.OnnxStableDiffusionXLImg2ImgPipeline
+        diffusers.ORTStableDiffusionXLPipeline = diffusers.OnnxStableDiffusionXLPipeline # Huggingface model compatibility
+        diffusers.ORTStableDiffusionXLImg2ImgPipeline = diffusers.OnnxStableDiffusionXLImg2ImgPipeline
+    except Exception as e:
+        log.error(f'ONNX initialization: {e}')
+
+
 def install_olive():
-    from installer import installed, install, log
    if installed("olive-ai"):
        return
    try:
--- a/modules/onnx_impl/execution_providers.py
+++ b/modules/onnx_impl/execution_providers.py
@ -1,7 +1,6 @@
 import sys
 from enum import Enum
 from typing import Tuple, List
-import onnxruntime as ort
 from installer import log
 from modules import devices

@ -15,7 +14,6 @@ class ExecutionProvider(str, Enum):
    OpenVINO = "OpenVINOExecutionProvider"


-available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()
 EP_TO_NAME = {
    ExecutionProvider.CPU: "gpu-cpu", # ???
    ExecutionProvider.DirectML: "gpu-dml",
@ -33,6 +31,15 @@ TORCH_DEVICE_TO_EP = {
 }


+try:
+    import onnxruntime as ort
+    available_execution_providers: List[ExecutionProvider] = ort.get_available_providers()
+except Exception as e:
+    log.error(f'ONNX import error: {e}')
+    available_execution_providers = []
+    ort = None
+
+
 def get_default_execution_provider() -> ExecutionProvider:
    if devices.backend == "cpu":
        return ExecutionProvider.CPU
--- a/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py
+++ b/modules/onnx_impl/pipelines/onnx_stable_diffusion_xl_img2img_pipeline.py
@ -2,6 +2,7 @@ from typing import Optional, Dict, Any
 import numpy as np
 import torch
 import onnxruntime as ort
+
 import optimum.onnxruntime
 from modules.onnx_impl.pipelines import CallablePipelineBase
 from modules.onnx_impl.pipelines.utils import randn_tensor
--- a/Show More
+++ b/Show More