Merge pull request #4089 from vladmandic/dev

merge dev
2025-07-31 17:34:04 -04:00 · 2025-07-31 17:34:04 -04:00 · 3b9193803a
parent a941992bc3 b120778c4f
commit 3b9193803a
46 changed files with 781 additions and 209 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,12 +2,30 @@

 ## Update for 2025-07-31

- **Feature**  
-  - Wan select which stage to run: *first/second/both* with configurable *boundary ration* when running both stages  
+- **Models**  
+  - [FLUX.1-Krea-Dev](https://www.krea.ai/blog/flux-krea-open-source-release)  
+    new 12B base model compatible with FLUX.1-Dev from *Black Forest Labs* with opinionated aesthetics and aesthetic preferences in mind  
+    simply select in *networks -> models -> reference*  
+  - [Chroma](https://huggingface.co/lodestones/Chroma)  
+    great model based on FLUX.1 and then redesigned and retrained by *lodestones*  
+    update with latest **v48**, **v48 Detail Calibrated** and **v46 Flash** variants  
+    simply select in *networks -> models -> reference*  
+- **UI**  
+  - new embedded docs/wiki search!  
+    **Docs** search: fully-local and works in real-time on all document pages  
+    **Wiki** search: uses github api to search online wiki pages  
+  - modernui checkbox/radio styling  
+- **Offloading**
+  - changed **default** values for offloading based on detected gpu memory  
+    see [offloading docs](https://vladmandic.github.io/sdnext-docs/Offload/) for details  
+  - new feature to specify which modules to offload always or never  
+    in *settings -> models & loading -> offload always/never*  
+  - new `highvram` profile provides significant performance boost on gpus with more than 24gb  
+- **Features**  
+  - **Wan** select which stage to run: *first/second/both* with configurable *boundary ration* when running both stages  
    in settings -> model options  
  - prompt parser allow explict `BOS` and `EOS` tokens in prompt  
- **UI**  
-  - modernui checkbox/radio styling  
+  - **Nunchaku** support for *FLUX.1-Fill* and *FLUX.1-Depth* models  
 - **Fixes**  
  - fix Wan 2.2-5B I2V workflow  
  - fix inpaint image metadata  
@ -15,6 +33,9 @@
  - fix progress bar with refine/detailer  
  - fix api progress reporting endpoint  
  - fix openvino backend failing to compile  
+  - fix nunchaku fallback on unsupported model  
+  - api set default script-name  
+  - avoid forced gc and rely on thresholds  
  - add missing interrogate in output panel  

 ## Update for 2025-07-29
--- a/TODO.md
+++ b/TODO.md
@ -4,6 +4,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma

 ## Current

+- Reset quicksettings
+- Gallery: force refresh on delete
+
 ## Future Candidates

 - [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915)  
@ -26,6 +29,16 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
  - see <https://github.com/Cschlaefli/automatic>
  - blocked by `insightface`

+## ModernUI
+
+- Extensions tab: Full CSS
+- Models tab: 
+  - Current
+  - Validate: broken table
+  - Update: broken table
+  - CivitAI: redesign downloader
+- History
+
 ### Under Consideration

 - [IPAdapter negative guidance](https://github.com/huggingface/diffusers/discussions/7167)  
--- a/cli/docs.py
+++ b/cli/docs.py
@ -0,0 +1,144 @@
+#!/usr/bin/env python
+import os
+import sys
+import time
+import logging
+
+
+logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
+log = logging.getLogger(__name__)
+
+
+class Page():
+    def __init__(self, fn, full: bool = True):
+        self.fn = fn
+        self.title = ''
+        self.size = 0
+        self.mtime = 0
+        self.h1 = []
+        self.h2 = []
+        self.h3 = []
+        self.lines = []
+        self.read(full=full)
+
+    def read(self, full: bool = True):
+        try:
+            self.title = ' ' + os.path.basename(self.fn).replace('.md', '').replace('-', ' ') + ' '
+            self.mtime = int(os.path.getmtime(self.fn))
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+            self.size = len(content)
+            self.lines = [line.strip().lower() + ' ' for line in content.splitlines() if len(line)>1]
+            self.h1 = [line[1:] for line in self.lines if line.startswith('# ')]
+            self.h2 = [line[2:] for line in self.lines if line.startswith('## ')]
+            self.h3 = [line[3:] for line in self.lines if line.startswith('### ')]
+            if not full:
+                self.lines.clear()
+        except Exception as e:
+            log.error(f'Wiki: page="{self.fn}" {e}')
+
+    def search(self, text):
+        if not text or len(text) < 2:
+            return []
+        text = text.lower()
+        if text.strip() == self.title.lower().strip():
+            return 1.0
+        if self.title.lower().startswith(f'{text} '):
+            return 0.99
+        if f' {text} ' in self.title.lower():
+            return 0.98
+        if f' {text}' in self.title.lower():
+            return 0.97
+
+        if any(f' {text} ' in h for h in self.h1):
+            return 0.89
+        if any(f' {text}' in h for h in self.h1):
+            return 0.88
+
+        if any(f' {text} ' in h for h in self.h2):
+            return 0.79
+        if any(f' {text}' in h for h in self.h2):
+            return 0.78
+
+        if any(f' {text} ' in h for h in self.h3):
+            return 0.69
+        if any(f' {text}' in h for h in self.h3):
+            return 0.68
+
+        if f'{text}' in self.title.lower():
+            return 0.59
+        if any(f'{text}' in h for h in self.h1):
+            return 0.58
+        if any(f'{text}' in h for h in self.h2):
+            return 0.57
+        if any(f'{text}' in h for h in self.h3):
+            return 0.56
+
+        if any(text in line for line in self.lines):
+            return 0.50
+
+        return 0.0
+
+    def get(self):
+        try:
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+                return content
+        except Exception as e:
+            log.error(f'Wiki: page="{self.fn}" {e}')
+        return ''
+
+    def __str__(self):
+        return f'Page(title="{self.title.strip()}" fn="{self.fn}" mtime={self.mtime} h1={[h.strip() for h in self.h1]} h2={len(self.h2)} h3={len(self.h3)} lines={len(self.lines)} size={self.size})'
+
+
+class Pages():
+    def __init__(self):
+        self.time = time.time()
+        self.size = 0
+        self.full = None
+        self.pages: list[Page] = []
+
+    def build(self, full: bool = True):
+        self.pages.clear()
+        self.full = full
+        with os.scandir('wiki') as entries:
+            for entry in entries:
+                if entry.is_file() and entry.name.endswith('.md'):
+                    page = Page(entry.path, full=full)
+                    self.pages.append(page)
+        self.size = sum(page.size for page in self.pages)
+
+    def search(self, text: str, topk: int = 10, full: bool = True) -> list[Page]:
+        if not text:
+            return []
+        if len(self.pages) == 0:
+            self.build(full=full)
+        text = text.lower()
+        scores = [page.search(text) for page in self.pages]
+        mtimes = [page.mtime for page in self.pages]
+        found = sorted(zip(scores, mtimes, self.pages), key=lambda x: (x[0], x[1]), reverse=True)
+        found = [item for item in found if item[0] > 0]
+        return [(item[0], item[2]) for item in found][:topk]
+
+
+index = Pages()
+
+
+if __name__ == "__main__":
+    sys.argv.pop(0)
+    if len(sys.argv) < 1:
+        log.error("Usage: python cli/docs.py <search_term>")
+    text = ' '.join(sys.argv)
+    topk = 10
+    full = True
+    log.info(f'Search: "{text}" topk={topk}, full={full}')
+    t0 = time.time()
+    results = index.search(text, topk=topk, full=full)
+    t1 = time.time()
+    log.info(f'Results: pages={len(results)} size={index.size} time={t1-t0:.3f}')
+    for score, page in results:
+        log.info(f'Score: {score:.2f} {page}')
+    # if len(results) > 0:
+    #     log.info('Top result:')
+    #     log.info(results[0][1].get())
--- a/extensions-builtin/sd-extension-chainner
+++ b/extensions-builtin/sd-extension-chainner
@ -1 +1 @@
-Subproject commit 716b1ee7dc8042ba2a62460425930cf3ab472919
+Subproject commit 3e0108fedbec300f72c3ca6e06236419d45eb660
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@ -1 +1 @@
-Subproject commit 9741e151b01dda2d2697c8ca8a369e50482e976e
+Subproject commit 9ee81f7cb6bfb882ecd93228b861ff9d06fb7ec8
--- a/html/reference.json
+++ b/html/reference.json
@ -165,14 +165,35 @@
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
  },
+  "Black Forest Labs FLUX.1 Krea Dev": {
+    "path": "black-forest-labs/FLUX.1-Krea-dev",
+    "preview": "black-forest-labs--FLUX.1-Krea-dev.jpg",
+    "desc": "FLUX.1 Krea [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 4.5"
+  },

-  "lodestones Chroma": {
-    "path": "lodestones/Chroma",
+  "lodestones Chroma Unlocked v48": {
+    "path": "vladmandic/chroma-unlocked-v48",
    "preview": "lodestones--Chroma.jpg",
    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 3.5"
  },
+  "lodestones Chroma Unlocked v48 Detail Calibrated": {
+    "path": "vladmandic/chroma-unlocked-v48-detail-calibrated",
+    "preview": "lodestones--Chroma.jpg",
+    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 3.5"
+  },
+  "lodestones Chroma Unlocked v48 Flash": {
+    "path": "vladmandic/chroma-unlocked-v46-flash",
+    "preview": "lodestones--Chroma.jpg",
+    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 1.0"
+  },

  "Ostris Flex.2 Preview": {
    "path": "ostris/Flex.2-preview",
--- a/installer.py
+++ b/installer.py
@ -1216,7 +1216,7 @@ def ensure_base_requirements():
        update_setuptools()

    # used by installler itself so must be installed before requirements
-    install('rich==14.0.0', 'rich', quiet=True)
+    install('rich==14.1.0', 'rich', quiet=True)
    install('psutil', 'psutil', quiet=True)
    install('requests==2.32.3', 'requests', quiet=True)
    ts('base', t_start)
--- a/javascript/changelog.js
+++ b/javascript/changelog.js
@ -79,10 +79,3 @@ async function initChangelog() {
  };
  search.addEventListener('keyup', searchChangelog);
 }
-
-function wikiSearch(txt) {
-  log('wikiSearch', txt);
-  const url = `https://github.com/search?q=repo%3Avladmandic%2Fautomatic+${encodeURIComponent(txt)}&type=wikis`;
-  // window.open(url, '_blank').focus();
-  return txt;
-}
--- a/javascript/docs.js
+++ b/javascript/docs.js
@ -0,0 +1,24 @@
+let lastGitHubSearch = '';
+let lastDocsSearch = '';
+
+async function clickGitHubWikiPage(page) {
+  log(`clickGitHubWikiPage: page="${page}"`);
+  lastGitHubSearch = page;
+  const el = gradioApp().getElementById('github_md_btn');
+  if (el) el.click();
+}
+
+function getGitHubWikiPage() {
+  return lastGitHubSearch;
+}
+
+async function clickDocsPage(page) {
+  log(`clickDocsPage: page="${page}"`);
+  lastDocsSearch = page;
+  const el = gradioApp().getElementById('docs_md_btn');
+  if (el) el.click();
+}
+
+function getDocsPage() {
+  return lastDocsSearch;
+}
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@ -1637,7 +1637,7 @@ div:has(>#tab-gallery-folders) {
  cursor: cell;
  padding: 8px;
  background-color: var(--input-background-fill);
-  border-radius: var(--sd-border-radius);
+  border-radius: var(--radius-lg);
  max-width: 100%;
 }

@ -1649,7 +1649,7 @@ div:has(>#tab-gallery-folders) {
  display: inline-block;
  transition: transform 0.2s ease-in-out;
  flex-shrink: 0;
-  color: var(--sd-input-text-color);
+  color: var(--block-title-text-color);
 }

 .gallery-separator-name {
@ -1811,6 +1811,84 @@ div:has(>#tab-gallery-folders) {
  border-top-color: var(--primary-300);
 }

+.docs-search textarea {
+  height: 1em !important;
+  resize: none !important
+}
+
+.github-result, #docs_result {
+  max-height: 38vh;
+  overflow-y: auto;
+}
+
+.github-result a {
+  margin: 0;
+  padding: 0;
+  background-color: unset !important;
+}
+
+.github-result h3, .github-md h3 {
+  margin: 0;
+  padding: 0;
+  font-size: 1.1em;
+}
+
+.github-page {
+  background-color: var(--background-fill-primary);
+  margin: 1em 0 0.2em 0;
+  border-radius: var(--radius-lg);
+  padding: 4px;
+  font-size: 1em;
+  font-weight: 400;
+  cursor: help;
+}
+
+.github-result li {
+  font-size: 0.9em;
+  display: ruby;
+  filter: brightness(0.5);
+}
+
+.github-md, .docs-md {
+  padding: 0.2em;
+}
+
+.docs-results {
+  background-color: var(--sd-group-background-color);
+}
+
+.docs-card {
+  margin: 1em 0;
+  background-color: var(--background-fill-primary);
+  cursor: help;
+  padding: 0.5em;
+}
+
+.docs-card-title {
+  font-size: 1.2em;
+  line-height: 1.6em;
+  color: var(--button-primary-background-fill) !important;
+}
+
+.docs-card-h1 {
+  font-weight: bold;
+  font-size: 1.0;
+}
+
+.docs-card-h2 {
+  font-size: 1.0;
+  max-height: 4em;
+  overflow: hidden;
+}
+
+.docs-card-footer {
+  display: flex;
+  justify-content: space-between;
+  filter: brightness(0.5);
+  font-size: 0.9em;
+  margin-top: 0.2em;
+}
+
@keyframes move {
  from {
    background-position-x: 0, -40px;
--- a/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg
+++ b/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg
--- a/modules/api/models.py
+++ b/modules/api/models.py
@ -200,7 +200,7 @@ ReqTxt2Img = PydanticModelGenerator(
        {"key": "sampler_index", "type": Union[int, str], "default": 0},
        {"key": "sampler_name", "type": str, "default": "Default"},
        {"key": "hr_sampler_name", "type": str, "default": "Same as primary"},
-        {"key": "script_name", "type": Optional[str], "default": "none"},
+        {"key": "script_name", "type": Optional[str], "default": ""},
        {"key": "script_args", "type": list, "default": []},
        {"key": "send_images", "type": bool, "default": True},
        {"key": "save_images", "type": bool, "default": False},
@ -228,7 +228,7 @@ ReqImg2Img = PydanticModelGenerator(
        {"key": "denoising_strength", "type": float, "default": 0.5},
        {"key": "mask", "type": Optional[str], "default": None},
        {"key": "include_init_images", "type": bool, "default": False, "exclude": True},
-        {"key": "script_name", "type": Optional[str], "default": "none"},
+        {"key": "script_name", "type": Optional[str], "default": ""},
        {"key": "script_args", "type": list, "default": []},
        {"key": "send_images", "type": bool, "default": True},
        {"key": "save_images", "type": bool, "default": False},
--- a/modules/devices.py
+++ b/modules/devices.py
@ -280,7 +280,7 @@ def set_cuda_memory_limit():
        return
    try:
        from modules.shared import cmd_opts
-        torch_gc(force=True)
+        torch_gc(force=True, reason='cuda')
        mem = torch.cuda.get_device_properties(device).total_memory
        torch.cuda.set_per_process_memory_fraction(float(opts.cuda_mem_fraction), cmd_opts.device_id if cmd_opts.device_id is not None else 0)
        log.info(f'Torch memory limit: fraction={opts.cuda_mem_fraction:.2f} limit={round(opts.cuda_mem_fraction * mem / 1024 / 1024)} total={round(mem / 1024 / 1024)}')
--- a/modules/extras.py
+++ b/modules/extras.py
@ -176,7 +176,7 @@ def run_modelmerger(id_task, **kwargs):  # pylint: disable=unused-argument
    created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
    if created_model:
        created_model.calculate_shorthash()
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='merge')
    shared.state.end()
    return [*[gr.Dropdown.update(choices=sd_models.checkpoint_titles()) for _ in range(4)], f"Model saved to {output_modelname}"]

@ -248,7 +248,7 @@ def run_model_modules(model_type:str, model_name:str, custom_name:str,
    yield from modules_sdxl.merge()
    status = modules_sdxl.status

-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='merge')
    yield msg("modules merge complete")
    if modules_sdxl.pipeline is not None:
        checkpoint_info = sd_models.CheckpointInfo(filename='None')
--- a/modules/framepack/framepack_load.py
+++ b/modules/framepack/framepack_load.py
@ -183,7 +183,7 @@ def load_model(variant:str=None, pipeline:str=None, text_encoder:str=None, text_
        diffusers.loaders.peft._SET_ADAPTER_SCALE_FN_MAPPING['HunyuanVideoTransformer3DModelPacked'] = lambda model_cls, weights: weights # pylint: disable=protected-access
        shared.log.info(f'FramePack load: model={shared.sd_model.__class__.__name__} variant="{variant}" type={shared.sd_model_type} time={t1-t0:.2f}')
        sd_models.apply_balanced_offload(shared.sd_model)
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='load')

    except Exception as e:
        shared.log.error(f'FramePack load: {e}')
--- a/modules/intel/openvino/init.py
+++ b/modules/intel/openvino/init.py
@ -516,7 +516,7 @@ def openvino_fx(subgraph, example_inputs, options=None):
            else:
                # Delete unused subgraphs
                subgraph = subgraph.apply(sd_models.convert_to_faketensors)
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='openvino')

            # Model is fully supported and already cached. Run the cached OV model directly.
            compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
--- a/modules/interrogate/vqa.py
+++ b/modules/interrogate/vqa.py
@ -632,7 +632,7 @@ def interrogate(question:str='', system_prompt:str=None, prompt:str=None, image:

    if shared.opts.interrogate_offload and model is not None:
        sd_models.move_model(model, devices.cpu, force=True)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='vqa')
    answer = clean(answer, question)
    t1 = time.time()
    if not quiet:
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@ -436,7 +436,7 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
            else:
                getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
            if do_gc:
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='sdnq')
        if shared.cmd_opts.medvram or shared.cmd_opts.lowvram or shared.opts.diffusers_offload_mode != "none":
            quant_last_model_name = op
            quant_last_model_device = model.device
@ -447,7 +447,7 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
    elif shared.opts.diffusers_offload_mode != "none":
        model = model.to(devices.cpu)
    if do_gc:
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='sdnq')
    return model


@ -465,7 +465,7 @@ def sdnq_quantize_weights(sd_model):
                getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
            else:
                getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='sdnq')
        quant_last_model_name = None
        quant_last_model_device = None

@ -510,7 +510,7 @@ def optimum_quanto_model(model, op=None, sd_model=None, weights=None, activation
                getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
            else:
                getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')
        if shared.cmd_opts.medvram or shared.cmd_opts.lowvram or shared.opts.diffusers_offload_mode != "none":
            quant_last_model_name = op
            quant_last_model_device = model.device
@ -518,7 +518,7 @@ def optimum_quanto_model(model, op=None, sd_model=None, weights=None, activation
            quant_last_model_name = None
            quant_last_model_device = None
        model.to(devices.device)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='quanto')
    return model


@ -540,7 +540,7 @@ def optimum_quanto_weights(sd_model):
                getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
            else:
                getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')
        quant_last_model_name = None
        quant_last_model_device = None

@ -572,7 +572,7 @@ def optimum_quanto_weights(sd_model):
                sd_models.move_model(sd_model, devices.cpu)
                if hasattr(sd_model, "encode_prompt"):
                    sd_model.encode_prompt = original_encode_prompt
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')

        t1 = time.time()
        log.info(f"Quantization: type=Optimum.quanto time={t1-t0:.2f}")
--- a/modules/processing.py
+++ b/modules/processing.py
@ -372,43 +372,44 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                else:
                    image.info["parameters"] = info
                    output_images.append(image)
-
-            for i, image in enumerate(output_images):
-                is_grid = len(output_images) == p.batch_size * p.n_iter + 1 and i == 0
-                # resize after
-                if p.selected_scale_tab_after == 1:
-                    p.width_after, p.height_after = int(image.width * p.scale_by_after), int(image.height * p.scale_by_after)
-                if p.resize_mode_after != 0 and p.resize_name_after != 'None' and not is_grid:
-                    image = images.resize_image(p.resize_mode_after, image, p.width_after, p.height_after, p.resize_name_after, context=p.resize_context_after)
-
-                # save images
-                if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
-                    info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i)
-                    if isinstance(image, list):
-                        for img in image:
-                            images.save_image(img, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
-                    else:
-                        images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
-
-                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]):
-                    image_mask = p.mask_for_overlay.convert('RGB')
-                    image1 = image.convert('RGBA').convert('RGBa')
-                    image2 = Image.new('RGBa', image.size)
-                    mask = images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L')
-                    image_mask_composite = Image.composite(image1, image2, mask).convert('RGBA')
-                    if shared.opts.save_mask:
-                        images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask")
-                    if shared.opts.save_mask_composite:
-                        images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask-composite")
-                    if shared.opts.return_mask:
-                        output_images.append(image_mask)
-                    if shared.opts.return_mask_composite:
-                        output_images.append(image_mask_composite)
-
-            timer.process.record('post')
+            devices.torch_gc()
            del samples

-            devices.torch_gc()
+        for i, image in enumerate(output_images):
+            is_grid = len(output_images) == p.batch_size * p.n_iter + 1 and i == 0
+            # resize after
+            if p.selected_scale_tab_after == 1:
+                p.width_after, p.height_after = int(image.width * p.scale_by_after), int(image.height * p.scale_by_after)
+            if p.resize_mode_after != 0 and p.resize_name_after != 'None' and not is_grid:
+                image = images.resize_image(p.resize_mode_after, image, p.width_after, p.height_after, p.resize_name_after, context=p.resize_context_after)
+
+            # save images
+            if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
+                info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i)
+                if isinstance(image, list):
+                    for img in image:
+                        images.save_image(img, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+                else:
+                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+
+            if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]):
+                image_mask = p.mask_for_overlay.convert('RGB')
+                image1 = image.convert('RGBA').convert('RGBa')
+                image2 = Image.new('RGBa', image.size)
+                mask = images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L')
+                image_mask_composite = Image.composite(image1, image2, mask).convert('RGBA')
+                if shared.opts.save_mask:
+                    images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask")
+                if shared.opts.save_mask_composite:
+                    images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask-composite")
+                if shared.opts.return_mask:
+                    output_images.append(image_mask)
+                if shared.opts.return_mask_composite:
+                    output_images.append(image_mask_composite)
+
+            if shared.cmd_opts.lowvram:
+                devices.torch_gc(force=True, reason='lowvram')
+            timer.process.record('post')

        if not p.xyz:
            if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None):
@ -462,5 +463,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
        shared.log.debug(f'Processed: timers={timer.process.dct()}')
        shared.log.debug(f'Processed: memory={memstats.memory_stats()}')

-    devices.torch_gc(force=True, reason='final')
+    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+        devices.torch_gc(force=True, reason='final')
    return processed
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@ -437,7 +437,7 @@ def get_prompts_with_weights(pipe, prompt: str):
                sections += 1
        if all_tokens > 0:
            avg_weight = avg_weight / all_tokens
-            shared.log.debug(f'Prompt tokenizer: parser={shared.opts.prompt_attention} len={len(prompt)} sections={sections} tokens={all_tokens} weights={min_weight:.2f}/{avg_weight:.2f}/{max_weight:.2f}')
+            debug(f'Prompt tokenizer: parser={shared.opts.prompt_attention} len={len(prompt)} sections={sections} tokens={all_tokens} weights={min_weight:.2f}/{avg_weight:.2f}/{max_weight:.2f}')
    except Exception:
        pass
    debug(f'Prompt: weights={texts_and_weights} time={(time.time() - t0):.3f}')
--- a/modules/scripts.py
+++ b/modules/scripts.py
@ -1,2 +1,19 @@
 # compatibility with extensions that import scripts directly
+from modules import scripts_manager
 from modules.scripts_manager import * # noqa: F403 # pylint: disable=wildcard-import
+
+
+scripts_txt2img = None
+scripts_img2img = None
+scripts_control = None
+scripts_current = None
+scripts_postproc = None
+
+
+def register_runners():
+    global scripts_txt2img, scripts_img2img, scripts_control, scripts_current, scripts_postproc # pylint: disable=global-statement
+    scripts_txt2img = scripts_manager.scripts_txt2img
+    scripts_img2img = scripts_manager.scripts_img2img
+    scripts_control = scripts_manager.scripts_control
+    scripts_current = scripts_manager.scripts_current
+    scripts_postproc = scripts_manager.scripts_postproc
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -678,7 +678,7 @@ def load_diffuser(checkpoint_info=None, timer=None, op='model', revision=None):
        errors.display(e, "Model")

    if shared.opts.diffusers_offload_mode != 'balanced':
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='load')
    if sd_model is not None:
        script_callbacks.model_loaded_callback(sd_model)

@ -1107,14 +1107,14 @@ def unload_model_weights(op='model'):
            disable_offload(model_data.sd_model)
            move_model(model_data.sd_model, 'meta')
        model_data.sd_model = None
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='unload')
        shared.log.debug(f'Unload {op}: {memory_stats()} after')
    elif (op == 'refiner') and model_data.sd_refiner:
        shared.log.debug(f'Current {op}: {memory_stats()}')
        disable_offload(model_data.sd_refiner)
        move_model(model_data.sd_refiner, 'meta')
        model_data.sd_refiner = None
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='unload')
        shared.log.debug(f'Unload {op}: {memory_stats()}')


--- a/modules/sd_offload.py
+++ b/modules/sd_offload.py
@ -1,4 +1,5 @@
 import os
+import re
 import sys
 import time
 import inspect
@ -172,12 +173,14 @@ class OffloadHook(accelerate.hooks.ModelHook):
        self.min_watermark = shared.opts.diffusers_offload_min_gpu_memory
        self.max_watermark = shared.opts.diffusers_offload_max_gpu_memory
        self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
+        self.offload_always = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_always) if len(m.strip()) > 2]
+        self.offload_never = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_never) if len(m.strip()) > 2]
        self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
        self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
        self.offload_map = {}
        self.param_map = {}
        gpu = f'{(shared.gpu_memory * shared.opts.diffusers_offload_min_gpu_memory):.2f}-{(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory):.2f}:{shared.gpu_memory:.2f}'
-        shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f}')
+        shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f} always={self.offload_always} never={self.offload_never}')
        self.validate()
        super().__init__()

@ -210,10 +213,7 @@ class OffloadHook(accelerate.hooks.ModelHook):
            max_memory = { device_index: self.gpu, "cpu": self.cpu }
            device_map = getattr(module, "balanced_offload_device_map", None)
            if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
-                # try:
                device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
-                # except Exception as e:
-                #     shared.log.error(f'Offload: type=balanced module={module.__class__.__name__} {e}')
            offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
            if devices.backend == "directml":
                keys = device_map.keys()
@ -233,15 +233,22 @@ class OffloadHook(accelerate.hooks.ModelHook):
            perc_gpu = used_gpu / shared.gpu_memory
            try:
                module_size = self.model_size()
+                module_cls = module.__class__.__name__
                prev_gpu = used_gpu
-                offload_now = perc_gpu > shared.opts.diffusers_offload_min_gpu_memory
-                if offload_now:
+                op = 'post:skip'
+                if module_cls in self.offload_never:
+                    op = 'post:never'
+                elif module_cls in self.offload_always:
+                    op = 'post:always'
+                    module = module.to(devices.cpu)
+                    used_gpu -= module_size
+                elif perc_gpu > shared.opts.diffusers_offload_min_gpu_memory:
+                    op = 'post:mem'
                    module = module.to(devices.cpu)
                    used_gpu -= module_size
                if debug:
-                    cls = module.__class__.__name__
                    quant = getattr(module, "quantization_method", None)
-                    debug_move(f'Offload: type=balanced op={"post" if offload_now else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={cls} size={module_size:.3f}')
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
            except Exception as e:
                if 'out of memory' in str(e):
                    devices.torch_gc(fast=True, force=True, reason='oom')
@ -311,6 +318,8 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
        else:
            keys = get_signature(pipe).keys()
        keys = [k for k in keys if k not in exclude and not k.startswith('_')]
+        offload_always = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_always) if len(m.strip()) > 2]
+        offload_never = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_never) if len(m.strip()) > 2]
        for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
            # shared.log.trace(f'Offload: type=balanced op=apply pipe={pipe.__class__.__name__} module={module_name} size={module_size:.3f}')
            module = getattr(pipe, module_name, None)
@ -326,16 +335,26 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
            perc_gpu = used_gpu / shared.gpu_memory
            try:
                prev_gpu = used_gpu
-                offload_now = (perc_gpu > shared.opts.diffusers_offload_min_gpu_memory) and (module.device != devices.cpu)
-                if offload_now:
+                module_cls = module.__class__.__name__
+                op = 'apply:skip'
+                if module_cls in offload_never:
+                    op = 'apply:never'
+                elif module_cls in offload_always:
+                    op = 'apply:always'
                    module = module.to(devices.cpu)
                    used_gpu -= module_size
-                cls = module.__class__.__name__
+                elif perc_gpu > shared.opts.diffusers_offload_min_gpu_memory:
+                    op = 'apply:mem'
+                    module = module.to(devices.cpu)
+                    used_gpu -= module_size
+                if debug:
+                    quant = getattr(module, "quantization_method", None)
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
                quant = getattr(module, "quantization_method", None)
                if not cached:
-                    shared.log.debug(f'Model module={module_name} type={cls} dtype={module.dtype} quant={quant} params={offload_hook_instance.param_map[module_name]:.3f} size={offload_hook_instance.offload_map[module_name]:.3f}')
+                    shared.log.debug(f'Model module={module_name} type={module_cls} dtype={module.dtype} quant={quant} params={offload_hook_instance.param_map[module_name]:.3f} size={offload_hook_instance.offload_map[module_name]:.3f}')
                if debug:
-                    debug_move(f'Offload: type=balanced op={"move" if offload_now else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={cls} size={module_size:.3f}')
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
            except Exception as e:
                if 'out of memory' in str(e):
                    devices.torch_gc(fast=True, force=True, reason='oom')
--- a/modules/sdnq/init.py
+++ b/modules/sdnq/init.py
@ -346,7 +346,7 @@ class SDNQQuantizer(DiffusersQuantizer):
    def _process_model_after_weight_loading(self, model, **kwargs): # pylint: disable=unused-argument
        if shared.opts.diffusers_offload_mode != "none":
            model = model.to(devices.cpu)
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='sdnq')
        return model

    def get_cuda_warm_up_factor(self):
--- a/modules/shared.py
+++ b/modules/shared.py
@ -8,6 +8,7 @@ import gradio as gr
 import diffusers
 from modules.json_helpers import readfile, writefile # pylint: disable=W0611
 from modules.shared_helpers import listdir, walk_files, html_path, html, req, total_tqdm # pylint: disable=W0611
+from modules.shared_defaults import get_default_modes
 from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history, files_cache
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
@ -86,8 +87,8 @@ elif cmd_opts.use_directml:
 devices.backend = devices.get_backend(cmd_opts)
 devices.device = devices.get_optimal_device()
 mem_stat = memory_stats()
-cpu_memory = mem_stat['ram']['total'] if "ram" in mem_stat else 0
-gpu_memory = mem_stat['gpu']['total'] if "gpu" in mem_stat else 0
+cpu_memory = round(mem_stat['ram']['total'] if "ram" in mem_stat else 0)
+gpu_memory = round(mem_stat['gpu']['total'] if "gpu" in mem_stat else 0)
 native = backend == Backend.DIFFUSERS
 if not files_cache.do_cache_folders:
    log.warning('File cache disabled: ')
@ -129,45 +130,7 @@ def list_samplers():
    return modules.sd_samplers.all_samplers


-def get_default_modes():
-    default_offload_mode = "none"
-    default_diffusers_offload_min_gpu_memory = 0.2
-    if not (cmd_opts.lowvram or cmd_opts.medvram):
-        if "gpu" in mem_stat:
-            if gpu_memory <= 4:
-                cmd_opts.lowvram = True
-                default_offload_mode = "sequential"
-                default_diffusers_offload_min_gpu_memory = 0
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=sequential optimization=lowvram")
-            elif gpu_memory <= 12:
-                cmd_opts.medvram = True # VAE Tiling and other stuff
-                default_offload_mode = "balanced"
-                default_diffusers_offload_min_gpu_memory = 0
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=medvram")
-            else:
-                default_offload_mode = "balanced"
-                default_diffusers_offload_min_gpu_memory = 0.2
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced")
-    elif cmd_opts.medvram:
-        default_offload_mode = "balanced"
-        default_diffusers_offload_min_gpu_memory = 0
-    elif cmd_opts.lowvram:
-        default_offload_mode = "sequential"
-        default_diffusers_offload_min_gpu_memory = 0
-
-    default_cross_attention = "Scaled-Dot-Product"
-
-    if devices.backend == "zluda":
-        default_sdp_options = ['Flash attention', 'Math attention', 'Dynamic attention']
-    elif devices.backend in {"rocm", "directml", "cpu", "mps"}:
-        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention']
-    else:
-        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
-
-    return default_offload_mode, default_diffusers_offload_min_gpu_memory, default_cross_attention, default_sdp_options
-
-
-startup_offload_mode, startup_diffusers_offload_min_gpu_memory, startup_cross_attention, startup_sdp_options = get_default_modes()
+startup_offload_mode, startup_offload_min_gpu, startup_offload_max_gpu, startup_cross_attention, startup_sdp_options, startup_offload_always, startup_offload_never = get_default_modes(cmd_opts=cmd_opts, mem_stat=mem_stat)

 options_templates.update(options_section(('sd', "Models & Loading"), {
    "sd_backend": OptionInfo('diffusers', "Execution backend", gr.Radio, {"choices": ['diffusers', 'original'], "visible": False }),
@ -179,9 +142,11 @@ options_templates.update(options_section(('sd', "Models & Loading"), {

    "offload_sep": OptionInfo("<h2>Model Offloading</h2>", "", gr.HTML),
    "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'group', 'model', 'sequential']}),
-    "diffusers_offload_min_gpu_memory": OptionInfo(startup_diffusers_offload_min_gpu_memory, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_min_gpu_memory": OptionInfo(startup_offload_min_gpu, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(startup_offload_max_gpu, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
    "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
+    "diffusers_offload_always": OptionInfo(startup_offload_always, "Modules to always offload"),
+    "diffusers_offload_never": OptionInfo(startup_offload_never, "Modules to never offload"),

    "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
    "sd_checkpoint_autoload": OptionInfo(True, "Model auto-load on start"),
@ -299,7 +264,7 @@ options_templates.update(options_section(("quantization", "Quantization Settings
    "sdnq_quantize_conv_layers": OptionInfo(False, "Quantize convolutional layers", gr.Checkbox),
    "sdnq_dequantize_compile": OptionInfo(devices.has_triton(), "Dequantize using torch.compile", gr.Checkbox),
    "sdnq_use_quantized_matmul": OptionInfo(False, "Use quantized MatMul", gr.Checkbox),
-    "sdnq_use_quantized_matmul_conv": OptionInfo(False, "Use quantized MatMul with convolutional layers", gr.Checkbox),
+    "sdnq_use_quantized_matmul_conv": OptionInfo(False, "Use quantized MatMul with conv", gr.Checkbox),
    "sdnq_quantize_with_gpu": OptionInfo(True, "Quantize using GPU", gr.Checkbox),
    "sdnq_dequantize_fp32": OptionInfo(False, "Dequantize using full precision", gr.Checkbox),
    "sdnq_quantize_shuffle_weights": OptionInfo(False, "Shuffle weights in post mode", gr.Checkbox),
@ -723,13 +688,13 @@ options_templates.update(options_section(('extra_networks', "Networks"), {

    "extra_networks_lora_sep": OptionInfo("<h2>LoRA</h2>", "", gr.HTML),
    "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
-    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
    "lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
    "lora_force_reload": OptionInfo(False, "LoRA force reload always"),
    "lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA load using Diffusers method"),
-    "lora_maybe_diffusers": OptionInfo(False, "LoRA load using Diffusers method for selected models"),
+    "lora_maybe_diffusers": OptionInfo(False, "LoRA load using Diffusers method for selected models", gr.Checkbox, {"visible": False}),
    "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
    "lora_in_memory_limit": OptionInfo(1, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
+    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
    "lora_quant": OptionInfo("NF4","LoRA precision when quantized", gr.Radio, {"choices": ["NF4", "FP4"]}),

    "extra_networks_styles_sep": OptionInfo("<h2>Styles</h2>", "", gr.HTML),
--- a/modules/shared_defaults.py
+++ b/modules/shared_defaults.py
@ -0,0 +1,56 @@
+from installer import log
+from modules import devices
+
+
+def get_default_modes(cmd_opts, mem_stat):
+    default_offload_mode = "none"
+    default_diffusers_offload_min_gpu_memory = 0.2
+    default_diffusers_offload_max_gpu_memory = 0.6
+    default_diffusers_offload_always = ''
+    default_diffusers_offload_never = ''
+    gpu_memory = round(mem_stat['gpu']['total'] if "gpu" in mem_stat else 0)
+    if not (cmd_opts.lowvram or cmd_opts.medvram):
+        if "gpu" in mem_stat:
+            if gpu_memory <= 4:
+                cmd_opts.lowvram = True
+                default_offload_mode = "sequential"
+                default_diffusers_offload_min_gpu_memory = 0
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=sequential optimization=lowvram")
+            elif gpu_memory <= 12:
+                cmd_opts.medvram = True # VAE Tiling and other stuff
+                default_offload_mode = "balanced"
+                default_diffusers_offload_min_gpu_memory = 0
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=medvram")
+            elif gpu_memory >= 24:
+                default_offload_mode = "balanced"
+                default_diffusers_offload_max_gpu_memory = 0.8
+                default_diffusers_offload_never = ', '.join(['CLIPTextModel', 'CLIPTextModelWithProjection', 'AutoencoderKL'])
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=highvram")
+            else:
+                default_offload_mode = "balanced"
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced")
+    elif cmd_opts.medvram:
+        default_offload_mode = "balanced"
+        default_diffusers_offload_min_gpu_memory = 0
+    elif cmd_opts.lowvram:
+        default_offload_mode = "sequential"
+        default_diffusers_offload_min_gpu_memory = 0
+
+    default_cross_attention = "Scaled-Dot-Product"
+
+    if devices.backend == "zluda":
+        default_sdp_options = ['Flash attention', 'Math attention', 'Dynamic attention']
+    elif devices.backend in {"rocm", "directml", "cpu", "mps"}:
+        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention']
+    else:
+        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
+
+    return (
+        default_offload_mode,
+        default_diffusers_offload_min_gpu_memory,
+        default_diffusers_offload_max_gpu_memory,
+        default_cross_attention,
+        default_sdp_options,
+        default_diffusers_offload_always,
+        default_diffusers_offload_never
+    )
--- a/modules/ui.py
+++ b/modules/ui.py
@ -123,14 +123,9 @@ def create_ui(startup_timer = None):
        timer.startup.record("ui-extensions")

    with gr.Blocks(analytics_enabled=False) as info_interface:
-        with gr.Tabs(elem_id="tabs_info"):
-            with gr.TabItem("Change log", id="change_log", elem_id="system_tab_changelog"):
-                from modules import ui_docs
-                ui_docs.create_ui_logs()
-
-            with gr.TabItem("Wiki", id="wiki", elem_id="system_tab_wiki"):
-                from modules import ui_docs
-                ui_docs.create_ui_wiki()
+        from modules import ui_docs
+        ui_docs.create_ui()
+        timer.startup.record("ui-info")

    with gr.Blocks(analytics_enabled=False) as extensions_interface:
        from modules import ui_extensions
--- a/modules/ui_docs.py
+++ b/modules/ui_docs.py
@ -1,5 +1,231 @@
+import os
+import time
 import gradio as gr
 from modules import ui_symbols, ui_components
+from installer import install, log
+
+
+class Page():
+    def __init__(self, fn, full: bool = True):
+        self.fn = fn
+        self.title = ''
+        self.size = 0
+        self.mtime = 0
+        self.h1 = []
+        self.h2 = []
+        self.h3 = []
+        self.lines = []
+        self.read(full=full)
+
+    def read(self, full: bool = True):
+        try:
+            self.title = ' ' + os.path.basename(self.fn).replace('.md', '').replace('-', ' ') + ' '
+            self.mtime = time.localtime(os.path.getmtime(self.fn))
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+            self.size = len(content)
+            self.lines = [line.strip().lower() + ' ' for line in content.splitlines() if len(line)>1]
+            self.h1 = [line[1:] for line in self.lines if line.startswith('# ')]
+            self.h2 = [line[2:] for line in self.lines if line.startswith('## ')]
+            self.h3 = [line[3:] for line in self.lines if line.startswith('### ')]
+            if not full:
+                self.lines.clear()
+        except Exception as e:
+            log.error(f'Search docs: page="{self.fn}" {e}')
+
+    def search(self, text):
+        if not text or len(text) < 2:
+            return []
+        text = text.lower()
+        if text.strip() == self.title.lower().strip():
+            return 1.0
+        if self.title.lower().startswith(f'{text} '):
+            return 0.99
+        if f' {text} ' in self.title.lower():
+            return 0.98
+        if f' {text}' in self.title.lower():
+            return 0.97
+
+        if any(f' {text} ' in h for h in self.h1):
+            return 0.89
+        if any(f' {text}' in h for h in self.h1):
+            return 0.88
+
+        if any(f' {text} ' in h for h in self.h2):
+            return 0.79
+        if any(f' {text}' in h for h in self.h2):
+            return 0.78
+
+        if any(f' {text} ' in h for h in self.h3):
+            return 0.69
+        if any(f' {text}' in h for h in self.h3):
+            return 0.68
+
+        if f'{text}' in self.title.lower():
+            return 0.59
+        if any(f'{text}' in h for h in self.h1):
+            return 0.58
+        if any(f'{text}' in h for h in self.h2):
+            return 0.57
+        if any(f'{text}' in h for h in self.h3):
+            return 0.56
+
+        if any(text in line for line in self.lines):
+            return 0.50
+
+        return 0.0
+
+    def get(self):
+        if self.fn is None or not os.path.exists(self.fn):
+            log.error(f'Search docs: page="{self.fn}" does not exist')
+            return f'page="{self.fn}" does not exist'
+        try:
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+                return content
+        except Exception as e:
+            log.error(f'Search docs: page="{self.fn}" {e}')
+        return ''
+
+    def __str__(self):
+        return f'Page(title="{self.title.strip()}" fn="{self.fn}" mtime={self.mtime} h1={[h.strip() for h in self.h1]} h2={len(self.h2)} h3={len(self.h3)} lines={len(self.lines)} size={self.size})'
+
+
+class Pages():
+    def __init__(self):
+        self.time = time.time()
+        self.size = 0
+        self.full = None
+        self.pages: list[Page] = []
+
+    def build(self, full: bool = True):
+        self.pages.clear()
+        self.full = full
+        with os.scandir('wiki') as entries:
+            for entry in entries:
+                if entry.is_file() and entry.name.endswith('.md'):
+                    page = Page(entry.path, full=full)
+                    self.pages.append(page)
+        self.size = sum(page.size for page in self.pages)
+
+    def search(self, text: str, topk: int = 10, full: bool = True) -> list[Page]:
+        if not text or len(text) < 2:
+            return []
+        if len(self.pages) == 0:
+            self.build(full=full)
+        try:
+            text = text.lower()
+            scores = [page.search(text) for page in self.pages]
+            mtimes = [page.mtime for page in self.pages]
+            found = sorted(zip(scores, mtimes, self.pages), key=lambda x: (x[0], x[1]), reverse=True)
+            found = [item for item in found if item[0] > 0]
+            return [(item[0], item[2]) for item in found][:topk]
+        except Exception as e:
+            log.error(f'Search docs: text="{text}" {e}')
+            return []
+
+    def get(self, title: str) -> Page:
+        if len(self.pages) == 0:
+            self.build(full=self.full)
+        for page in self.pages:
+            if page.title.lower().strip() == title.lower().strip():
+                return page
+        return Page('')
+
+
+index = Pages()
+
+
+def get_docs_page(page_title: str) -> str:
+    if len(index.pages) == 0:
+        index.build(full=True)
+    page = index.get(page_title)
+    log.debug(f'Search docs: title="{page_title}" {page}')
+    content = page.get()
+    return content
+
+
+def search_html(pages: list[Page]) -> str:
+    html = ''
+    for score, page in pages:
+        if score > 0.0:
+            html += f'''
+                <div class="docs-card" onclick="clickDocsPage('{page.title}')">
+                    <div class="docs-card-title">{page.title.strip()}</div>
+                    <div class="docs-card-h1">Heading | {' | '.join([h.strip() for h in page.h1])}</div>
+                    <div class="docs-card-h2"><b>Topics</b> | {' | '.join([h.strip() for h in page.h2])}</div>
+                    <div class="docs-card-footer">
+                        <span class="docs-card-score">Score | {score}</span>
+                        <span class="docs-card-mtime">Last modified | {time.strftime('%c', page.mtime)}</span>
+                    </div>
+                </div>'''
+    return html
+
+
+def search_docs(search_term):
+    topk = 10
+    full = True
+    t0 = time.time()
+    results = index.search(search_term, topk=topk, full=full)
+    t1 = time.time()
+    log.debug(f'Search results: search="{search_term}" topk={topk}, full={full} pages={len(results)} size={index.size} time={t1-t0:.3f}')
+    for score, page in results:
+        log.trace(f'Search results: score={score:.2f} {page}')
+    html = search_html(results)
+    return html
+
+
+def get_github_page(page):
+    try:
+        with open(os.path.join('wiki', f'{page}.md'), 'r', encoding='utf-8') as f:
+            content = f.read()
+            log.debug(f'Search wiki: page="{page}" size={len(content)}')
+    except Exception as e:
+        log.error(f'Search wiki: page="{page}" {e}')
+        content = f'Error: {e}'
+    return content
+
+
+def search_github(search_term):
+    import requests
+    from urllib.parse import quote
+    install('beautifulsoup4')
+    from bs4 import BeautifulSoup
+
+    url = f'https://github.com/search?q=repo%3Avladmandic%2Fsdnext+{quote(search_term)}&type=wikis'
+    res = requests.get(url, timeout=10)
+    pages = []
+    if res.status_code == 200:
+        html = res.content
+        soup = BeautifulSoup(html, 'html.parser')
+
+        # remove header links
+        tags = soup.find_all(attrs={"data-hovercard-url": "/vladmandic/sdnext/hovercard"})
+        for tag in tags:
+            tag.extract()
+
+        # replace relative links with full links
+        tags = soup.find_all('a')
+        for tag in tags:
+            if tag.has_attr('href'):
+                if tag['href'].startswith('/vladmandic/sdnext/wiki/'):
+                    page = tag['href'].replace('/vladmandic/sdnext/wiki/', '')
+                    tag.name = 'div'
+                    tag['class'] = 'github-page'
+                    tag['onclick'] = f'clickGitHubWikiPage("{page}")'
+                    pages.append(page)
+                elif tag['href'].startswith('/'):
+                    tag['href'] = 'https://github.com' + tag['href']
+
+        # find result only
+        result = soup.find(attrs={"data-testid": "results-list"})
+        if result is None:
+            return 'No results found'
+        html = str(result)
+    else:
+        html = f'Error: {res.status_code}'
+    log.debug(f'Search wiki: code={res.status_code} text="{search_term}" pages={pages}')
+    return html


 def create_ui_logs():
@ -10,57 +236,49 @@ def create_ui_logs():
        return content

    with gr.Column():
-        get_changelog_btn = gr.Button(value='Get changelog', elem_id="get_changelog")
-        gr.HTML('<a href="https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md" style="color: #AAA" target="_blank">&nbsp Open GitHub Changelog</a>')
+        get_changelog_btn = gr.Button(value='Get Changelog', elem_id="get_changelog")
    with gr.Column():
-        _changelog_search = gr.Textbox(label="Search Changelog", elem_id="changelog_search")
+        _changelog_search = gr.Textbox(label="Search Changelog", elem_id="changelog_search", elem_classes="docs-search")
        _changelog_result = gr.HTML(elem_id="changelog_result")

    changelog_markdown = gr.Markdown('', elem_id="changelog_markdown")
    get_changelog_btn.click(fn=get_changelog, outputs=[changelog_markdown], show_progress=True)


-def create_ui_wiki():
-    def search_github(search_term):
-        import requests
-        from urllib.parse import quote
-        from installer import install
-
-        install('beautifulsoup4')
-        from bs4 import BeautifulSoup
-
-        url = f'https://github.com/search?q=repo%3Avladmandic%2Fautomatic+{quote(search_term)}&type=wikis'
-        res = requests.get(url, timeout=10)
-        if res.status_code == 200:
-            html = res.content
-            soup = BeautifulSoup(html, 'html.parser')
-
-            # remove header links
-            tags = soup.find_all(attrs={"data-hovercard-url": "/vladmandic/sdnext/hovercard"})
-            for tag in tags:
-                tag.extract()
-
-            # replace relative links with full links
-            tags = soup.find_all('a')
-            for tag in tags:
-                if tag.has_attr('href') and tag['href'].startswith('/'):
-                    tag['href'] = 'https://github.com' + tag['href']
-
-            # find result only
-            result = soup.find(attrs={"data-testid": "results-list"})
-            if result is None:
-                return 'No results found'
-            html = str(result)
-            return html
-        else:
-            return f'Error: {res.status_code}'
-
+def create_ui_github():
    with gr.Row():
-        gr.HTML('<a href="https://github.com/vladmandic/sdnext/wiki" style="color: #AAA" target="_blank">&nbsp Open GitHub Wiki</a>')
+        github_search = gr.Textbox(label="Search GitHub Wiki Pages", elem_id="github_search", elem_classes="docs-search")
+        github_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="github_search_btn")
    with gr.Row():
-        wiki_search = gr.Textbox(label="Search Wiki Pages", elem_id="wiki_search")
-        wiki_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="wiki_search_btn")
+        github_result = gr.HTML(elem_id="github_result", value='', elem_classes="github-result")
    with gr.Row():
-        wiki_result = gr.HTML(elem_id="wiki_result", value='')
-    wiki_search.submit(_js="wikiSearch", fn=search_github, inputs=[wiki_search], outputs=[wiki_result])
-    wiki_search_btn.click(_js="wikiSearch", fn=search_github, inputs=[wiki_search], outputs=[wiki_result])
+        github_md_btn = gr.Button(value='html2md', elem_id="github_md_btn", visible=False)
+        github_md = gr.Markdown(elem_id="github_md", value='', elem_classes="github-md")
+    github_search.submit(fn=search_github, inputs=[github_search], outputs=[github_result], show_progress=True)
+    github_search_btn.click(fn=search_github, inputs=[github_search], outputs=[github_result], show_progress=True)
+    github_md_btn.click(fn=get_github_page, _js='getGitHubWikiPage', inputs=[github_search], outputs=[github_md], show_progress=True)
+
+
+def create_ui_docs():
+    with gr.Row():
+        docs_search = gr.Textbox(label="Search Docs", elem_id="github_search", elem_classes="docs-search")
+        docs_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="github_search_btn")
+    with gr.Row():
+        docs_result = gr.HTML(elem_id="docs_result", value='', elem_classes="docs-result")
+    with gr.Row():
+        docs_md_btn = gr.Button(value='html2md', elem_id="docs_md_btn", visible=False)
+        docs_md = gr.Markdown(elem_id="docs_md", value='', elem_classes="docs-md")
+    docs_search.submit(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_search.change(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_search_btn.click(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_md_btn.click(fn=get_docs_page, _js='getDocsPage', inputs=[docs_search], outputs=[docs_md], show_progress=False)
+
+
+def create_ui():
+    with gr.Tabs(elem_id="tabs_info"):
+        with gr.TabItem("Docs", id="docs", elem_id="system_tab_docs"):
+            create_ui_docs()
+        with gr.TabItem("Wiki", id="wiki", elem_id="system_tab_wiki"):
+            create_ui_github()
+        with gr.TabItem("Change log", id="change_log", elem_id="system_tab_changelog"):
+            create_ui_logs()
--- a/modules/video_models/video_run.py
+++ b/modules/video_models/video_run.py
@ -71,7 +71,7 @@ def generate(*args, **kwargs):

    # cleanup memory
    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='video')

    # set args
    processing.fix_seed(p)
--- a/pipelines/model_auraflow.py
+++ b/pipelines/model_auraflow.py
@ -17,5 +17,5 @@ def load_auraflow(checkpoint_info, diffusers_load_config={}):
        cache_dir = shared.opts.diffusers_dir,
        **diffusers_load_config,
    )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_chroma.py
+++ b/pipelines/model_chroma.py
@ -117,7 +117,7 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant): # pylint: disable=unus
            "cache_dir": cache_dir,
        }
        if 'transformer' not in kwargs and model_quant.check_nunchaku('Model'):
-            raise NotImplementedError('Nunchaku does not support Chroma Model yet. See https://github.com/mit-han-lab/nunchaku/issues/167')
+            shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported')
        if 'transformer' not in kwargs and model_quant.check_quant('Model'):
            load_args, quant_args = model_quant.get_dit_args(diffusers_load_config, module='Model', device_map=True, modules_to_not_convert=["distilled_guidance_layer"])
            kwargs['transformer'] = diffusers.ChromaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", **load_args, **quant_args)
@ -187,7 +187,7 @@ def load_chroma(checkpoint_info, diffusers_load_config): # triggered by opts.sd_
    # unload current model
    sd_models.unload_model_weights()
    shared.sd_model = None
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')

    if shared.opts.teacache_enabled:
        from modules import teacache
@ -277,5 +277,5 @@ def load_chroma(checkpoint_info, diffusers_load_config): # triggered by opts.sd_
    for k in kwargs.keys():
        kwargs[k] = None
    sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe, allow_post_quant
--- a/pipelines/model_flux.py
+++ b/pipelines/model_flux.py
@ -118,12 +118,16 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant): # pylint: disable=unus
            import nunchaku
            nunchaku_precision = nunchaku.utils.get_precision()
            nunchaku_repo = None
-            if 'kontext' in repo_id.lower():
+            if 'flux.1-kontext' in repo_id.lower():
                nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-kontext-dev/svdq-{nunchaku_precision}_r32-flux.1-kontext-dev.safetensors"
-            elif 'dev' in repo_id.lower():
+            elif 'flux.1-dev' in repo_id.lower():
                nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-dev/svdq-{nunchaku_precision}_r32-flux.1-dev.safetensors"
-            elif 'schnell' in repo_id.lower():
+            elif 'flux.1-schnell' in repo_id.lower():
                nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-schnell/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
+            elif 'flux.1-fill' in repo_id.lower():
+                nunchaku_repo = f"mit-han-lab/svdq-fp4-flux.1-fill-dev/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
+            elif 'flux.1-depth' in repo_id.lower():
+                nunchaku_repo = f"mit-han-lab/svdq-int4-flux.1-depth-dev/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
            elif 'shuttle' in repo_id.lower():
                nunchaku_repo = f"mit-han-lab/nunchaku-shuttle-jaguar/svdq-{nunchaku_precision}_r32-shuttle-jaguar.safetensors"
            else:
@ -220,7 +224,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
    # unload current model
    sd_models.unload_model_weights()
    shared.sd_model = None
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')

    if shared.opts.teacache_enabled:
        from modules import teacache
@ -356,5 +360,5 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
    for k in kwargs.keys():
        kwargs[k] = None
    sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe, allow_post_quant
--- a/pipelines/model_flux_nf4.py
+++ b/pipelines/model_flux_nf4.py
@ -196,5 +196,5 @@ def load_flux_nf4(checkpoint_info, prequantized: bool = True):
            errors.display(e, 'FLUX:')

    del original_state_dict
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return transformer, text_encoder_2
--- a/pipelines/model_kolors.py
+++ b/pipelines/model_kolors.py
@ -23,5 +23,5 @@ def load_kolors(_checkpoint_info, diffusers_load_config={}):
        **diffusers_load_config,
    )
    pipe.vae.config.force_upcast = True
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_lumina.py
+++ b/pipelines/model_lumina.py
@ -14,7 +14,7 @@ def load_lumina(_checkpoint_info, diffusers_load_config={}):
        cache_dir = shared.opts.diffusers_dir,
        **load_config,
    )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe


@ -91,5 +91,5 @@ def load_lumina2(checkpoint_info, diffusers_load_config={}):
    )

    sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_meissonic.py
+++ b/pipelines/model_meissonic.py
@ -52,5 +52,5 @@ def load_meissonic(checkpoint_info, diffusers_load_config={}):
    diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["meissonic"] = PipelineMeissonic
    diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["meissonic"] = PipelineMeissonicImg2Img
    diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["meissonic"] = PipelineMeissonicInpaint
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_omnigen.py
+++ b/pipelines/model_omnigen.py
@ -28,5 +28,5 @@ def load_omnigen(checkpoint_info, diffusers_load_config={}): # pylint: disable=u
        **load_config,
    )

-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_omnigen2.py
+++ b/pipelines/model_omnigen2.py
@ -45,5 +45,5 @@ def load_omnigen2(checkpoint_info, diffusers_load_config={}): # pylint: disable=
    )
    pipe.transformer = transformer # for omnigen2 transformer must be loaded after pipeline

-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_pixart.py
+++ b/pipelines/model_pixart.py
@ -40,5 +40,5 @@ def load_pixart(checkpoint_info, diffusers_load_config={}):
        text_encoder=text_encoder,
        **load_args,
    )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_sana.py
+++ b/pipelines/model_sana.py
@ -88,5 +88,5 @@ def load_sana(checkpoint_info, kwargs={}):
    sd_hijack_te.init_hijack(pipe)
    t1 = time.time()
    shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_sd3.py
+++ b/pipelines/model_sd3.py
@ -124,5 +124,5 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
        config=config,
        **kwargs,
    )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    return pipe
--- a/pipelines/model_stablecascade.py
+++ b/pipelines/model_stablecascade.py
@ -155,7 +155,7 @@ def load_cascade_combined(checkpoint_info, diffusers_load_config):
        latent_dim_scale=sd_model.decoder_pipe.config.latent_dim_scale,
    )

-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
    shared.log.debug(f'StableCascade combined: {sd_model.__class__.__name__}')
    return sd_model

--- a/requirements.txt
+++ b/requirements.txt
@ -31,7 +31,7 @@ invisible-watermark
 pi-heif

 # versioned
-rich==14.0.0
+rich==14.1.0
 safetensors==0.5.3
 tensordict==0.8.3
 peft==0.16.0
--- a/scripts/pulid_ext.py
+++ b/scripts/pulid_ext.py
@ -228,7 +228,7 @@ class Script(scripts_manager.Script):
                    shared.sd_model.clip_vision_model = None
                    shared.sd_model.handler_ante = None
                shared.sd_model = shared.sd_model.pipe
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='pulid')
            shared.log.debug(f'PuLID complete: class={shared.sd_model.__class__.__name__} preprocess={self.preprocess:.2f} pipe={"restore" if restore else "cache"}')
        return processed

--- a/webui.py
+++ b/webui.py
@ -19,6 +19,7 @@ import modules.devices
 import modules.sd_checkpoint
 import modules.sd_samplers
 import modules.scripts_manager
+import modules.scripts
 import modules.sd_models
 import modules.sd_vae
 import modules.sd_unet
@ -106,6 +107,7 @@ def initialize():

    log.info('Load extensions')
    t_timer, t_total = modules.scripts_manager.load_scripts()
+    modules.scripts.register_runners()
    timer.startup.record("extensions")
    timer.startup.records["extensions"] = t_total # scripts can reset the time
    log.debug(f'Extensions init time: {t_timer.summary()}')
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 906fb43c528af396fe3dd3da3d556f2aa39f5f44
+Subproject commit 79b18f2c5e3438f3f564fd264fdb27bed76b0f72