diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6bfc90863..9d1662c5c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,30 @@
 
 ## Update for 2025-07-31
 
-- **Feature**  
-  - Wan select which stage to run: *first/second/both* with configurable *boundary ration* when running both stages  
+- **Models**  
+  - [FLUX.1-Krea-Dev](https://www.krea.ai/blog/flux-krea-open-source-release)  
+    new 12B base model compatible with FLUX.1-Dev from *Black Forest Labs* with opinionated aesthetics and aesthetic preferences in mind  
+    simply select in *networks -> models -> reference*  
+  - [Chroma](https://huggingface.co/lodestones/Chroma)  
+    great model based on FLUX.1 and then redesigned and retrained by *lodestones*  
+    update with latest **v48**, **v48 Detail Calibrated** and **v46 Flash** variants  
+    simply select in *networks -> models -> reference*  
+- **UI**  
+  - new embedded docs/wiki search!  
+    **Docs** search: fully-local and works in real-time on all document pages  
+    **Wiki** search: uses github api to search online wiki pages  
+  - modernui checkbox/radio styling  
+- **Offloading**
+  - changed **default** values for offloading based on detected gpu memory  
+    see [offloading docs](https://vladmandic.github.io/sdnext-docs/Offload/) for details  
+  - new feature to specify which modules to offload always or never  
+    in *settings -> models & loading -> offload always/never*  
+  - new `highvram` profile provides significant performance boost on gpus with more than 24gb  
+- **Features**  
+  - **Wan** select which stage to run: *first/second/both* with configurable *boundary ration* when running both stages  
     in settings -> model options  
   - prompt parser allow explict `BOS` and `EOS` tokens in prompt  
-- **UI**  
-  - modernui checkbox/radio styling  
+  - **Nunchaku** support for *FLUX.1-Fill* and *FLUX.1-Depth* models  
 - **Fixes**  
   - fix Wan 2.2-5B I2V workflow  
   - fix inpaint image metadata  
@@ -15,6 +33,9 @@
   - fix progress bar with refine/detailer  
   - fix api progress reporting endpoint  
   - fix openvino backend failing to compile  
+  - fix nunchaku fallback on unsupported model  
+  - api set default script-name  
+  - avoid forced gc and rely on thresholds  
   - add missing interrogate in output panel  
 
 ## Update for 2025-07-29
diff --git a/TODO.md b/TODO.md
index 94abf20be..c50ae0e51 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,6 +4,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Current
 
+- Reset quicksettings
+- Gallery: force refresh on delete
+
 ## Future Candidates
 
 - [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915)  
@@ -26,6 +29,16 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
   - see <https://github.com/Cschlaefli/automatic>
   - blocked by `insightface`
 
+## ModernUI
+
+- Extensions tab: Full CSS
+- Models tab: 
+  - Current
+  - Validate: broken table
+  - Update: broken table
+  - CivitAI: redesign downloader
+- History
+
 ### Under Consideration
 
 - [IPAdapter negative guidance](https://github.com/huggingface/diffusers/discussions/7167)  
diff --git a/cli/docs.py b/cli/docs.py
new file mode 100755
index 000000000..11200d206
--- /dev/null
+++ b/cli/docs.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+import os
+import sys
+import time
+import logging
+
+
+logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
+log = logging.getLogger(__name__)
+
+
+class Page():
+    def __init__(self, fn, full: bool = True):
+        self.fn = fn
+        self.title = ''
+        self.size = 0
+        self.mtime = 0
+        self.h1 = []
+        self.h2 = []
+        self.h3 = []
+        self.lines = []
+        self.read(full=full)
+
+    def read(self, full: bool = True):
+        try:
+            self.title = ' ' + os.path.basename(self.fn).replace('.md', '').replace('-', ' ') + ' '
+            self.mtime = int(os.path.getmtime(self.fn))
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+            self.size = len(content)
+            self.lines = [line.strip().lower() + ' ' for line in content.splitlines() if len(line)>1]
+            self.h1 = [line[1:] for line in self.lines if line.startswith('# ')]
+            self.h2 = [line[2:] for line in self.lines if line.startswith('## ')]
+            self.h3 = [line[3:] for line in self.lines if line.startswith('### ')]
+            if not full:
+                self.lines.clear()
+        except Exception as e:
+            log.error(f'Wiki: page="{self.fn}" {e}')
+
+    def search(self, text):
+        if not text or len(text) < 2:
+            return []
+        text = text.lower()
+        if text.strip() == self.title.lower().strip():
+            return 1.0
+        if self.title.lower().startswith(f'{text} '):
+            return 0.99
+        if f' {text} ' in self.title.lower():
+            return 0.98
+        if f' {text}' in self.title.lower():
+            return 0.97
+
+        if any(f' {text} ' in h for h in self.h1):
+            return 0.89
+        if any(f' {text}' in h for h in self.h1):
+            return 0.88
+
+        if any(f' {text} ' in h for h in self.h2):
+            return 0.79
+        if any(f' {text}' in h for h in self.h2):
+            return 0.78
+
+        if any(f' {text} ' in h for h in self.h3):
+            return 0.69
+        if any(f' {text}' in h for h in self.h3):
+            return 0.68
+
+        if f'{text}' in self.title.lower():
+            return 0.59
+        if any(f'{text}' in h for h in self.h1):
+            return 0.58
+        if any(f'{text}' in h for h in self.h2):
+            return 0.57
+        if any(f'{text}' in h for h in self.h3):
+            return 0.56
+
+        if any(text in line for line in self.lines):
+            return 0.50
+
+        return 0.0
+
+    def get(self):
+        try:
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+                return content
+        except Exception as e:
+            log.error(f'Wiki: page="{self.fn}" {e}')
+        return ''
+
+    def __str__(self):
+        return f'Page(title="{self.title.strip()}" fn="{self.fn}" mtime={self.mtime} h1={[h.strip() for h in self.h1]} h2={len(self.h2)} h3={len(self.h3)} lines={len(self.lines)} size={self.size})'
+
+
+class Pages():
+    def __init__(self):
+        self.time = time.time()
+        self.size = 0
+        self.full = None
+        self.pages: list[Page] = []
+
+    def build(self, full: bool = True):
+        self.pages.clear()
+        self.full = full
+        with os.scandir('wiki') as entries:
+            for entry in entries:
+                if entry.is_file() and entry.name.endswith('.md'):
+                    page = Page(entry.path, full=full)
+                    self.pages.append(page)
+        self.size = sum(page.size for page in self.pages)
+
+    def search(self, text: str, topk: int = 10, full: bool = True) -> list[Page]:
+        if not text:
+            return []
+        if len(self.pages) == 0:
+            self.build(full=full)
+        text = text.lower()
+        scores = [page.search(text) for page in self.pages]
+        mtimes = [page.mtime for page in self.pages]
+        found = sorted(zip(scores, mtimes, self.pages), key=lambda x: (x[0], x[1]), reverse=True)
+        found = [item for item in found if item[0] > 0]
+        return [(item[0], item[2]) for item in found][:topk]
+
+
+index = Pages()
+
+
+if __name__ == "__main__":
+    sys.argv.pop(0)
+    if len(sys.argv) < 1:
+        log.error("Usage: python cli/docs.py <search_term>")
+    text = ' '.join(sys.argv)
+    topk = 10
+    full = True
+    log.info(f'Search: "{text}" topk={topk}, full={full}')
+    t0 = time.time()
+    results = index.search(text, topk=topk, full=full)
+    t1 = time.time()
+    log.info(f'Results: pages={len(results)} size={index.size} time={t1-t0:.3f}')
+    for score, page in results:
+        log.info(f'Score: {score:.2f} {page}')
+    # if len(results) > 0:
+    #     log.info('Top result:')
+    #     log.info(results[0][1].get())
diff --git a/extensions-builtin/sd-extension-chainner b/extensions-builtin/sd-extension-chainner
index 716b1ee7d..3e0108fed 160000
--- a/extensions-builtin/sd-extension-chainner
+++ b/extensions-builtin/sd-extension-chainner
@@ -1 +1 @@
-Subproject commit 716b1ee7dc8042ba2a62460425930cf3ab472919
+Subproject commit 3e0108fedbec300f72c3ca6e06236419d45eb660
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 9741e151b..9ee81f7cb 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 9741e151b01dda2d2697c8ca8a369e50482e976e
+Subproject commit 9ee81f7cb6bfb882ecd93228b861ff9d06fb7ec8
diff --git a/html/reference.json b/html/reference.json
index 8504f822d..4f21017ac 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -165,14 +165,35 @@
     "skip": true,
     "extras": "sampler: Default, cfg_scale: 3.5"
   },
+  "Black Forest Labs FLUX.1 Krea Dev": {
+    "path": "black-forest-labs/FLUX.1-Krea-dev",
+    "preview": "black-forest-labs--FLUX.1-Krea-dev.jpg",
+    "desc": "FLUX.1 Krea [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 4.5"
+  },
 
-  "lodestones Chroma": {
-    "path": "lodestones/Chroma",
+  "lodestones Chroma Unlocked v48": {
+    "path": "vladmandic/chroma-unlocked-v48",
     "preview": "lodestones--Chroma.jpg",
     "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
     "skip": true,
     "extras": "sampler: Default, cfg_scale: 3.5"
   },
+  "lodestones Chroma Unlocked v48 Detail Calibrated": {
+    "path": "vladmandic/chroma-unlocked-v48-detail-calibrated",
+    "preview": "lodestones--Chroma.jpg",
+    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 3.5"
+  },
+  "lodestones Chroma Unlocked v48 Flash": {
+    "path": "vladmandic/chroma-unlocked-v46-flash",
+    "preview": "lodestones--Chroma.jpg",
+    "desc": "Chroma is a 8.9B parameter model based on FLUX.1-schnell. It’s fully Apache 2.0 licensed, ensuring that anyone can use, modify, and build on top of it—no corporate gatekeeping. The model is still training right now, and I’d love to hear your thoughts! Your input and feedback are really appreciated.",
+    "skip": true,
+    "extras": "sampler: Default, cfg_scale: 1.0"
+  },
 
   "Ostris Flex.2 Preview": {
     "path": "ostris/Flex.2-preview",
diff --git a/installer.py b/installer.py
index 5bf112e5c..3f0b1b7e8 100644
--- a/installer.py
+++ b/installer.py
@@ -1216,7 +1216,7 @@ def ensure_base_requirements():
         update_setuptools()
 
     # used by installler itself so must be installed before requirements
-    install('rich==14.0.0', 'rich', quiet=True)
+    install('rich==14.1.0', 'rich', quiet=True)
     install('psutil', 'psutil', quiet=True)
     install('requests==2.32.3', 'requests', quiet=True)
     ts('base', t_start)
diff --git a/javascript/changelog.js b/javascript/changelog.js
index 65dd0ba89..4cd9a11c7 100644
--- a/javascript/changelog.js
+++ b/javascript/changelog.js
@@ -79,10 +79,3 @@ async function initChangelog() {
   };
   search.addEventListener('keyup', searchChangelog);
 }
-
-function wikiSearch(txt) {
-  log('wikiSearch', txt);
-  const url = `https://github.com/search?q=repo%3Avladmandic%2Fautomatic+${encodeURIComponent(txt)}&type=wikis`;
-  // window.open(url, '_blank').focus();
-  return txt;
-}
diff --git a/javascript/docs.js b/javascript/docs.js
new file mode 100644
index 000000000..d94068b28
--- /dev/null
+++ b/javascript/docs.js
@@ -0,0 +1,24 @@
+let lastGitHubSearch = '';
+let lastDocsSearch = '';
+
+async function clickGitHubWikiPage(page) {
+  log(`clickGitHubWikiPage: page="${page}"`);
+  lastGitHubSearch = page;
+  const el = gradioApp().getElementById('github_md_btn');
+  if (el) el.click();
+}
+
+function getGitHubWikiPage() {
+  return lastGitHubSearch;
+}
+
+async function clickDocsPage(page) {
+  log(`clickDocsPage: page="${page}"`);
+  lastDocsSearch = page;
+  const el = gradioApp().getElementById('docs_md_btn');
+  if (el) el.click();
+}
+
+function getDocsPage() {
+  return lastDocsSearch;
+}
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 7eacd1978..3398fa6d8 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -1637,7 +1637,7 @@ div:has(>#tab-gallery-folders) {
   cursor: cell;
   padding: 8px;
   background-color: var(--input-background-fill);
-  border-radius: var(--sd-border-radius);
+  border-radius: var(--radius-lg);
   max-width: 100%;
 }
 
@@ -1649,7 +1649,7 @@ div:has(>#tab-gallery-folders) {
   display: inline-block;
   transition: transform 0.2s ease-in-out;
   flex-shrink: 0;
-  color: var(--sd-input-text-color);
+  color: var(--block-title-text-color);
 }
 
 .gallery-separator-name {
@@ -1811,6 +1811,84 @@ div:has(>#tab-gallery-folders) {
   border-top-color: var(--primary-300);
 }
 
+.docs-search textarea {
+  height: 1em !important;
+  resize: none !important
+}
+
+.github-result, #docs_result {
+  max-height: 38vh;
+  overflow-y: auto;
+}
+
+.github-result a {
+  margin: 0;
+  padding: 0;
+  background-color: unset !important;
+}
+
+.github-result h3, .github-md h3 {
+  margin: 0;
+  padding: 0;
+  font-size: 1.1em;
+}
+
+.github-page {
+  background-color: var(--background-fill-primary);
+  margin: 1em 0 0.2em 0;
+  border-radius: var(--radius-lg);
+  padding: 4px;
+  font-size: 1em;
+  font-weight: 400;
+  cursor: help;
+}
+
+.github-result li {
+  font-size: 0.9em;
+  display: ruby;
+  filter: brightness(0.5);
+}
+
+.github-md, .docs-md {
+  padding: 0.2em;
+}
+
+.docs-results {
+  background-color: var(--sd-group-background-color);
+}
+
+.docs-card {
+  margin: 1em 0;
+  background-color: var(--background-fill-primary);
+  cursor: help;
+  padding: 0.5em;
+}
+
+.docs-card-title {
+  font-size: 1.2em;
+  line-height: 1.6em;
+  color: var(--button-primary-background-fill) !important;
+}
+
+.docs-card-h1 {
+  font-weight: bold;
+  font-size: 1.0;
+}
+
+.docs-card-h2 {
+  font-size: 1.0;
+  max-height: 4em;
+  overflow: hidden;
+}
+
+.docs-card-footer {
+  display: flex;
+  justify-content: space-between;
+  filter: brightness(0.5);
+  font-size: 0.9em;
+  margin-top: 0.2em;
+}
+
 @keyframes move {
   from {
     background-position-x: 0, -40px;
diff --git a/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg b/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg
new file mode 100644
index 000000000..0c227e3c9
Binary files /dev/null and b/models/Reference/black-forest-labs--FLUX.1-Krea-dev.jpg differ
diff --git a/modules/api/models.py b/modules/api/models.py
index 9feed99df..010467f66 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -200,7 +200,7 @@ ReqTxt2Img = PydanticModelGenerator(
         {"key": "sampler_index", "type": Union[int, str], "default": 0},
         {"key": "sampler_name", "type": str, "default": "Default"},
         {"key": "hr_sampler_name", "type": str, "default": "Same as primary"},
-        {"key": "script_name", "type": Optional[str], "default": "none"},
+        {"key": "script_name", "type": Optional[str], "default": ""},
         {"key": "script_args", "type": list, "default": []},
         {"key": "send_images", "type": bool, "default": True},
         {"key": "save_images", "type": bool, "default": False},
@@ -228,7 +228,7 @@ ReqImg2Img = PydanticModelGenerator(
         {"key": "denoising_strength", "type": float, "default": 0.5},
         {"key": "mask", "type": Optional[str], "default": None},
         {"key": "include_init_images", "type": bool, "default": False, "exclude": True},
-        {"key": "script_name", "type": Optional[str], "default": "none"},
+        {"key": "script_name", "type": Optional[str], "default": ""},
         {"key": "script_args", "type": list, "default": []},
         {"key": "send_images", "type": bool, "default": True},
         {"key": "save_images", "type": bool, "default": False},
diff --git a/modules/devices.py b/modules/devices.py
index 8f5dcbec3..e6dbc35ed 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -280,7 +280,7 @@ def set_cuda_memory_limit():
         return
     try:
         from modules.shared import cmd_opts
-        torch_gc(force=True)
+        torch_gc(force=True, reason='cuda')
         mem = torch.cuda.get_device_properties(device).total_memory
         torch.cuda.set_per_process_memory_fraction(float(opts.cuda_mem_fraction), cmd_opts.device_id if cmd_opts.device_id is not None else 0)
         log.info(f'Torch memory limit: fraction={opts.cuda_mem_fraction:.2f} limit={round(opts.cuda_mem_fraction * mem / 1024 / 1024)} total={round(mem / 1024 / 1024)}')
diff --git a/modules/extras.py b/modules/extras.py
index 128ed596e..9d5c706d2 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -176,7 +176,7 @@ def run_modelmerger(id_task, **kwargs):  # pylint: disable=unused-argument
     created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
     if created_model:
         created_model.calculate_shorthash()
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='merge')
     shared.state.end()
     return [*[gr.Dropdown.update(choices=sd_models.checkpoint_titles()) for _ in range(4)], f"Model saved to {output_modelname}"]
 
@@ -248,7 +248,7 @@ def run_model_modules(model_type:str, model_name:str, custom_name:str,
     yield from modules_sdxl.merge()
     status = modules_sdxl.status
 
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='merge')
     yield msg("modules merge complete")
     if modules_sdxl.pipeline is not None:
         checkpoint_info = sd_models.CheckpointInfo(filename='None')
diff --git a/modules/framepack/framepack_load.py b/modules/framepack/framepack_load.py
index 86bcfe32a..3a4fb90a4 100644
--- a/modules/framepack/framepack_load.py
+++ b/modules/framepack/framepack_load.py
@@ -183,7 +183,7 @@ def load_model(variant:str=None, pipeline:str=None, text_encoder:str=None, text_
         diffusers.loaders.peft._SET_ADAPTER_SCALE_FN_MAPPING['HunyuanVideoTransformer3DModelPacked'] = lambda model_cls, weights: weights # pylint: disable=protected-access
         shared.log.info(f'FramePack load: model={shared.sd_model.__class__.__name__} variant="{variant}" type={shared.sd_model_type} time={t1-t0:.2f}')
         sd_models.apply_balanced_offload(shared.sd_model)
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='load')
 
     except Exception as e:
         shared.log.error(f'FramePack load: {e}')
diff --git a/modules/intel/openvino/__init__.py b/modules/intel/openvino/__init__.py
index 26ac004e4..8d787d0e0 100644
--- a/modules/intel/openvino/__init__.py
+++ b/modules/intel/openvino/__init__.py
@@ -516,7 +516,7 @@ def openvino_fx(subgraph, example_inputs, options=None):
             else:
                 # Delete unused subgraphs
                 subgraph = subgraph.apply(sd_models.convert_to_faketensors)
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='openvino')
 
             # Model is fully supported and already cached. Run the cached OV model directly.
             compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
diff --git a/modules/interrogate/vqa.py b/modules/interrogate/vqa.py
index cf3f5b5ee..07f7e7a15 100644
--- a/modules/interrogate/vqa.py
+++ b/modules/interrogate/vqa.py
@@ -632,7 +632,7 @@ def interrogate(question:str='', system_prompt:str=None, prompt:str=None, image:
 
     if shared.opts.interrogate_offload and model is not None:
         sd_models.move_model(model, devices.cpu, force=True)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='vqa')
     answer = clean(answer, question)
     t1 = time.time()
     if not quiet:
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 48bacf0a1..5ce926114 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -436,7 +436,7 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
             else:
                 getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
             if do_gc:
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='sdnq')
         if shared.cmd_opts.medvram or shared.cmd_opts.lowvram or shared.opts.diffusers_offload_mode != "none":
             quant_last_model_name = op
             quant_last_model_device = model.device
@@ -447,7 +447,7 @@ def sdnq_quantize_model(model, op=None, sd_model=None, do_gc: bool = True, weigh
     elif shared.opts.diffusers_offload_mode != "none":
         model = model.to(devices.cpu)
     if do_gc:
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='sdnq')
     return model
 
 
@@ -465,7 +465,7 @@ def sdnq_quantize_weights(sd_model):
                 getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
             else:
                 getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='sdnq')
         quant_last_model_name = None
         quant_last_model_device = None
 
@@ -510,7 +510,7 @@ def optimum_quanto_model(model, op=None, sd_model=None, weights=None, activation
                 getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
             else:
                 getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')
         if shared.cmd_opts.medvram or shared.cmd_opts.lowvram or shared.opts.diffusers_offload_mode != "none":
             quant_last_model_name = op
             quant_last_model_device = model.device
@@ -518,7 +518,7 @@ def optimum_quanto_model(model, op=None, sd_model=None, weights=None, activation
             quant_last_model_name = None
             quant_last_model_device = None
         model.to(devices.device)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='quanto')
     return model
 
 
@@ -540,7 +540,7 @@ def optimum_quanto_weights(sd_model):
                 getattr(getattr(sd_model, last_model_names[0]), last_model_names[1]).to(quant_last_model_device)
             else:
                 getattr(sd_model, quant_last_model_name).to(quant_last_model_device)
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')
         quant_last_model_name = None
         quant_last_model_device = None
 
@@ -572,7 +572,7 @@ def optimum_quanto_weights(sd_model):
                 sd_models.move_model(sd_model, devices.cpu)
                 if hasattr(sd_model, "encode_prompt"):
                     sd_model.encode_prompt = original_encode_prompt
-            devices.torch_gc(force=True)
+            devices.torch_gc(force=True, reason='quanto')
 
         t1 = time.time()
         log.info(f"Quantization: type=Optimum.quanto time={t1-t0:.2f}")
diff --git a/modules/processing.py b/modules/processing.py
index c574c814e..db138dce0 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -372,43 +372,44 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 else:
                     image.info["parameters"] = info
                     output_images.append(image)
-
-            for i, image in enumerate(output_images):
-                is_grid = len(output_images) == p.batch_size * p.n_iter + 1 and i == 0
-                # resize after
-                if p.selected_scale_tab_after == 1:
-                    p.width_after, p.height_after = int(image.width * p.scale_by_after), int(image.height * p.scale_by_after)
-                if p.resize_mode_after != 0 and p.resize_name_after != 'None' and not is_grid:
-                    image = images.resize_image(p.resize_mode_after, image, p.width_after, p.height_after, p.resize_name_after, context=p.resize_context_after)
-
-                # save images
-                if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
-                    info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i)
-                    if isinstance(image, list):
-                        for img in image:
-                            images.save_image(img, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
-                    else:
-                        images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
-
-                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]):
-                    image_mask = p.mask_for_overlay.convert('RGB')
-                    image1 = image.convert('RGBA').convert('RGBa')
-                    image2 = Image.new('RGBa', image.size)
-                    mask = images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L')
-                    image_mask_composite = Image.composite(image1, image2, mask).convert('RGBA')
-                    if shared.opts.save_mask:
-                        images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask")
-                    if shared.opts.save_mask_composite:
-                        images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask-composite")
-                    if shared.opts.return_mask:
-                        output_images.append(image_mask)
-                    if shared.opts.return_mask_composite:
-                        output_images.append(image_mask_composite)
-
-            timer.process.record('post')
+            devices.torch_gc()
             del samples
 
-            devices.torch_gc()
+        for i, image in enumerate(output_images):
+            is_grid = len(output_images) == p.batch_size * p.n_iter + 1 and i == 0
+            # resize after
+            if p.selected_scale_tab_after == 1:
+                p.width_after, p.height_after = int(image.width * p.scale_by_after), int(image.height * p.scale_by_after)
+            if p.resize_mode_after != 0 and p.resize_name_after != 'None' and not is_grid:
+                image = images.resize_image(p.resize_mode_after, image, p.width_after, p.height_after, p.resize_name_after, context=p.resize_context_after)
+
+            # save images
+            if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
+                info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i)
+                if isinstance(image, list):
+                    for img in image:
+                        images.save_image(img, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+                else:
+                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+
+            if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]):
+                image_mask = p.mask_for_overlay.convert('RGB')
+                image1 = image.convert('RGBA').convert('RGBa')
+                image2 = Image.new('RGBa', image.size)
+                mask = images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L')
+                image_mask_composite = Image.composite(image1, image2, mask).convert('RGBA')
+                if shared.opts.save_mask:
+                    images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask")
+                if shared.opts.save_mask_composite:
+                    images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p, suffix="-mask-composite")
+                if shared.opts.return_mask:
+                    output_images.append(image_mask)
+                if shared.opts.return_mask_composite:
+                    output_images.append(image_mask_composite)
+
+            if shared.cmd_opts.lowvram:
+                devices.torch_gc(force=True, reason='lowvram')
+            timer.process.record('post')
 
         if not p.xyz:
             if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None):
@@ -462,5 +463,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         shared.log.debug(f'Processed: timers={timer.process.dct()}')
         shared.log.debug(f'Processed: memory={memstats.memory_stats()}')
 
-    devices.torch_gc(force=True, reason='final')
+    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+        devices.torch_gc(force=True, reason='final')
     return processed
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index a4f2fd4b6..3f50c40d5 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -437,7 +437,7 @@ def get_prompts_with_weights(pipe, prompt: str):
                 sections += 1
         if all_tokens > 0:
             avg_weight = avg_weight / all_tokens
-            shared.log.debug(f'Prompt tokenizer: parser={shared.opts.prompt_attention} len={len(prompt)} sections={sections} tokens={all_tokens} weights={min_weight:.2f}/{avg_weight:.2f}/{max_weight:.2f}')
+            debug(f'Prompt tokenizer: parser={shared.opts.prompt_attention} len={len(prompt)} sections={sections} tokens={all_tokens} weights={min_weight:.2f}/{avg_weight:.2f}/{max_weight:.2f}')
     except Exception:
         pass
     debug(f'Prompt: weights={texts_and_weights} time={(time.time() - t0):.3f}')
diff --git a/modules/scripts.py b/modules/scripts.py
index 49debeac8..941fa9bc1 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -1,2 +1,19 @@
 # compatibility with extensions that import scripts directly
+from modules import scripts_manager
 from modules.scripts_manager import * # noqa: F403 # pylint: disable=wildcard-import
+
+
+scripts_txt2img = None
+scripts_img2img = None
+scripts_control = None
+scripts_current = None
+scripts_postproc = None
+
+
+def register_runners():
+    global scripts_txt2img, scripts_img2img, scripts_control, scripts_current, scripts_postproc # pylint: disable=global-statement
+    scripts_txt2img = scripts_manager.scripts_txt2img
+    scripts_img2img = scripts_manager.scripts_img2img
+    scripts_control = scripts_manager.scripts_control
+    scripts_current = scripts_manager.scripts_current
+    scripts_postproc = scripts_manager.scripts_postproc
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 35db128c1..00b1ecb3e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -678,7 +678,7 @@ def load_diffuser(checkpoint_info=None, timer=None, op='model', revision=None):
         errors.display(e, "Model")
 
     if shared.opts.diffusers_offload_mode != 'balanced':
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='load')
     if sd_model is not None:
         script_callbacks.model_loaded_callback(sd_model)
 
@@ -1107,14 +1107,14 @@ def unload_model_weights(op='model'):
             disable_offload(model_data.sd_model)
             move_model(model_data.sd_model, 'meta')
         model_data.sd_model = None
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='unload')
         shared.log.debug(f'Unload {op}: {memory_stats()} after')
     elif (op == 'refiner') and model_data.sd_refiner:
         shared.log.debug(f'Current {op}: {memory_stats()}')
         disable_offload(model_data.sd_refiner)
         move_model(model_data.sd_refiner, 'meta')
         model_data.sd_refiner = None
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='unload')
         shared.log.debug(f'Unload {op}: {memory_stats()}')
 
 
diff --git a/modules/sd_offload.py b/modules/sd_offload.py
index 46aa02ece..69d0c6364 100644
--- a/modules/sd_offload.py
+++ b/modules/sd_offload.py
@@ -1,4 +1,5 @@
 import os
+import re
 import sys
 import time
 import inspect
@@ -172,12 +173,14 @@ class OffloadHook(accelerate.hooks.ModelHook):
         self.min_watermark = shared.opts.diffusers_offload_min_gpu_memory
         self.max_watermark = shared.opts.diffusers_offload_max_gpu_memory
         self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
+        self.offload_always = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_always) if len(m.strip()) > 2]
+        self.offload_never = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_never) if len(m.strip()) > 2]
         self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
         self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
         self.offload_map = {}
         self.param_map = {}
         gpu = f'{(shared.gpu_memory * shared.opts.diffusers_offload_min_gpu_memory):.2f}-{(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory):.2f}:{shared.gpu_memory:.2f}'
-        shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f}')
+        shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f} always={self.offload_always} never={self.offload_never}')
         self.validate()
         super().__init__()
 
@@ -210,10 +213,7 @@ class OffloadHook(accelerate.hooks.ModelHook):
             max_memory = { device_index: self.gpu, "cpu": self.cpu }
             device_map = getattr(module, "balanced_offload_device_map", None)
             if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
-                # try:
                 device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
-                # except Exception as e:
-                #     shared.log.error(f'Offload: type=balanced module={module.__class__.__name__} {e}')
             offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
             if devices.backend == "directml":
                 keys = device_map.keys()
@@ -233,15 +233,22 @@ class OffloadHook(accelerate.hooks.ModelHook):
             perc_gpu = used_gpu / shared.gpu_memory
             try:
                 module_size = self.model_size()
+                module_cls = module.__class__.__name__
                 prev_gpu = used_gpu
-                offload_now = perc_gpu > shared.opts.diffusers_offload_min_gpu_memory
-                if offload_now:
+                op = 'post:skip'
+                if module_cls in self.offload_never:
+                    op = 'post:never'
+                elif module_cls in self.offload_always:
+                    op = 'post:always'
+                    module = module.to(devices.cpu)
+                    used_gpu -= module_size
+                elif perc_gpu > shared.opts.diffusers_offload_min_gpu_memory:
+                    op = 'post:mem'
                     module = module.to(devices.cpu)
                     used_gpu -= module_size
                 if debug:
-                    cls = module.__class__.__name__
                     quant = getattr(module, "quantization_method", None)
-                    debug_move(f'Offload: type=balanced op={"post" if offload_now else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={cls} size={module_size:.3f}')
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
             except Exception as e:
                 if 'out of memory' in str(e):
                     devices.torch_gc(fast=True, force=True, reason='oom')
@@ -311,6 +318,8 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
         else:
             keys = get_signature(pipe).keys()
         keys = [k for k in keys if k not in exclude and not k.startswith('_')]
+        offload_always = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_always) if len(m.strip()) > 2]
+        offload_never = [m.strip() for m in re.split(';|,| ', shared.opts.diffusers_offload_never) if len(m.strip()) > 2]
         for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
             # shared.log.trace(f'Offload: type=balanced op=apply pipe={pipe.__class__.__name__} module={module_name} size={module_size:.3f}')
             module = getattr(pipe, module_name, None)
@@ -326,16 +335,26 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
             perc_gpu = used_gpu / shared.gpu_memory
             try:
                 prev_gpu = used_gpu
-                offload_now = (perc_gpu > shared.opts.diffusers_offload_min_gpu_memory) and (module.device != devices.cpu)
-                if offload_now:
+                module_cls = module.__class__.__name__
+                op = 'apply:skip'
+                if module_cls in offload_never:
+                    op = 'apply:never'
+                elif module_cls in offload_always:
+                    op = 'apply:always'
                     module = module.to(devices.cpu)
                     used_gpu -= module_size
-                cls = module.__class__.__name__
+                elif perc_gpu > shared.opts.diffusers_offload_min_gpu_memory:
+                    op = 'apply:mem'
+                    module = module.to(devices.cpu)
+                    used_gpu -= module_size
+                if debug:
+                    quant = getattr(module, "quantization_method", None)
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
                 quant = getattr(module, "quantization_method", None)
                 if not cached:
-                    shared.log.debug(f'Model module={module_name} type={cls} dtype={module.dtype} quant={quant} params={offload_hook_instance.param_map[module_name]:.3f} size={offload_hook_instance.offload_map[module_name]:.3f}')
+                    shared.log.debug(f'Model module={module_name} type={module_cls} dtype={module.dtype} quant={quant} params={offload_hook_instance.param_map[module_name]:.3f} size={offload_hook_instance.offload_map[module_name]:.3f}')
                 if debug:
-                    debug_move(f'Offload: type=balanced op={"move" if offload_now else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={cls} size={module_size:.3f}')
+                    debug_move(f'Offload: type=balanced op={op} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={quant} module={module_cls} size={module_size:.3f}')
             except Exception as e:
                 if 'out of memory' in str(e):
                     devices.torch_gc(fast=True, force=True, reason='oom')
diff --git a/modules/sdnq/__init__.py b/modules/sdnq/__init__.py
index 1a2a8a992..985e99a2a 100644
--- a/modules/sdnq/__init__.py
+++ b/modules/sdnq/__init__.py
@@ -346,7 +346,7 @@ class SDNQQuantizer(DiffusersQuantizer):
     def _process_model_after_weight_loading(self, model, **kwargs): # pylint: disable=unused-argument
         if shared.opts.diffusers_offload_mode != "none":
             model = model.to(devices.cpu)
-        devices.torch_gc(force=True)
+        devices.torch_gc(force=True, reason='sdnq')
         return model
 
     def get_cuda_warm_up_factor(self):
diff --git a/modules/shared.py b/modules/shared.py
index f41952cba..61be9d5d0 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -8,6 +8,7 @@ import gradio as gr
 import diffusers
 from modules.json_helpers import readfile, writefile # pylint: disable=W0611
 from modules.shared_helpers import listdir, walk_files, html_path, html, req, total_tqdm # pylint: disable=W0611
+from modules.shared_defaults import get_default_modes
 from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history, files_cache
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
@@ -86,8 +87,8 @@ elif cmd_opts.use_directml:
 devices.backend = devices.get_backend(cmd_opts)
 devices.device = devices.get_optimal_device()
 mem_stat = memory_stats()
-cpu_memory = mem_stat['ram']['total'] if "ram" in mem_stat else 0
-gpu_memory = mem_stat['gpu']['total'] if "gpu" in mem_stat else 0
+cpu_memory = round(mem_stat['ram']['total'] if "ram" in mem_stat else 0)
+gpu_memory = round(mem_stat['gpu']['total'] if "gpu" in mem_stat else 0)
 native = backend == Backend.DIFFUSERS
 if not files_cache.do_cache_folders:
     log.warning('File cache disabled: ')
@@ -129,45 +130,7 @@ def list_samplers():
     return modules.sd_samplers.all_samplers
 
 
-def get_default_modes():
-    default_offload_mode = "none"
-    default_diffusers_offload_min_gpu_memory = 0.2
-    if not (cmd_opts.lowvram or cmd_opts.medvram):
-        if "gpu" in mem_stat:
-            if gpu_memory <= 4:
-                cmd_opts.lowvram = True
-                default_offload_mode = "sequential"
-                default_diffusers_offload_min_gpu_memory = 0
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=sequential optimization=lowvram")
-            elif gpu_memory <= 12:
-                cmd_opts.medvram = True # VAE Tiling and other stuff
-                default_offload_mode = "balanced"
-                default_diffusers_offload_min_gpu_memory = 0
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=medvram")
-            else:
-                default_offload_mode = "balanced"
-                default_diffusers_offload_min_gpu_memory = 0.2
-                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced")
-    elif cmd_opts.medvram:
-        default_offload_mode = "balanced"
-        default_diffusers_offload_min_gpu_memory = 0
-    elif cmd_opts.lowvram:
-        default_offload_mode = "sequential"
-        default_diffusers_offload_min_gpu_memory = 0
-
-    default_cross_attention = "Scaled-Dot-Product"
-
-    if devices.backend == "zluda":
-        default_sdp_options = ['Flash attention', 'Math attention', 'Dynamic attention']
-    elif devices.backend in {"rocm", "directml", "cpu", "mps"}:
-        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention']
-    else:
-        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
-
-    return default_offload_mode, default_diffusers_offload_min_gpu_memory, default_cross_attention, default_sdp_options
-
-
-startup_offload_mode, startup_diffusers_offload_min_gpu_memory, startup_cross_attention, startup_sdp_options = get_default_modes()
+startup_offload_mode, startup_offload_min_gpu, startup_offload_max_gpu, startup_cross_attention, startup_sdp_options, startup_offload_always, startup_offload_never = get_default_modes(cmd_opts=cmd_opts, mem_stat=mem_stat)
 
 options_templates.update(options_section(('sd', "Models & Loading"), {
     "sd_backend": OptionInfo('diffusers', "Execution backend", gr.Radio, {"choices": ['diffusers', 'original'], "visible": False }),
@@ -179,9 +142,11 @@ options_templates.update(options_section(('sd', "Models & Loading"), {
 
     "offload_sep": OptionInfo("<h2>Model Offloading</h2>", "", gr.HTML),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'group', 'model', 'sequential']}),
-    "diffusers_offload_min_gpu_memory": OptionInfo(startup_diffusers_offload_min_gpu_memory, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_min_gpu_memory": OptionInfo(startup_offload_min_gpu, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(startup_offload_max_gpu, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
+    "diffusers_offload_always": OptionInfo(startup_offload_always, "Modules to always offload"),
+    "diffusers_offload_never": OptionInfo(startup_offload_never, "Modules to never offload"),
 
     "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
     "sd_checkpoint_autoload": OptionInfo(True, "Model auto-load on start"),
@@ -299,7 +264,7 @@ options_templates.update(options_section(("quantization", "Quantization Settings
     "sdnq_quantize_conv_layers": OptionInfo(False, "Quantize convolutional layers", gr.Checkbox),
     "sdnq_dequantize_compile": OptionInfo(devices.has_triton(), "Dequantize using torch.compile", gr.Checkbox),
     "sdnq_use_quantized_matmul": OptionInfo(False, "Use quantized MatMul", gr.Checkbox),
-    "sdnq_use_quantized_matmul_conv": OptionInfo(False, "Use quantized MatMul with convolutional layers", gr.Checkbox),
+    "sdnq_use_quantized_matmul_conv": OptionInfo(False, "Use quantized MatMul with conv", gr.Checkbox),
     "sdnq_quantize_with_gpu": OptionInfo(True, "Quantize using GPU", gr.Checkbox),
     "sdnq_dequantize_fp32": OptionInfo(False, "Dequantize using full precision", gr.Checkbox),
     "sdnq_quantize_shuffle_weights": OptionInfo(False, "Shuffle weights in post mode", gr.Checkbox),
@@ -723,13 +688,13 @@ options_templates.update(options_section(('extra_networks', "Networks"), {
 
     "extra_networks_lora_sep": OptionInfo("<h2>LoRA</h2>", "", gr.HTML),
     "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
-    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
     "lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
     "lora_force_reload": OptionInfo(False, "LoRA force reload always"),
     "lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA load using Diffusers method"),
-    "lora_maybe_diffusers": OptionInfo(False, "LoRA load using Diffusers method for selected models"),
+    "lora_maybe_diffusers": OptionInfo(False, "LoRA load using Diffusers method for selected models", gr.Checkbox, {"visible": False}),
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(1, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
+    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
     "lora_quant": OptionInfo("NF4","LoRA precision when quantized", gr.Radio, {"choices": ["NF4", "FP4"]}),
 
     "extra_networks_styles_sep": OptionInfo("<h2>Styles</h2>", "", gr.HTML),
diff --git a/modules/shared_defaults.py b/modules/shared_defaults.py
new file mode 100644
index 000000000..bc7f8ce11
--- /dev/null
+++ b/modules/shared_defaults.py
@@ -0,0 +1,56 @@
+from installer import log
+from modules import devices
+
+
+def get_default_modes(cmd_opts, mem_stat):
+    default_offload_mode = "none"
+    default_diffusers_offload_min_gpu_memory = 0.2
+    default_diffusers_offload_max_gpu_memory = 0.6
+    default_diffusers_offload_always = ''
+    default_diffusers_offload_never = ''
+    gpu_memory = round(mem_stat['gpu']['total'] if "gpu" in mem_stat else 0)
+    if not (cmd_opts.lowvram or cmd_opts.medvram):
+        if "gpu" in mem_stat:
+            if gpu_memory <= 4:
+                cmd_opts.lowvram = True
+                default_offload_mode = "sequential"
+                default_diffusers_offload_min_gpu_memory = 0
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=sequential optimization=lowvram")
+            elif gpu_memory <= 12:
+                cmd_opts.medvram = True # VAE Tiling and other stuff
+                default_offload_mode = "balanced"
+                default_diffusers_offload_min_gpu_memory = 0
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=medvram")
+            elif gpu_memory >= 24:
+                default_offload_mode = "balanced"
+                default_diffusers_offload_max_gpu_memory = 0.8
+                default_diffusers_offload_never = ', '.join(['CLIPTextModel', 'CLIPTextModelWithProjection', 'AutoencoderKL'])
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced optimization=highvram")
+            else:
+                default_offload_mode = "balanced"
+                log.info(f"Device detect: memory={gpu_memory:.1f} default=balanced")
+    elif cmd_opts.medvram:
+        default_offload_mode = "balanced"
+        default_diffusers_offload_min_gpu_memory = 0
+    elif cmd_opts.lowvram:
+        default_offload_mode = "sequential"
+        default_diffusers_offload_min_gpu_memory = 0
+
+    default_cross_attention = "Scaled-Dot-Product"
+
+    if devices.backend == "zluda":
+        default_sdp_options = ['Flash attention', 'Math attention', 'Dynamic attention']
+    elif devices.backend in {"rocm", "directml", "cpu", "mps"}:
+        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention']
+    else:
+        default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
+
+    return (
+        default_offload_mode,
+        default_diffusers_offload_min_gpu_memory,
+        default_diffusers_offload_max_gpu_memory,
+        default_cross_attention,
+        default_sdp_options,
+        default_diffusers_offload_always,
+        default_diffusers_offload_never
+    )
diff --git a/modules/ui.py b/modules/ui.py
index f3f942636..d87f02c70 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -123,14 +123,9 @@ def create_ui(startup_timer = None):
         timer.startup.record("ui-extensions")
 
     with gr.Blocks(analytics_enabled=False) as info_interface:
-        with gr.Tabs(elem_id="tabs_info"):
-            with gr.TabItem("Change log", id="change_log", elem_id="system_tab_changelog"):
-                from modules import ui_docs
-                ui_docs.create_ui_logs()
-
-            with gr.TabItem("Wiki", id="wiki", elem_id="system_tab_wiki"):
-                from modules import ui_docs
-                ui_docs.create_ui_wiki()
+        from modules import ui_docs
+        ui_docs.create_ui()
+        timer.startup.record("ui-info")
 
     with gr.Blocks(analytics_enabled=False) as extensions_interface:
         from modules import ui_extensions
diff --git a/modules/ui_docs.py b/modules/ui_docs.py
index e38ddf466..1b64f946c 100644
--- a/modules/ui_docs.py
+++ b/modules/ui_docs.py
@@ -1,5 +1,231 @@
+import os
+import time
 import gradio as gr
 from modules import ui_symbols, ui_components
+from installer import install, log
+
+
+class Page():
+    def __init__(self, fn, full: bool = True):
+        self.fn = fn
+        self.title = ''
+        self.size = 0
+        self.mtime = 0
+        self.h1 = []
+        self.h2 = []
+        self.h3 = []
+        self.lines = []
+        self.read(full=full)
+
+    def read(self, full: bool = True):
+        try:
+            self.title = ' ' + os.path.basename(self.fn).replace('.md', '').replace('-', ' ') + ' '
+            self.mtime = time.localtime(os.path.getmtime(self.fn))
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+            self.size = len(content)
+            self.lines = [line.strip().lower() + ' ' for line in content.splitlines() if len(line)>1]
+            self.h1 = [line[1:] for line in self.lines if line.startswith('# ')]
+            self.h2 = [line[2:] for line in self.lines if line.startswith('## ')]
+            self.h3 = [line[3:] for line in self.lines if line.startswith('### ')]
+            if not full:
+                self.lines.clear()
+        except Exception as e:
+            log.error(f'Search docs: page="{self.fn}" {e}')
+
+    def search(self, text):
+        if not text or len(text) < 2:
+            return []
+        text = text.lower()
+        if text.strip() == self.title.lower().strip():
+            return 1.0
+        if self.title.lower().startswith(f'{text} '):
+            return 0.99
+        if f' {text} ' in self.title.lower():
+            return 0.98
+        if f' {text}' in self.title.lower():
+            return 0.97
+
+        if any(f' {text} ' in h for h in self.h1):
+            return 0.89
+        if any(f' {text}' in h for h in self.h1):
+            return 0.88
+
+        if any(f' {text} ' in h for h in self.h2):
+            return 0.79
+        if any(f' {text}' in h for h in self.h2):
+            return 0.78
+
+        if any(f' {text} ' in h for h in self.h3):
+            return 0.69
+        if any(f' {text}' in h for h in self.h3):
+            return 0.68
+
+        if f'{text}' in self.title.lower():
+            return 0.59
+        if any(f'{text}' in h for h in self.h1):
+            return 0.58
+        if any(f'{text}' in h for h in self.h2):
+            return 0.57
+        if any(f'{text}' in h for h in self.h3):
+            return 0.56
+
+        if any(text in line for line in self.lines):
+            return 0.50
+
+        return 0.0
+
+    def get(self):
+        if self.fn is None or not os.path.exists(self.fn):
+            log.error(f'Search docs: page="{self.fn}" does not exist')
+            return f'page="{self.fn}" does not exist'
+        try:
+            with open(self.fn, 'r', encoding='utf-8') as f:
+                content = f.read()
+                return content
+        except Exception as e:
+            log.error(f'Search docs: page="{self.fn}" {e}')
+        return ''
+
+    def __str__(self):
+        return f'Page(title="{self.title.strip()}" fn="{self.fn}" mtime={self.mtime} h1={[h.strip() for h in self.h1]} h2={len(self.h2)} h3={len(self.h3)} lines={len(self.lines)} size={self.size})'
+
+
+class Pages():
+    def __init__(self):
+        self.time = time.time()
+        self.size = 0
+        self.full = None
+        self.pages: list[Page] = []
+
+    def build(self, full: bool = True):
+        self.pages.clear()
+        self.full = full
+        with os.scandir('wiki') as entries:
+            for entry in entries:
+                if entry.is_file() and entry.name.endswith('.md'):
+                    page = Page(entry.path, full=full)
+                    self.pages.append(page)
+        self.size = sum(page.size for page in self.pages)
+
+    def search(self, text: str, topk: int = 10, full: bool = True) -> list[Page]:
+        if not text or len(text) < 2:
+            return []
+        if len(self.pages) == 0:
+            self.build(full=full)
+        try:
+            text = text.lower()
+            scores = [page.search(text) for page in self.pages]
+            mtimes = [page.mtime for page in self.pages]
+            found = sorted(zip(scores, mtimes, self.pages), key=lambda x: (x[0], x[1]), reverse=True)
+            found = [item for item in found if item[0] > 0]
+            return [(item[0], item[2]) for item in found][:topk]
+        except Exception as e:
+            log.error(f'Search docs: text="{text}" {e}')
+            return []
+
+    def get(self, title: str) -> Page:
+        if len(self.pages) == 0:
+            self.build(full=self.full)
+        for page in self.pages:
+            if page.title.lower().strip() == title.lower().strip():
+                return page
+        return Page('')
+
+
+index = Pages()
+
+
+def get_docs_page(page_title: str) -> str:
+    if len(index.pages) == 0:
+        index.build(full=True)
+    page = index.get(page_title)
+    log.debug(f'Search docs: title="{page_title}" {page}')
+    content = page.get()
+    return content
+
+
+def search_html(pages: list[Page]) -> str:
+    html = ''
+    for score, page in pages:
+        if score > 0.0:
+            html += f'''
+                <div class="docs-card" onclick="clickDocsPage('{page.title}')">
+                    <div class="docs-card-title">{page.title.strip()}</div>
+                    <div class="docs-card-h1">Heading | {' | '.join([h.strip() for h in page.h1])}</div>
+                    <div class="docs-card-h2"><b>Topics</b> | {' | '.join([h.strip() for h in page.h2])}</div>
+                    <div class="docs-card-footer">
+                        <span class="docs-card-score">Score | {score}</span>
+                        <span class="docs-card-mtime">Last modified | {time.strftime('%c', page.mtime)}</span>
+                    </div>
+                </div>'''
+    return html
+
+
+def search_docs(search_term):
+    topk = 10
+    full = True
+    t0 = time.time()
+    results = index.search(search_term, topk=topk, full=full)
+    t1 = time.time()
+    log.debug(f'Search results: search="{search_term}" topk={topk}, full={full} pages={len(results)} size={index.size} time={t1-t0:.3f}')
+    for score, page in results:
+        log.trace(f'Search results: score={score:.2f} {page}')
+    html = search_html(results)
+    return html
+
+
+def get_github_page(page):
+    try:
+        with open(os.path.join('wiki', f'{page}.md'), 'r', encoding='utf-8') as f:
+            content = f.read()
+            log.debug(f'Search wiki: page="{page}" size={len(content)}')
+    except Exception as e:
+        log.error(f'Search wiki: page="{page}" {e}')
+        content = f'Error: {e}'
+    return content
+
+
+def search_github(search_term):
+    import requests
+    from urllib.parse import quote
+    install('beautifulsoup4')
+    from bs4 import BeautifulSoup
+
+    url = f'https://github.com/search?q=repo%3Avladmandic%2Fsdnext+{quote(search_term)}&type=wikis'
+    res = requests.get(url, timeout=10)
+    pages = []
+    if res.status_code == 200:
+        html = res.content
+        soup = BeautifulSoup(html, 'html.parser')
+
+        # remove header links
+        tags = soup.find_all(attrs={"data-hovercard-url": "/vladmandic/sdnext/hovercard"})
+        for tag in tags:
+            tag.extract()
+
+        # replace relative links with full links
+        tags = soup.find_all('a')
+        for tag in tags:
+            if tag.has_attr('href'):
+                if tag['href'].startswith('/vladmandic/sdnext/wiki/'):
+                    page = tag['href'].replace('/vladmandic/sdnext/wiki/', '')
+                    tag.name = 'div'
+                    tag['class'] = 'github-page'
+                    tag['onclick'] = f'clickGitHubWikiPage("{page}")'
+                    pages.append(page)
+                elif tag['href'].startswith('/'):
+                    tag['href'] = 'https://github.com' + tag['href']
+
+        # find result only
+        result = soup.find(attrs={"data-testid": "results-list"})
+        if result is None:
+            return 'No results found'
+        html = str(result)
+    else:
+        html = f'Error: {res.status_code}'
+    log.debug(f'Search wiki: code={res.status_code} text="{search_term}" pages={pages}')
+    return html
 
 
 def create_ui_logs():
@@ -10,57 +236,49 @@ def create_ui_logs():
         return content
 
     with gr.Column():
-        get_changelog_btn = gr.Button(value='Get changelog', elem_id="get_changelog")
-        gr.HTML('<a href="https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md" style="color: #AAA" target="_blank">&nbsp Open GitHub Changelog</a>')
+        get_changelog_btn = gr.Button(value='Get Changelog', elem_id="get_changelog")
     with gr.Column():
-        _changelog_search = gr.Textbox(label="Search Changelog", elem_id="changelog_search")
+        _changelog_search = gr.Textbox(label="Search Changelog", elem_id="changelog_search", elem_classes="docs-search")
         _changelog_result = gr.HTML(elem_id="changelog_result")
 
     changelog_markdown = gr.Markdown('', elem_id="changelog_markdown")
     get_changelog_btn.click(fn=get_changelog, outputs=[changelog_markdown], show_progress=True)
 
 
-def create_ui_wiki():
-    def search_github(search_term):
-        import requests
-        from urllib.parse import quote
-        from installer import install
-
-        install('beautifulsoup4')
-        from bs4 import BeautifulSoup
-
-        url = f'https://github.com/search?q=repo%3Avladmandic%2Fautomatic+{quote(search_term)}&type=wikis'
-        res = requests.get(url, timeout=10)
-        if res.status_code == 200:
-            html = res.content
-            soup = BeautifulSoup(html, 'html.parser')
-
-            # remove header links
-            tags = soup.find_all(attrs={"data-hovercard-url": "/vladmandic/sdnext/hovercard"})
-            for tag in tags:
-                tag.extract()
-
-            # replace relative links with full links
-            tags = soup.find_all('a')
-            for tag in tags:
-                if tag.has_attr('href') and tag['href'].startswith('/'):
-                    tag['href'] = 'https://github.com' + tag['href']
-
-            # find result only
-            result = soup.find(attrs={"data-testid": "results-list"})
-            if result is None:
-                return 'No results found'
-            html = str(result)
-            return html
-        else:
-            return f'Error: {res.status_code}'
-
+def create_ui_github():
     with gr.Row():
-        gr.HTML('<a href="https://github.com/vladmandic/sdnext/wiki" style="color: #AAA" target="_blank">&nbsp Open GitHub Wiki</a>')
+        github_search = gr.Textbox(label="Search GitHub Wiki Pages", elem_id="github_search", elem_classes="docs-search")
+        github_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="github_search_btn")
     with gr.Row():
-        wiki_search = gr.Textbox(label="Search Wiki Pages", elem_id="wiki_search")
-        wiki_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="wiki_search_btn")
+        github_result = gr.HTML(elem_id="github_result", value='', elem_classes="github-result")
     with gr.Row():
-        wiki_result = gr.HTML(elem_id="wiki_result", value='')
-    wiki_search.submit(_js="wikiSearch", fn=search_github, inputs=[wiki_search], outputs=[wiki_result])
-    wiki_search_btn.click(_js="wikiSearch", fn=search_github, inputs=[wiki_search], outputs=[wiki_result])
+        github_md_btn = gr.Button(value='html2md', elem_id="github_md_btn", visible=False)
+        github_md = gr.Markdown(elem_id="github_md", value='', elem_classes="github-md")
+    github_search.submit(fn=search_github, inputs=[github_search], outputs=[github_result], show_progress=True)
+    github_search_btn.click(fn=search_github, inputs=[github_search], outputs=[github_result], show_progress=True)
+    github_md_btn.click(fn=get_github_page, _js='getGitHubWikiPage', inputs=[github_search], outputs=[github_md], show_progress=True)
+
+
+def create_ui_docs():
+    with gr.Row():
+        docs_search = gr.Textbox(label="Search Docs", elem_id="github_search", elem_classes="docs-search")
+        docs_search_btn = ui_components.ToolButton(value=ui_symbols.search, elem_id="github_search_btn")
+    with gr.Row():
+        docs_result = gr.HTML(elem_id="docs_result", value='', elem_classes="docs-result")
+    with gr.Row():
+        docs_md_btn = gr.Button(value='html2md', elem_id="docs_md_btn", visible=False)
+        docs_md = gr.Markdown(elem_id="docs_md", value='', elem_classes="docs-md")
+    docs_search.submit(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_search.change(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_search_btn.click(fn=search_docs, inputs=[docs_search], outputs=[docs_result], show_progress=False)
+    docs_md_btn.click(fn=get_docs_page, _js='getDocsPage', inputs=[docs_search], outputs=[docs_md], show_progress=False)
+
+
+def create_ui():
+    with gr.Tabs(elem_id="tabs_info"):
+        with gr.TabItem("Docs", id="docs", elem_id="system_tab_docs"):
+            create_ui_docs()
+        with gr.TabItem("Wiki", id="wiki", elem_id="system_tab_wiki"):
+            create_ui_github()
+        with gr.TabItem("Change log", id="change_log", elem_id="system_tab_changelog"):
+            create_ui_logs()
diff --git a/modules/video_models/video_run.py b/modules/video_models/video_run.py
index 6a3e13ebb..01e3fd387 100644
--- a/modules/video_models/video_run.py
+++ b/modules/video_models/video_run.py
@@ -71,7 +71,7 @@ def generate(*args, **kwargs):
 
     # cleanup memory
     shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='video')
 
     # set args
     processing.fix_seed(p)
diff --git a/pipelines/model_auraflow.py b/pipelines/model_auraflow.py
index 3d66c02b3..175ba12bf 100644
--- a/pipelines/model_auraflow.py
+++ b/pipelines/model_auraflow.py
@@ -17,5 +17,5 @@ def load_auraflow(checkpoint_info, diffusers_load_config={}):
         cache_dir = shared.opts.diffusers_dir,
         **diffusers_load_config,
     )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_chroma.py b/pipelines/model_chroma.py
index f8750782b..4964543ac 100644
--- a/pipelines/model_chroma.py
+++ b/pipelines/model_chroma.py
@@ -117,7 +117,7 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant): # pylint: disable=unus
             "cache_dir": cache_dir,
         }
         if 'transformer' not in kwargs and model_quant.check_nunchaku('Model'):
-            raise NotImplementedError('Nunchaku does not support Chroma Model yet. See https://github.com/mit-han-lab/nunchaku/issues/167')
+            shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported')
         if 'transformer' not in kwargs and model_quant.check_quant('Model'):
             load_args, quant_args = model_quant.get_dit_args(diffusers_load_config, module='Model', device_map=True, modules_to_not_convert=["distilled_guidance_layer"])
             kwargs['transformer'] = diffusers.ChromaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", **load_args, **quant_args)
@@ -187,7 +187,7 @@ def load_chroma(checkpoint_info, diffusers_load_config): # triggered by opts.sd_
     # unload current model
     sd_models.unload_model_weights()
     shared.sd_model = None
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
 
     if shared.opts.teacache_enabled:
         from modules import teacache
@@ -277,5 +277,5 @@ def load_chroma(checkpoint_info, diffusers_load_config): # triggered by opts.sd_
     for k in kwargs.keys():
         kwargs[k] = None
     sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe, allow_post_quant
diff --git a/pipelines/model_flux.py b/pipelines/model_flux.py
index ea1e1d269..dd113a0d4 100644
--- a/pipelines/model_flux.py
+++ b/pipelines/model_flux.py
@@ -118,12 +118,16 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant): # pylint: disable=unus
             import nunchaku
             nunchaku_precision = nunchaku.utils.get_precision()
             nunchaku_repo = None
-            if 'kontext' in repo_id.lower():
+            if 'flux.1-kontext' in repo_id.lower():
                 nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-kontext-dev/svdq-{nunchaku_precision}_r32-flux.1-kontext-dev.safetensors"
-            elif 'dev' in repo_id.lower():
+            elif 'flux.1-dev' in repo_id.lower():
                 nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-dev/svdq-{nunchaku_precision}_r32-flux.1-dev.safetensors"
-            elif 'schnell' in repo_id.lower():
+            elif 'flux.1-schnell' in repo_id.lower():
                 nunchaku_repo = f"mit-han-lab/nunchaku-flux.1-schnell/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
+            elif 'flux.1-fill' in repo_id.lower():
+                nunchaku_repo = f"mit-han-lab/svdq-fp4-flux.1-fill-dev/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
+            elif 'flux.1-depth' in repo_id.lower():
+                nunchaku_repo = f"mit-han-lab/svdq-int4-flux.1-depth-dev/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors"
             elif 'shuttle' in repo_id.lower():
                 nunchaku_repo = f"mit-han-lab/nunchaku-shuttle-jaguar/svdq-{nunchaku_precision}_r32-shuttle-jaguar.safetensors"
             else:
@@ -220,7 +224,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     # unload current model
     sd_models.unload_model_weights()
     shared.sd_model = None
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
 
     if shared.opts.teacache_enabled:
         from modules import teacache
@@ -356,5 +360,5 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     for k in kwargs.keys():
         kwargs[k] = None
     sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe, allow_post_quant
diff --git a/pipelines/model_flux_nf4.py b/pipelines/model_flux_nf4.py
index 2290bacba..94c99e422 100644
--- a/pipelines/model_flux_nf4.py
+++ b/pipelines/model_flux_nf4.py
@@ -196,5 +196,5 @@ def load_flux_nf4(checkpoint_info, prequantized: bool = True):
             errors.display(e, 'FLUX:')
 
     del original_state_dict
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return transformer, text_encoder_2
diff --git a/pipelines/model_kolors.py b/pipelines/model_kolors.py
index b6c35c85a..8add20664 100644
--- a/pipelines/model_kolors.py
+++ b/pipelines/model_kolors.py
@@ -23,5 +23,5 @@ def load_kolors(_checkpoint_info, diffusers_load_config={}):
         **diffusers_load_config,
     )
     pipe.vae.config.force_upcast = True
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_lumina.py b/pipelines/model_lumina.py
index d792740ef..60b681881 100644
--- a/pipelines/model_lumina.py
+++ b/pipelines/model_lumina.py
@@ -14,7 +14,7 @@ def load_lumina(_checkpoint_info, diffusers_load_config={}):
         cache_dir = shared.opts.diffusers_dir,
         **load_config,
     )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
 
 
@@ -91,5 +91,5 @@ def load_lumina2(checkpoint_info, diffusers_load_config={}):
     )
 
     sd_hijack_te.init_hijack(pipe)
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_meissonic.py b/pipelines/model_meissonic.py
index 650035913..30671e350 100644
--- a/pipelines/model_meissonic.py
+++ b/pipelines/model_meissonic.py
@@ -52,5 +52,5 @@ def load_meissonic(checkpoint_info, diffusers_load_config={}):
     diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["meissonic"] = PipelineMeissonic
     diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["meissonic"] = PipelineMeissonicImg2Img
     diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["meissonic"] = PipelineMeissonicInpaint
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_omnigen.py b/pipelines/model_omnigen.py
index 88f379dc8..596fe4dbb 100644
--- a/pipelines/model_omnigen.py
+++ b/pipelines/model_omnigen.py
@@ -28,5 +28,5 @@ def load_omnigen(checkpoint_info, diffusers_load_config={}): # pylint: disable=u
         **load_config,
     )
 
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_omnigen2.py b/pipelines/model_omnigen2.py
index 84b0b828b..94488ae0a 100644
--- a/pipelines/model_omnigen2.py
+++ b/pipelines/model_omnigen2.py
@@ -45,5 +45,5 @@ def load_omnigen2(checkpoint_info, diffusers_load_config={}): # pylint: disable=
     )
     pipe.transformer = transformer # for omnigen2 transformer must be loaded after pipeline
 
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_pixart.py b/pipelines/model_pixart.py
index c7798109a..254326abc 100644
--- a/pipelines/model_pixart.py
+++ b/pipelines/model_pixart.py
@@ -40,5 +40,5 @@ def load_pixart(checkpoint_info, diffusers_load_config={}):
         text_encoder=text_encoder,
         **load_args,
     )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_sana.py b/pipelines/model_sana.py
index 0246beefd..1c04ff1e3 100644
--- a/pipelines/model_sana.py
+++ b/pipelines/model_sana.py
@@ -88,5 +88,5 @@ def load_sana(checkpoint_info, kwargs={}):
     sd_hijack_te.init_hijack(pipe)
     t1 = time.time()
     shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_sd3.py b/pipelines/model_sd3.py
index 8b3b6601c..6130ad81c 100644
--- a/pipelines/model_sd3.py
+++ b/pipelines/model_sd3.py
@@ -124,5 +124,5 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
         config=config,
         **kwargs,
     )
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     return pipe
diff --git a/pipelines/model_stablecascade.py b/pipelines/model_stablecascade.py
index fc143e8e7..fb780c0f2 100644
--- a/pipelines/model_stablecascade.py
+++ b/pipelines/model_stablecascade.py
@@ -155,7 +155,7 @@ def load_cascade_combined(checkpoint_info, diffusers_load_config):
         latent_dim_scale=sd_model.decoder_pipe.config.latent_dim_scale,
     )
 
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='load')
     shared.log.debug(f'StableCascade combined: {sd_model.__class__.__name__}')
     return sd_model
 
diff --git a/requirements.txt b/requirements.txt
index 30b00f06d..c957ed21e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,7 +31,7 @@ invisible-watermark
 pi-heif
 
 # versioned
-rich==14.0.0
+rich==14.1.0
 safetensors==0.5.3
 tensordict==0.8.3
 peft==0.16.0
diff --git a/scripts/pulid_ext.py b/scripts/pulid_ext.py
index 59ced45b6..7003daf19 100644
--- a/scripts/pulid_ext.py
+++ b/scripts/pulid_ext.py
@@ -228,7 +228,7 @@ class Script(scripts_manager.Script):
                     shared.sd_model.clip_vision_model = None
                     shared.sd_model.handler_ante = None
                 shared.sd_model = shared.sd_model.pipe
-                devices.torch_gc(force=True)
+                devices.torch_gc(force=True, reason='pulid')
             shared.log.debug(f'PuLID complete: class={shared.sd_model.__class__.__name__} preprocess={self.preprocess:.2f} pipe={"restore" if restore else "cache"}')
         return processed
 
diff --git a/webui.py b/webui.py
index 25132d1fb..22735c6f1 100644
--- a/webui.py
+++ b/webui.py
@@ -19,6 +19,7 @@ import modules.devices
 import modules.sd_checkpoint
 import modules.sd_samplers
 import modules.scripts_manager
+import modules.scripts
 import modules.sd_models
 import modules.sd_vae
 import modules.sd_unet
@@ -106,6 +107,7 @@ def initialize():
 
     log.info('Load extensions')
     t_timer, t_total = modules.scripts_manager.load_scripts()
+    modules.scripts.register_runners()
     timer.startup.record("extensions")
     timer.startup.records["extensions"] = t_total # scripts can reset the time
     log.debug(f'Extensions init time: {t_timer.summary()}')
diff --git a/wiki b/wiki
index 906fb43c5..79b18f2c5 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 906fb43c528af396fe3dd3da3d556f2aa39f5f44
+Subproject commit 79b18f2c5e3438f3f564fd264fdb27bed76b0f72