diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81298a455..d3f7a0aa9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,13 @@
 # Change Log for SD.Next
 
-## Update for 2025-04-16
+## Update for 2025-04-18
 
 - **Features**
   - [Nunchaku](https://github.com/mit-han-lab/nunchaku) inference engine with custom **SVDQuant** 4-bit execution  
-    highly experimental and with limited support, but when it works, its magic: **Flux.1 at 5.90 it/s** *(not sec/it)*!  
-    see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for details  
+    highly experimental and with limited support, but when it works, its magic: **Flux.1 at 6.0 it/s** *(not sec/it)*!  
+    see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for installation guide and list of supported models & features  
+  - [LTXVideo 0.9.6](https://github.com/Lightricks/LTX-Video?tab=readme-ov-file) T2V and I2V  
+    in both standard and distilled variants  
   - [CFG-Zero](https://github.com/WeichenFan/CFG-Zero-star) new guidance method optimized for flow-matching models  
     implemented for **FLUX.1, HiDream-I1, SD3.x, CogView4, HunyuanVideo, WanAI**  
     enable and configure in *settings -> pipeline modifiers -> cfg zero*  
@@ -14,10 +16,12 @@
   - **HiDream** optimized offloading and prompt-encode caching  
     it now works in 12GB VRAM / 26GB RAM!  
   - **CogView3** and **CogView4** model loader optimizations  
+  - **Sana** model loader optimizations
   - add explicit offload after encode prompt  
     configure in *settings -> text encoder -> offload*  
 - **Other**
   - **HiDream** add HF gated access auth check  
+  - **HiDream** add LLM into to metadata  
   - add **UniPC FlowMatch** scheduler  
   - add **LCM FlowMatch** scheduler  
   - networks: set which networks to skip when scanning civitai  
@@ -25,10 +29,18 @@
     comma-separate list of regex patterns to skip  
   - ui display reference models with subdued color  
   - xyz grid support bool  
+  - do not force gc at end of processing  
+- **Wiki**  
+  - new Nunchaku page  
+  - updated HiDream, Quantization, NNCF pages  
 - **Fixes**
   - NNCF with TE-only quant  
+  - Quanto with TE/LLM quant  
+  - HiDream live preview  
+  - SD35 InstantX IP-adapter  
   - **HunyuanVideo-I2V** with latest transformers  
   - trace logging  
+  - xyz grid restore settings  
 
 ## Update for 2025-04-12
 
diff --git a/html/reference.json b/html/reference.json
index d7ac2cea5..2f75d032c 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -428,13 +428,32 @@
     "preview": "THUDM--CogView3-Plus-3B.jpg",
     "skip": true
   },
+
+  "ShuttleAI Shuttle 3.0 Diffusion": {
+    "path": "shuttleai/shuttle-3-diffusion",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+  "ShuttleAI Shuttle 3.1 Aesthetic": {
+    "path": "shuttleai/shuttle-3.1-aesthetic",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+  "ShuttleAI Shuttle Jaguar": {
+    "path": "shuttleai/shuttle-jaguar",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+
   "Meissonic": {
     "path": "MeissonFlow/Meissonic",
     "desc": "Meissonic is a non-autoregressive mask image modeling text-to-image synthesis model that can generate high-resolution images. It is designed to run on consumer graphics cards.",
     "preview": "MeissonFlow--Meissonic.jpg",
     "skip": true
   },
-
   "aMUSEd 256": {
     "path": "huggingface/amused/amused-256",
     "skip": true,
diff --git a/installer.py b/installer.py
index 51f4c97d5..783567c70 100644
--- a/installer.py
+++ b/installer.py
@@ -571,7 +571,7 @@ def install_cuda():
     log.info('CUDA: nVidia toolkit detected')
     ts('cuda', t_start)
     if args.use_nightly:
-        cmd = os.environ.get('TORCH_COMMAND', 'pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
+        cmd = os.environ.get('TORCH_COMMAND', '--upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
     else:
         cmd = os.environ.get('TORCH_COMMAND', 'torch==2.6.0+cu126 torchvision==0.21.0+cu126 --index-url https://download.pytorch.org/whl/cu126')
     return cmd
@@ -646,9 +646,6 @@ def install_rocm_zluda():
 
         if error is None:
             try:
-                if device is not None and zluda_installer.get_blaslt_enabled():
-                    log.debug(f'ROCm hipBLASLt: arch={device.name} available={device.blaslt_supported}')
-                    zluda_installer.set_blaslt_enabled(device.blaslt_supported)
                 zluda_installer.load()
                 torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.6.0 torchvision --index-url https://download.pytorch.org/whl/cu118')
             except Exception as e:
diff --git a/modules/ipadapter.py b/modules/ipadapter.py
index aa010c33d..a03381a1b 100644
--- a/modules/ipadapter.py
+++ b/modules/ipadapter.py
@@ -42,7 +42,7 @@ ADAPTERS_SDXL = {
 }
 ADAPTERS_SD3 = {
     'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
-    'InstantX Large': { 'name': 'none', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
+    'InstantX Large': { 'name': 'ip-adapter_diffusers.safetensors', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
 }
 ADAPTERS_F1 = {
     'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index a17914683..40982caac 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -146,7 +146,7 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
             sd_model.loaded_loras = {}
         key = f'{",".join(include)}:{",".join(exclude)}'
         loaded = sd_model.loaded_loras.get(key, [])
-        # shared.log.trace(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
+        debug_log(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
         if len(requested) != len(loaded):
             sd_model.loaded_loras[key] = requested
             return True
@@ -167,21 +167,24 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
         requested = self.signature(names, te_multipliers, unet_multipliers)
 
+        load_method = lora_overrides.get_method()
         if debug:
             import sys
             fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-            debug_log(f'Network load: type=LoRA include={include} exclude={exclude} requested={requested} fn={fn}')
+            debug_log(f'Network load: type=LoRA include={include} exclude={exclude} method={load_method} requested={requested} fn={fn}')
 
-        force_diffusers = lora_overrides.check_override()
-        if force_diffusers:
-            has_changed = False # diffusers handle their own loading
+        if load_method == 'diffusers':
+            has_changed = False # diffusers handles its own loading
             if len(exclude) == 0:
                 job = shared.state.job
                 shared.state.job = 'LoRA'
                 lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load only on first call
                 sd_models.set_diffuser_offload(shared.sd_model, op="model")
                 shared.state.job = job
-        else:
+        elif load_method == 'nunchaku':
+            from modules.lora import lora_nunchaku
+            has_changed = lora_nunchaku.load_nunchaku(names, unet_multipliers)
+        else: # native
             lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
             has_changed = self.changed(requested, include, exclude)
             if has_changed:
@@ -196,11 +199,11 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
                 shared.state.job = job
                 debug_log(f'Network load: type=LoRA previous={[n.name for n in l.previously_loaded_networks]} current={[n.name for n in l.loaded_networks]} changed')
 
-        if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or force_diffusers) and step == 0:
+        if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or load_method=='diffusers' or load_method=='nunchaku') and step == 0:
             infotext(p)
             prompt(p)
-            if (has_changed or force_diffusers) and len(include) == 0: # print only once
-                shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')
+            if has_changed and len(include) == 0: # print only once
+                shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} method={load_method} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')
 
     def deactivate(self, p):
         if shared.native and len(lora_load.diffuser_loaded) > 0:
diff --git a/modules/lora/lora_load.py b/modules/lora/lora_load.py
index f67e6b26b..1a38f8787 100644
--- a/modules/lora/lora_load.py
+++ b/modules/lora/lora_load.py
@@ -115,7 +115,7 @@ def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
         if l.debug:
             shared.log.debug(f'Network load: type=LoRA name="{name}" unmatched={keys_failed_to_match}')
     else:
-        shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} direct={shared.opts.lora_fuse_diffusers}')
+        shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} fuse={shared.opts.lora_fuse_diffusers}')
     if len(matched_networks) == 0:
         return None
     lora_cache[name] = net
@@ -205,7 +205,7 @@ def network_download(name):
     return None
 
 
-def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+def gather_networks(names):
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -213,6 +213,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
     for i in range(len(names)):
         if names[i].startswith('/'):
             networks_on_disk[i] = network_download(names[i])
+    return networks_on_disk
+
+
+def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    networks_on_disk = gather_networks(names)
     failed_to_load_networks = []
     recompile_model, skip_lora_load = maybe_recompile_model(names, te_multipliers)
 
@@ -230,8 +235,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
             try:
                 if recompile_model:
                     shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}")
-                if shared.opts.lora_force_diffusers or lora_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
+                lora_method = lora_overrides.get_method(shorthash)
+                if shared.opts.lora_force_diffusers or lora_method == 'diffusers': # OpenVINO only works with Diffusers LoRa loading
                     net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
+                elif lora_method == 'nunchaku':
+                    pass # handled directly from extra_networks_lora.load_nunchaku
                 else:
                     net = load_safetensors(name, network_on_disk)
                 if net is not None:
@@ -260,12 +268,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
     if not skip_lora_load and len(diffuser_loaded) > 0:
         shared.log.debug(f'Network load: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
         try:
-            t0 = time.time()
+            t1 = time.time()
             shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
             if shared.opts.lora_fuse_diffusers and not lora_overrides.check_fuse():
                 shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # diffusers with fuse uses fixed scale since later apply does the scaling
                 shared.sd_model.unload_lora_weights()
-            l.timer.activate += time.time() - t0
+            l.timer.activate += time.time() - t1
         except Exception as e:
             shared.log.error(f'Network load: type=LoRA {e}')
             if l.debug:
diff --git a/modules/lora/lora_nunchaku.py b/modules/lora/lora_nunchaku.py
new file mode 100644
index 000000000..63a71b46f
--- /dev/null
+++ b/modules/lora/lora_nunchaku.py
@@ -0,0 +1,33 @@
+import time
+from modules import shared, errors
+from modules.lora import lora_load, lora_common
+
+
+previously_loaded = [] # we maintain private state here
+
+
+def load_nunchaku(names, strengths):
+    global previously_loaded # pylint: disable=global-statement
+    strengths = [s[0] if isinstance(s, list) else s for s in strengths]
+    networks = lora_load.gather_networks(names)
+    networks = [(network, strength) for network, strength in zip(networks, strengths) if network is not None and strength > 0]
+    loras = [(network.filename, strength) for network, strength in networks]
+    is_changed = loras != previously_loaded
+    if not is_changed:
+        return False
+
+    previously_loaded = loras
+    try:
+        t0 = time.time()
+        from nunchaku.lora.flux.compose import compose_lora
+        composed_lora = compose_lora(loras)
+        shared.sd_model.transformer.update_lora_params(composed_lora)
+        lora_common.loaded_networks = [n[0] for n in networks] # used by infotext
+        t1 = time.time()
+        lora_common.timer.load = t1 - t0
+        shared.log.debug(f"Network load: type=LoRA method=nunchaku loras={names} strength={strengths} time={t1-t0:.3f}")
+    except Exception as e:
+        shared.log.errors(f'Network load: type=LoRA method=nunchaku {e}')
+        if lora_common.debug:
+            errors.display(e, 'LoRA')
+    return is_changed
diff --git a/modules/lora/lora_overrides.py b/modules/lora/lora_overrides.py
index 8cc9c1d17..0e16c1254 100644
--- a/modules/lora/lora_overrides.py
+++ b/modules/lora/lora_overrides.py
@@ -25,7 +25,7 @@ force_diffusers = [ # forced always
     '22c8339e7666', # spo-sdxl-10ep
 ]
 
-force_models = [ # forced always
+force_models_diffusers = [ # forced always
     # 'sd3',
     'sc',
     'h1',
@@ -41,7 +41,7 @@ force_models = [ # forced always
     'allegrovideo',
 ]
 
-force_classes = [ # forced always
+force_classes_diffusers = [ # forced always
 ]
 
 fuse_ignore = [
@@ -49,17 +49,19 @@ fuse_ignore = [
 ]
 
 
-def check_override(shorthash=''):
-    force = False
-    force = force or (shared.sd_model_type in force_models)
-    force = force or (shared.sd_model.__class__.__name__ in force_classes)
-    if len(shorthash) < 4:
-        return force
-    force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)
-    force = force or any(x.startswith(shorthash) for x in force_diffusers)
-    if force and shared.opts.lora_maybe_diffusers:
-        shared.log.debug('LoRA override: force diffusers')
-    return force
+def get_method(shorthash=''):
+    use_diffusers = (shared.sd_model_type in force_models_diffusers) or (shared.sd_model.__class__.__name__ in force_classes_diffusers)
+    if shared.opts.lora_maybe_diffusers and len(shorthash) > 4:
+        use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in maybe_diffusers)
+    if shared.opts.lora_force_diffusers and len(shorthash) > 4:
+        use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in force_diffusers)
+    use_nunchaku = hasattr(shared.sd_model, 'transformer') and 'Nunchaku' in shared.sd_model.transformer.__class__.__name__
+    if use_nunchaku:
+        return 'nunchaku'
+    elif use_diffusers:
+        return 'diffusers'
+    else:
+        return 'native'
 
 def check_fuse():
     return shared.sd_model_type in fuse_ignore
diff --git a/modules/mit_nunchaku.py b/modules/mit_nunchaku.py
index 06e084e6e..39497cf36 100644
--- a/modules/mit_nunchaku.py
+++ b/modules/mit_nunchaku.py
@@ -1,7 +1,7 @@
 # MIT-Han-Lab Nunchaku: <https://github.com/mit-han-lab/nunchaku>
 # TODO nunchaku: cache-dir for transformer and t5 loader
 # TODO nunchaku: batch support
-# TODO nunchaku: LoRA support
+
 
 from installer import log, pip
 from modules import devices
@@ -31,6 +31,7 @@ def install_nunchaku():
     if devices.backend is None:
         return False # too early
     if not check():
+        import os
         import sys
         import platform
         import importlib
@@ -51,11 +52,13 @@ def install_nunchaku():
         if torch_ver not in ['2.5', '2.6', '2.7', '2.8']:
             log.error(f'Nunchaku: torch={torch.__version__} unsupported')
         suffix = 'x86_64' if arch == 'linux' else 'win_amd64'
-        url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
-        url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
-        cmd = f'install --upgrade {url}'
+        cmd = os.environ.get('NUNCHAKU_COMMAND', None)
+        if cmd is None:
+            url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
+            url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
+            cmd = f'install --upgrade {url}'
         # pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.2.0+torch2.6-cp311-cp311-linux_x86_64.whl
-        log.debug(f'Nunchaku: url={url}')
+        log.debug(f'Nunchaku: install="{url}"')
         pip(cmd, ignore=False, uv=False)
         importlib.reload(pkg_resources)
     if not check():
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 3b123cc20..eabb248f7 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -112,11 +112,21 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant):
         if 'transformer' not in kwargs and model_quant.check_nunchaku('Transformer'):
             import nunchaku
             nunchaku_precision = nunchaku.utils.get_precision()
-            nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev" if 'dev' in repo_id else f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
-            shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
-            kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
-            if shared.opts.nunchaku_attention:
-                kwargs['transformer'].set_attention_impl("nunchaku-fp16")
+            nunchaku_repo = None
+            if 'dev' in repo_id:
+                nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev"
+            elif 'schnell' in repo_id:
+                nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
+            elif 'shuttle' in repo_id:
+                nunchaku_repo = 'mit-han-lab/svdq-fp4-shuttle-jaguar'
+            else:
+                shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported')
+            if nunchaku_repo is not None:
+                shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} offload={shared.opts.nunchaku_offload} attention={shared.opts.nunchaku_attention}')
+                kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, offload=shared.opts.nunchaku_offload, torch_dtype=devices.dtype)
+                kwargs['transformer'].quantization_method = 'SVDQuant'
+                if shared.opts.nunchaku_attention:
+                    kwargs['transformer'].set_attention_impl("nunchaku-fp16")
         elif 'transformer' not in kwargs and model_quant.check_quant('Transformer'):
             quant_args = model_quant.create_config(allow=allow_quant, module='Transformer')
             if quant_args:
diff --git a/modules/model_hidream.py b/modules/model_hidream.py
index 358ac1011..948cc6c85 100644
--- a/modules/model_hidream.py
+++ b/modules/model_hidream.py
@@ -54,7 +54,8 @@ def load_text_encoders(repo_id, diffusers_load_config={}):
         sd_models.move_model(text_encoder_3, devices.cpu)
 
     load_args, quant_args = model_quant.get_dit_args(diffusers_load_config, module='LLM', device_map=True)
-    shared.log.debug(f'Load model: type=HiDream te4="{shared.opts.model_h1_llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
+    llama_repo = shared.opts.model_h1_llama_repo if shared.opts.model_h1_llama_repo != 'Default' else 'meta-llama/Meta-Llama-3.1-8B-Instruct'
+    shared.log.debug(f'Load model: type=HiDream te4="{llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
 
     text_encoder_4 = transformers.LlamaForCausalLM.from_pretrained(
         shared.opts.model_h1_llama_repo,
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 99d5d6c9b..8488401a8 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -88,9 +88,13 @@ def create_quanto_config(kwargs = None, allow_quanto: bool = True, module: str =
             load_quanto(silent=True)
             if optimum_quanto is None:
                 return kwargs
-            quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
-            quanto_config.activations = None # patch so it works with transformers
-            quanto_config.weights = quanto_config.weights_dtype
+            if module in {'TE', 'LLM'}:
+                quanto_config = transformers.QuantoConfig(weights=shared.opts.quanto_quantization_type)
+                quanto_config.weights_dtype = quanto_config.weights
+            else:
+                quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
+                quanto_config.activations = None # patch so it works with transformers
+                quanto_config.weights = quanto_config.weights_dtype
             log.debug(f'Quantization: module="{module}" type=quanto dtype={shared.opts.quanto_quantization_type}')
             if kwargs is None:
                 return quanto_config
@@ -490,8 +494,8 @@ def get_dit_args(load_config:dict={}, module:str=None, device_map:bool=False, al
         del config['safety_checker']
     if 'requires_safety_checker' in config:
         del config['requires_safety_checker']
-    if 'variant' in config:
-        del config['variant']
+    # if 'variant' in config:
+    #     del config['variant']
     if device_map:
         if shared.opts.device_map == 'cpu':
             config['device_map'] = 'cpu'
diff --git a/modules/model_sana.py b/modules/model_sana.py
index c2bc39119..d211321fd 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -6,15 +6,21 @@ from modules import shared, sd_models, devices, modelloader, model_quant
 
 
 def load_quants(kwargs, repo_id, cache_dir):
-    quant_args = {}
-    quant_args = model_quant.create_config()
-    if not quant_args:
-        return kwargs
-    load_args = kwargs.copy()
-    if 'transformer' not in kwargs and (('Model' in shared.opts.bnb_quantization or 'Model' in shared.opts.torchao_quantization or 'Model' in shared.opts.quanto_quantization) or ('Transformer' in shared.opts.bnb_quantization or 'Transformer' in shared.opts.torchao_quantization or 'Transformer' in shared.opts.quanto_quantization)):
-        kwargs['transformer'] = diffusers.models.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
-    if 'text_encoder' not in kwargs and ('TE' in shared.opts.bnb_quantization or 'TE' in shared.opts.torchao_quantization or 'TE' in shared.opts.quanto_quantization):
-        kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
+    kwargs_copy = kwargs.copy()
+    if model_quant.check_nunchaku('Transformer') and 'Sana_1600M' in repo_id: # only sana-1600m
+        import nunchaku
+        nunchaku_precision = nunchaku.utils.get_precision()
+        nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-sana-1600m"
+        shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
+        kwargs['transformer'] = nunchaku.NunchakuSanaTransformer2DModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
+    elif model_quant.check_quant('Transformer'):
+        load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='Transformer')
+        if quant_args:
+            kwargs['transformer'] = diffusers.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
+    if model_quant.check_quant('TE'):
+        load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='TE')
+        if quant_args:
+            kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
     return kwargs
 
 
@@ -28,9 +34,9 @@ def load_sana(checkpoint_info, kwargs={}):
     kwargs.pop('requires_safety_checker', None)
     kwargs.pop('torch_dtype', None)
 
+    # set variant since hf repos are a mess
     if not repo_id.endswith('_diffusers'):
         repo_id = f'{repo_id}_diffusers'
-
     if 'Sana_1600M' in repo_id:
         if devices.dtype == torch.bfloat16 or 'BF16' in repo_id:
             if 'BF16' not in repo_id:
@@ -45,6 +51,7 @@ def load_sana(checkpoint_info, kwargs={}):
     kwargs = load_quants(kwargs, repo_id, cache_dir=shared.opts.diffusers_dir)
     shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={list(kwargs)}')
     t0 = time.time()
+
     if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
         kwargs['torch_dtype'] = devices.dtype
     if 'Sprint' in repo_id:
@@ -56,21 +63,31 @@ def load_sana(checkpoint_info, kwargs={}):
         cache_dir=shared.opts.diffusers_dir,
         **kwargs,
     )
-    if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
-        if 'transformer' not in kwargs:
-            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        if 'text_encoder' not in kwargs:
-            pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
-        pipe.vae = pipe.vae.to(dtype=devices.dtype)
-    if devices.dtype == torch.float16:
-        if 'transformer' not in kwargs:
-            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        if 'text_encoder' not in kwargs:
-            pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
-        pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
-    if shared.opts.diffusers_eval:
-        pipe.text_encoder.eval()
-        pipe.transformer.eval()
+
+    # only cast if not quant-loaded
+    try:
+        if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
+            if 'transformer' not in kwargs:
+                pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+            if 'text_encoder' not in kwargs:
+                pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
+            pipe.vae = pipe.vae.to(dtype=devices.dtype)
+        if devices.dtype == torch.float16:
+            if 'transformer' not in kwargs:
+                pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+            if 'text_encoder' not in kwargs:
+                pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
+            pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
+    except Exception as e:
+        shared.log.error(f'Load model: type=Sana {e}')
+
+    try:
+        if shared.opts.diffusers_eval:
+            pipe.text_encoder.eval()
+            pipe.transformer.eval()
+    except Exception:
+        pass
+
     t1 = time.time()
     shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
     devices.torch_gc(force=True)
diff --git a/modules/processing.py b/modules/processing.py
index 7342ad373..0472a058c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -502,5 +502,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     if not p.disable_extra_networks:
         shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct()} memory={memstats.memory_stats()}')
 
-    devices.torch_gc(force=True, reason='final')
+    devices.torch_gc(force=False, reason='final')
     return processed
diff --git a/modules/processing_args.py b/modules/processing_args.py
index d2465bc6c..85be8e4a1 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -153,7 +153,7 @@ def set_pipeline_args(p, model, prompts:list, negative_prompts:list, prompts_2:t
             shared.log.error(f'Prompt parser encode: {e}')
             if os.environ.get('SD_PROMPT_DEBUG', None) is not None:
                 errors.display(e, 'Prompt parser encode')
-        timer.process.record('encode', reset=False)
+        timer.process.record('prompt', reset=False)
     else:
         prompt_parser_diffusers.embedder = None
 
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 644e0b276..942ef75ba 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -147,9 +147,6 @@ def process_base(p: processing.StableDiffusionProcessing):
         hidiffusion.unapply()
         sd_models_compile.check_deepcache(enable=False)
 
-    if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
-        p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
-
     shared.state.nextjob()
     return output
 
diff --git a/modules/processing_info.py b/modules/processing_info.py
index 4b57d859d..5a2535e7a 100644
--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@@ -7,10 +7,6 @@ from modules.processing_class import StableDiffusionProcessing
 args = {} # maintain history
 infotext = '' # maintain history
 debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
-if not shared.native:
-    from modules import sd_hijack
-else:
-    sd_hijack = None
 
 
 def get_last_args():
@@ -62,11 +58,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         "Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
         "Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
         "Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
-        # sdnext
         "App": 'SD.Next',
         "Version": git_commit,
         "Backend": 'Legacy' if not shared.native else None,
-        "Pipeline": 'LDM' if not shared.native else None,
         "Parser": shared.opts.prompt_attention if shared.opts.prompt_attention != 'native' else None,
         "Comment": comment,
         "Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
@@ -77,9 +71,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         args["VAE"] = 'TAESD'
     elif p.vae_type == 'Remote':
         args["VAE"] = 'Remote'
-    if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
+    if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
         args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
-    if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:
+    if getattr(shared.sd_model, 'sd_model_hash', None) is not None:
         args["Model hash"] = shared.sd_model.sd_model_hash
     # native
     if grid is None and (p.n_iter > 1 or p.batch_size > 1) and index >= 0:
@@ -88,8 +82,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         args['Grid'] = grid
     if shared.native:
         args['Pipeline'] = shared.sd_model.__class__.__name__
-        args['TE'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
-        args['UNet'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
+        args['TE'] = None if (shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
+        args['UNet'] = None if (shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
+    else:
+        args['Pipeline'] = 'LDM'
     if 'txt2img' in p.ops:
         args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None
         args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None
@@ -155,11 +151,14 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         args["Detailer negative"] = p.detailer_negative if len(p.detailer_negative) > 0 else None
     if 'color' in p.ops:
         args["Color correction"] = True
-    # embeddings
-    if sd_hijack is not None and hasattr(sd_hijack.model_hijack, 'embedding_db') and len(sd_hijack.model_hijack.embedding_db.embeddings_used) > 0: # this is for original hijaacked models only, diffusers are handled separately
-        args["Embeddings"] = ', '.join(sd_hijack.model_hijack.embedding_db.embeddings_used)
-    # samplers
+    if shared.opts.token_merging_method == 'ToMe': # tome/todo
+        args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
+    else:
+        args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
+    if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
+        args['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
 
+    # samplers
     if getattr(p, 'sampler_name', None) is not None and p.sampler_name.lower() != 'default':
         args["Sampler eta delta"] = shared.opts.eta_noise_seed_delta if shared.opts.eta_noise_seed_delta != 0 and sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) else None
         args["Sampler eta multiplier"] = p.initial_noise_multiplier if getattr(p, 'initial_noise_multiplier', 1.0) != 1.0 else None
@@ -177,11 +176,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         args['Sampler range'] = shared.opts.schedulers_timesteps_range if shared.opts.schedulers_timesteps_range != shared.opts.data_labels.get('schedulers_timesteps_range').default else None
         args['Sampler shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift != shared.opts.data_labels.get('schedulers_shift').default else None
         args['Sampler dynamic shift'] = shared.opts.schedulers_dynamic_shift if shared.opts.schedulers_dynamic_shift != shared.opts.data_labels.get('schedulers_dynamic_shift').default else None
-    # tome/todo
-    if shared.opts.token_merging_method == 'ToMe':
-        args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
-    else:
-        args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
+
+    # model specific
+    if shared.sd_model_type == 'h1':
+        args['LLM'] =  None if shared.opts.model_h1_llama_repo == 'Default' else shared.opts.model_h1_llama_repo
 
     args.update(p.extra_generation_params)
     for k, v in args.copy().items():
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 00eecc092..fd84c9c09 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -2,7 +2,6 @@ import os
 import time
 import numpy as np
 import torch
-import torchvision.transforms.functional as TF
 from modules import shared, devices, sd_models, sd_vae, sd_vae_taesd, errors
 
 
@@ -316,6 +315,7 @@ def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, he
 
 
 def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
+    import torchvision.transforms.functional as f
     if shared.state.interrupted or shared.state.skipped:
         return []
     if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@@ -323,7 +323,7 @@ def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
     if not hasattr(model, 'vae'):
         shared.log.error('VAE not found in model')
         return []
-    tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
+    tensor = f.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
     if vae_type == 'Full':
         tensor = tensor * 2 - 1
         latents = full_vae_encode(image=tensor, model=shared.sd_model)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index bed80bdb5..35507dcec 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -947,8 +947,12 @@ def add_noise_pred_to_diffusers_callback(pipe):
         pipe.prior_pipe._callback_tensor_inputs.append("predicted_image_embedding") # pylint: disable=protected-access
     elif hasattr(pipe, "scheduler") and "flow" in pipe.scheduler.__class__.__name__.lower():
         pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
+    elif hasattr(pipe, "scheduler") and hasattr(pipe.scheduler, "config") and getattr(pipe.scheduler.config, "prediction_type", "none") == "flow_prediction":
+        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
     elif hasattr(pipe, "default_scheduler") and "flow" in pipe.default_scheduler.__class__.__name__.lower():
         pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
+    elif hasattr(pipe, "default_scheduler") and hasattr(pipe.default_scheduler, "config") and getattr(pipe.default_scheduler.config, "prediction_type", "none") == "flow_prediction":
+        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
     return pipe
 
 
diff --git a/modules/sd_offload.py b/modules/sd_offload.py
index 9352d4f38..c57165fb5 100644
--- a/modules/sd_offload.py
+++ b/modules/sd_offload.py
@@ -299,7 +299,7 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
             if device_map and max_memory:
                 module.balanced_offload_device_map = device_map
                 module.balanced_offload_max_memory = max_memory
-            module.offload_post = shared.sd_model_type in [offload_post] and shared.opts.te_hijack and module_name.startswith("text_encoder")
+            module.offload_post = shared.sd_model_type in offload_post and shared.opts.te_hijack and module_name.startswith("text_encoder")
         devices.torch_gc(fast=True, force=True, reason='offload')
 
     apply_balanced_offload_to_module(sd_model)
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index f5bde64bd..3d83cb29d 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -268,7 +268,7 @@ class DiffusionSampler:
         if 'shift' in self.config:
             self.config['shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift > 0 else 3
         if 'use_dynamic_shifting' in self.config:
-            self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift <= 0 else shared.opts.schedulers_dynamic_shift
+            self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift == 0 else shared.opts.schedulers_dynamic_shift
         if 'use_beta_sigmas' in self.config and 'sigma_schedule' in self.config:
             self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
         if 'rescale_betas_zero_snr' in self.config:
diff --git a/modules/shared.py b/modules/shared.py
index 601e5ebfa..58e114d1a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -416,7 +416,7 @@ options_templates.update(options_section(('sd', "Models & Loading"), {
 
 options_templates.update(options_section(('model_options', "Models Options"), {
     "model_sd3_disable_te5": OptionInfo(False, "StableDiffusion3: T5 disable encoder"),
-    "model_h1_llama_repo": OptionInfo("meta-llama/Meta-Llama-3.1-8B-Instruct", "HiDream: LLama repo", gr.Textbox),
+    "model_h1_llama_repo": OptionInfo("Default", "HiDream: LLama repo", gr.Textbox),
 }))
 
 options_templates.update(options_section(('vae_encoder', "Variable Auto Encoder"), {
@@ -552,6 +552,7 @@ options_templates.update(options_section(('quantization', "Quantization Settings
     "nunchaku_sep": OptionInfo("<h2>Nunchaku Engine</h2>", "", gr.HTML),
     "nunchaku_quantization": OptionInfo([], "SVDQuant enabled", gr.CheckboxGroup, {"choices": ["Model", "Transformer", "VAE", "TE", "Video", "LLM", "ControlNet"], "visible": native}),
     "nunchaku_attention": OptionInfo(False, "Nunchaku attention", gr.Checkbox, {"visible": native}),
+    "nunchaku_offload": OptionInfo(False, "Nunchaku offloading", gr.Checkbox, {"visible": native}),
 }))
 
 options_templates.update(options_section(('advanced', "Pipeline Modifiers"), {
diff --git a/modules/video_models/models_def.py b/modules/video_models/models_def.py
index 31c6354f3..0d471cd60 100644
--- a/modules/video_models/models_def.py
+++ b/modules/video_models/models_def.py
@@ -71,6 +71,30 @@ models = {
     ],
     'LTX Video': [
         Model(name='None'),
+        Model(name='LTXVideo 0.9.6 2B T2V',
+              url='https://huggingface.co/Lightricks/LTX-Video',
+              repo='Lightricks/LTX-Video',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B I2V',
+              url='https://huggingface.co/Lightricks/LTX-Video',
+              repo='Lightricks/LTX-Video',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B T2V Distilled',
+              url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B I2V Distilled',
+              url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
         Model(name='LTXVideo 0.9.5 T2V', # https://github.com/huggingface/diffusers/pull/10968
               url='https://huggingface.co/Lightricks/LTX-Video-0.9.5',
               repo='Lightricks/LTX-Video-0.9.5',
diff --git a/modules/zluda_installer.py b/modules/zluda_installer.py
index 2f097f707..70a6476a0 100644
--- a/modules/zluda_installer.py
+++ b/modules/zluda_installer.py
@@ -123,9 +123,14 @@ def load():
     core = Core(ctypes.windll.LoadLibrary(os.path.join(path, 'nvcuda.dll')))
     ml = ZLUDALibrary(ctypes.windll.LoadLibrary(os.path.join(path, 'nvml.dll')))
     is_nightly = core.get_nightly_flag() == 1
-    hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll"))
+    hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll")) and default_agent is not None
     MIOpen_enabled = is_nightly and os.path.exists(os.path.join(rocm.path, "bin", "MIOpen.dll"))
 
+    if hipBLASLt_enabled:
+        if not default_agent.blaslt_supported:
+            hipBLASLt_enabled = False
+        log.debug(f'ROCm hipBLASLt: arch={default_agent.name} available={hipBLASLt_enabled}')
+
     for k, v in DLL_MAPPING.items():
         if not os.path.exists(os.path.join(path, v)):
             link_or_copy(os.path.join(path, k), os.path.join(path, v))
diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index 683672319..1e9e9972e 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -36,6 +36,7 @@ class SharedSettingsStackHelper(object):
     freeu_b2 = None
     freeu_s1 = None
     freeu_s2 = None
+    cfgzero_enabled = None
     schedulers_sigma_adjust = None
     schedulers_beta_schedule = None
     schedulers_beta_start = None
@@ -53,6 +54,7 @@ class SharedSettingsStackHelper(object):
     eta_noise_seed_delta = None
     tome_ratio = None
     todo_ratio = None
+    teacache_thresh = None
     extra_networks_default_multiplier = None
     disable_weights_auto_swap = None
 
@@ -75,6 +77,7 @@ class SharedSettingsStackHelper(object):
         self.freeu_b2 = shared.opts.freeu_b2
         self.freeu_s1 = shared.opts.freeu_s1
         self.freeu_s2 = shared.opts.freeu_s2
+        self.cfgzero_enabled = shared.opts.cfgzero_enabled
         self.sd_model_checkpoint = shared.opts.sd_model_checkpoint
         self.sd_model_refiner = shared.opts.sd_model_refiner
         self.sd_model_dict = shared.opts.sd_model_dict
@@ -83,6 +86,7 @@ class SharedSettingsStackHelper(object):
         self.sd_text_encoder = shared.opts.sd_text_encoder
         self.extra_networks_default_multiplier = shared.opts.extra_networks_default_multiplier
         self.disable_weights_auto_swap = shared.opts.disable_weights_auto_swap
+        self.teacache_thresh = shared.opts.teacache_thresh
         shared.opts.data["disable_weights_auto_swap"] = False
 
     def __exit__(self, exc_type, exc_value, tb):
@@ -100,12 +104,14 @@ class SharedSettingsStackHelper(object):
         shared.opts.data["schedulers_shift"] = self.schedulers_shift
         shared.opts.data["scheduler_eta"] = self.scheduler_eta
         shared.opts.data["eta_noise_seed_delta"] = self.eta_noise_seed_delta
+        shared.opts.data["cfgzero_enabled"] = self.cfgzero_enabled
         shared.opts.data["freeu_b1"] = self.freeu_b1
         shared.opts.data["freeu_b2"] = self.freeu_b2
         shared.opts.data["freeu_s1"] = self.freeu_s1
         shared.opts.data["freeu_s2"] = self.freeu_s2
         shared.opts.data["tome_ratio"] = self.tome_ratio
         shared.opts.data["todo_ratio"] = self.todo_ratio
+        shared.opts.data["teacache_thresh"] = self.teacache_thresh
 
         if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint:
             shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint
diff --git a/wiki b/wiki
index a985acf8c..7c7a9ffdc 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit a985acf8ca4f8e20c7438f749b4074d37c9df949
+Subproject commit 7c7a9ffdc9cfffa2e4febc05e44dcdfa9c533e56