Merge branch 'dev' into patch-1

2025-04-18 13:45:58 -04:00 · 2025-04-18 13:45:58 -04:00 · 29b01278a5
parent 9e89e29b00 80a6b3aafc
commit 29b01278a5
26 changed files with 252 additions and 108 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,11 +1,13 @@
 # Change Log for SD.Next

-## Update for 2025-04-16
+## Update for 2025-04-18

 - **Features**
  - [Nunchaku](https://github.com/mit-han-lab/nunchaku) inference engine with custom **SVDQuant** 4-bit execution  
-    highly experimental and with limited support, but when it works, its magic: **Flux.1 at 5.90 it/s** *(not sec/it)*!  
-    see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for details  
+    highly experimental and with limited support, but when it works, its magic: **Flux.1 at 6.0 it/s** *(not sec/it)*!  
+    see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for installation guide and list of supported models & features  
+  - [LTXVideo 0.9.6](https://github.com/Lightricks/LTX-Video?tab=readme-ov-file) T2V and I2V  
+    in both standard and distilled variants  
  - [CFG-Zero](https://github.com/WeichenFan/CFG-Zero-star) new guidance method optimized for flow-matching models  
    implemented for **FLUX.1, HiDream-I1, SD3.x, CogView4, HunyuanVideo, WanAI**  
    enable and configure in *settings -> pipeline modifiers -> cfg zero*  
@ -14,10 +16,12 @@
  - **HiDream** optimized offloading and prompt-encode caching  
    it now works in 12GB VRAM / 26GB RAM!  
  - **CogView3** and **CogView4** model loader optimizations  
+  - **Sana** model loader optimizations
  - add explicit offload after encode prompt  
    configure in *settings -> text encoder -> offload*  
 - **Other**
  - **HiDream** add HF gated access auth check  
+  - **HiDream** add LLM into to metadata  
  - add **UniPC FlowMatch** scheduler  
  - add **LCM FlowMatch** scheduler  
  - networks: set which networks to skip when scanning civitai  
@ -25,10 +29,18 @@
    comma-separate list of regex patterns to skip  
  - ui display reference models with subdued color  
  - xyz grid support bool  
+  - do not force gc at end of processing  
+- **Wiki**  
+  - new Nunchaku page  
+  - updated HiDream, Quantization, NNCF pages  
 - **Fixes**
  - NNCF with TE-only quant  
+  - Quanto with TE/LLM quant  
+  - HiDream live preview  
+  - SD35 InstantX IP-adapter  
  - **HunyuanVideo-I2V** with latest transformers  
  - trace logging  
+  - xyz grid restore settings  

 ## Update for 2025-04-12

--- a/html/reference.json
+++ b/html/reference.json
@ -428,13 +428,32 @@
    "preview": "THUDM--CogView3-Plus-3B.jpg",
    "skip": true
  },
+
+  "ShuttleAI Shuttle 3.0 Diffusion": {
+    "path": "shuttleai/shuttle-3-diffusion",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+  "ShuttleAI Shuttle 3.1 Aesthetic": {
+    "path": "shuttleai/shuttle-3.1-aesthetic",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+  "ShuttleAI Shuttle Jaguar": {
+    "path": "shuttleai/shuttle-jaguar",
+    "desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
+    "preview": "shuttleai--shuttle-3-diffusion.jpg",
+    "skip": true
+  },
+
  "Meissonic": {
    "path": "MeissonFlow/Meissonic",
    "desc": "Meissonic is a non-autoregressive mask image modeling text-to-image synthesis model that can generate high-resolution images. It is designed to run on consumer graphics cards.",
    "preview": "MeissonFlow--Meissonic.jpg",
    "skip": true
  },
-
  "aMUSEd 256": {
    "path": "huggingface/amused/amused-256",
    "skip": true,
--- a/installer.py
+++ b/installer.py
@ -571,7 +571,7 @@ def install_cuda():
    log.info('CUDA: nVidia toolkit detected')
    ts('cuda', t_start)
    if args.use_nightly:
-        cmd = os.environ.get('TORCH_COMMAND', 'pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
+        cmd = os.environ.get('TORCH_COMMAND', '--upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
    else:
        cmd = os.environ.get('TORCH_COMMAND', 'torch==2.6.0+cu126 torchvision==0.21.0+cu126 --index-url https://download.pytorch.org/whl/cu126')
    return cmd
@ -646,9 +646,6 @@ def install_rocm_zluda():

        if error is None:
            try:
-                if device is not None and zluda_installer.get_blaslt_enabled():
-                    log.debug(f'ROCm hipBLASLt: arch={device.name} available={device.blaslt_supported}')
-                    zluda_installer.set_blaslt_enabled(device.blaslt_supported)
                zluda_installer.load()
                torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.6.0 torchvision --index-url https://download.pytorch.org/whl/cu118')
            except Exception as e:
--- a/modules/ipadapter.py
+++ b/modules/ipadapter.py
@ -42,7 +42,7 @@ ADAPTERS_SDXL = {
 }
 ADAPTERS_SD3 = {
    'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
-    'InstantX Large': { 'name': 'none', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
+    'InstantX Large': { 'name': 'ip-adapter_diffusers.safetensors', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
 }
 ADAPTERS_F1 = {
    'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@ -146,7 +146,7 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
            sd_model.loaded_loras = {}
        key = f'{",".join(include)}:{",".join(exclude)}'
        loaded = sd_model.loaded_loras.get(key, [])
-        # shared.log.trace(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
+        debug_log(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
        if len(requested) != len(loaded):
            sd_model.loaded_loras[key] = requested
            return True
@ -167,21 +167,24 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
        names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
        requested = self.signature(names, te_multipliers, unet_multipliers)

+        load_method = lora_overrides.get_method()
        if debug:
            import sys
            fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-            debug_log(f'Network load: type=LoRA include={include} exclude={exclude} requested={requested} fn={fn}')
+            debug_log(f'Network load: type=LoRA include={include} exclude={exclude} method={load_method} requested={requested} fn={fn}')

-        force_diffusers = lora_overrides.check_override()
-        if force_diffusers:
-            has_changed = False # diffusers handle their own loading
+        if load_method == 'diffusers':
+            has_changed = False # diffusers handles its own loading
            if len(exclude) == 0:
                job = shared.state.job
                shared.state.job = 'LoRA'
                lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load only on first call
                sd_models.set_diffuser_offload(shared.sd_model, op="model")
                shared.state.job = job
-        else:
+        elif load_method == 'nunchaku':
+            from modules.lora import lora_nunchaku
+            has_changed = lora_nunchaku.load_nunchaku(names, unet_multipliers)
+        else: # native
            lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
            has_changed = self.changed(requested, include, exclude)
            if has_changed:
@ -196,11 +199,11 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
                shared.state.job = job
                debug_log(f'Network load: type=LoRA previous={[n.name for n in l.previously_loaded_networks]} current={[n.name for n in l.loaded_networks]} changed')

-        if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or force_diffusers) and step == 0:
+        if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or load_method=='diffusers' or load_method=='nunchaku') and step == 0:
            infotext(p)
            prompt(p)
-            if (has_changed or force_diffusers) and len(include) == 0: # print only once
-                shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')
+            if has_changed and len(include) == 0: # print only once
+                shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} method={load_method} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')

    def deactivate(self, p):
        if shared.native and len(lora_load.diffuser_loaded) > 0:
--- a/modules/lora/lora_load.py
+++ b/modules/lora/lora_load.py
@ -115,7 +115,7 @@ def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
        if l.debug:
            shared.log.debug(f'Network load: type=LoRA name="{name}" unmatched={keys_failed_to_match}')
    else:
-        shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} direct={shared.opts.lora_fuse_diffusers}')
+        shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} fuse={shared.opts.lora_fuse_diffusers}')
    if len(matched_networks) == 0:
        return None
    lora_cache[name] = net
@ -205,7 +205,7 @@ def network_download(name):
    return None


-def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+def gather_networks(names):
    networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
    if any(x is None for x in networks_on_disk):
        list_available_networks()
@ -213,6 +213,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
    for i in range(len(names)):
        if names[i].startswith('/'):
            networks_on_disk[i] = network_download(names[i])
+    return networks_on_disk
+
+
+def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    networks_on_disk = gather_networks(names)
    failed_to_load_networks = []
    recompile_model, skip_lora_load = maybe_recompile_model(names, te_multipliers)

@ -230,8 +235,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
            try:
                if recompile_model:
                    shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}")
-                if shared.opts.lora_force_diffusers or lora_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
+                lora_method = lora_overrides.get_method(shorthash)
+                if shared.opts.lora_force_diffusers or lora_method == 'diffusers': # OpenVINO only works with Diffusers LoRa loading
                    net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
+                elif lora_method == 'nunchaku':
+                    pass # handled directly from extra_networks_lora.load_nunchaku
                else:
                    net = load_safetensors(name, network_on_disk)
                if net is not None:
@ -260,12 +268,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
    if not skip_lora_load and len(diffuser_loaded) > 0:
        shared.log.debug(f'Network load: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
        try:
-            t0 = time.time()
+            t1 = time.time()
            shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
            if shared.opts.lora_fuse_diffusers and not lora_overrides.check_fuse():
                shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # diffusers with fuse uses fixed scale since later apply does the scaling
                shared.sd_model.unload_lora_weights()
-            l.timer.activate += time.time() - t0
+            l.timer.activate += time.time() - t1
        except Exception as e:
            shared.log.error(f'Network load: type=LoRA {e}')
            if l.debug:
--- a/modules/lora/lora_nunchaku.py
+++ b/modules/lora/lora_nunchaku.py
@ -0,0 +1,33 @@
+import time
+from modules import shared, errors
+from modules.lora import lora_load, lora_common
+
+
+previously_loaded = [] # we maintain private state here
+
+
+def load_nunchaku(names, strengths):
+    global previously_loaded # pylint: disable=global-statement
+    strengths = [s[0] if isinstance(s, list) else s for s in strengths]
+    networks = lora_load.gather_networks(names)
+    networks = [(network, strength) for network, strength in zip(networks, strengths) if network is not None and strength > 0]
+    loras = [(network.filename, strength) for network, strength in networks]
+    is_changed = loras != previously_loaded
+    if not is_changed:
+        return False
+
+    previously_loaded = loras
+    try:
+        t0 = time.time()
+        from nunchaku.lora.flux.compose import compose_lora
+        composed_lora = compose_lora(loras)
+        shared.sd_model.transformer.update_lora_params(composed_lora)
+        lora_common.loaded_networks = [n[0] for n in networks] # used by infotext
+        t1 = time.time()
+        lora_common.timer.load = t1 - t0
+        shared.log.debug(f"Network load: type=LoRA method=nunchaku loras={names} strength={strengths} time={t1-t0:.3f}")
+    except Exception as e:
+        shared.log.errors(f'Network load: type=LoRA method=nunchaku {e}')
+        if lora_common.debug:
+            errors.display(e, 'LoRA')
+    return is_changed
--- a/modules/lora/lora_overrides.py
+++ b/modules/lora/lora_overrides.py
@ -25,7 +25,7 @@ force_diffusers = [ # forced always
    '22c8339e7666', # spo-sdxl-10ep
 ]

-force_models = [ # forced always
+force_models_diffusers = [ # forced always
    # 'sd3',
    'sc',
    'h1',
@ -41,7 +41,7 @@ force_models = [ # forced always
    'allegrovideo',
 ]

-force_classes = [ # forced always
+force_classes_diffusers = [ # forced always
 ]

 fuse_ignore = [
@ -49,17 +49,19 @@ fuse_ignore = [
 ]


-def check_override(shorthash=''):
-    force = False
-    force = force or (shared.sd_model_type in force_models)
-    force = force or (shared.sd_model.__class__.__name__ in force_classes)
-    if len(shorthash) < 4:
-        return force
-    force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)
-    force = force or any(x.startswith(shorthash) for x in force_diffusers)
-    if force and shared.opts.lora_maybe_diffusers:
-        shared.log.debug('LoRA override: force diffusers')
-    return force
+def get_method(shorthash=''):
+    use_diffusers = (shared.sd_model_type in force_models_diffusers) or (shared.sd_model.__class__.__name__ in force_classes_diffusers)
+    if shared.opts.lora_maybe_diffusers and len(shorthash) > 4:
+        use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in maybe_diffusers)
+    if shared.opts.lora_force_diffusers and len(shorthash) > 4:
+        use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in force_diffusers)
+    use_nunchaku = hasattr(shared.sd_model, 'transformer') and 'Nunchaku' in shared.sd_model.transformer.__class__.__name__
+    if use_nunchaku:
+        return 'nunchaku'
+    elif use_diffusers:
+        return 'diffusers'
+    else:
+        return 'native'

 def check_fuse():
    return shared.sd_model_type in fuse_ignore
--- a/modules/mit_nunchaku.py
+++ b/modules/mit_nunchaku.py
@ -1,7 +1,7 @@
 # MIT-Han-Lab Nunchaku: <https://github.com/mit-han-lab/nunchaku>
 # TODO nunchaku: cache-dir for transformer and t5 loader
 # TODO nunchaku: batch support
-# TODO nunchaku: LoRA support
+

 from installer import log, pip
 from modules import devices
@ -31,6 +31,7 @@ def install_nunchaku():
    if devices.backend is None:
        return False # too early
    if not check():
+        import os
        import sys
        import platform
        import importlib
@ -51,11 +52,13 @@ def install_nunchaku():
        if torch_ver not in ['2.5', '2.6', '2.7', '2.8']:
            log.error(f'Nunchaku: torch={torch.__version__} unsupported')
        suffix = 'x86_64' if arch == 'linux' else 'win_amd64'
-        url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
-        url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
-        cmd = f'install --upgrade {url}'
+        cmd = os.environ.get('NUNCHAKU_COMMAND', None)
+        if cmd is None:
+            url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
+            url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
+            cmd = f'install --upgrade {url}'
        # pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.2.0+torch2.6-cp311-cp311-linux_x86_64.whl
-        log.debug(f'Nunchaku: url={url}')
+        log.debug(f'Nunchaku: install="{url}"')
        pip(cmd, ignore=False, uv=False)
        importlib.reload(pkg_resources)
    if not check():
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@ -112,11 +112,21 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant):
        if 'transformer' not in kwargs and model_quant.check_nunchaku('Transformer'):
            import nunchaku
            nunchaku_precision = nunchaku.utils.get_precision()
-            nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev" if 'dev' in repo_id else f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
-            shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
-            kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
-            if shared.opts.nunchaku_attention:
-                kwargs['transformer'].set_attention_impl("nunchaku-fp16")
+            nunchaku_repo = None
+            if 'dev' in repo_id:
+                nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev"
+            elif 'schnell' in repo_id:
+                nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
+            elif 'shuttle' in repo_id:
+                nunchaku_repo = 'mit-han-lab/svdq-fp4-shuttle-jaguar'
+            else:
+                shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported')
+            if nunchaku_repo is not None:
+                shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} offload={shared.opts.nunchaku_offload} attention={shared.opts.nunchaku_attention}')
+                kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, offload=shared.opts.nunchaku_offload, torch_dtype=devices.dtype)
+                kwargs['transformer'].quantization_method = 'SVDQuant'
+                if shared.opts.nunchaku_attention:
+                    kwargs['transformer'].set_attention_impl("nunchaku-fp16")
        elif 'transformer' not in kwargs and model_quant.check_quant('Transformer'):
            quant_args = model_quant.create_config(allow=allow_quant, module='Transformer')
            if quant_args:
--- a/modules/model_hidream.py
+++ b/modules/model_hidream.py
@ -54,7 +54,8 @@ def load_text_encoders(repo_id, diffusers_load_config={}):
        sd_models.move_model(text_encoder_3, devices.cpu)

    load_args, quant_args = model_quant.get_dit_args(diffusers_load_config, module='LLM', device_map=True)
-    shared.log.debug(f'Load model: type=HiDream te4="{shared.opts.model_h1_llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
+    llama_repo = shared.opts.model_h1_llama_repo if shared.opts.model_h1_llama_repo != 'Default' else 'meta-llama/Meta-Llama-3.1-8B-Instruct'
+    shared.log.debug(f'Load model: type=HiDream te4="{llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')

    text_encoder_4 = transformers.LlamaForCausalLM.from_pretrained(
        shared.opts.model_h1_llama_repo,
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@ -88,9 +88,13 @@ def create_quanto_config(kwargs = None, allow_quanto: bool = True, module: str =
            load_quanto(silent=True)
            if optimum_quanto is None:
                return kwargs
-            quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
-            quanto_config.activations = None # patch so it works with transformers
-            quanto_config.weights = quanto_config.weights_dtype
+            if module in {'TE', 'LLM'}:
+                quanto_config = transformers.QuantoConfig(weights=shared.opts.quanto_quantization_type)
+                quanto_config.weights_dtype = quanto_config.weights
+            else:
+                quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
+                quanto_config.activations = None # patch so it works with transformers
+                quanto_config.weights = quanto_config.weights_dtype
            log.debug(f'Quantization: module="{module}" type=quanto dtype={shared.opts.quanto_quantization_type}')
            if kwargs is None:
                return quanto_config
@ -490,8 +494,8 @@ def get_dit_args(load_config:dict={}, module:str=None, device_map:bool=False, al
        del config['safety_checker']
    if 'requires_safety_checker' in config:
        del config['requires_safety_checker']
-    if 'variant' in config:
-        del config['variant']
+    # if 'variant' in config:
+    #     del config['variant']
    if device_map:
        if shared.opts.device_map == 'cpu':
            config['device_map'] = 'cpu'
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@ -6,15 +6,21 @@ from modules import shared, sd_models, devices, modelloader, model_quant


 def load_quants(kwargs, repo_id, cache_dir):
-    quant_args = {}
-    quant_args = model_quant.create_config()
-    if not quant_args:
-        return kwargs
-    load_args = kwargs.copy()
-    if 'transformer' not in kwargs and (('Model' in shared.opts.bnb_quantization or 'Model' in shared.opts.torchao_quantization or 'Model' in shared.opts.quanto_quantization) or ('Transformer' in shared.opts.bnb_quantization or 'Transformer' in shared.opts.torchao_quantization or 'Transformer' in shared.opts.quanto_quantization)):
-        kwargs['transformer'] = diffusers.models.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
-    if 'text_encoder' not in kwargs and ('TE' in shared.opts.bnb_quantization or 'TE' in shared.opts.torchao_quantization or 'TE' in shared.opts.quanto_quantization):
-        kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
+    kwargs_copy = kwargs.copy()
+    if model_quant.check_nunchaku('Transformer') and 'Sana_1600M' in repo_id: # only sana-1600m
+        import nunchaku
+        nunchaku_precision = nunchaku.utils.get_precision()
+        nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-sana-1600m"
+        shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
+        kwargs['transformer'] = nunchaku.NunchakuSanaTransformer2DModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
+    elif model_quant.check_quant('Transformer'):
+        load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='Transformer')
+        if quant_args:
+            kwargs['transformer'] = diffusers.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
+    if model_quant.check_quant('TE'):
+        load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='TE')
+        if quant_args:
+            kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
    return kwargs


@ -28,9 +34,9 @@ def load_sana(checkpoint_info, kwargs={}):
    kwargs.pop('requires_safety_checker', None)
    kwargs.pop('torch_dtype', None)

+    # set variant since hf repos are a mess
    if not repo_id.endswith('_diffusers'):
        repo_id = f'{repo_id}_diffusers'
-
    if 'Sana_1600M' in repo_id:
        if devices.dtype == torch.bfloat16 or 'BF16' in repo_id:
            if 'BF16' not in repo_id:
@ -45,6 +51,7 @@ def load_sana(checkpoint_info, kwargs={}):
    kwargs = load_quants(kwargs, repo_id, cache_dir=shared.opts.diffusers_dir)
    shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={list(kwargs)}')
    t0 = time.time()
+
    if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
        kwargs['torch_dtype'] = devices.dtype
    if 'Sprint' in repo_id:
@ -56,21 +63,31 @@ def load_sana(checkpoint_info, kwargs={}):
        cache_dir=shared.opts.diffusers_dir,
        **kwargs,
    )
-    if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
-        if 'transformer' not in kwargs:
-            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        if 'text_encoder' not in kwargs:
-            pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
-        pipe.vae = pipe.vae.to(dtype=devices.dtype)
-    if devices.dtype == torch.float16:
-        if 'transformer' not in kwargs:
-            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        if 'text_encoder' not in kwargs:
-            pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
-        pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
-    if shared.opts.diffusers_eval:
-        pipe.text_encoder.eval()
-        pipe.transformer.eval()
+
+    # only cast if not quant-loaded
+    try:
+        if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
+            if 'transformer' not in kwargs:
+                pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+            if 'text_encoder' not in kwargs:
+                pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
+            pipe.vae = pipe.vae.to(dtype=devices.dtype)
+        if devices.dtype == torch.float16:
+            if 'transformer' not in kwargs:
+                pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+            if 'text_encoder' not in kwargs:
+                pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
+            pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
+    except Exception as e:
+        shared.log.error(f'Load model: type=Sana {e}')
+
+    try:
+        if shared.opts.diffusers_eval:
+            pipe.text_encoder.eval()
+            pipe.transformer.eval()
+    except Exception:
+        pass
+
    t1 = time.time()
    shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
    devices.torch_gc(force=True)
--- a/modules/processing.py
+++ b/modules/processing.py
@ -502,5 +502,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
    if not p.disable_extra_networks:
        shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct()} memory={memstats.memory_stats()}')

-    devices.torch_gc(force=True, reason='final')
+    devices.torch_gc(force=False, reason='final')
    return processed
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@ -153,7 +153,7 @@ def set_pipeline_args(p, model, prompts:list, negative_prompts:list, prompts_2:t
            shared.log.error(f'Prompt parser encode: {e}')
            if os.environ.get('SD_PROMPT_DEBUG', None) is not None:
                errors.display(e, 'Prompt parser encode')
-        timer.process.record('encode', reset=False)
+        timer.process.record('prompt', reset=False)
    else:
        prompt_parser_diffusers.embedder = None

--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@ -147,9 +147,6 @@ def process_base(p: processing.StableDiffusionProcessing):
        hidiffusion.unapply()
        sd_models_compile.check_deepcache(enable=False)

-    if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
-        p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
-
    shared.state.nextjob()
    return output

--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@ -7,10 +7,6 @@ from modules.processing_class import StableDiffusionProcessing
 args = {} # maintain history
 infotext = '' # maintain history
 debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
-if not shared.native:
-    from modules import sd_hijack
-else:
-    sd_hijack = None


 def get_last_args():
@ -62,11 +58,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        "Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
        "Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
        "Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
-        # sdnext
        "App": 'SD.Next',
        "Version": git_commit,
        "Backend": 'Legacy' if not shared.native else None,
-        "Pipeline": 'LDM' if not shared.native else None,
        "Parser": shared.opts.prompt_attention if shared.opts.prompt_attention != 'native' else None,
        "Comment": comment,
        "Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
@ -77,9 +71,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        args["VAE"] = 'TAESD'
    elif p.vae_type == 'Remote':
        args["VAE"] = 'Remote'
-    if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
+    if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
        args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
-    if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:
+    if getattr(shared.sd_model, 'sd_model_hash', None) is not None:
        args["Model hash"] = shared.sd_model.sd_model_hash
    # native
    if grid is None and (p.n_iter > 1 or p.batch_size > 1) and index >= 0:
@ -88,8 +82,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        args['Grid'] = grid
    if shared.native:
        args['Pipeline'] = shared.sd_model.__class__.__name__
-        args['TE'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
-        args['UNet'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
+        args['TE'] = None if (shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
+        args['UNet'] = None if (shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
+    else:
+        args['Pipeline'] = 'LDM'
    if 'txt2img' in p.ops:
        args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None
        args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None
@ -155,11 +151,14 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        args["Detailer negative"] = p.detailer_negative if len(p.detailer_negative) > 0 else None
    if 'color' in p.ops:
        args["Color correction"] = True
-    # embeddings
-    if sd_hijack is not None and hasattr(sd_hijack.model_hijack, 'embedding_db') and len(sd_hijack.model_hijack.embedding_db.embeddings_used) > 0: # this is for original hijaacked models only, diffusers are handled separately
-        args["Embeddings"] = ', '.join(sd_hijack.model_hijack.embedding_db.embeddings_used)
-    # samplers
+    if shared.opts.token_merging_method == 'ToMe': # tome/todo
+        args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
+    else:
+        args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
+    if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
+        args['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)

+    # samplers
    if getattr(p, 'sampler_name', None) is not None and p.sampler_name.lower() != 'default':
        args["Sampler eta delta"] = shared.opts.eta_noise_seed_delta if shared.opts.eta_noise_seed_delta != 0 and sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) else None
        args["Sampler eta multiplier"] = p.initial_noise_multiplier if getattr(p, 'initial_noise_multiplier', 1.0) != 1.0 else None
@ -177,11 +176,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        args['Sampler range'] = shared.opts.schedulers_timesteps_range if shared.opts.schedulers_timesteps_range != shared.opts.data_labels.get('schedulers_timesteps_range').default else None
        args['Sampler shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift != shared.opts.data_labels.get('schedulers_shift').default else None
        args['Sampler dynamic shift'] = shared.opts.schedulers_dynamic_shift if shared.opts.schedulers_dynamic_shift != shared.opts.data_labels.get('schedulers_dynamic_shift').default else None
-    # tome/todo
-    if shared.opts.token_merging_method == 'ToMe':
-        args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
-    else:
-        args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
+
+    # model specific
+    if shared.sd_model_type == 'h1':
+        args['LLM'] =  None if shared.opts.model_h1_llama_repo == 'Default' else shared.opts.model_h1_llama_repo

    args.update(p.extra_generation_params)
    for k, v in args.copy().items():
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@ -2,7 +2,6 @@ import os
 import time
 import numpy as np
 import torch
-import torchvision.transforms.functional as TF
 from modules import shared, devices, sd_models, sd_vae, sd_vae_taesd, errors


@ -316,6 +315,7 @@ def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, he


 def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
+    import torchvision.transforms.functional as f
    if shared.state.interrupted or shared.state.skipped:
        return []
    if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -323,7 +323,7 @@ def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
    if not hasattr(model, 'vae'):
        shared.log.error('VAE not found in model')
        return []
-    tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
+    tensor = f.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
    if vae_type == 'Full':
        tensor = tensor * 2 - 1
        latents = full_vae_encode(image=tensor, model=shared.sd_model)
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -947,8 +947,12 @@ def add_noise_pred_to_diffusers_callback(pipe):
        pipe.prior_pipe._callback_tensor_inputs.append("predicted_image_embedding") # pylint: disable=protected-access
    elif hasattr(pipe, "scheduler") and "flow" in pipe.scheduler.__class__.__name__.lower():
        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
+    elif hasattr(pipe, "scheduler") and hasattr(pipe.scheduler, "config") and getattr(pipe.scheduler.config, "prediction_type", "none") == "flow_prediction":
+        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
    elif hasattr(pipe, "default_scheduler") and "flow" in pipe.default_scheduler.__class__.__name__.lower():
        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
+    elif hasattr(pipe, "default_scheduler") and hasattr(pipe.default_scheduler, "config") and getattr(pipe.default_scheduler.config, "prediction_type", "none") == "flow_prediction":
+        pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
    return pipe


--- a/modules/sd_offload.py
+++ b/modules/sd_offload.py
@ -299,7 +299,7 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
            if device_map and max_memory:
                module.balanced_offload_device_map = device_map
                module.balanced_offload_max_memory = max_memory
-            module.offload_post = shared.sd_model_type in [offload_post] and shared.opts.te_hijack and module_name.startswith("text_encoder")
+            module.offload_post = shared.sd_model_type in offload_post and shared.opts.te_hijack and module_name.startswith("text_encoder")
        devices.torch_gc(fast=True, force=True, reason='offload')

    apply_balanced_offload_to_module(sd_model)
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@ -268,7 +268,7 @@ class DiffusionSampler:
        if 'shift' in self.config:
            self.config['shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift > 0 else 3
        if 'use_dynamic_shifting' in self.config:
-            self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift <= 0 else shared.opts.schedulers_dynamic_shift
+            self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift == 0 else shared.opts.schedulers_dynamic_shift
        if 'use_beta_sigmas' in self.config and 'sigma_schedule' in self.config:
            self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
        if 'rescale_betas_zero_snr' in self.config:
--- a/modules/shared.py
+++ b/modules/shared.py
@ -416,7 +416,7 @@ options_templates.update(options_section(('sd', "Models & Loading"), {

 options_templates.update(options_section(('model_options', "Models Options"), {
    "model_sd3_disable_te5": OptionInfo(False, "StableDiffusion3: T5 disable encoder"),
-    "model_h1_llama_repo": OptionInfo("meta-llama/Meta-Llama-3.1-8B-Instruct", "HiDream: LLama repo", gr.Textbox),
+    "model_h1_llama_repo": OptionInfo("Default", "HiDream: LLama repo", gr.Textbox),
 }))

 options_templates.update(options_section(('vae_encoder', "Variable Auto Encoder"), {
@ -552,6 +552,7 @@ options_templates.update(options_section(('quantization', "Quantization Settings
    "nunchaku_sep": OptionInfo("<h2>Nunchaku Engine</h2>", "", gr.HTML),
    "nunchaku_quantization": OptionInfo([], "SVDQuant enabled", gr.CheckboxGroup, {"choices": ["Model", "Transformer", "VAE", "TE", "Video", "LLM", "ControlNet"], "visible": native}),
    "nunchaku_attention": OptionInfo(False, "Nunchaku attention", gr.Checkbox, {"visible": native}),
+    "nunchaku_offload": OptionInfo(False, "Nunchaku offloading", gr.Checkbox, {"visible": native}),
 }))

 options_templates.update(options_section(('advanced', "Pipeline Modifiers"), {
--- a/modules/video_models/models_def.py
+++ b/modules/video_models/models_def.py
@ -71,6 +71,30 @@ models = {
    ],
    'LTX Video': [
        Model(name='None'),
+        Model(name='LTXVideo 0.9.6 2B T2V',
+              url='https://huggingface.co/Lightricks/LTX-Video',
+              repo='Lightricks/LTX-Video',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B I2V',
+              url='https://huggingface.co/Lightricks/LTX-Video',
+              repo='Lightricks/LTX-Video',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B T2V Distilled',
+              url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
+        Model(name='LTXVideo 0.9.6 2B I2V Distilled',
+              url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
+              repo_cls=diffusers.LTXConditionPipeline,
+              te_cls=transformers.T5EncoderModel,
+              dit_cls=diffusers.LTXVideoTransformer3DModel),
        Model(name='LTXVideo 0.9.5 T2V', # https://github.com/huggingface/diffusers/pull/10968
              url='https://huggingface.co/Lightricks/LTX-Video-0.9.5',
              repo='Lightricks/LTX-Video-0.9.5',
--- a/modules/zluda_installer.py
+++ b/modules/zluda_installer.py
@ -123,9 +123,14 @@ def load():
    core = Core(ctypes.windll.LoadLibrary(os.path.join(path, 'nvcuda.dll')))
    ml = ZLUDALibrary(ctypes.windll.LoadLibrary(os.path.join(path, 'nvml.dll')))
    is_nightly = core.get_nightly_flag() == 1
-    hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll"))
+    hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll")) and default_agent is not None
    MIOpen_enabled = is_nightly and os.path.exists(os.path.join(rocm.path, "bin", "MIOpen.dll"))

+    if hipBLASLt_enabled:
+        if not default_agent.blaslt_supported:
+            hipBLASLt_enabled = False
+        log.debug(f'ROCm hipBLASLt: arch={default_agent.name} available={hipBLASLt_enabled}')
+
    for k, v in DLL_MAPPING.items():
        if not os.path.exists(os.path.join(path, v)):
            link_or_copy(os.path.join(path, k), os.path.join(path, v))
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@ -36,6 +36,7 @@ class SharedSettingsStackHelper(object):
    freeu_b2 = None
    freeu_s1 = None
    freeu_s2 = None
+    cfgzero_enabled = None
    schedulers_sigma_adjust = None
    schedulers_beta_schedule = None
    schedulers_beta_start = None
@ -53,6 +54,7 @@ class SharedSettingsStackHelper(object):
    eta_noise_seed_delta = None
    tome_ratio = None
    todo_ratio = None
+    teacache_thresh = None
    extra_networks_default_multiplier = None
    disable_weights_auto_swap = None

@ -75,6 +77,7 @@ class SharedSettingsStackHelper(object):
        self.freeu_b2 = shared.opts.freeu_b2
        self.freeu_s1 = shared.opts.freeu_s1
        self.freeu_s2 = shared.opts.freeu_s2
+        self.cfgzero_enabled = shared.opts.cfgzero_enabled
        self.sd_model_checkpoint = shared.opts.sd_model_checkpoint
        self.sd_model_refiner = shared.opts.sd_model_refiner
        self.sd_model_dict = shared.opts.sd_model_dict
@ -83,6 +86,7 @@ class SharedSettingsStackHelper(object):
        self.sd_text_encoder = shared.opts.sd_text_encoder
        self.extra_networks_default_multiplier = shared.opts.extra_networks_default_multiplier
        self.disable_weights_auto_swap = shared.opts.disable_weights_auto_swap
+        self.teacache_thresh = shared.opts.teacache_thresh
        shared.opts.data["disable_weights_auto_swap"] = False

    def __exit__(self, exc_type, exc_value, tb):
@ -100,12 +104,14 @@ class SharedSettingsStackHelper(object):
        shared.opts.data["schedulers_shift"] = self.schedulers_shift
        shared.opts.data["scheduler_eta"] = self.scheduler_eta
        shared.opts.data["eta_noise_seed_delta"] = self.eta_noise_seed_delta
+        shared.opts.data["cfgzero_enabled"] = self.cfgzero_enabled
        shared.opts.data["freeu_b1"] = self.freeu_b1
        shared.opts.data["freeu_b2"] = self.freeu_b2
        shared.opts.data["freeu_s1"] = self.freeu_s1
        shared.opts.data["freeu_s2"] = self.freeu_s2
        shared.opts.data["tome_ratio"] = self.tome_ratio
        shared.opts.data["todo_ratio"] = self.todo_ratio
+        shared.opts.data["teacache_thresh"] = self.teacache_thresh

        if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint:
            shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit a985acf8ca4f8e20c7438f749b4074d37c9df949
+Subproject commit 7c7a9ffdc9cfffa2e4febc05e44dcdfa9c533e56