Merge branch 'dev' into patch-1

pull/3878/head
Vladimir Mandic 2025-04-18 13:45:58 -04:00 committed by GitHub
commit 29b01278a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 252 additions and 108 deletions

View File

@ -1,11 +1,13 @@
# Change Log for SD.Next
## Update for 2025-04-16
## Update for 2025-04-18
- **Features**
- [Nunchaku](https://github.com/mit-han-lab/nunchaku) inference engine with custom **SVDQuant** 4-bit execution
highly experimental and with limited support, but when it works, its magic: **Flux.1 at 5.90 it/s** *(not sec/it)*!
see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for details
highly experimental and with limited support, but when it works, its magic: **Flux.1 at 6.0 it/s** *(not sec/it)*!
see [Nunchaku Wiki](https://github.com/vladmandic/sdnext/wiki/Nunchaku) for installation guide and list of supported models & features
- [LTXVideo 0.9.6](https://github.com/Lightricks/LTX-Video?tab=readme-ov-file) T2V and I2V
in both standard and distilled variants
- [CFG-Zero](https://github.com/WeichenFan/CFG-Zero-star) new guidance method optimized for flow-matching models
implemented for **FLUX.1, HiDream-I1, SD3.x, CogView4, HunyuanVideo, WanAI**
enable and configure in *settings -> pipeline modifiers -> cfg zero*
@ -14,10 +16,12 @@
- **HiDream** optimized offloading and prompt-encode caching
it now works in 12GB VRAM / 26GB RAM!
- **CogView3** and **CogView4** model loader optimizations
- **Sana** model loader optimizations
- add explicit offload after encode prompt
configure in *settings -> text encoder -> offload*
- **Other**
- **HiDream** add HF gated access auth check
- **HiDream** add LLM into to metadata
- add **UniPC FlowMatch** scheduler
- add **LCM FlowMatch** scheduler
- networks: set which networks to skip when scanning civitai
@ -25,10 +29,18 @@
comma-separate list of regex patterns to skip
- ui display reference models with subdued color
- xyz grid support bool
- do not force gc at end of processing
- **Wiki**
- new Nunchaku page
- updated HiDream, Quantization, NNCF pages
- **Fixes**
- NNCF with TE-only quant
- Quanto with TE/LLM quant
- HiDream live preview
- SD35 InstantX IP-adapter
- **HunyuanVideo-I2V** with latest transformers
- trace logging
- xyz grid restore settings
## Update for 2025-04-12

View File

@ -428,13 +428,32 @@
"preview": "THUDM--CogView3-Plus-3B.jpg",
"skip": true
},
"ShuttleAI Shuttle 3.0 Diffusion": {
"path": "shuttleai/shuttle-3-diffusion",
"desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
"preview": "shuttleai--shuttle-3-diffusion.jpg",
"skip": true
},
"ShuttleAI Shuttle 3.1 Aesthetic": {
"path": "shuttleai/shuttle-3.1-aesthetic",
"desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
"preview": "shuttleai--shuttle-3-diffusion.jpg",
"skip": true
},
"ShuttleAI Shuttle Jaguar": {
"path": "shuttleai/shuttle-jaguar",
"desc": "Shuttle uses Flux.1 Schnell as its base. It can produce images similar to Flux Dev or Pro in just 4 steps, and it is licensed under Apache 2. The model was partially de-distilled during training. When used beyond 10 steps, it enters refiner mode enhancing image details without altering the composition",
"preview": "shuttleai--shuttle-3-diffusion.jpg",
"skip": true
},
"Meissonic": {
"path": "MeissonFlow/Meissonic",
"desc": "Meissonic is a non-autoregressive mask image modeling text-to-image synthesis model that can generate high-resolution images. It is designed to run on consumer graphics cards.",
"preview": "MeissonFlow--Meissonic.jpg",
"skip": true
},
"aMUSEd 256": {
"path": "huggingface/amused/amused-256",
"skip": true,

View File

@ -571,7 +571,7 @@ def install_cuda():
log.info('CUDA: nVidia toolkit detected')
ts('cuda', t_start)
if args.use_nightly:
cmd = os.environ.get('TORCH_COMMAND', 'pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
cmd = os.environ.get('TORCH_COMMAND', '--upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 --extra-index-url https://download.pytorch.org/whl/nightly/cu126')
else:
cmd = os.environ.get('TORCH_COMMAND', 'torch==2.6.0+cu126 torchvision==0.21.0+cu126 --index-url https://download.pytorch.org/whl/cu126')
return cmd
@ -646,9 +646,6 @@ def install_rocm_zluda():
if error is None:
try:
if device is not None and zluda_installer.get_blaslt_enabled():
log.debug(f'ROCm hipBLASLt: arch={device.name} available={device.blaslt_supported}')
zluda_installer.set_blaslt_enabled(device.blaslt_supported)
zluda_installer.load()
torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.6.0 torchvision --index-url https://download.pytorch.org/whl/cu118')
except Exception as e:

View File

@ -42,7 +42,7 @@ ADAPTERS_SDXL = {
}
ADAPTERS_SD3 = {
'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
'InstantX Large': { 'name': 'none', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
'InstantX Large': { 'name': 'ip-adapter_diffusers.safetensors', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
}
ADAPTERS_F1 = {
'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },

View File

@ -146,7 +146,7 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
sd_model.loaded_loras = {}
key = f'{",".join(include)}:{",".join(exclude)}'
loaded = sd_model.loaded_loras.get(key, [])
# shared.log.trace(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
debug_log(f'Network load: type=LoRA key="{key}" requested={requested} loaded={loaded}')
if len(requested) != len(loaded):
sd_model.loaded_loras[key] = requested
return True
@ -167,21 +167,24 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
requested = self.signature(names, te_multipliers, unet_multipliers)
load_method = lora_overrides.get_method()
if debug:
import sys
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
debug_log(f'Network load: type=LoRA include={include} exclude={exclude} requested={requested} fn={fn}')
debug_log(f'Network load: type=LoRA include={include} exclude={exclude} method={load_method} requested={requested} fn={fn}')
force_diffusers = lora_overrides.check_override()
if force_diffusers:
has_changed = False # diffusers handle their own loading
if load_method == 'diffusers':
has_changed = False # diffusers handles its own loading
if len(exclude) == 0:
job = shared.state.job
shared.state.job = 'LoRA'
lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load only on first call
sd_models.set_diffuser_offload(shared.sd_model, op="model")
shared.state.job = job
else:
elif load_method == 'nunchaku':
from modules.lora import lora_nunchaku
has_changed = lora_nunchaku.load_nunchaku(names, unet_multipliers)
else: # native
lora_load.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
has_changed = self.changed(requested, include, exclude)
if has_changed:
@ -196,11 +199,11 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
shared.state.job = job
debug_log(f'Network load: type=LoRA previous={[n.name for n in l.previously_loaded_networks]} current={[n.name for n in l.loaded_networks]} changed')
if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or force_diffusers) and step == 0:
if len(l.loaded_networks) > 0 and (len(networks.applied_layers) > 0 or load_method=='diffusers' or load_method=='nunchaku') and step == 0:
infotext(p)
prompt(p)
if (has_changed or force_diffusers) and len(include) == 0: # print only once
shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')
if has_changed and len(include) == 0: # print only once
shared.log.info(f'Network load: type=LoRA apply={[n.name for n in l.loaded_networks]} method={load_method} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={l.timer.summary}')
def deactivate(self, p):
if shared.native and len(lora_load.diffuser_loaded) > 0:

View File

@ -115,7 +115,7 @@ def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
if l.debug:
shared.log.debug(f'Network load: type=LoRA name="{name}" unmatched={keys_failed_to_match}')
else:
shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} direct={shared.opts.lora_fuse_diffusers}')
shared.log.debug(f'Network load: type=LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} dtypes={dtypes} fuse={shared.opts.lora_fuse_diffusers}')
if len(matched_networks) == 0:
return None
lora_cache[name] = net
@ -205,7 +205,7 @@ def network_download(name):
return None
def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
def gather_networks(names):
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
list_available_networks()
@ -213,6 +213,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
for i in range(len(names)):
if names[i].startswith('/'):
networks_on_disk[i] = network_download(names[i])
return networks_on_disk
def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
networks_on_disk = gather_networks(names)
failed_to_load_networks = []
recompile_model, skip_lora_load = maybe_recompile_model(names, te_multipliers)
@ -230,8 +235,11 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
try:
if recompile_model:
shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}")
if shared.opts.lora_force_diffusers or lora_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
lora_method = lora_overrides.get_method(shorthash)
if shared.opts.lora_force_diffusers or lora_method == 'diffusers': # OpenVINO only works with Diffusers LoRa loading
net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
elif lora_method == 'nunchaku':
pass # handled directly from extra_networks_lora.load_nunchaku
else:
net = load_safetensors(name, network_on_disk)
if net is not None:
@ -260,12 +268,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
if not skip_lora_load and len(diffuser_loaded) > 0:
shared.log.debug(f'Network load: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
try:
t0 = time.time()
t1 = time.time()
shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
if shared.opts.lora_fuse_diffusers and not lora_overrides.check_fuse():
shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # diffusers with fuse uses fixed scale since later apply does the scaling
shared.sd_model.unload_lora_weights()
l.timer.activate += time.time() - t0
l.timer.activate += time.time() - t1
except Exception as e:
shared.log.error(f'Network load: type=LoRA {e}')
if l.debug:

View File

@ -0,0 +1,33 @@
import time
from modules import shared, errors
from modules.lora import lora_load, lora_common
previously_loaded = [] # we maintain private state here
def load_nunchaku(names, strengths):
global previously_loaded # pylint: disable=global-statement
strengths = [s[0] if isinstance(s, list) else s for s in strengths]
networks = lora_load.gather_networks(names)
networks = [(network, strength) for network, strength in zip(networks, strengths) if network is not None and strength > 0]
loras = [(network.filename, strength) for network, strength in networks]
is_changed = loras != previously_loaded
if not is_changed:
return False
previously_loaded = loras
try:
t0 = time.time()
from nunchaku.lora.flux.compose import compose_lora
composed_lora = compose_lora(loras)
shared.sd_model.transformer.update_lora_params(composed_lora)
lora_common.loaded_networks = [n[0] for n in networks] # used by infotext
t1 = time.time()
lora_common.timer.load = t1 - t0
shared.log.debug(f"Network load: type=LoRA method=nunchaku loras={names} strength={strengths} time={t1-t0:.3f}")
except Exception as e:
shared.log.errors(f'Network load: type=LoRA method=nunchaku {e}')
if lora_common.debug:
errors.display(e, 'LoRA')
return is_changed

View File

@ -25,7 +25,7 @@ force_diffusers = [ # forced always
'22c8339e7666', # spo-sdxl-10ep
]
force_models = [ # forced always
force_models_diffusers = [ # forced always
# 'sd3',
'sc',
'h1',
@ -41,7 +41,7 @@ force_models = [ # forced always
'allegrovideo',
]
force_classes = [ # forced always
force_classes_diffusers = [ # forced always
]
fuse_ignore = [
@ -49,17 +49,19 @@ fuse_ignore = [
]
def check_override(shorthash=''):
force = False
force = force or (shared.sd_model_type in force_models)
force = force or (shared.sd_model.__class__.__name__ in force_classes)
if len(shorthash) < 4:
return force
force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)
force = force or any(x.startswith(shorthash) for x in force_diffusers)
if force and shared.opts.lora_maybe_diffusers:
shared.log.debug('LoRA override: force diffusers')
return force
def get_method(shorthash=''):
use_diffusers = (shared.sd_model_type in force_models_diffusers) or (shared.sd_model.__class__.__name__ in force_classes_diffusers)
if shared.opts.lora_maybe_diffusers and len(shorthash) > 4:
use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in maybe_diffusers)
if shared.opts.lora_force_diffusers and len(shorthash) > 4:
use_diffusers = use_diffusers or any(x.startswith(shorthash) for x in force_diffusers)
use_nunchaku = hasattr(shared.sd_model, 'transformer') and 'Nunchaku' in shared.sd_model.transformer.__class__.__name__
if use_nunchaku:
return 'nunchaku'
elif use_diffusers:
return 'diffusers'
else:
return 'native'
def check_fuse():
return shared.sd_model_type in fuse_ignore

View File

@ -1,7 +1,7 @@
# MIT-Han-Lab Nunchaku: <https://github.com/mit-han-lab/nunchaku>
# TODO nunchaku: cache-dir for transformer and t5 loader
# TODO nunchaku: batch support
# TODO nunchaku: LoRA support
from installer import log, pip
from modules import devices
@ -31,6 +31,7 @@ def install_nunchaku():
if devices.backend is None:
return False # too early
if not check():
import os
import sys
import platform
import importlib
@ -51,11 +52,13 @@ def install_nunchaku():
if torch_ver not in ['2.5', '2.6', '2.7', '2.8']:
log.error(f'Nunchaku: torch={torch.__version__} unsupported')
suffix = 'x86_64' if arch == 'linux' else 'win_amd64'
url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
cmd = f'install --upgrade {url}'
cmd = os.environ.get('NUNCHAKU_COMMAND', None)
if cmd is None:
url = f'https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-{ver}'
url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}_{suffix}.whl'
cmd = f'install --upgrade {url}'
# pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.2.0+torch2.6-cp311-cp311-linux_x86_64.whl
log.debug(f'Nunchaku: url={url}')
log.debug(f'Nunchaku: install="{url}"')
pip(cmd, ignore=False, uv=False)
importlib.reload(pkg_resources)
if not check():

View File

@ -112,11 +112,21 @@ def load_quants(kwargs, repo_id, cache_dir, allow_quant):
if 'transformer' not in kwargs and model_quant.check_nunchaku('Transformer'):
import nunchaku
nunchaku_precision = nunchaku.utils.get_precision()
nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev" if 'dev' in repo_id else f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
if shared.opts.nunchaku_attention:
kwargs['transformer'].set_attention_impl("nunchaku-fp16")
nunchaku_repo = None
if 'dev' in repo_id:
nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-dev"
elif 'schnell' in repo_id:
nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-flux.1-schnell"
elif 'shuttle' in repo_id:
nunchaku_repo = 'mit-han-lab/svdq-fp4-shuttle-jaguar'
else:
shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported')
if nunchaku_repo is not None:
shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} offload={shared.opts.nunchaku_offload} attention={shared.opts.nunchaku_attention}')
kwargs['transformer'] = nunchaku.NunchakuFluxTransformer2dModel.from_pretrained(nunchaku_repo, offload=shared.opts.nunchaku_offload, torch_dtype=devices.dtype)
kwargs['transformer'].quantization_method = 'SVDQuant'
if shared.opts.nunchaku_attention:
kwargs['transformer'].set_attention_impl("nunchaku-fp16")
elif 'transformer' not in kwargs and model_quant.check_quant('Transformer'):
quant_args = model_quant.create_config(allow=allow_quant, module='Transformer')
if quant_args:

View File

@ -54,7 +54,8 @@ def load_text_encoders(repo_id, diffusers_load_config={}):
sd_models.move_model(text_encoder_3, devices.cpu)
load_args, quant_args = model_quant.get_dit_args(diffusers_load_config, module='LLM', device_map=True)
shared.log.debug(f'Load model: type=HiDream te4="{shared.opts.model_h1_llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
llama_repo = shared.opts.model_h1_llama_repo if shared.opts.model_h1_llama_repo != 'Default' else 'meta-llama/Meta-Llama-3.1-8B-Instruct'
shared.log.debug(f'Load model: type=HiDream te4="{llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
text_encoder_4 = transformers.LlamaForCausalLM.from_pretrained(
shared.opts.model_h1_llama_repo,

View File

@ -88,9 +88,13 @@ def create_quanto_config(kwargs = None, allow_quanto: bool = True, module: str =
load_quanto(silent=True)
if optimum_quanto is None:
return kwargs
quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
quanto_config.activations = None # patch so it works with transformers
quanto_config.weights = quanto_config.weights_dtype
if module in {'TE', 'LLM'}:
quanto_config = transformers.QuantoConfig(weights=shared.opts.quanto_quantization_type)
quanto_config.weights_dtype = quanto_config.weights
else:
quanto_config = diffusers.QuantoConfig(weights_dtype=shared.opts.quanto_quantization_type)
quanto_config.activations = None # patch so it works with transformers
quanto_config.weights = quanto_config.weights_dtype
log.debug(f'Quantization: module="{module}" type=quanto dtype={shared.opts.quanto_quantization_type}')
if kwargs is None:
return quanto_config
@ -490,8 +494,8 @@ def get_dit_args(load_config:dict={}, module:str=None, device_map:bool=False, al
del config['safety_checker']
if 'requires_safety_checker' in config:
del config['requires_safety_checker']
if 'variant' in config:
del config['variant']
# if 'variant' in config:
# del config['variant']
if device_map:
if shared.opts.device_map == 'cpu':
config['device_map'] = 'cpu'

View File

@ -6,15 +6,21 @@ from modules import shared, sd_models, devices, modelloader, model_quant
def load_quants(kwargs, repo_id, cache_dir):
quant_args = {}
quant_args = model_quant.create_config()
if not quant_args:
return kwargs
load_args = kwargs.copy()
if 'transformer' not in kwargs and (('Model' in shared.opts.bnb_quantization or 'Model' in shared.opts.torchao_quantization or 'Model' in shared.opts.quanto_quantization) or ('Transformer' in shared.opts.bnb_quantization or 'Transformer' in shared.opts.torchao_quantization or 'Transformer' in shared.opts.quanto_quantization)):
kwargs['transformer'] = diffusers.models.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
if 'text_encoder' not in kwargs and ('TE' in shared.opts.bnb_quantization or 'TE' in shared.opts.torchao_quantization or 'TE' in shared.opts.quanto_quantization):
kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
kwargs_copy = kwargs.copy()
if model_quant.check_nunchaku('Transformer') and 'Sana_1600M' in repo_id: # only sana-1600m
import nunchaku
nunchaku_precision = nunchaku.utils.get_precision()
nunchaku_repo = f"mit-han-lab/svdq-{nunchaku_precision}-sana-1600m"
shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}')
kwargs['transformer'] = nunchaku.NunchakuSanaTransformer2DModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype)
elif model_quant.check_quant('Transformer'):
load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='Transformer')
if quant_args:
kwargs['transformer'] = diffusers.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
if model_quant.check_quant('TE'):
load_args, quant_args = model_quant.get_dit_args(kwargs_copy, module='TE')
if quant_args:
kwargs['text_encoder'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
return kwargs
@ -28,9 +34,9 @@ def load_sana(checkpoint_info, kwargs={}):
kwargs.pop('requires_safety_checker', None)
kwargs.pop('torch_dtype', None)
# set variant since hf repos are a mess
if not repo_id.endswith('_diffusers'):
repo_id = f'{repo_id}_diffusers'
if 'Sana_1600M' in repo_id:
if devices.dtype == torch.bfloat16 or 'BF16' in repo_id:
if 'BF16' not in repo_id:
@ -45,6 +51,7 @@ def load_sana(checkpoint_info, kwargs={}):
kwargs = load_quants(kwargs, repo_id, cache_dir=shared.opts.diffusers_dir)
shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={list(kwargs)}')
t0 = time.time()
if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
kwargs['torch_dtype'] = devices.dtype
if 'Sprint' in repo_id:
@ -56,21 +63,31 @@ def load_sana(checkpoint_info, kwargs={}):
cache_dir=shared.opts.diffusers_dir,
**kwargs,
)
if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
if 'transformer' not in kwargs:
pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
if 'text_encoder' not in kwargs:
pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
pipe.vae = pipe.vae.to(dtype=devices.dtype)
if devices.dtype == torch.float16:
if 'transformer' not in kwargs:
pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
if 'text_encoder' not in kwargs:
pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
if shared.opts.diffusers_eval:
pipe.text_encoder.eval()
pipe.transformer.eval()
# only cast if not quant-loaded
try:
if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
if 'transformer' not in kwargs:
pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
if 'text_encoder' not in kwargs:
pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
pipe.vae = pipe.vae.to(dtype=devices.dtype)
if devices.dtype == torch.float16:
if 'transformer' not in kwargs:
pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
if 'text_encoder' not in kwargs:
pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
except Exception as e:
shared.log.error(f'Load model: type=Sana {e}')
try:
if shared.opts.diffusers_eval:
pipe.text_encoder.eval()
pipe.transformer.eval()
except Exception:
pass
t1 = time.time()
shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
devices.torch_gc(force=True)

View File

@ -502,5 +502,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if not p.disable_extra_networks:
shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct()} memory={memstats.memory_stats()}')
devices.torch_gc(force=True, reason='final')
devices.torch_gc(force=False, reason='final')
return processed

View File

@ -153,7 +153,7 @@ def set_pipeline_args(p, model, prompts:list, negative_prompts:list, prompts_2:t
shared.log.error(f'Prompt parser encode: {e}')
if os.environ.get('SD_PROMPT_DEBUG', None) is not None:
errors.display(e, 'Prompt parser encode')
timer.process.record('encode', reset=False)
timer.process.record('prompt', reset=False)
else:
prompt_parser_diffusers.embedder = None

View File

@ -147,9 +147,6 @@ def process_base(p: processing.StableDiffusionProcessing):
hidiffusion.unapply()
sd_models_compile.check_deepcache(enable=False)
if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
shared.state.nextjob()
return output

View File

@ -7,10 +7,6 @@ from modules.processing_class import StableDiffusionProcessing
args = {} # maintain history
infotext = '' # maintain history
debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
if not shared.native:
from modules import sd_hijack
else:
sd_hijack = None
def get_last_args():
@ -62,11 +58,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
"Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
"Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
"Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
# sdnext
"App": 'SD.Next',
"Version": git_commit,
"Backend": 'Legacy' if not shared.native else None,
"Pipeline": 'LDM' if not shared.native else None,
"Parser": shared.opts.prompt_attention if shared.opts.prompt_attention != 'native' else None,
"Comment": comment,
"Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
@ -77,9 +71,9 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
args["VAE"] = 'TAESD'
elif p.vae_type == 'Remote':
args["VAE"] = 'Remote'
if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:
if getattr(shared.sd_model, 'sd_model_hash', None) is not None:
args["Model hash"] = shared.sd_model.sd_model_hash
# native
if grid is None and (p.n_iter > 1 or p.batch_size > 1) and index >= 0:
@ -88,8 +82,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
args['Grid'] = grid
if shared.native:
args['Pipeline'] = shared.sd_model.__class__.__name__
args['TE'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
args['UNet'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
args['TE'] = None if (shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'Default') else shared.opts.sd_text_encoder
args['UNet'] = None if (shared.opts.sd_unet is None or shared.opts.sd_unet == 'Default') else shared.opts.sd_unet
else:
args['Pipeline'] = 'LDM'
if 'txt2img' in p.ops:
args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None
args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None
@ -155,11 +151,14 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
args["Detailer negative"] = p.detailer_negative if len(p.detailer_negative) > 0 else None
if 'color' in p.ops:
args["Color correction"] = True
# embeddings
if sd_hijack is not None and hasattr(sd_hijack.model_hijack, 'embedding_db') and len(sd_hijack.model_hijack.embedding_db.embeddings_used) > 0: # this is for original hijaacked models only, diffusers are handled separately
args["Embeddings"] = ', '.join(sd_hijack.model_hijack.embedding_db.embeddings_used)
# samplers
if shared.opts.token_merging_method == 'ToMe': # tome/todo
args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
else:
args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
args['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
# samplers
if getattr(p, 'sampler_name', None) is not None and p.sampler_name.lower() != 'default':
args["Sampler eta delta"] = shared.opts.eta_noise_seed_delta if shared.opts.eta_noise_seed_delta != 0 and sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) else None
args["Sampler eta multiplier"] = p.initial_noise_multiplier if getattr(p, 'initial_noise_multiplier', 1.0) != 1.0 else None
@ -177,11 +176,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
args['Sampler range'] = shared.opts.schedulers_timesteps_range if shared.opts.schedulers_timesteps_range != shared.opts.data_labels.get('schedulers_timesteps_range').default else None
args['Sampler shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift != shared.opts.data_labels.get('schedulers_shift').default else None
args['Sampler dynamic shift'] = shared.opts.schedulers_dynamic_shift if shared.opts.schedulers_dynamic_shift != shared.opts.data_labels.get('schedulers_dynamic_shift').default else None
# tome/todo
if shared.opts.token_merging_method == 'ToMe':
args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None
else:
args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None
# model specific
if shared.sd_model_type == 'h1':
args['LLM'] = None if shared.opts.model_h1_llama_repo == 'Default' else shared.opts.model_h1_llama_repo
args.update(p.extra_generation_params)
for k, v in args.copy().items():

View File

@ -2,7 +2,6 @@ import os
import time
import numpy as np
import torch
import torchvision.transforms.functional as TF
from modules import shared, devices, sd_models, sd_vae, sd_vae_taesd, errors
@ -316,6 +315,7 @@ def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, he
def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
import torchvision.transforms.functional as f
if shared.state.interrupted or shared.state.skipped:
return []
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -323,7 +323,7 @@ def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
if not hasattr(model, 'vae'):
shared.log.error('VAE not found in model')
return []
tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
tensor = f.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
if vae_type == 'Full':
tensor = tensor * 2 - 1
latents = full_vae_encode(image=tensor, model=shared.sd_model)

View File

@ -947,8 +947,12 @@ def add_noise_pred_to_diffusers_callback(pipe):
pipe.prior_pipe._callback_tensor_inputs.append("predicted_image_embedding") # pylint: disable=protected-access
elif hasattr(pipe, "scheduler") and "flow" in pipe.scheduler.__class__.__name__.lower():
pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
elif hasattr(pipe, "scheduler") and hasattr(pipe.scheduler, "config") and getattr(pipe.scheduler.config, "prediction_type", "none") == "flow_prediction":
pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
elif hasattr(pipe, "default_scheduler") and "flow" in pipe.default_scheduler.__class__.__name__.lower():
pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
elif hasattr(pipe, "default_scheduler") and hasattr(pipe.default_scheduler, "config") and getattr(pipe.default_scheduler.config, "prediction_type", "none") == "flow_prediction":
pipe._callback_tensor_inputs.append("noise_pred") # pylint: disable=protected-access
return pipe

View File

@ -299,7 +299,7 @@ def apply_balanced_offload(sd_model=None, exclude=[]):
if device_map and max_memory:
module.balanced_offload_device_map = device_map
module.balanced_offload_max_memory = max_memory
module.offload_post = shared.sd_model_type in [offload_post] and shared.opts.te_hijack and module_name.startswith("text_encoder")
module.offload_post = shared.sd_model_type in offload_post and shared.opts.te_hijack and module_name.startswith("text_encoder")
devices.torch_gc(fast=True, force=True, reason='offload')
apply_balanced_offload_to_module(sd_model)

View File

@ -268,7 +268,7 @@ class DiffusionSampler:
if 'shift' in self.config:
self.config['shift'] = shared.opts.schedulers_shift if shared.opts.schedulers_shift > 0 else 3
if 'use_dynamic_shifting' in self.config:
self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift <= 0 else shared.opts.schedulers_dynamic_shift
self.config['use_dynamic_shifting'] = True if shared.opts.schedulers_shift == 0 else shared.opts.schedulers_dynamic_shift
if 'use_beta_sigmas' in self.config and 'sigma_schedule' in self.config:
self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
if 'rescale_betas_zero_snr' in self.config:

View File

@ -416,7 +416,7 @@ options_templates.update(options_section(('sd', "Models & Loading"), {
options_templates.update(options_section(('model_options', "Models Options"), {
"model_sd3_disable_te5": OptionInfo(False, "StableDiffusion3: T5 disable encoder"),
"model_h1_llama_repo": OptionInfo("meta-llama/Meta-Llama-3.1-8B-Instruct", "HiDream: LLama repo", gr.Textbox),
"model_h1_llama_repo": OptionInfo("Default", "HiDream: LLama repo", gr.Textbox),
}))
options_templates.update(options_section(('vae_encoder', "Variable Auto Encoder"), {
@ -552,6 +552,7 @@ options_templates.update(options_section(('quantization', "Quantization Settings
"nunchaku_sep": OptionInfo("<h2>Nunchaku Engine</h2>", "", gr.HTML),
"nunchaku_quantization": OptionInfo([], "SVDQuant enabled", gr.CheckboxGroup, {"choices": ["Model", "Transformer", "VAE", "TE", "Video", "LLM", "ControlNet"], "visible": native}),
"nunchaku_attention": OptionInfo(False, "Nunchaku attention", gr.Checkbox, {"visible": native}),
"nunchaku_offload": OptionInfo(False, "Nunchaku offloading", gr.Checkbox, {"visible": native}),
}))
options_templates.update(options_section(('advanced', "Pipeline Modifiers"), {

View File

@ -71,6 +71,30 @@ models = {
],
'LTX Video': [
Model(name='None'),
Model(name='LTXVideo 0.9.6 2B T2V',
url='https://huggingface.co/Lightricks/LTX-Video',
repo='Lightricks/LTX-Video',
repo_cls=diffusers.LTXConditionPipeline,
te_cls=transformers.T5EncoderModel,
dit_cls=diffusers.LTXVideoTransformer3DModel),
Model(name='LTXVideo 0.9.6 2B I2V',
url='https://huggingface.co/Lightricks/LTX-Video',
repo='Lightricks/LTX-Video',
repo_cls=diffusers.LTXConditionPipeline,
te_cls=transformers.T5EncoderModel,
dit_cls=diffusers.LTXVideoTransformer3DModel),
Model(name='LTXVideo 0.9.6 2B T2V Distilled',
url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
repo_cls=diffusers.LTXConditionPipeline,
te_cls=transformers.T5EncoderModel,
dit_cls=diffusers.LTXVideoTransformer3DModel),
Model(name='LTXVideo 0.9.6 2B I2V Distilled',
url='https://huggingface.co/Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
repo='Lightricks/LTX-Video-2B-0.9.6-Distilled-04-25',
repo_cls=diffusers.LTXConditionPipeline,
te_cls=transformers.T5EncoderModel,
dit_cls=diffusers.LTXVideoTransformer3DModel),
Model(name='LTXVideo 0.9.5 T2V', # https://github.com/huggingface/diffusers/pull/10968
url='https://huggingface.co/Lightricks/LTX-Video-0.9.5',
repo='Lightricks/LTX-Video-0.9.5',

View File

@ -123,9 +123,14 @@ def load():
core = Core(ctypes.windll.LoadLibrary(os.path.join(path, 'nvcuda.dll')))
ml = ZLUDALibrary(ctypes.windll.LoadLibrary(os.path.join(path, 'nvml.dll')))
is_nightly = core.get_nightly_flag() == 1
hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll"))
hipBLASLt_enabled = is_nightly and os.path.exists(rocm.blaslt_tensile_libpath) and os.path.exists(os.path.join(rocm.path, "bin", "hipblaslt.dll")) and default_agent is not None
MIOpen_enabled = is_nightly and os.path.exists(os.path.join(rocm.path, "bin", "MIOpen.dll"))
if hipBLASLt_enabled:
if not default_agent.blaslt_supported:
hipBLASLt_enabled = False
log.debug(f'ROCm hipBLASLt: arch={default_agent.name} available={hipBLASLt_enabled}')
for k, v in DLL_MAPPING.items():
if not os.path.exists(os.path.join(path, v)):
link_or_copy(os.path.join(path, k), os.path.join(path, v))

View File

@ -36,6 +36,7 @@ class SharedSettingsStackHelper(object):
freeu_b2 = None
freeu_s1 = None
freeu_s2 = None
cfgzero_enabled = None
schedulers_sigma_adjust = None
schedulers_beta_schedule = None
schedulers_beta_start = None
@ -53,6 +54,7 @@ class SharedSettingsStackHelper(object):
eta_noise_seed_delta = None
tome_ratio = None
todo_ratio = None
teacache_thresh = None
extra_networks_default_multiplier = None
disable_weights_auto_swap = None
@ -75,6 +77,7 @@ class SharedSettingsStackHelper(object):
self.freeu_b2 = shared.opts.freeu_b2
self.freeu_s1 = shared.opts.freeu_s1
self.freeu_s2 = shared.opts.freeu_s2
self.cfgzero_enabled = shared.opts.cfgzero_enabled
self.sd_model_checkpoint = shared.opts.sd_model_checkpoint
self.sd_model_refiner = shared.opts.sd_model_refiner
self.sd_model_dict = shared.opts.sd_model_dict
@ -83,6 +86,7 @@ class SharedSettingsStackHelper(object):
self.sd_text_encoder = shared.opts.sd_text_encoder
self.extra_networks_default_multiplier = shared.opts.extra_networks_default_multiplier
self.disable_weights_auto_swap = shared.opts.disable_weights_auto_swap
self.teacache_thresh = shared.opts.teacache_thresh
shared.opts.data["disable_weights_auto_swap"] = False
def __exit__(self, exc_type, exc_value, tb):
@ -100,12 +104,14 @@ class SharedSettingsStackHelper(object):
shared.opts.data["schedulers_shift"] = self.schedulers_shift
shared.opts.data["scheduler_eta"] = self.scheduler_eta
shared.opts.data["eta_noise_seed_delta"] = self.eta_noise_seed_delta
shared.opts.data["cfgzero_enabled"] = self.cfgzero_enabled
shared.opts.data["freeu_b1"] = self.freeu_b1
shared.opts.data["freeu_b2"] = self.freeu_b2
shared.opts.data["freeu_s1"] = self.freeu_s1
shared.opts.data["freeu_s2"] = self.freeu_s2
shared.opts.data["tome_ratio"] = self.tome_ratio
shared.opts.data["todo_ratio"] = self.todo_ratio
shared.opts.data["teacache_thresh"] = self.teacache_thresh
if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint:
shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint

2
wiki

@ -1 +1 @@
Subproject commit a985acf8ca4f8e20c7438f749b4074d37c9df949
Subproject commit 7c7a9ffdc9cfffa2e4febc05e44dcdfa9c533e56