diff --git a/data/reference-nunchaku.json b/data/reference-nunchaku.json new file mode 100644 index 000000000..a22be9e91 --- /dev/null +++ b/data/reference-nunchaku.json @@ -0,0 +1,209 @@ +{ + "FLUX.1-Dev Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-dev", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-dev.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-dev transformer with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "FLUX.1-Schnell Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-schnell", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-schnell.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-schnell transformer with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "extras": "sampler: Default, cfg_scale: 1.0, steps: 4", + "size": 0, + "date": "2025 June" + }, + "FLUX.1-Kontext Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-Kontext-dev", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-Kontext-dev.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-Kontext-dev transformer with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "FLUX.1-Krea Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-Krea-dev", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-Krea-dev.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-Krea-dev transformer with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "FLUX.1-Fill Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-Fill-dev", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-Fill-dev.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-Fill-dev transformer for inpainting", + "skip": true, + "hidden": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "FLUX.1-Depth Nunchaku SVDQuant": { + "path": "black-forest-labs/FLUX.1-Depth-dev", + "subfolder": "nunchaku", + "preview": "black-forest-labs--FLUX.1-Depth-dev.jpg", + "desc": "Nunchaku SVDQuant quantization of FLUX.1-Depth-dev transformer for depth-conditioned generation", + "skip": true, + "hidden": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "Shuttle Jaguar Nunchaku SVDQuant": { + "path": "shuttleai/shuttle-jaguar", + "subfolder": "nunchaku", + "preview": "shuttleai--shuttle-jaguar.jpg", + "desc": "Nunchaku SVDQuant quantization of Shuttle Jaguar transformer", + "skip": true, + "nunchaku": ["Model", "TE"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "Qwen-Image Nunchaku SVDQuant": { + "path": "Qwen/Qwen-Image", + "subfolder": "nunchaku", + "preview": "Qwen--Qwen-Image.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Image transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "Qwen-Lightning (8-step) Nunchaku SVDQuant": { + "path": "vladmandic/Qwen-Lightning", + "subfolder": "nunchaku", + "preview": "vladmandic--Qwen-Lightning.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning (8-step distilled) transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "steps: 8", + "size": 0, + "date": "2025 June" + }, + "Qwen-Lightning (4-step) Nunchaku SVDQuant": { + "path": "vladmandic/Qwen-Lightning", + "subfolder": "nunchaku-4step", + "preview": "vladmandic--Qwen-Lightning.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning (4-step distilled) transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "steps: 4", + "size": 0, + "date": "2025 June" + }, + "Qwen-Image-Edit Nunchaku SVDQuant": { + "path": "Qwen/Qwen-Image-Edit", + "subfolder": "nunchaku", + "preview": "Qwen--Qwen-Image-Edit.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Image-Edit transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "Qwen-Lightning-Edit (8-step) Nunchaku SVDQuant": { + "path": "vladmandic/Qwen-Lightning-Edit", + "subfolder": "nunchaku", + "preview": "vladmandic--Qwen-Lightning-Edit.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning-Edit (8-step distilled editing) transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "steps: 8", + "size": 0, + "date": "2025 June" + }, + "Qwen-Lightning-Edit (4-step) Nunchaku SVDQuant": { + "path": "vladmandic/Qwen-Lightning-Edit", + "subfolder": "nunchaku-4step", + "preview": "vladmandic--Qwen-Lightning-Edit.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning-Edit (4-step distilled editing) transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "steps: 4", + "size": 0, + "date": "2025 June" + }, + "Qwen-Image-Edit-2509 Nunchaku SVDQuant": { + "path": "Qwen/Qwen-Image-Edit-2509", + "subfolder": "nunchaku", + "preview": "Qwen--Qwen-Image-Edit-2509.jpg", + "desc": "Nunchaku SVDQuant quantization of Qwen-Image-Edit-2509 transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "size": 0, + "date": "2025 September" + }, + "Sana 1.6B 1k Nunchaku SVDQuant": { + "path": "Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers", + "subfolder": "nunchaku", + "preview": "Efficient-Large-Model--Sana_1600M_1024px_BF16_diffusers.jpg", + "desc": "Nunchaku SVDQuant quantization of Sana 1.6B 1024px transformer with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "Z-Image-Turbo Nunchaku SVDQuant": { + "path": "Tongyi-MAI/Z-Image-Turbo", + "subfolder": "nunchaku", + "preview": "Tongyi-MAI--Z-Image-Turbo.jpg", + "desc": "Nunchaku SVDQuant quantization of Z-Image-Turbo transformer with INT4 and SVD rank 128", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "sampler: Default, cfg_scale: 1.0, steps: 9", + "size": 0, + "date": "2025 June" + }, + "SDXL Base Nunchaku SVDQuant": { + "path": "stabilityai/stable-diffusion-xl-base-1.0", + "subfolder": "nunchaku", + "preview": "stabilityai--stable-diffusion-xl-base-1.0.jpg", + "desc": "Nunchaku SVDQuant quantization of SDXL Base 1.0 UNet with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "size": 0, + "date": "2025 June" + }, + "SDXL Turbo Nunchaku SVDQuant": { + "path": "stabilityai/sdxl-turbo", + "subfolder": "nunchaku", + "preview": "stabilityai--sdxl-turbo.jpg", + "desc": "Nunchaku SVDQuant quantization of SDXL Turbo UNet with INT4 and SVD rank 32", + "skip": true, + "nunchaku": ["Model"], + "tags": "nunchaku", + "extras": "sampler: Default, cfg_scale: 1.0, steps: 4", + "size": 0, + "date": "2025 June" + } +} diff --git a/html/locale_en.json b/html/locale_en.json index 5eb0b9f89..599bf7de5 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -337,6 +337,8 @@ {"id":"","label":"Model Options","localized":"","reload":"","hint":"Settings related to behavior of specific models"}, {"id":"","label":"Model Offloading","localized":"","reload":"","hint":"Settings related to model offloading and memory management"}, {"id":"","label":"Model Quantization","localized":"","reload":"","hint":"Settings related to model quantization which is used to reduce memory usage"}, + {"id":"","label":"Nunchaku attention","localized":"","reload":"","hint":"Replaces default attention with Nunchaku's custom FP16 attention kernel for faster inference on consumer NVIDIA GPUs.
Might provide performance improvement on GPUs which have higher FP16 tensor cores throughput than BF16.

Currently only affects Flux-based models (Dev, Schnell, Kontext, Fill, Depth, etc.). Has no effect on Qwen, SDXL, Sana, or other architectures.

Disabled by default."}, + {"id":"","label":"Nunchaku offloading","localized":"","reload":"","hint":"Enables Nunchaku's own per-block CPU offloading with asynchronous CUDA streams to reduce VRAM usage.
Uses a ping-pong buffer strategy: while one transformer block computes on GPU, the next block preloads from CPU in the background, hiding most of the transfer latency.

Can reduce VRAM usage at the cost of slower inference.
This replaces SD.Next's pipeline offloading for the transformer component.

Only useful on low-VRAM GPUs. If your GPU has enough memory to hold the quantized model (16+ GB), keep this disabled for maximum speed.
Supports Flux and Qwen models. Not supported for SDXL where this setting is ignored.
Disabled by default."}, {"id":"","label":"Image Metadata","localized":"","reload":"","hint":"Settings related to handling of metadata that is created with generated images"}, {"id":"","label":"Legacy Options","localized":"","reload":"","hint":"Settings related to legacy options - should not be used"}, {"id":"","label":"Restart server","localized":"","reload":"","hint":"Restart server"}, diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js index acb305da8..eaf516ff2 100644 --- a/javascript/extraNetworks.js +++ b/javascript/extraNetworks.js @@ -171,6 +171,12 @@ async function filterExtraNetworksForTab(searchTerm) { .toLowerCase() .includes('quantized') ? '' : 'none'; }); + } else if (searchTerm === 'nunchaku/') { + cards.forEach((elem) => { + elem.style.display = elem.dataset.tags + .toLowerCase() + .includes('nunchaku') ? '' : 'none'; + }); } else if (searchTerm === 'local/') { cards.forEach((elem) => { elem.style.display = elem.dataset.name diff --git a/modules/mit_nunchaku.py b/modules/mit_nunchaku.py index b5e82c1da..6b4e524cc 100644 --- a/modules/mit_nunchaku.py +++ b/modules/mit_nunchaku.py @@ -4,10 +4,27 @@ from installer import log, pip from modules import devices -nunchaku_ver = '1.1.0' +nunchaku_versions = { + '2.5': '1.0.1', + '2.6': '1.0.1', + '2.7': '1.1.0', + '2.8': '1.1.0', + '2.9': '1.1.0', + '2.10': '1.0.2', + '2.11': '1.1.0', +} ok = False +def _expected_ver(): + try: + import torch + torch_ver = '.'.join(torch.__version__.split('+')[0].split('.')[:2]) + return nunchaku_versions.get(torch_ver) + except Exception: + return None + + def check(): global ok # pylint: disable=global-statement if ok: @@ -16,8 +33,9 @@ def check(): import nunchaku import nunchaku.utils from nunchaku import __version__ + expected = _expected_ver() log.info(f'Nunchaku: path={nunchaku.__path__} version={__version__.__version__} precision={nunchaku.utils.get_precision()}') - if __version__.__version__ != nunchaku_ver: + if expected is not None and __version__.__version__ != expected: ok = False return False ok = True @@ -49,14 +67,16 @@ def install_nunchaku(): if devices.backend not in ['cuda']: log.error(f'Nunchaku: backend={devices.backend} unsupported') return False - torch_ver = torch.__version__[:3] - if torch_ver not in ['2.5', '2.6', '2.7', '2.8', '2.9', '2.10']: + torch_ver = '.'.join(torch.__version__.split('+')[0].split('.')[:2]) + nunchaku_ver = nunchaku_versions.get(torch_ver) + if nunchaku_ver is None: log.error(f'Nunchaku: torch={torch.__version__} unsupported') + return False suffix = 'x86_64' if arch == 'linux' else 'win_amd64' url = os.environ.get('NUNCHAKU_COMMAND', None) if url is None: arch = f'{arch}_' if arch == 'linux' else '' - url = f'https://huggingface.co/nunchaku-tech/nunchaku/resolve/main/nunchaku-{nunchaku_ver}' + url = f'https://huggingface.co/nunchaku-ai/nunchaku/resolve/main/nunchaku-{nunchaku_ver}' url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}{suffix}.whl' cmd = f'install --upgrade {url}' log.debug(f'Nunchaku: install="{url}"') diff --git a/modules/model_quant.py b/modules/model_quant.py index 1a501be0a..3cad91181 100644 --- a/modules/model_quant.py +++ b/modules/model_quant.py @@ -255,13 +255,25 @@ def check_quant(module: str = ''): def check_nunchaku(module: str = ''): from modules import shared - if module not in shared.opts.nunchaku_quantization: + model_name = getattr(shared.opts, 'sd_model_checkpoint', '') + if '+nunchaku' not in model_name: return False - from modules import mit_nunchaku - mit_nunchaku.install_nunchaku() - if not mit_nunchaku.ok: - return False - return True + base_path = model_name.split('+')[0] + for v in shared.reference_models.values(): + if v.get('path', '') != base_path: + continue + nunchaku_modules = v.get('nunchaku', None) + if nunchaku_modules is None: + continue + if isinstance(nunchaku_modules, bool) and nunchaku_modules: + nunchaku_modules = ['Model', 'TE'] + if not isinstance(nunchaku_modules, list): + continue + if module in nunchaku_modules: + from modules import mit_nunchaku + mit_nunchaku.install_nunchaku() + return mit_nunchaku.ok + return False def create_config(kwargs = None, allow: bool = True, module: str = 'Model', modules_to_not_convert: list = None, modules_dtype_dict: dict = None): diff --git a/modules/sd_unet.py b/modules/sd_unet.py index c73ca8dc5..643d1c08e 100644 --- a/modules/sd_unet.py +++ b/modules/sd_unet.py @@ -18,14 +18,15 @@ def load_unet_sdxl_nunchaku(repo_id): shared.log.error(f'Load module: quant=Nunchaku module=unet repo="{repo_id}" low nunchaku version') return None if 'turbo' in repo_id.lower(): - nunchaku_repo = 'nunchaku-tech/nunchaku-sdxl-turbo/svdq-int4_r32-sdxl-turbo.safetensors' + nunchaku_repo = 'nunchaku-ai/nunchaku-sdxl-turbo/svdq-int4_r32-sdxl-turbo.safetensors' else: - nunchaku_repo = 'nunchaku-tech/nunchaku-sdxl/svdq-int4_r32-sdxl.safetensors' + nunchaku_repo = 'nunchaku-ai/nunchaku-sdxl/svdq-int4_r32-sdxl.safetensors' - shared.log.debug(f'Load module: quant=Nunchaku module=unet repo="{nunchaku_repo}" offload={shared.opts.nunchaku_offload}') + if shared.opts.nunchaku_offload: + shared.log.warning('Load module: quant=Nunchaku module=unet offload not supported for SDXL, ignoring') + shared.log.debug(f'Load module: quant=Nunchaku module=unet repo="{nunchaku_repo}"') unet = NunchakuSDXLUNet2DConditionModel.from_pretrained( nunchaku_repo, - offload=shared.opts.nunchaku_offload, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir, ) diff --git a/modules/shared.py b/modules/shared.py index 91188a825..0d606ab2a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -281,7 +281,6 @@ options_templates.update(options_section(("quantization", "Model Quantization"), "sdnq_quantize_shuffle_weights": OptionInfo(False, "Shuffle weights in post mode", gr.Checkbox), "nunchaku_sep": OptionInfo("

Nunchaku Engine

", "", gr.HTML), - "nunchaku_quantization": OptionInfo([], "SVDQuant enabled", gr.CheckboxGroup, {"choices": ["Model", "TE"]}), "nunchaku_attention": OptionInfo(False, "Nunchaku attention", gr.Checkbox), "nunchaku_offload": OptionInfo(False, "Nunchaku offloading", gr.Checkbox), @@ -881,6 +880,7 @@ profiler = None import modules.styles prompt_styles = modules.styles.StyleDatabase(opts) reference_models = readfile(os.path.join('data', 'reference.json'), as_type="dict") if opts.extra_network_reference_enable else {} +reference_models.update(readfile(os.path.join('data', 'reference-nunchaku.json'), as_type="dict") if opts.extra_network_reference_enable else {}) cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or (cmd_opts.server_name or False)) and not cmd_opts.insecure log.debug('Initializing: devices') diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index 3de17896f..d98e7a14b 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -305,6 +305,7 @@ class ExtraNetworksPage: subdirs['Reference'] = 1 subdirs['Distilled'] = 1 subdirs['Quantized'] = 1 + subdirs['Nunchaku'] = 1 subdirs['Community'] = 1 subdirs['Cloud'] = 1 subdirs[diffusers_base] = 1 @@ -324,6 +325,8 @@ class ExtraNetworksPage: subdirs.move_to_end('Distilled', last=True) if 'Quantized' in subdirs: subdirs.move_to_end('Quantized', last=True) + if 'Nunchaku' in subdirs: + subdirs.move_to_end('Nunchaku', last=True) if 'Community' in subdirs: subdirs.move_to_end('Community', last=True) if 'Cloud' in subdirs: @@ -332,7 +335,7 @@ class ExtraNetworksPage: for subdir in subdirs: if len(subdir) == 0: continue - if subdir in ['All', 'Local', 'Diffusers', 'Reference', 'Distilled', 'Quantized', 'Community', 'Cloud']: + if subdir in ['All', 'Local', 'Diffusers', 'Reference', 'Distilled', 'Quantized', 'Nunchaku', 'Community', 'Cloud']: style = 'network-reference' else: style = 'network-folder' diff --git a/modules/ui_extra_networks_checkpoints.py b/modules/ui_extra_networks_checkpoints.py index 9470ec9bb..b179d55c6 100644 --- a/modules/ui_extra_networks_checkpoints.py +++ b/modules/ui_extra_networks_checkpoints.py @@ -3,7 +3,7 @@ import html import json import concurrent from datetime import datetime -from modules import shared, ui_extra_networks, sd_models, modelstats, paths +from modules import shared, ui_extra_networks, sd_models, modelstats, paths, devices from modules.json_helpers import readfile @@ -48,16 +48,21 @@ class ExtraNetworksPageCheckpoints(ui_extra_networks.ExtraNetworksPage): reference_distilled = readfile(os.path.join('data', 'reference-distilled.json'), as_type="dict") reference_community = readfile(os.path.join('data', 'reference-community.json'), as_type="dict") reference_cloud = readfile(os.path.join('data', 'reference-cloud.json'), as_type="dict") + reference_nunchaku = readfile(os.path.join('data', 'reference-nunchaku.json'), as_type="dict") shared.reference_models = {} shared.reference_models.update(reference_base) shared.reference_models.update(reference_quant) shared.reference_models.update(reference_community) shared.reference_models.update(reference_distilled) shared.reference_models.update(reference_cloud) + shared.reference_models.update(reference_nunchaku) for k, v in shared.reference_models.items(): count['total'] += 1 url = v['path'] + if v.get('hidden', False): + count['hidden'] += 1 + continue experimental = v.get('experimental', False) if experimental: if shared.cmd_opts.experimental: @@ -83,6 +88,9 @@ class ExtraNetworksPageCheckpoints(ui_extra_networks.ExtraNetworksPage): path = f'{v.get("path", "")}' tag = v.get('tags', '') + if tag == 'nunchaku' and devices.backend != 'cuda': + count['hidden'] += 1 + continue if tag in count: count[tag] += 1 elif tag != '': diff --git a/pipelines/flux/flux_nunchaku.py b/pipelines/flux/flux_nunchaku.py index 9a737d103..d8761e186 100644 --- a/pipelines/flux/flux_nunchaku.py +++ b/pipelines/flux/flux_nunchaku.py @@ -9,19 +9,19 @@ def load_flux_nunchaku(repo_id): if 'srpo' in repo_id.lower(): pass elif 'flux.1-dev' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-dev/svdq-{nunchaku_precision}_r32-flux.1-dev.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-dev/svdq-{nunchaku_precision}_r32-flux.1-dev.safetensors" elif 'flux.1-schnell' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-schnell/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-schnell/svdq-{nunchaku_precision}_r32-flux.1-schnell.safetensors" elif 'flux.1-kontext' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-kontext-dev/svdq-{nunchaku_precision}_r32-flux.1-kontext-dev.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-kontext-dev/svdq-{nunchaku_precision}_r32-flux.1-kontext-dev.safetensors" elif 'flux.1-krea' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-krea-dev/svdq-{nunchaku_precision}_r32-flux.1-krea-dev.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-krea-dev/svdq-{nunchaku_precision}_r32-flux.1-krea-dev.safetensors" elif 'flux.1-fill' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-fill-dev/svdq-{nunchaku_precision}-flux.1-fill-dev.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-fill-dev/svdq-{nunchaku_precision}-flux.1-fill-dev.safetensors" elif 'flux.1-depth' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-flux.1-depth-dev/svdq-{nunchaku_precision}-flux.1-depth-dev.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-flux.1-depth-dev/svdq-{nunchaku_precision}-flux.1-depth-dev.safetensors" elif 'shuttle' in repo_id.lower(): - nunchaku_repo = f"nunchaku-tech/nunchaku-shuttle-jaguar/svdq-{nunchaku_precision}-shuttle-jaguar.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-shuttle-jaguar/svdq-{nunchaku_precision}-shuttle-jaguar.safetensors" else: shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported') if nunchaku_repo is not None: diff --git a/pipelines/generic.py b/pipelines/generic.py index 6ad9a3fd5..3b1b4bc66 100644 --- a/pipelines/generic.py +++ b/pipelines/generic.py @@ -152,7 +152,7 @@ def load_text_encoder(repo_id, cls_name, load_config=None, subfolder="text_encod elif cls_name == transformers.T5EncoderModel and allow_shared and shared.opts.te_shared_t5: if model_quant.check_nunchaku('TE'): import nunchaku - repo_id = 'nunchaku-tech/nunchaku-t5/awq-int4-flux.1-t5xxl.safetensors' + repo_id = 'nunchaku-ai/nunchaku-t5/awq-int4-flux.1-t5xxl.safetensors' cls_name = nunchaku.NunchakuT5EncoderModel shared.log.debug(f'Load model: text_encoder="{repo_id}" cls={cls_name.__name__} quant="SVDQuant" loader={_loader("transformers")}') text_encoder = nunchaku.NunchakuT5EncoderModel.from_pretrained( diff --git a/pipelines/model_qwen.py b/pipelines/model_qwen.py index 3bea5c121..546e755cc 100644 --- a/pipelines/model_qwen.py +++ b/pipelines/model_qwen.py @@ -37,7 +37,7 @@ def load_qwen(checkpoint_info, diffusers_load_config=None): diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["qwen-image"] = diffusers.QwenImageInpaintPipeline if model_quant.check_nunchaku('Model'): - transformer = qwen.load_qwen_nunchaku(repo_id) + transformer = qwen.load_qwen_nunchaku(repo_id, subfolder=repo_subfolder) if 'Qwen-Image-Distill-Full' in repo_id: repo_transformer = repo_id @@ -63,6 +63,8 @@ def load_qwen(checkpoint_info, diffusers_load_config=None): text_encoder = generic.load_text_encoder(repo_te, cls_name=transformers.Qwen2_5_VLForConditionalGeneration, load_config=diffusers_load_config) repo_id, repo_subfolder = qwen.check_qwen_pruning(repo_id, repo_subfolder) + if repo_subfolder is not None and repo_subfolder.startswith('nunchaku'): + repo_subfolder = None pipe = cls_name.from_pretrained( repo_id, transformer=transformer, diff --git a/pipelines/model_sana.py b/pipelines/model_sana.py index 73da28472..8b04fcfb0 100644 --- a/pipelines/model_sana.py +++ b/pipelines/model_sana.py @@ -9,7 +9,7 @@ def load_quants(kwargs, repo_id, cache_dir): if 'Sana_1600M_1024px' in repo_id and model_quant.check_nunchaku('Model'): # only available model import nunchaku nunchaku_precision = nunchaku.utils.get_precision() - nunchaku_repo = "nunchaku-tech/nunchaku-sana/svdq-int4_r32-sana1.6b.safetensors" + nunchaku_repo = "nunchaku-ai/nunchaku-sana/svdq-int4_r32-sana1.6b.safetensors" shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" precision={nunchaku_precision} attention={shared.opts.nunchaku_attention}') kwargs['transformer'] = nunchaku.NunchakuSanaTransformer2DModel.from_pretrained(nunchaku_repo, torch_dtype=devices.dtype, cache_dir=cache_dir) elif model_quant.check_quant('Model'): diff --git a/pipelines/model_z_image.py b/pipelines/model_z_image.py index 9f8dd51e1..3aa269b48 100644 --- a/pipelines/model_z_image.py +++ b/pipelines/model_z_image.py @@ -8,7 +8,7 @@ def load_nunchaku(): import nunchaku nunchaku_precision = nunchaku.utils.get_precision() nunchaku_rank = 128 - nunchaku_repo = f"nunchaku-tech/nunchaku-z-image-turbo/svdq-{nunchaku_precision}_r{nunchaku_rank}-z-image-turbo.safetensors" + nunchaku_repo = f"nunchaku-ai/nunchaku-z-image-turbo/svdq-{nunchaku_precision}_r{nunchaku_rank}-z-image-turbo.safetensors" shared.log.debug(f'Load module: quant=Nunchaku module=transformer repo="{nunchaku_repo}" attention={shared.opts.nunchaku_attention}') transformer = nunchaku.NunchakuZImageTransformer2DModel.from_pretrained( nunchaku_repo, diff --git a/pipelines/qwen/qwen_nunchaku.py b/pipelines/qwen/qwen_nunchaku.py index 4c89b7b1c..4fd964df3 100644 --- a/pipelines/qwen/qwen_nunchaku.py +++ b/pipelines/qwen/qwen_nunchaku.py @@ -1,11 +1,12 @@ from modules import shared, devices -def load_qwen_nunchaku(repo_id): +def load_qwen_nunchaku(repo_id, subfolder=None): import nunchaku nunchaku_precision = nunchaku.utils.get_precision() nunchaku_repo = None transformer = None + four_step = subfolder is not None and '4step' in subfolder try: from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel except Exception: @@ -14,15 +15,21 @@ def load_qwen_nunchaku(repo_id): if 'pruning' in repo_id.lower() or 'distill' in repo_id.lower(): return None elif repo_id.lower().endswith('qwen-image'): - nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image.safetensors" # r32 vs r128 + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image.safetensors" elif repo_id.lower().endswith('qwen-lightning'): - nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image-lightningv1.1-8steps.safetensors" # 8-step variant + if four_step: + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image-lightningv1.0-4steps.safetensors" + else: + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image-lightningv1.1-8steps.safetensors" elif repo_id.lower().endswith('qwen-image-edit-2509'): - nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image-edit-2509/svdq-{nunchaku_precision}_r128-qwen-image-edit-2509.safetensors" # 8-step variant + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image-edit-2509/svdq-{nunchaku_precision}_r128-qwen-image-edit-2509.safetensors" elif repo_id.lower().endswith('qwen-image-edit'): - nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image-edit/svdq-{nunchaku_precision}_r128-qwen-image-edit.safetensors" # 8-step variant + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image-edit/svdq-{nunchaku_precision}_r128-qwen-image-edit.safetensors" elif repo_id.lower().endswith('qwen-lightning-edit'): - nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image-edit/svdq-{nunchaku_precision}_r128-qwen-image-edit-lightningv1.0-8steps.safetensors" # 8-step variant + if four_step: + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image-edit/svdq-{nunchaku_precision}_r128-qwen-image-edit-lightningv1.0-4steps.safetensors" + else: + nunchaku_repo = f"nunchaku-ai/nunchaku-qwen-image-edit/svdq-{nunchaku_precision}_r128-qwen-image-edit-lightningv1.0-8steps.safetensors" else: shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" unsupported') if nunchaku_repo is not None: diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py index aa0358ce1..b2e87c36c 100644 --- a/scripts/flux_tools.py +++ b/scripts/flux_tools.py @@ -25,7 +25,7 @@ class Script(scripts_manager.Script): with gr.Row(): gr.HTML('  Flux.1 Redux
') with gr.Row(): - tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None') + tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Fill (Nunchaku)', 'Canny', 'Depth', 'Depth (Nunchaku)'], value='None') with gr.Row(): prompt = gr.Slider(label='Redux prompt strength', minimum=0, maximum=2, step=0.01, value=0, visible=False) process = gr.Checkbox(label='Control preprocess input images', value=True, visible=False) @@ -34,8 +34,8 @@ class Script(scripts_manager.Script): def display(tool): return [ gr.update(visible=tool in ['Redux']), - gr.update(visible=tool in ['Canny', 'Depth']), - gr.update(visible=tool in ['Canny', 'Depth']), + gr.update(visible=tool in ['Canny', 'Depth', 'Depth (Nunchaku)']), + gr.update(visible=tool in ['Canny', 'Depth', 'Depth (Nunchaku)']), ] tool.change(fn=display, inputs=[tool], outputs=[prompt, process, strength]) @@ -91,13 +91,15 @@ class Script(scripts_manager.Script): shared.log.debug(f'{title}: tool=Redux unload') redux_pipe = None - if tool == 'Fill': + if tool in ['Fill', 'Fill (Nunchaku)']: # pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16, revision="refs/pr/4").to("cuda") if p.image_mask is None: shared.log.error(f'{title}: tool={tool} no image_mask') return None - if shared.sd_model.__class__.__name__ != 'FluxFillPipeline': - shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Fill-dev" + nunchaku_suffix = '+nunchaku' if tool == 'Fill (Nunchaku)' else '' + checkpoint = f"black-forest-labs/FLUX.1-Fill-dev{nunchaku_suffix}" + if shared.sd_model.__class__.__name__ != 'FluxFillPipeline' or shared.opts.sd_model_checkpoint != checkpoint: + shared.opts.data["sd_model_checkpoint"] = checkpoint sd_models.reload_model_weights(op='model', revision="refs/pr/4") p.task_args['image'] = image p.task_args['mask_image'] = p.image_mask @@ -124,11 +126,13 @@ class Script(scripts_manager.Script): shared.log.debug(f'{title}: tool=Canny unload processor') processor_canny = None - if tool == 'Depth': + if tool in ['Depth', 'Depth (Nunchaku)']: # pipe = diffusers.FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda") install('git+https://github.com/huggingface/image_gen_aux.git', 'image_gen_aux') - if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint: - shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev" + nunchaku_suffix = '+nunchaku' if tool == 'Depth (Nunchaku)' else '' + checkpoint = f"black-forest-labs/FLUX.1-Depth-dev{nunchaku_suffix}" + if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or shared.opts.sd_model_checkpoint != checkpoint: + shared.opts.data["sd_model_checkpoint"] = checkpoint sd_models.reload_model_weights(op='model', revision="refs/pr/1") if processor_depth is None: from image_gen_aux import DepthPreprocessor