diff --git a/data/reference-nunchaku.json b/data/reference-nunchaku.json
new file mode 100644
index 000000000..a22be9e91
--- /dev/null
+++ b/data/reference-nunchaku.json
@@ -0,0 +1,209 @@
+{
+ "FLUX.1-Dev Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-dev",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-dev.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-dev transformer with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "FLUX.1-Schnell Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-schnell",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-schnell.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-schnell transformer with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "extras": "sampler: Default, cfg_scale: 1.0, steps: 4",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "FLUX.1-Kontext Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-Kontext-dev",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-Kontext-dev.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-Kontext-dev transformer with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "FLUX.1-Krea Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-Krea-dev",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-Krea-dev.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-Krea-dev transformer with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "FLUX.1-Fill Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-Fill-dev",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-Fill-dev.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-Fill-dev transformer for inpainting",
+ "skip": true,
+ "hidden": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "FLUX.1-Depth Nunchaku SVDQuant": {
+ "path": "black-forest-labs/FLUX.1-Depth-dev",
+ "subfolder": "nunchaku",
+ "preview": "black-forest-labs--FLUX.1-Depth-dev.jpg",
+ "desc": "Nunchaku SVDQuant quantization of FLUX.1-Depth-dev transformer for depth-conditioned generation",
+ "skip": true,
+ "hidden": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Shuttle Jaguar Nunchaku SVDQuant": {
+ "path": "shuttleai/shuttle-jaguar",
+ "subfolder": "nunchaku",
+ "preview": "shuttleai--shuttle-jaguar.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Shuttle Jaguar transformer",
+ "skip": true,
+ "nunchaku": ["Model", "TE"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Image Nunchaku SVDQuant": {
+ "path": "Qwen/Qwen-Image",
+ "subfolder": "nunchaku",
+ "preview": "Qwen--Qwen-Image.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Image transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Lightning (8-step) Nunchaku SVDQuant": {
+ "path": "vladmandic/Qwen-Lightning",
+ "subfolder": "nunchaku",
+ "preview": "vladmandic--Qwen-Lightning.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning (8-step distilled) transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "steps: 8",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Lightning (4-step) Nunchaku SVDQuant": {
+ "path": "vladmandic/Qwen-Lightning",
+ "subfolder": "nunchaku-4step",
+ "preview": "vladmandic--Qwen-Lightning.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning (4-step distilled) transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "steps: 4",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Image-Edit Nunchaku SVDQuant": {
+ "path": "Qwen/Qwen-Image-Edit",
+ "subfolder": "nunchaku",
+ "preview": "Qwen--Qwen-Image-Edit.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Image-Edit transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Lightning-Edit (8-step) Nunchaku SVDQuant": {
+ "path": "vladmandic/Qwen-Lightning-Edit",
+ "subfolder": "nunchaku",
+ "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning-Edit (8-step distilled editing) transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "steps: 8",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Lightning-Edit (4-step) Nunchaku SVDQuant": {
+ "path": "vladmandic/Qwen-Lightning-Edit",
+ "subfolder": "nunchaku-4step",
+ "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Lightning-Edit (4-step distilled editing) transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "steps: 4",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Qwen-Image-Edit-2509 Nunchaku SVDQuant": {
+ "path": "Qwen/Qwen-Image-Edit-2509",
+ "subfolder": "nunchaku",
+ "preview": "Qwen--Qwen-Image-Edit-2509.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Qwen-Image-Edit-2509 transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 September"
+ },
+ "Sana 1.6B 1k Nunchaku SVDQuant": {
+ "path": "Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers",
+ "subfolder": "nunchaku",
+ "preview": "Efficient-Large-Model--Sana_1600M_1024px_BF16_diffusers.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Sana 1.6B 1024px transformer with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "Z-Image-Turbo Nunchaku SVDQuant": {
+ "path": "Tongyi-MAI/Z-Image-Turbo",
+ "subfolder": "nunchaku",
+ "preview": "Tongyi-MAI--Z-Image-Turbo.jpg",
+ "desc": "Nunchaku SVDQuant quantization of Z-Image-Turbo transformer with INT4 and SVD rank 128",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "sampler: Default, cfg_scale: 1.0, steps: 9",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "SDXL Base Nunchaku SVDQuant": {
+ "path": "stabilityai/stable-diffusion-xl-base-1.0",
+ "subfolder": "nunchaku",
+ "preview": "stabilityai--stable-diffusion-xl-base-1.0.jpg",
+ "desc": "Nunchaku SVDQuant quantization of SDXL Base 1.0 UNet with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "size": 0,
+ "date": "2025 June"
+ },
+ "SDXL Turbo Nunchaku SVDQuant": {
+ "path": "stabilityai/sdxl-turbo",
+ "subfolder": "nunchaku",
+ "preview": "stabilityai--sdxl-turbo.jpg",
+ "desc": "Nunchaku SVDQuant quantization of SDXL Turbo UNet with INT4 and SVD rank 32",
+ "skip": true,
+ "nunchaku": ["Model"],
+ "tags": "nunchaku",
+ "extras": "sampler: Default, cfg_scale: 1.0, steps: 4",
+ "size": 0,
+ "date": "2025 June"
+ }
+}
diff --git a/html/locale_en.json b/html/locale_en.json
index 5eb0b9f89..599bf7de5 100644
--- a/html/locale_en.json
+++ b/html/locale_en.json
@@ -337,6 +337,8 @@
{"id":"","label":"Model Options","localized":"","reload":"","hint":"Settings related to behavior of specific models"},
{"id":"","label":"Model Offloading","localized":"","reload":"","hint":"Settings related to model offloading and memory management"},
{"id":"","label":"Model Quantization","localized":"","reload":"","hint":"Settings related to model quantization which is used to reduce memory usage"},
+ {"id":"","label":"Nunchaku attention","localized":"","reload":"","hint":"Replaces default attention with Nunchaku's custom FP16 attention kernel for faster inference on consumer NVIDIA GPUs.
Might provide performance improvement on GPUs which have higher FP16 tensor cores throughput than BF16.
Currently only affects Flux-based models (Dev, Schnell, Kontext, Fill, Depth, etc.). Has no effect on Qwen, SDXL, Sana, or other architectures.
Disabled by default."},
+ {"id":"","label":"Nunchaku offloading","localized":"","reload":"","hint":"Enables Nunchaku's own per-block CPU offloading with asynchronous CUDA streams to reduce VRAM usage.
Uses a ping-pong buffer strategy: while one transformer block computes on GPU, the next block preloads from CPU in the background, hiding most of the transfer latency.
Can reduce VRAM usage at the cost of slower inference.
This replaces SD.Next's pipeline offloading for the transformer component.
Only useful on low-VRAM GPUs. If your GPU has enough memory to hold the quantized model (16+ GB), keep this disabled for maximum speed.
Supports Flux and Qwen models. Not supported for SDXL where this setting is ignored.
Disabled by default."},
{"id":"","label":"Image Metadata","localized":"","reload":"","hint":"Settings related to handling of metadata that is created with generated images"},
{"id":"","label":"Legacy Options","localized":"","reload":"","hint":"Settings related to legacy options - should not be used"},
{"id":"","label":"Restart server","localized":"","reload":"","hint":"Restart server"},
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index acb305da8..eaf516ff2 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -171,6 +171,12 @@ async function filterExtraNetworksForTab(searchTerm) {
.toLowerCase()
.includes('quantized') ? '' : 'none';
});
+ } else if (searchTerm === 'nunchaku/') {
+ cards.forEach((elem) => {
+ elem.style.display = elem.dataset.tags
+ .toLowerCase()
+ .includes('nunchaku') ? '' : 'none';
+ });
} else if (searchTerm === 'local/') {
cards.forEach((elem) => {
elem.style.display = elem.dataset.name
diff --git a/modules/mit_nunchaku.py b/modules/mit_nunchaku.py
index b5e82c1da..6b4e524cc 100644
--- a/modules/mit_nunchaku.py
+++ b/modules/mit_nunchaku.py
@@ -4,10 +4,27 @@ from installer import log, pip
from modules import devices
-nunchaku_ver = '1.1.0'
+nunchaku_versions = {
+ '2.5': '1.0.1',
+ '2.6': '1.0.1',
+ '2.7': '1.1.0',
+ '2.8': '1.1.0',
+ '2.9': '1.1.0',
+ '2.10': '1.0.2',
+ '2.11': '1.1.0',
+}
ok = False
+def _expected_ver():
+ try:
+ import torch
+ torch_ver = '.'.join(torch.__version__.split('+')[0].split('.')[:2])
+ return nunchaku_versions.get(torch_ver)
+ except Exception:
+ return None
+
+
def check():
global ok # pylint: disable=global-statement
if ok:
@@ -16,8 +33,9 @@ def check():
import nunchaku
import nunchaku.utils
from nunchaku import __version__
+ expected = _expected_ver()
log.info(f'Nunchaku: path={nunchaku.__path__} version={__version__.__version__} precision={nunchaku.utils.get_precision()}')
- if __version__.__version__ != nunchaku_ver:
+ if expected is not None and __version__.__version__ != expected:
ok = False
return False
ok = True
@@ -49,14 +67,16 @@ def install_nunchaku():
if devices.backend not in ['cuda']:
log.error(f'Nunchaku: backend={devices.backend} unsupported')
return False
- torch_ver = torch.__version__[:3]
- if torch_ver not in ['2.5', '2.6', '2.7', '2.8', '2.9', '2.10']:
+ torch_ver = '.'.join(torch.__version__.split('+')[0].split('.')[:2])
+ nunchaku_ver = nunchaku_versions.get(torch_ver)
+ if nunchaku_ver is None:
log.error(f'Nunchaku: torch={torch.__version__} unsupported')
+ return False
suffix = 'x86_64' if arch == 'linux' else 'win_amd64'
url = os.environ.get('NUNCHAKU_COMMAND', None)
if url is None:
arch = f'{arch}_' if arch == 'linux' else ''
- url = f'https://huggingface.co/nunchaku-tech/nunchaku/resolve/main/nunchaku-{nunchaku_ver}'
+ url = f'https://huggingface.co/nunchaku-ai/nunchaku/resolve/main/nunchaku-{nunchaku_ver}'
url += f'+torch{torch_ver}-cp{python_ver}-cp{python_ver}-{arch}{suffix}.whl'
cmd = f'install --upgrade {url}'
log.debug(f'Nunchaku: install="{url}"')
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 1a501be0a..3cad91181 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -255,13 +255,25 @@ def check_quant(module: str = ''):
def check_nunchaku(module: str = ''):
from modules import shared
- if module not in shared.opts.nunchaku_quantization:
+ model_name = getattr(shared.opts, 'sd_model_checkpoint', '')
+ if '+nunchaku' not in model_name:
return False
- from modules import mit_nunchaku
- mit_nunchaku.install_nunchaku()
- if not mit_nunchaku.ok:
- return False
- return True
+ base_path = model_name.split('+')[0]
+ for v in shared.reference_models.values():
+ if v.get('path', '') != base_path:
+ continue
+ nunchaku_modules = v.get('nunchaku', None)
+ if nunchaku_modules is None:
+ continue
+ if isinstance(nunchaku_modules, bool) and nunchaku_modules:
+ nunchaku_modules = ['Model', 'TE']
+ if not isinstance(nunchaku_modules, list):
+ continue
+ if module in nunchaku_modules:
+ from modules import mit_nunchaku
+ mit_nunchaku.install_nunchaku()
+ return mit_nunchaku.ok
+ return False
def create_config(kwargs = None, allow: bool = True, module: str = 'Model', modules_to_not_convert: list = None, modules_dtype_dict: dict = None):
diff --git a/modules/sd_unet.py b/modules/sd_unet.py
index c73ca8dc5..643d1c08e 100644
--- a/modules/sd_unet.py
+++ b/modules/sd_unet.py
@@ -18,14 +18,15 @@ def load_unet_sdxl_nunchaku(repo_id):
shared.log.error(f'Load module: quant=Nunchaku module=unet repo="{repo_id}" low nunchaku version')
return None
if 'turbo' in repo_id.lower():
- nunchaku_repo = 'nunchaku-tech/nunchaku-sdxl-turbo/svdq-int4_r32-sdxl-turbo.safetensors'
+ nunchaku_repo = 'nunchaku-ai/nunchaku-sdxl-turbo/svdq-int4_r32-sdxl-turbo.safetensors'
else:
- nunchaku_repo = 'nunchaku-tech/nunchaku-sdxl/svdq-int4_r32-sdxl.safetensors'
+ nunchaku_repo = 'nunchaku-ai/nunchaku-sdxl/svdq-int4_r32-sdxl.safetensors'
- shared.log.debug(f'Load module: quant=Nunchaku module=unet repo="{nunchaku_repo}" offload={shared.opts.nunchaku_offload}')
+ if shared.opts.nunchaku_offload:
+ shared.log.warning('Load module: quant=Nunchaku module=unet offload not supported for SDXL, ignoring')
+ shared.log.debug(f'Load module: quant=Nunchaku module=unet repo="{nunchaku_repo}"')
unet = NunchakuSDXLUNet2DConditionModel.from_pretrained(
nunchaku_repo,
- offload=shared.opts.nunchaku_offload,
torch_dtype=devices.dtype,
cache_dir=shared.opts.hfcache_dir,
)
diff --git a/modules/shared.py b/modules/shared.py
index 91188a825..0d606ab2a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -281,7 +281,6 @@ options_templates.update(options_section(("quantization", "Model Quantization"),
"sdnq_quantize_shuffle_weights": OptionInfo(False, "Shuffle weights in post mode", gr.Checkbox),
"nunchaku_sep": OptionInfo("