diff --git a/html/reference-distilled.json b/html/reference-distilled.json index 123d8e6e4..e77c3b4b6 100644 --- a/html/reference-distilled.json +++ b/html/reference-distilled.json @@ -165,7 +165,7 @@ "Black Forest Labs FLUX.2 Klein 4B": { "path": "black-forest-labs/FLUX.2-klein-4B", "preview": "black-forest-labs--FLUX.2-klein-4B.jpg", - "desc": "FLUX.2-klein-4B is a 4 billion parameter size-distilled version of FLUX.2-dev optimized for consumer GPUs. Achieves sub-second inference with 4 steps while fitting in ~13GB VRAM. Supports both text-to-image generation and multi-reference image editing. Apache 2.0 licensed.", + "desc": "FLUX.2-klein-4B is a 4 billion parameter size-distilled version of FLUX.2-dev optimized for consumer GPUs. Achieves sub-second inference with 4 steps. Supports both text-to-image generation and multi-reference image editing. Apache 2.0 licensed.", "skip": true, "tags": "distilled", "extras": "sampler: Default, cfg_scale: 4.0, steps: 4", @@ -175,7 +175,7 @@ "Black Forest Labs FLUX.2 Klein 9B": { "path": "black-forest-labs/FLUX.2-klein-9B", "preview": "black-forest-labs--FLUX.2-klein-9B.jpg", - "desc": "FLUX.2-klein-9B is a 9 billion parameter size-distilled version of FLUX.2-dev. Higher quality than 4B variant with sub-second inference using 4 steps. Requires ~29GB VRAM. Supports text-to-image and multi-reference editing. Non-commercial license.", + "desc": "FLUX.2-klein-9B is a 9 billion parameter size-distilled version of FLUX.2-dev. Higher quality than 4B variant with sub-second inference using 4 steps. Supports text-to-image and multi-reference editing. Non-commercial license.", "skip": true, "tags": "distilled", "extras": "sampler: Default, cfg_scale: 4.0, steps: 4", diff --git a/html/reference.json b/html/reference.json index 63cd80124..2f1f6562b 100644 --- a/html/reference.json +++ b/html/reference.json @@ -127,7 +127,7 @@ "Black Forest Labs FLUX.2 Klein Base 4B": { "path": "black-forest-labs/FLUX.2-klein-base-4B", "preview": "black-forest-labs--FLUX.2-klein-base-4B.jpg", - "desc": "FLUX.2-klein-base-4B is the undistilled 4 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Fits in ~13GB VRAM. Supports text-to-image and multi-reference editing. Apache 2.0 licensed.", + "desc": "FLUX.2-klein-base-4B is the undistilled 4 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Supports text-to-image and multi-reference editing. Apache 2.0 licensed.", "skip": true, "extras": "sampler: Default, cfg_scale: 4.0, steps: 50", "size": 8.5, @@ -136,7 +136,7 @@ "Black Forest Labs FLUX.2 Klein Base 9B": { "path": "black-forest-labs/FLUX.2-klein-base-9B", "preview": "black-forest-labs--FLUX.2-klein-base-9B.jpg", - "desc": "FLUX.2-klein-base-9B is the undistilled 9 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Requires ~29GB VRAM. Supports text-to-image and multi-reference editing. Non-commercial license.", + "desc": "FLUX.2-klein-base-9B is the undistilled 9 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Supports text-to-image and multi-reference editing. Non-commercial license.", "skip": true, "extras": "sampler: Default, cfg_scale: 4.0, steps: 50", "size": 18.5, @@ -146,7 +146,7 @@ "Z-Image-Turbo": { "path": "Tongyi-MAI/Z-Image-Turbo", "preview": "Tongyi-MAI--Z-Image-Turbo.jpg", - "desc": "Z-Image-Turbo, a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers sub-second inference latency on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.", + "desc": "Z-Image-Turbo, a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.", "skip": true, "extras": "sampler: Default, cfg_scale: 1.0, steps: 9", "size": 20.3, diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 80cf5de69..1c5dd0dd0 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -12,6 +12,9 @@ import torch from modules import devices, paths, shared +debug = os.environ.get('SD_PREVIEW_DEBUG', None) is not None + + TAESD_MODELS = { 'TAESD 1.3 Mocha Croissant': { 'fn': 'taesd_13_', 'uri': 'https://github.com/madebyollin/taesd/raw/7f572ca629c9b0d3c9f71140e5f501e09f9ea280', 'model': None }, 'TAESD 1.2 Chocolate-Dipped Shortbread': { 'fn': 'taesd_12_', 'uri': 'https://github.com/madebyollin/taesd/raw/8909b44e3befaa0efa79c5791e4fe1c4d4f7884e', 'model': None }, @@ -158,8 +161,9 @@ def decode(latents): dtype = devices.dtype_vae if devices.dtype_vae != torch.bfloat16 else torch.float16 # taesd does not support bf16 tensor = latents.unsqueeze(0) if len(latents.shape) == 3 else latents tensor = tensor.detach().clone().to(devices.device, dtype=dtype) - shared.log.debug(f'Decode: type="taesd" variant="{variant}" input={latents.shape} tensor={tensor.shape}') - # FLUX.2 has 128 latent channels that need reshaping to 32 channels for TAESD + if debug: + shared.log.debug(f'Decode: type="taesd" variant="{variant}" input={latents.shape} tensor={tensor.shape}') + # Fallback: reshape packed 128-channel latents to 32 channels if not already unpacked if variant == 'TAE FLUX.2' and len(tensor.shape) == 4 and tensor.shape[1] == 128: b, _c, h, w = tensor.shape tensor = tensor.reshape(b, 32, h * 2, w * 2) diff --git a/pipelines/model_flux2_klein.py b/pipelines/model_flux2_klein.py index 9b8b05ac8..d810821d9 100644 --- a/pipelines/model_flux2_klein.py +++ b/pipelines/model_flux2_klein.py @@ -16,7 +16,7 @@ def load_flux2_klein(checkpoint_info, diffusers_load_config=None): # Load transformer - Klein uses Flux2Transformer2DModel (same class as Flux2, different size) transformer = generic.load_transformer(repo_id, cls_name=diffusers.Flux2Transformer2DModel, load_config=diffusers_load_config) - # Load text encoder - Klein uses Qwen3ForCausalLM (8B), shared across all Klein variants + # Load text encoder - Klein uses Qwen3 (4B for Klein-4B, 8B for Klein-9B) text_encoder = generic.load_text_encoder(repo_id, cls_name=transformers.Qwen3ForCausalLM, load_config=diffusers_load_config) pipe = diffusers.Flux2KleinPipeline.from_pretrained(