fix: correct comments and cleanup model descriptions

- Fix Klein text encoder comment to specify correct sizes per variant - Lock TAESD decode logging behind SD_PREVIEW_DEBUG env var - Fix misleading comment about FLUX.2 128-channel reshape (is fallback) - Remove VRAM requirements from model descriptions in reference files
2026-01-16 03:24:39 +00:00 · 2026-01-16 03:24:39 +00:00 · eaa8dbcd42
parent 5e2bc01367
commit eaa8dbcd42
4 changed files with 12 additions and 8 deletions
--- a/html/reference-distilled.json
+++ b/html/reference-distilled.json
@ -165,7 +165,7 @@
  "Black Forest Labs FLUX.2 Klein 4B": {
    "path": "black-forest-labs/FLUX.2-klein-4B",
    "preview": "black-forest-labs--FLUX.2-klein-4B.jpg",
-    "desc": "FLUX.2-klein-4B is a 4 billion parameter size-distilled version of FLUX.2-dev optimized for consumer GPUs. Achieves sub-second inference with 4 steps while fitting in ~13GB VRAM. Supports both text-to-image generation and multi-reference image editing. Apache 2.0 licensed.",
+    "desc": "FLUX.2-klein-4B is a 4 billion parameter size-distilled version of FLUX.2-dev optimized for consumer GPUs. Achieves sub-second inference with 4 steps. Supports both text-to-image generation and multi-reference image editing. Apache 2.0 licensed.",
    "skip": true,
    "tags": "distilled",
    "extras": "sampler: Default, cfg_scale: 4.0, steps: 4",
@ -175,7 +175,7 @@
  "Black Forest Labs FLUX.2 Klein 9B": {
    "path": "black-forest-labs/FLUX.2-klein-9B",
    "preview": "black-forest-labs--FLUX.2-klein-9B.jpg",
-    "desc": "FLUX.2-klein-9B is a 9 billion parameter size-distilled version of FLUX.2-dev. Higher quality than 4B variant with sub-second inference using 4 steps. Requires ~29GB VRAM. Supports text-to-image and multi-reference editing. Non-commercial license.",
+    "desc": "FLUX.2-klein-9B is a 9 billion parameter size-distilled version of FLUX.2-dev. Higher quality than 4B variant with sub-second inference using 4 steps. Supports text-to-image and multi-reference editing. Non-commercial license.",
    "skip": true,
    "tags": "distilled",
    "extras": "sampler: Default, cfg_scale: 4.0, steps: 4",
--- a/html/reference.json
+++ b/html/reference.json
@ -127,7 +127,7 @@
  "Black Forest Labs FLUX.2 Klein Base 4B": {
    "path": "black-forest-labs/FLUX.2-klein-base-4B",
    "preview": "black-forest-labs--FLUX.2-klein-base-4B.jpg",
-    "desc": "FLUX.2-klein-base-4B is the undistilled 4 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Fits in ~13GB VRAM. Supports text-to-image and multi-reference editing. Apache 2.0 licensed.",
+    "desc": "FLUX.2-klein-base-4B is the undistilled 4 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Supports text-to-image and multi-reference editing. Apache 2.0 licensed.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 4.0, steps: 50",
    "size": 8.5,
@ -136,7 +136,7 @@
  "Black Forest Labs FLUX.2 Klein Base 9B": {
    "path": "black-forest-labs/FLUX.2-klein-base-9B",
    "preview": "black-forest-labs--FLUX.2-klein-base-9B.jpg",
-    "desc": "FLUX.2-klein-base-9B is the undistilled 9 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Requires ~29GB VRAM. Supports text-to-image and multi-reference editing. Non-commercial license.",
+    "desc": "FLUX.2-klein-base-9B is the undistilled 9 billion parameter base model of FLUX.2-klein. Requires 50 inference steps for full quality but offers flexibility for fine-tuning. Supports text-to-image and multi-reference editing. Non-commercial license.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 4.0, steps: 50",
    "size": 18.5,
@ -146,7 +146,7 @@
  "Z-Image-Turbo": {
    "path": "Tongyi-MAI/Z-Image-Turbo",
    "preview": "Tongyi-MAI--Z-Image-Turbo.jpg",
-    "desc": "Z-Image-Turbo, a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers sub-second inference latency on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.",
+    "desc": "Z-Image-Turbo, a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.",
    "skip": true,
    "extras": "sampler: Default, cfg_scale: 1.0, steps: 9",
    "size": 20.3,
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@ -12,6 +12,9 @@ import torch
 from modules import devices, paths, shared


+debug = os.environ.get('SD_PREVIEW_DEBUG', None) is not None
+
+
 TAESD_MODELS = {
    'TAESD 1.3 Mocha Croissant': { 'fn': 'taesd_13_', 'uri': 'https://github.com/madebyollin/taesd/raw/7f572ca629c9b0d3c9f71140e5f501e09f9ea280', 'model': None },
    'TAESD 1.2 Chocolate-Dipped Shortbread': { 'fn': 'taesd_12_', 'uri': 'https://github.com/madebyollin/taesd/raw/8909b44e3befaa0efa79c5791e4fe1c4d4f7884e', 'model': None },
@ -158,8 +161,9 @@ def decode(latents):
                dtype = devices.dtype_vae if devices.dtype_vae != torch.bfloat16 else torch.float16 # taesd does not support bf16
                tensor = latents.unsqueeze(0) if len(latents.shape) == 3 else latents
                tensor = tensor.detach().clone().to(devices.device, dtype=dtype)
-                shared.log.debug(f'Decode: type="taesd" variant="{variant}" input={latents.shape} tensor={tensor.shape}')
-                # FLUX.2 has 128 latent channels that need reshaping to 32 channels for TAESD
+                if debug:
+                    shared.log.debug(f'Decode: type="taesd" variant="{variant}" input={latents.shape} tensor={tensor.shape}')
+                # Fallback: reshape packed 128-channel latents to 32 channels if not already unpacked
                if variant == 'TAE FLUX.2' and len(tensor.shape) == 4 and tensor.shape[1] == 128:
                    b, _c, h, w = tensor.shape
                    tensor = tensor.reshape(b, 32, h * 2, w * 2)
--- a/pipelines/model_flux2_klein.py
+++ b/pipelines/model_flux2_klein.py
@ -16,7 +16,7 @@ def load_flux2_klein(checkpoint_info, diffusers_load_config=None):
    # Load transformer - Klein uses Flux2Transformer2DModel (same class as Flux2, different size)
    transformer = generic.load_transformer(repo_id, cls_name=diffusers.Flux2Transformer2DModel, load_config=diffusers_load_config)

-    # Load text encoder - Klein uses Qwen3ForCausalLM (8B), shared across all Klein variants
+    # Load text encoder - Klein uses Qwen3 (4B for Klein-4B, 8B for Klein-9B)
    text_encoder = generic.load_text_encoder(repo_id, cls_name=transformers.Qwen3ForCausalLM, load_config=diffusers_load_config)

    pipe = diffusers.Flux2KleinPipeline.from_pretrained(