add qwen pruning variants

Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-10-03 11:36:12 -04:00 · 2025-10-03 11:36:12 -04:00 · a6108dd6df
parent 54acf1760b
commit a6108dd6df
11 changed files with 81 additions and 18 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,12 +1,14 @@
 # Change Log for SD.Next

-## Update for 2025-10-02
+## Update for 2025-10-03

 - **Models**
  - [WAN 2.2 14B VACE](https://huggingface.co/alibaba-pai/Wan2.2-VACE-Fun-A14B)  
    available for *text-to-image* and *text-to-video* and *image-to-video* workflows  
  - [Qwen Image Edit 2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) and [Nunchaku Qwen Image Edit 2509](https://huggingface.co/nunchaku-tech/nunchaku-qwen-image-edit-2509)  
    updated version of Qwen Image Edit with improved image consistency  
+  - [Qwen Image Pruning](https://huggingface.co/OPPOer/Qwen-Image-Pruning) and [Qwen Image Edit Pruning](https://huggingface.co/OPPOer/Qwen-Image-Edit-Pruning)  
+    pruned versions of Qwen with 13B params instead of 20B, with some quality tradeoff  
  - [HiDream E1.1](https://huggingface.co/HiDream-ai/HiDream-E1-1)  
    updated version of E1 image editing model  
  - [Tencent FLUX.1 Dev SRPO](https://huggingface.co/tencent/SRPO)  
@ -15,7 +17,6 @@
    impact of nunchaku engine on unet-based model such as sdxl is much less than on a dit-based models, but its still significantly faster than baseline  
    note that nunchaku optimized and prequantized unet is replacement for base unet, so its only applicable to base models, not any of finetunes  
    *how to use*: enable nunchaku in settings -> quantization and then load either sdxl-base or sdxl-base-turbo reference models  
-    *note*: sdxl support for nunchaku is not in released version of `nunchaku==1.0.0`, so you need to build [nunchaku](https://nunchaku.tech/docs/nunchaku/installation/installation.html) from source
 - **Features**
  - [Cache-DiT](https://github.com/vipshop/cache-dit)  
    cache-dit is a unified, flexible and training-free cache acceleration framework  
@ -74,6 +75,7 @@
  - **attention** remove split-attention and add explicitly attention slicing enable/disable option  
    enable in *settings -> compute settings*  
    can be combined with sdp, enabling may improve stability when used on iGPU or shared memory systems  
+  - **nunchaku** update to `1.0.1`  
 - **Experimental**
  - `new` command line flag enables new `pydantic` and `albumentations` packages  
  - **modular pipelines**: enable in *settings -> model options*  
--- a/html/reference.json
+++ b/html/reference.json
@ -183,7 +183,7 @@
  "Qwen-Image": {
    "path": "Qwen/Qwen-Image",
    "preview": "Qwen--Qwen-Image.jpg",
-    "desc": " Qwen-Image, an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing.",
+    "desc": "Qwen-Image, an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing.",
    "skip": true,
    "extras": "",
    "size": 56.1,
@ -228,12 +228,39 @@
  "Qwen-Image-Lightning-Edit": {
    "path": "vladmandic/Qwen-Lightning-Edit",
    "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
-    "desc": " Qwen-Lightning-Edit is step-distilled from Qwen-Image-Edit to allow for generation in 8 steps.",
+    "desc": "Qwen-Lightning-Edit is step-distilled from Qwen-Image-Edit to allow for generation in 8 steps.",
    "skip": true,
    "extras": "steps: 8",
    "size": 56.1,
    "date": "2025 August"
  },
+  "Qwen-Image-Pruning": {
+    "path": "OPPOer/Qwen-Image-Pruning-13B",
+    "subfolder": "Qwen-Image-13B",
+    "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
+    "desc": "This open-source project is based on Qwen-Image and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
+    "skip": true,
+    "size": 56.1,
+    "date": "2025 Ocotober"
+  },
+  "Qwen-Image-Edit-Pruning": {
+    "path": "OPPOer/Qwen-Image-Edit-Pruning-13B",
+    "subfolder": "Qwen-Image-Edit-13B",
+    "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
+    "desc": "This open-source project is based on Qwen-Image-Edit and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
+    "skip": true,
+    "size": 56.1,
+    "date": "2025 Ocotober"
+  },
+  "Qwen-Image-Edit-2509-Pruning": {
+    "path": "OPPOer/Qwen-Image-Edit-Pruning-14B",
+    "subfolder": "Qwen-Image-Edit-2509-14B",
+    "preview": "vladmandic--Qwen-Lightning-Edit.jpg",
+    "desc": "This open-source project is based on Qwen-Image-Edit and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
+    "skip": true,
+    "size": 56.1,
+    "date": "2025 Ocotober"
+  },

  "lodestones Chroma1 HD": {
    "path": "lodestones/Chroma1-HD",
--- a/modules/mit_nunchaku.py
+++ b/modules/mit_nunchaku.py
@ -4,7 +4,7 @@ from installer import log, pip
 from modules import devices


-ver = '1.0.0'
+ver = '1.0.1'
 ok = False


--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@ -180,6 +180,13 @@ def find_diffuser(name: str, full=False):
    if len(repo) > 0:
        return [repo[0]['name']]
    hf_api = hf.HfApi()
+    suffix = ''
+    if len(name) > 3 and name.count('/') > 1:
+        parts = name.split('/')
+        name = '/'.join(parts[:2]) # only user/model
+        suffix = '/'.join(parts[2:]) # subfolder
+        if len(suffix) > 0:
+            suffix = '/' + suffix
    models = list(hf_api.list_models(model_name=name, library=['diffusers'], full=True, limit=20, sort="downloads", direction=-1))
    if len(models) == 0:
        models = list(hf_api.list_models(model_name=name, full=True, limit=20, sort="downloads", direction=-1)) # widen search
@ -187,9 +194,9 @@ def find_diffuser(name: str, full=False):
    shared.log.debug(f'Search model: repo="{name}" {len(models) > 0}')
    if len(models) > 0:
        if not full:
-            return models[0].id
+            return models[0].id + suffix
        else:
-            return [m.id for m in models]
+            return [m.id + suffix for m in models]
    return None


--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@ -27,6 +27,7 @@ class CheckpointInfo:
        self.hash = sha
        self.filename = filename
        self.type = ''
+        self.subfolder = None
        relname = filename
        app_path = os.path.abspath(paths.script_path)

@ -106,7 +107,7 @@ class CheckpointInfo:
        return self.shorthash

    def __str__(self):
-        return f"CheckpointInfo(name={self.name} filename={self.filename} hash={self.shorthash} type={self.type}"
+        return f'CheckpointInfo(name="{self.name}" filename="{self.filename}" hash={self.shorthash} type={self.type} title="{self.title}" path="{self.path}" subfolder="{self.subfolder}")'


 def setup_model():
@ -231,7 +232,12 @@ def get_closet_checkpoint_match(s: str) -> CheckpointInfo:
        return checkpoint_info

    # huggingface search
-    if shared.opts.sd_checkpoint_autodownload and s.count('/') == 1:
+    if shared.opts.sd_checkpoint_autodownload and (s.count('/') == 1 or s.count('/') == 2):
+        if s.count('/') == 2:
+            subfolder = '/'.join(s.split('/')[2:]) # subfolder
+            s = '/'.join(s.split('/')[:2]) # only user/model
+        else:
+            subfolder = None
        modelloader.hf_login()
        found = modelloader.find_diffuser(s, full=True)
        if found is None:
@ -241,6 +247,7 @@ def get_closet_checkpoint_match(s: str) -> CheckpointInfo:
        if found is not None and len(found) == 1:
            checkpoint_info = CheckpointInfo(s)
            checkpoint_info.type = 'huggingface'
+            checkpoint_info.subfolder = subfolder
            return checkpoint_info

    # civitai search
@ -289,7 +296,7 @@ def select_checkpoint(op='model', sd_model_checkpoint=None):
        return None
    if model_checkpoint is not None:
        if model_checkpoint != 'model.safetensors' and model_checkpoint != 'stabilityai/stable-diffusion-xl-base-1.0':
-            shared.log.info(f'Load {op}: search="{model_checkpoint}" not found')
+            shared.log.error(f'Load {op}: search="{model_checkpoint}" not found')
        else:
            shared.log.info("Selecting first available checkpoint")
    else:
--- a/pipelines/generic.py
+++ b/pipelines/generic.py
@ -48,7 +48,7 @@ def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer",
                **quant_args,
            )
        else:
-            shared.log.debug(f'Load model: transformer="{repo_id}" cls={cls_name.__name__} quant="{quant_type}" args={load_args}')
+            shared.log.debug(f'Load model: transformer="{repo_id}" cls={cls_name.__name__} subfolder={subfolder} quant="{quant_type}" args={load_args}')
            if dtype is not None:
                load_args['torch_dtype'] = dtype
            if subfolder is not None:
--- a/pipelines/model_qwen.py
+++ b/pipelines/model_qwen.py
@ -1,11 +1,12 @@
 import transformers
 import diffusers
 from modules import shared, devices, sd_models, model_quant, sd_hijack_te, sd_hijack_vae
-from pipelines import generic


 def load_qwen(checkpoint_info, diffusers_load_config={}):
+    from pipelines import generic, qwen
    repo_id = sd_models.path_to_repo(checkpoint_info)
+    repo_subfolder = checkpoint_info.subfolder
    sd_models.hf_auth_check(checkpoint_info)
    transformer = None

@ -29,8 +30,7 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
        diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["qwen-image"] = diffusers.QwenImageInpaintPipeline

    if model_quant.check_nunchaku('Model'):
-        from pipelines.qwen.qwen_nunchaku import load_qwen_nunchaku
-        transformer = load_qwen_nunchaku(repo_id)
+        transformer = qwen.load_qwen_nunchaku(repo_id)

    if 'Qwen-Image-Distill-Full' in repo_id:
        repo_transformer = repo_id
@ -38,7 +38,10 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
        repo_id = 'Qwen/Qwen-Image'
    else:
        repo_transformer = repo_id
-        transformer_subfolder = "transformer"
+        if repo_subfolder is not None:
+            transformer_subfolder = repo_subfolder + '/transformer'
+        else:
+            transformer_subfolder = "transformer"

    if transformer is None:
        transformer = generic.load_transformer(
@ -52,11 +55,12 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
    repo_te = 'Qwen/Qwen-Image'
    text_encoder = generic.load_text_encoder(repo_te, cls_name=transformers.Qwen2_5_VLForConditionalGeneration, load_config=diffusers_load_config)

-    # NunchakuQwenImagePipeline
+    repo_id = qwen.check_qwen_pruning(repo_id)
    pipe = cls_name.from_pretrained(
        repo_id,
        transformer=transformer,
        text_encoder=text_encoder,
+        subfolder=repo_subfolder,
        cache_dir=shared.opts.diffusers_dir,
        **load_args,
    )
--- a/pipelines/qwen/init.py
+++ b/pipelines/qwen/init.py
@ -0,0 +1,2 @@
+from pipelines.qwen.qwen_nunchaku import load_qwen_nunchaku
+from pipelines.qwen.qwen_pruning import check_qwen_pruning
--- a/pipelines/qwen/qwen_nunchaku.py
+++ b/pipelines/qwen/qwen_nunchaku.py
@ -11,7 +11,9 @@ def load_qwen_nunchaku(repo_id):
    except Exception:
        shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" low nunchaku version')
        return None
-    if repo_id.lower().endswith('qwen-image'):
+    if 'pruning' in repo_id.lower() or 'distill' in repo_id.lower():
+        return None
+    elif repo_id.lower().endswith('qwen-image'):
        nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image.safetensors" # r32 vs r128
    elif repo_id.lower().endswith('qwen-lightning'):
        nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image-lightningv1.1-8steps.safetensors" # 8-step variant
--- a/pipelines/qwen/qwen_pruning.py
+++ b/pipelines/qwen/qwen_pruning.py
@ -0,0 +1,12 @@
+def check_qwen_pruning(repo_id):
+    from modules.shared import log
+    if 'pruning' not in repo_id.lower():
+        return repo_id
+    if '2509' in repo_id:
+        repo_id = "Qwen/Qwen-Image-Edit-2509"
+    elif 'Edit' in repo_id:
+        repo_id = "Qwen/Qwen-Image-Edit"
+    else:
+        repo_id = "Qwen/Qwen-Image"
+    log.debug(f'Load model: variant=pruning target="{repo_id}"')
+    return repo_id
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit f274489e42b97b57ea91b9e94087b8b39820a4ee
+Subproject commit dfcb801c592a493d48b7e22a0a46ab39340fd021