add qwen pruning variants

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/4250/head
Vladimir Mandic 2025-10-03 11:36:12 -04:00
parent 54acf1760b
commit a6108dd6df
11 changed files with 81 additions and 18 deletions

View File

@ -1,12 +1,14 @@
# Change Log for SD.Next
## Update for 2025-10-02
## Update for 2025-10-03
- **Models**
- [WAN 2.2 14B VACE](https://huggingface.co/alibaba-pai/Wan2.2-VACE-Fun-A14B)
available for *text-to-image* and *text-to-video* and *image-to-video* workflows
- [Qwen Image Edit 2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) and [Nunchaku Qwen Image Edit 2509](https://huggingface.co/nunchaku-tech/nunchaku-qwen-image-edit-2509)
updated version of Qwen Image Edit with improved image consistency
- [Qwen Image Pruning](https://huggingface.co/OPPOer/Qwen-Image-Pruning) and [Qwen Image Edit Pruning](https://huggingface.co/OPPOer/Qwen-Image-Edit-Pruning)
pruned versions of Qwen with 13B params instead of 20B, with some quality tradeoff
- [HiDream E1.1](https://huggingface.co/HiDream-ai/HiDream-E1-1)
updated version of E1 image editing model
- [Tencent FLUX.1 Dev SRPO](https://huggingface.co/tencent/SRPO)
@ -15,7 +17,6 @@
impact of nunchaku engine on unet-based model such as sdxl is much less than on a dit-based models, but its still significantly faster than baseline
note that nunchaku optimized and prequantized unet is replacement for base unet, so its only applicable to base models, not any of finetunes
*how to use*: enable nunchaku in settings -> quantization and then load either sdxl-base or sdxl-base-turbo reference models
*note*: sdxl support for nunchaku is not in released version of `nunchaku==1.0.0`, so you need to build [nunchaku](https://nunchaku.tech/docs/nunchaku/installation/installation.html) from source
- **Features**
- [Cache-DiT](https://github.com/vipshop/cache-dit)
cache-dit is a unified, flexible and training-free cache acceleration framework
@ -74,6 +75,7 @@
- **attention** remove split-attention and add explicitly attention slicing enable/disable option
enable in *settings -> compute settings*
can be combined with sdp, enabling may improve stability when used on iGPU or shared memory systems
- **nunchaku** update to `1.0.1`
- **Experimental**
- `new` command line flag enables new `pydantic` and `albumentations` packages
- **modular pipelines**: enable in *settings -> model options*

View File

@ -183,7 +183,7 @@
"Qwen-Image": {
"path": "Qwen/Qwen-Image",
"preview": "Qwen--Qwen-Image.jpg",
"desc": " Qwen-Image, an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing.",
"desc": "Qwen-Image, an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing.",
"skip": true,
"extras": "",
"size": 56.1,
@ -228,12 +228,39 @@
"Qwen-Image-Lightning-Edit": {
"path": "vladmandic/Qwen-Lightning-Edit",
"preview": "vladmandic--Qwen-Lightning-Edit.jpg",
"desc": " Qwen-Lightning-Edit is step-distilled from Qwen-Image-Edit to allow for generation in 8 steps.",
"desc": "Qwen-Lightning-Edit is step-distilled from Qwen-Image-Edit to allow for generation in 8 steps.",
"skip": true,
"extras": "steps: 8",
"size": 56.1,
"date": "2025 August"
},
"Qwen-Image-Pruning": {
"path": "OPPOer/Qwen-Image-Pruning-13B",
"subfolder": "Qwen-Image-13B",
"preview": "vladmandic--Qwen-Lightning-Edit.jpg",
"desc": "This open-source project is based on Qwen-Image and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
"skip": true,
"size": 56.1,
"date": "2025 Ocotober"
},
"Qwen-Image-Edit-Pruning": {
"path": "OPPOer/Qwen-Image-Edit-Pruning-13B",
"subfolder": "Qwen-Image-Edit-13B",
"preview": "vladmandic--Qwen-Lightning-Edit.jpg",
"desc": "This open-source project is based on Qwen-Image-Edit and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
"skip": true,
"size": 56.1,
"date": "2025 Ocotober"
},
"Qwen-Image-Edit-2509-Pruning": {
"path": "OPPOer/Qwen-Image-Edit-Pruning-14B",
"subfolder": "Qwen-Image-Edit-2509-14B",
"preview": "vladmandic--Qwen-Lightning-Edit.jpg",
"desc": "This open-source project is based on Qwen-Image-Edit and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.6B parameters.",
"skip": true,
"size": 56.1,
"date": "2025 Ocotober"
},
"lodestones Chroma1 HD": {
"path": "lodestones/Chroma1-HD",

View File

@ -4,7 +4,7 @@ from installer import log, pip
from modules import devices
ver = '1.0.0'
ver = '1.0.1'
ok = False

View File

@ -180,6 +180,13 @@ def find_diffuser(name: str, full=False):
if len(repo) > 0:
return [repo[0]['name']]
hf_api = hf.HfApi()
suffix = ''
if len(name) > 3 and name.count('/') > 1:
parts = name.split('/')
name = '/'.join(parts[:2]) # only user/model
suffix = '/'.join(parts[2:]) # subfolder
if len(suffix) > 0:
suffix = '/' + suffix
models = list(hf_api.list_models(model_name=name, library=['diffusers'], full=True, limit=20, sort="downloads", direction=-1))
if len(models) == 0:
models = list(hf_api.list_models(model_name=name, full=True, limit=20, sort="downloads", direction=-1)) # widen search
@ -187,9 +194,9 @@ def find_diffuser(name: str, full=False):
shared.log.debug(f'Search model: repo="{name}" {len(models) > 0}')
if len(models) > 0:
if not full:
return models[0].id
return models[0].id + suffix
else:
return [m.id for m in models]
return [m.id + suffix for m in models]
return None

View File

@ -27,6 +27,7 @@ class CheckpointInfo:
self.hash = sha
self.filename = filename
self.type = ''
self.subfolder = None
relname = filename
app_path = os.path.abspath(paths.script_path)
@ -106,7 +107,7 @@ class CheckpointInfo:
return self.shorthash
def __str__(self):
return f"CheckpointInfo(name={self.name} filename={self.filename} hash={self.shorthash} type={self.type}"
return f'CheckpointInfo(name="{self.name}" filename="{self.filename}" hash={self.shorthash} type={self.type} title="{self.title}" path="{self.path}" subfolder="{self.subfolder}")'
def setup_model():
@ -231,7 +232,12 @@ def get_closet_checkpoint_match(s: str) -> CheckpointInfo:
return checkpoint_info
# huggingface search
if shared.opts.sd_checkpoint_autodownload and s.count('/') == 1:
if shared.opts.sd_checkpoint_autodownload and (s.count('/') == 1 or s.count('/') == 2):
if s.count('/') == 2:
subfolder = '/'.join(s.split('/')[2:]) # subfolder
s = '/'.join(s.split('/')[:2]) # only user/model
else:
subfolder = None
modelloader.hf_login()
found = modelloader.find_diffuser(s, full=True)
if found is None:
@ -241,6 +247,7 @@ def get_closet_checkpoint_match(s: str) -> CheckpointInfo:
if found is not None and len(found) == 1:
checkpoint_info = CheckpointInfo(s)
checkpoint_info.type = 'huggingface'
checkpoint_info.subfolder = subfolder
return checkpoint_info
# civitai search
@ -289,7 +296,7 @@ def select_checkpoint(op='model', sd_model_checkpoint=None):
return None
if model_checkpoint is not None:
if model_checkpoint != 'model.safetensors' and model_checkpoint != 'stabilityai/stable-diffusion-xl-base-1.0':
shared.log.info(f'Load {op}: search="{model_checkpoint}" not found')
shared.log.error(f'Load {op}: search="{model_checkpoint}" not found')
else:
shared.log.info("Selecting first available checkpoint")
else:

View File

@ -48,7 +48,7 @@ def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer",
**quant_args,
)
else:
shared.log.debug(f'Load model: transformer="{repo_id}" cls={cls_name.__name__} quant="{quant_type}" args={load_args}')
shared.log.debug(f'Load model: transformer="{repo_id}" cls={cls_name.__name__} subfolder={subfolder} quant="{quant_type}" args={load_args}')
if dtype is not None:
load_args['torch_dtype'] = dtype
if subfolder is not None:

View File

@ -1,11 +1,12 @@
import transformers
import diffusers
from modules import shared, devices, sd_models, model_quant, sd_hijack_te, sd_hijack_vae
from pipelines import generic
def load_qwen(checkpoint_info, diffusers_load_config={}):
from pipelines import generic, qwen
repo_id = sd_models.path_to_repo(checkpoint_info)
repo_subfolder = checkpoint_info.subfolder
sd_models.hf_auth_check(checkpoint_info)
transformer = None
@ -29,8 +30,7 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["qwen-image"] = diffusers.QwenImageInpaintPipeline
if model_quant.check_nunchaku('Model'):
from pipelines.qwen.qwen_nunchaku import load_qwen_nunchaku
transformer = load_qwen_nunchaku(repo_id)
transformer = qwen.load_qwen_nunchaku(repo_id)
if 'Qwen-Image-Distill-Full' in repo_id:
repo_transformer = repo_id
@ -38,7 +38,10 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
repo_id = 'Qwen/Qwen-Image'
else:
repo_transformer = repo_id
transformer_subfolder = "transformer"
if repo_subfolder is not None:
transformer_subfolder = repo_subfolder + '/transformer'
else:
transformer_subfolder = "transformer"
if transformer is None:
transformer = generic.load_transformer(
@ -52,11 +55,12 @@ def load_qwen(checkpoint_info, diffusers_load_config={}):
repo_te = 'Qwen/Qwen-Image'
text_encoder = generic.load_text_encoder(repo_te, cls_name=transformers.Qwen2_5_VLForConditionalGeneration, load_config=diffusers_load_config)
# NunchakuQwenImagePipeline
repo_id = qwen.check_qwen_pruning(repo_id)
pipe = cls_name.from_pretrained(
repo_id,
transformer=transformer,
text_encoder=text_encoder,
subfolder=repo_subfolder,
cache_dir=shared.opts.diffusers_dir,
**load_args,
)

View File

@ -0,0 +1,2 @@
from pipelines.qwen.qwen_nunchaku import load_qwen_nunchaku
from pipelines.qwen.qwen_pruning import check_qwen_pruning

View File

@ -11,7 +11,9 @@ def load_qwen_nunchaku(repo_id):
except Exception:
shared.log.error(f'Load module: quant=Nunchaku module=transformer repo="{repo_id}" low nunchaku version')
return None
if repo_id.lower().endswith('qwen-image'):
if 'pruning' in repo_id.lower() or 'distill' in repo_id.lower():
return None
elif repo_id.lower().endswith('qwen-image'):
nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image.safetensors" # r32 vs r128
elif repo_id.lower().endswith('qwen-lightning'):
nunchaku_repo = f"nunchaku-tech/nunchaku-qwen-image/svdq-{nunchaku_precision}_r128-qwen-image-lightningv1.1-8steps.safetensors" # 8-step variant

View File

@ -0,0 +1,12 @@
def check_qwen_pruning(repo_id):
from modules.shared import log
if 'pruning' not in repo_id.lower():
return repo_id
if '2509' in repo_id:
repo_id = "Qwen/Qwen-Image-Edit-2509"
elif 'Edit' in repo_id:
repo_id = "Qwen/Qwen-Image-Edit"
else:
repo_id = "Qwen/Qwen-Image"
log.debug(f'Load model: variant=pruning target="{repo_id}"')
return repo_id

2
wiki

@ -1 +1 @@
Subproject commit f274489e42b97b57ea91b9e94087b8b39820a4ee
Subproject commit dfcb801c592a493d48b7e22a0a46ab39340fd021