diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f824f175..b57943106 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,16 @@ # Change Log for SD.Next -## Known issues +## Update for 2025-12-26 -- z-image-turbo controlnet device mismatch: -- z-image-turbo safetensors loader: -- kandinsky-image-5 hardcoded cuda: -- peft lora with torch-rocm-windows: +### Highlights for 2025-12-26 -## Update for 2025-12-25 +End of year release with: +- Several new models including highly anticipated **Qwen-Image-Edit 2511** as well as **Qwen-Image-Layered**, **LongCat Image** and **Ovis Image** +- New features including support for **Z-Image** *ControlNets* and *fine-tunes* and **Detailer** segmentation support + +[ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867) | [Sponsor](https://github.com/sponsors/vladmandic) + +### Details for 2025-12-26 - **Models** - [LongCat Image](https://github.com/meituan-longcat/LongCat-Image) in *Image* and *Image Edit* variants @@ -17,6 +20,8 @@ - [Qwen-Image-Layered](https://huggingface.co/Qwen/Qwen-Image-Layered) in *base* and *pre-quantized* variants Qwen-Image-Layered, a model capable of decomposing an image into multiple RGBA layers *note*: set number of desired output layers in *settings -> model options* + - [Ovis Image 7B](https://huggingface.co/AIDC-AI/Ovis-Image-7B) + Ovis Image is a new text-to-image base model based on Qwen3 text-encoder and optimized for text-rendering - **Features** - Google **Gemini** and **Veo** models support for both *Dev* and *Vertex* access methods see [docs](https://vladmandic.github.io/sdnext-docs/Google-GenAI/) for details @@ -45,6 +50,7 @@ - control input media with non-english locales - handle embeds when on meta device - improve offloading when model has manual modules + - fix ui section colapsible state, thanks @awsr ## Update for 2025-12-11 diff --git a/TODO.md b/TODO.md index dda9018b9..626fc7a6e 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,12 @@ # TODO +## Known issues + +- z-image-turbo controlnet device mismatch: +- z-image-turbo safetensors loader: +- kandinsky-image-5 hardcoded cuda: +- peft lora with torch-rocm-windows: + ## Project Board - diff --git a/extensions-builtin/sd-extension-chainner b/extensions-builtin/sd-extension-chainner index c6dc85eb2..2a7005fbc 160000 --- a/extensions-builtin/sd-extension-chainner +++ b/extensions-builtin/sd-extension-chainner @@ -1 +1 @@ -Subproject commit c6dc85eb28a02bc7af268497b7a5a596770c5d7b +Subproject commit 2a7005fbcf8985644b66121365fa7228a65f34b0 diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index af99fbab2..50f761327 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit af99fbab29e9a424c4e79fa8e4ae194481cb5f75 +Subproject commit 50f7613276ecb37015cc4dd270a033d32d038415 diff --git a/html/reference.json b/html/reference.json index fdb792228..492584ad8 100644 --- a/html/reference.json +++ b/html/reference.json @@ -938,6 +938,16 @@ "date": "2024 January" }, + "AIDC Ovis-Image 7B": { + "path": "AIDC-AI/Ovis-Image-7B", + "skip": true, + "desc": "Built upon Ovis-U1, Ovis-Image is a 7B text-to-image model specifically optimized for high-quality text rendering, designed to operate efficiently under stringent computational constraints.", + "preview": "AIDC-AI--Ovis-Image-7B.jpg", + "size": 23.38, + "date": "2025 December", + "extras": "" + }, + "HDM-XUT 340M Anime": { "path": "KBlueLeaf/HDM-xut-340M-anime", "skip": true, diff --git a/installer.py b/installer.py index 29b84e203..94346883e 100644 --- a/installer.py +++ b/installer.py @@ -648,7 +648,7 @@ def check_diffusers(): t_start = time.time() if args.skip_all: return - sha = '52766e6a6939ac6e74375bde5e19c5e0b90d24c1' # diffusers commit hash + sha = 'f6b6a7181eb44f0120b29cd897c129275f366c2a' # diffusers commit hash # if args.use_rocm or args.use_zluda or args.use_directml: # sha = '043ab2520f6a19fce78e6e060a68dbc947edb9f9' # lock diffusers versions for now pkg = pkg_resources.working_set.by_key.get('diffusers', None) diff --git a/modules/modeldata.py b/modules/modeldata.py index 1c97530b4..29ab6b659 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -78,6 +78,8 @@ def get_model_type(pipe): model_type = 'prx' elif 'LongCat' in name: model_type = 'longcat' + elif 'Ovis-Image' in name: + model_type = 'ovis' # video models elif "CogVideo" in name: model_type = 'cogvideo' diff --git a/modules/sd_detect.py b/modules/sd_detect.py index 7772fb990..2143e34b3 100644 --- a/modules/sd_detect.py +++ b/modules/sd_detect.py @@ -141,6 +141,8 @@ def guess_by_name(fn, current_guess): new_guess = 'Z-Image' elif 'longcat-image' in fn.lower(): new_guess = 'LongCat' + elif 'ovis-image' in fn.lower(): + new_guess = 'Ovis-Image' if debug_load: shared.log.trace(f'Autodetect: method=name file="{fn}" previous="{current_guess}" current="{new_guess}"') return new_guess or current_guess diff --git a/modules/sd_models.py b/modules/sd_models.py index 724aae574..da0888431 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -479,6 +479,10 @@ def load_diffuser_force(detected_model_type, checkpoint_info, diffusers_load_con from pipelines.model_longcat import load_longcat sd_model = load_longcat(checkpoint_info, diffusers_load_config) allow_post_quant = False + elif model_type in ['Overfit']: + from pipelines.model_ovis import load_ovis + sd_model = load_ovis(checkpoint_info, diffusers_load_config) + allow_post_quant = False except Exception as e: shared.log.error(f'Load {op}: path="{checkpoint_info.path}" {e}') if debug_load: diff --git a/pipelines/model_ovis.py b/pipelines/model_ovis.py new file mode 100644 index 000000000..9dfa0f599 --- /dev/null +++ b/pipelines/model_ovis.py @@ -0,0 +1,36 @@ +import transformers +import diffusers +from modules import shared, devices, sd_models, model_quant, sd_hijack_te +from pipelines import generic + + +def load_ovis(checkpoint_info, diffusers_load_config=None): + if diffusers_load_config is None: + diffusers_load_config = {} + repo_id = sd_models.path_to_repo(checkpoint_info) + sd_models.hf_auth_check(checkpoint_info) + + load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config, allow_quant=False) + shared.log.debug(f'Load model: type=OvisImage repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={diffusers_load_config}') + + transformer = generic.load_transformer(repo_id, cls_name=diffusers.OvisImageTransformer2DModel, load_config=diffusers_load_config) + text_encoder = generic.load_text_encoder(repo_id, cls_name=transformers.Qwen3Model, load_config=diffusers_load_config) + + pipe = diffusers.OvisImagePipeline.from_pretrained( + repo_id, + cache_dir=shared.opts.diffusers_dir, + transformer=transformer, + text_encoder=text_encoder, + **load_args, + ) + + pipe.task_args = { + 'output_type': 'np', + } + + del transformer + del text_encoder + sd_hijack_te.init_hijack(pipe) + + devices.torch_gc(force=True, reason='load') + return pipe diff --git a/pipelines/model_z_image.py b/pipelines/model_z_image.py index 1f2a18bfd..a839715e8 100644 --- a/pipelines/model_z_image.py +++ b/pipelines/model_z_image.py @@ -11,7 +11,7 @@ def load_z_image(checkpoint_info, diffusers_load_config=None): sd_models.hf_auth_check(checkpoint_info) load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config, allow_quant=False) - shared.log.debug(f'Load model: type=Z-Image repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={diffusers_load_config}') + shared.log.debug(f'Load model: type=ZImage repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={diffusers_load_config}') transformer = generic.load_transformer(repo_id, cls_name=diffusers.ZImageTransformer2DModel, load_config=diffusers_load_config) text_encoder = generic.load_text_encoder(repo_id, cls_name=transformers.Qwen3ForCausalLM, load_config=diffusers_load_config)