diff --git a/CHANGELOG.md b/CHANGELOG.md index a9d243672..1e993ba6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Change Log for SD.Next -## Update for 2024-10-15 +## Update for 2024-10-16 -### Highlights for 2024-10-15 +### Highlights for 2024-10-16 - **Reprocess**: New workflow options that allow you to generate at lower quality and then reprocess at higher quality for select images only or generate without hires/refine and then reprocess with hires/refine @@ -24,8 +24,9 @@ Oh, and we've compiled a full table with list of popular text-to-image generative models, their respective parameters and architecture overview: And there are also other goodies like multiple *XYZ grid* improvements, additional *Flux ControlNets*, additional *Interrogate models*, better *LoRA tags* support, and more... +See [changelog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) for details! -### Details for 2024-10-15 +### Details for 2024-10-16 - **reprocess** - new top-level button: reprocess latent from your history of generated image(s) @@ -130,15 +131,17 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition - see [wiki](https://github.com/vladmandic/automatic/wiki/FLUX#quantization) for details on `gguf` - support for `gguf` binary format for loading unet/transformer component - support for `gguf` binary format for loading t5/text-encoder component: requires transformers pr - - avoid unet load if unchanged + - additional controlnets: [JasperAI](https://huggingface.co/collections/jasperai/flux1-dev-controlnets-66f27f9459d760dcafa32e08) **Depth**, **Upscaler**, **Surface**, thanks @EnragedAntelope + - additional controlnets: [XLabs-AI](https://huggingface.co/XLabs-AI/flux-controlnet-hed-diffusers) **Canny**, **Depth**, **HED** - mark specific unet as unavailable if load failed - fix diffusers local model name parsing - full prompt parser will auto-select `xhinker` for flux models - controlnet support for img2img and inpaint (in addition to previous txt2img controlnet) - allow separate vae load - support for both kohya and onetrainer loras in native load mode for fp16/nf4/fp4, thanks @AI-Casanova + - support for differential diffusion - added native load mode for qint8/qint4 models - - add additional controlnets: [JasperAI](https://huggingface.co/collections/jasperai/flux1-dev-controlnets-66f27f9459d760dcafa32e08) **Depth**, **Upscaler**, **Surface**, thanks @EnragedAntelope + - avoid unet load if unchanged - [CogView 3 Plus](https://huggingface.co/THUDM/CogView3-Plus-3B) - Select from *networks -> models -> reference* diff --git a/html/reference.json b/html/reference.json index aa53be150..8779ef603 100644 --- a/html/reference.json +++ b/html/reference.json @@ -364,6 +364,12 @@ "desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.", "preview": "DeepFloyd--IF-I-M-v1.0.jpg", "extras": "width: 1024, height: 1024, sampler: Default" + }, + "DeepFloyd IF Large": { + "path": "DeepFloyd/IF-I-L-v1.0", + "desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.", + "preview": "DeepFloyd--IF-I-M-v1.0.jpg", + "extras": "width: 1024, height: 1024, sampler: Default" } } diff --git a/installer.py b/installer.py index baeea82d5..66602ca0b 100644 --- a/installer.py +++ b/installer.py @@ -451,7 +451,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): # check diffusers version def check_diffusers(): - sha = 'a3e8d3f7deed140f57a28d82dd0b5d965bd0fb09' + sha = 'd9029f2c5981a96ab51f8996be620af116b8d743' pkg = pkg_resources.working_set.by_key.get('diffusers', None) minor = int(pkg.version.split('.')[1] if pkg is not None else 0) cur = opts.get('diffusers_version', '') if minor > 0 else '' diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py index d8ed7fe57..236892eb5 100644 --- a/modules/control/units/controlnet.py +++ b/modules/control/units/controlnet.py @@ -75,9 +75,9 @@ predefined_f1 = { "Shakker-Labs Union": 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro', "Shakker-Labs Pose": 'Shakker-Labs/FLUX.1-dev-ControlNet-Pose', "Shakker-Labs Depth": 'Shakker-Labs/FLUX.1-dev-ControlNet-Depth', - "XLabs-AI Canny": 'XLabs-AI/flux-controlnet-canny-v3', - "XLabs-AI Depth": 'XLabs-AI/flux-controlnet-depth-v3', - "XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-v3' + "XLabs-AI Canny": 'XLabs-AI/flux-controlnet-canny-diffusers', + "XLabs-AI Depth": 'XLabs-AI/flux-controlnet-depth-diffusers', + "XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-diffusers' } models = {} all_models = {} diff --git a/scripts/differential_diffusion.py b/scripts/differential_diffusion.py index 36aa17eca..705242987 100644 --- a/scripts/differential_diffusion.py +++ b/scripts/differential_diffusion.py @@ -9,32 +9,20 @@ sdnext implementation follows after pipeline-end import inspect import hashlib from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import numpy as np -import PIL.Image -import torch from packaging import version -from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer -import torchvision +import PIL.Image +import numpy as np +import torch +import torchvision +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.models.attention_processor import ( - AttnProcessor2_0, - FusedAttnProcessor2_0, - XFormersAttnProcessor, -) +from diffusers.models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor from diffusers.configuration_utils import FrozenDict from diffusers.schedulers import KarrasDiffusionSchedulers -from diffusers.utils import ( - PIL_INTERPOLATION, - logging, - deprecate, - is_accelerate_available, - is_accelerate_version, - replace_example_docstring, -) +from diffusers.utils import PIL_INTERPOLATION, logging, deprecate, is_accelerate_available, is_accelerate_version, replace_example_docstring from diffusers.utils.torch_utils import randn_tensor from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -1921,7 +1909,7 @@ class Script(scripts.Script): def run(self, p: processing.StableDiffusionProcessingImg2Img, enabled, strength, invert, model, image): # pylint: disable=arguments-differ if not enabled: return - if shared.sd_model_type != 'sdxl' and shared.sd_model_type != 'sd': + if shared.sd_model_type not in ['sdxl', 'sd', 'f1']: shared.log.error(f'Differential-diffusion: incorrect base model: {shared.sd_model.__class__.__name__}') return if not hasattr(p, 'init_images') or len(p.init_images) == 0: @@ -1936,6 +1924,8 @@ class Script(scripts.Script): orig_pipeline = shared.sd_model pipe = None try: + # shared.sd_model = diffusers.StableDiffusionPipeline.from_pipe(shared.sd_model, **{ 'custom_pipeline': 'kohya_hires_fix', 'high_res_fix': high_res_fix }) + # from examples.community.pipeline_stable_diffusion_xl_differential_img2img import StableDiffusionXLDifferentialImg2ImgPipeline diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["StableDiffusionXLDiffImg2ImgPipeline"] = StableDiffusionXLDiffImg2ImgPipeline diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["StableDiffusionDiffImg2ImgPipeline"] = StableDiffusionDiffImg2ImgPipeline if shared.sd_model_type == 'sdxl': @@ -1959,6 +1949,9 @@ class Script(scripts.Script): safety_checker=None, requires_safety_checker=False, ) + elif shared.sd_model_type == 'f1': + pipe = diffusers.StableDiffusionPipeline.from_pipe(shared.sd_model, **{ 'custom_pipeline': 'pipeline_flux_differential_img2img' }) + diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["FluxDifferentialImg2ImgPipeline"] = pipe.__class__ sd_models.copy_diffuser_options(pipe, shared.sd_model) sd_models.set_diffuser_options(pipe) p.task_args['image'] = image_init diff --git a/scripts/regional_prompting.py b/scripts/regional_prompting.py index ab1f4a902..cecef747d 100644 --- a/scripts/regional_prompting.py +++ b/scripts/regional_prompting.py @@ -8,6 +8,7 @@ from modules import shared, devices, scripts, processing, sd_models, prompt_pars def hijack_register_modules(self, **kwargs): for name, module in kwargs.items(): + register_dict = None if module is None or isinstance(module, (tuple, list)) and module[0] is None: register_dict = {name: (None, None)} elif isinstance(module, bool): @@ -15,7 +16,8 @@ def hijack_register_modules(self, **kwargs): else: library, class_name = pipeline_utils._fetch_class_library_tuple(module) # pylint: disable=protected-access register_dict = {name: (library, class_name)} - self.register_to_config(**register_dict) + if register_dict is not None: + self.register_to_config(**register_dict) setattr(self, name, module) diff --git a/wiki b/wiki index 1f44cd378..a3d7ec999 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 1f44cd37813595194610e40f4389efb8d998c2ca +Subproject commit a3d7ec999fafc05d75dc5d0ae564f02bc689786f