mirror of https://github.com/vladmandic/automatic
flux extra controlnets and differential diffusion
Signed-off-by: Vladimir Mandic <mandic00@live.com>pull/3490/head
parent
18c0ab7297
commit
e746871962
13
CHANGELOG.md
13
CHANGELOG.md
|
|
@ -1,8 +1,8 @@
|
|||
# Change Log for SD.Next
|
||||
|
||||
## Update for 2024-10-15
|
||||
## Update for 2024-10-16
|
||||
|
||||
### Highlights for 2024-10-15
|
||||
### Highlights for 2024-10-16
|
||||
|
||||
- **Reprocess**: New workflow options that allow you to generate at lower quality and then
|
||||
reprocess at higher quality for select images only or generate without hires/refine and then reprocess with hires/refine
|
||||
|
|
@ -24,8 +24,9 @@
|
|||
Oh, and we've compiled a full table with list of popular text-to-image generative models, their respective parameters and architecture overview: <https://github.com/vladmandic/automatic/wiki/Models>
|
||||
|
||||
And there are also other goodies like multiple *XYZ grid* improvements, additional *Flux ControlNets*, additional *Interrogate models*, better *LoRA tags* support, and more...
|
||||
See [changelog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) for details!
|
||||
|
||||
### Details for 2024-10-15
|
||||
### Details for 2024-10-16
|
||||
|
||||
- **reprocess**
|
||||
- new top-level button: reprocess latent from your history of generated image(s)
|
||||
|
|
@ -130,15 +131,17 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
|
|||
- see [wiki](https://github.com/vladmandic/automatic/wiki/FLUX#quantization) for details on `gguf`
|
||||
- support for `gguf` binary format for loading unet/transformer component
|
||||
- support for `gguf` binary format for loading t5/text-encoder component: requires transformers pr
|
||||
- avoid unet load if unchanged
|
||||
- additional controlnets: [JasperAI](https://huggingface.co/collections/jasperai/flux1-dev-controlnets-66f27f9459d760dcafa32e08) **Depth**, **Upscaler**, **Surface**, thanks @EnragedAntelope
|
||||
- additional controlnets: [XLabs-AI](https://huggingface.co/XLabs-AI/flux-controlnet-hed-diffusers) **Canny**, **Depth**, **HED**
|
||||
- mark specific unet as unavailable if load failed
|
||||
- fix diffusers local model name parsing
|
||||
- full prompt parser will auto-select `xhinker` for flux models
|
||||
- controlnet support for img2img and inpaint (in addition to previous txt2img controlnet)
|
||||
- allow separate vae load
|
||||
- support for both kohya and onetrainer loras in native load mode for fp16/nf4/fp4, thanks @AI-Casanova
|
||||
- support for differential diffusion
|
||||
- added native load mode for qint8/qint4 models
|
||||
- add additional controlnets: [JasperAI](https://huggingface.co/collections/jasperai/flux1-dev-controlnets-66f27f9459d760dcafa32e08) **Depth**, **Upscaler**, **Surface**, thanks @EnragedAntelope
|
||||
- avoid unet load if unchanged
|
||||
|
||||
- [CogView 3 Plus](https://huggingface.co/THUDM/CogView3-Plus-3B)
|
||||
- Select from *networks -> models -> reference*
|
||||
|
|
|
|||
|
|
@ -364,6 +364,12 @@
|
|||
"desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.",
|
||||
"preview": "DeepFloyd--IF-I-M-v1.0.jpg",
|
||||
"extras": "width: 1024, height: 1024, sampler: Default"
|
||||
},
|
||||
"DeepFloyd IF Large": {
|
||||
"path": "DeepFloyd/IF-I-L-v1.0",
|
||||
"desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.",
|
||||
"preview": "DeepFloyd--IF-I-M-v1.0.jpg",
|
||||
"extras": "width: 1024, height: 1024, sampler: Default"
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
|
|||
|
||||
# check diffusers version
|
||||
def check_diffusers():
|
||||
sha = 'a3e8d3f7deed140f57a28d82dd0b5d965bd0fb09'
|
||||
sha = 'd9029f2c5981a96ab51f8996be620af116b8d743'
|
||||
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
|
||||
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
|
||||
cur = opts.get('diffusers_version', '') if minor > 0 else ''
|
||||
|
|
|
|||
|
|
@ -75,9 +75,9 @@ predefined_f1 = {
|
|||
"Shakker-Labs Union": 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro',
|
||||
"Shakker-Labs Pose": 'Shakker-Labs/FLUX.1-dev-ControlNet-Pose',
|
||||
"Shakker-Labs Depth": 'Shakker-Labs/FLUX.1-dev-ControlNet-Depth',
|
||||
"XLabs-AI Canny": 'XLabs-AI/flux-controlnet-canny-v3',
|
||||
"XLabs-AI Depth": 'XLabs-AI/flux-controlnet-depth-v3',
|
||||
"XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-v3'
|
||||
"XLabs-AI Canny": 'XLabs-AI/flux-controlnet-canny-diffusers',
|
||||
"XLabs-AI Depth": 'XLabs-AI/flux-controlnet-depth-diffusers',
|
||||
"XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-diffusers'
|
||||
}
|
||||
models = {}
|
||||
all_models = {}
|
||||
|
|
|
|||
|
|
@ -9,32 +9,20 @@ sdnext implementation follows after pipeline-end
|
|||
import inspect
|
||||
import hashlib
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
|
||||
import torchvision
|
||||
|
||||
import PIL.Image
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
||||
from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
||||
from diffusers.models.attention_processor import (
|
||||
AttnProcessor2_0,
|
||||
FusedAttnProcessor2_0,
|
||||
XFormersAttnProcessor,
|
||||
)
|
||||
from diffusers.models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
|
||||
from diffusers.configuration_utils import FrozenDict
|
||||
from diffusers.schedulers import KarrasDiffusionSchedulers
|
||||
from diffusers.utils import (
|
||||
PIL_INTERPOLATION,
|
||||
logging,
|
||||
deprecate,
|
||||
is_accelerate_available,
|
||||
is_accelerate_version,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from diffusers.utils import PIL_INTERPOLATION, logging, deprecate, is_accelerate_available, is_accelerate_version, replace_example_docstring
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
|
|
@ -1921,7 +1909,7 @@ class Script(scripts.Script):
|
|||
def run(self, p: processing.StableDiffusionProcessingImg2Img, enabled, strength, invert, model, image): # pylint: disable=arguments-differ
|
||||
if not enabled:
|
||||
return
|
||||
if shared.sd_model_type != 'sdxl' and shared.sd_model_type != 'sd':
|
||||
if shared.sd_model_type not in ['sdxl', 'sd', 'f1']:
|
||||
shared.log.error(f'Differential-diffusion: incorrect base model: {shared.sd_model.__class__.__name__}')
|
||||
return
|
||||
if not hasattr(p, 'init_images') or len(p.init_images) == 0:
|
||||
|
|
@ -1936,6 +1924,8 @@ class Script(scripts.Script):
|
|||
orig_pipeline = shared.sd_model
|
||||
pipe = None
|
||||
try:
|
||||
# shared.sd_model = diffusers.StableDiffusionPipeline.from_pipe(shared.sd_model, **{ 'custom_pipeline': 'kohya_hires_fix', 'high_res_fix': high_res_fix })
|
||||
# from examples.community.pipeline_stable_diffusion_xl_differential_img2img import StableDiffusionXLDifferentialImg2ImgPipeline
|
||||
diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["StableDiffusionXLDiffImg2ImgPipeline"] = StableDiffusionXLDiffImg2ImgPipeline
|
||||
diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["StableDiffusionDiffImg2ImgPipeline"] = StableDiffusionDiffImg2ImgPipeline
|
||||
if shared.sd_model_type == 'sdxl':
|
||||
|
|
@ -1959,6 +1949,9 @@ class Script(scripts.Script):
|
|||
safety_checker=None,
|
||||
requires_safety_checker=False,
|
||||
)
|
||||
elif shared.sd_model_type == 'f1':
|
||||
pipe = diffusers.StableDiffusionPipeline.from_pipe(shared.sd_model, **{ 'custom_pipeline': 'pipeline_flux_differential_img2img' })
|
||||
diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["FluxDifferentialImg2ImgPipeline"] = pipe.__class__
|
||||
sd_models.copy_diffuser_options(pipe, shared.sd_model)
|
||||
sd_models.set_diffuser_options(pipe)
|
||||
p.task_args['image'] = image_init
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from modules import shared, devices, scripts, processing, sd_models, prompt_pars
|
|||
|
||||
def hijack_register_modules(self, **kwargs):
|
||||
for name, module in kwargs.items():
|
||||
register_dict = None
|
||||
if module is None or isinstance(module, (tuple, list)) and module[0] is None:
|
||||
register_dict = {name: (None, None)}
|
||||
elif isinstance(module, bool):
|
||||
|
|
@ -15,7 +16,8 @@ def hijack_register_modules(self, **kwargs):
|
|||
else:
|
||||
library, class_name = pipeline_utils._fetch_class_library_tuple(module) # pylint: disable=protected-access
|
||||
register_dict = {name: (library, class_name)}
|
||||
self.register_to_config(**register_dict)
|
||||
if register_dict is not None:
|
||||
self.register_to_config(**register_dict)
|
||||
setattr(self, name, module)
|
||||
|
||||
|
||||
|
|
|
|||
2
wiki
2
wiki
|
|
@ -1 +1 @@
|
|||
Subproject commit 1f44cd37813595194610e40f4389efb8d998c2ca
|
||||
Subproject commit a3d7ec999fafc05d75dc5d0ae564f02bc689786f
|
||||
Loading…
Reference in New Issue