From 4b911ea822e4e0d77f103903f0f49720dad75dfd Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 28 Feb 2024 15:12:05 -0500 Subject: [PATCH] add support for json configs per model component --- CHANGELOG.md | 3 +- ...ground-v2.5-1024px-aesthetic.fp16_vae.json | 43 +++++++++++++++++ modules/processing_diffusers.py | 2 +- modules/processing_vae.py | 15 +++++- modules/sd_models.py | 48 +++++++++++++++++-- modules/sd_samplers_diffusers.py | 2 + requirements.txt | 2 +- 7 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 configs/playground-v2.5-1024px-aesthetic.fp16_vae.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e2e6382a..cee66fbb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ - [Playground v2.5](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) - new model version from Playground: based on SDXL, but with some cool new concepts - download using networks -> reference - - set sampler to DPM++ 2M EDM or Euler EDM + - set sampler to *DPM++ 2M EDM* or *Euler EDM* (EDM are new family of samplers) - **Image2Video** - new module for creating videos from images - simply enable from *img2img -> scripts -> image2video* @@ -29,6 +29,7 @@ - default theme updates and additional built-in theme *black-gray* - add **ROCm** 6.0 nightly option to installer, thanks @jicka - support models with their own YAML model config files + - support models with their own JSON per-component config files, for example: `playground-v2.5_vae.config` - **Internal** - remove obsolete textual inversion training code - remove obsolete hypernetworks training code diff --git a/configs/playground-v2.5-1024px-aesthetic.fp16_vae.json b/configs/playground-v2.5-1024px-aesthetic.fp16_vae.json new file mode 100644 index 000000000..f00799d4b --- /dev/null +++ b/configs/playground-v2.5-1024px-aesthetic.fp16_vae.json @@ -0,0 +1,43 @@ +{ + "_class_name": "AutoencoderKL", + "_diffusers_version": "0.27.0.dev0", + "act_fn": "silu", + "block_out_channels": [ + 128, + 256, + 512, + 512 + ], + "down_block_types": [ + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D" + ], + "force_upcast": true, + "in_channels": 3, + "latent_channels": 4, + "layers_per_block": 2, + "norm_num_groups": 32, + "out_channels": 3, + "sample_size": 1024, + "up_block_types": [ + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D" + ], + "latents_mean": [ + -1.6574, + 1.886, + -1.383, + 2.5155 + ], + "latents_std": [ + 8.4927, + 5.9022, + 6.5498, + 5.2299 + ], + "scaling_factor": 0.5 +} diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index b6d5c4c5e..f4250582d 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -229,7 +229,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing): if 'latents' in possible and getattr(p, "init_latent", None) is not None: args['latents'] = p.init_latent if 'output_type' in possible: - if hasattr(model, 'vae'): + if not hasattr(model, 'vae'): args['output_type'] = 'np' # only set latent if model has vae # stable cascade diff --git a/modules/processing_vae.py b/modules/processing_vae.py index 985bcdac4..793f2aeb8 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -46,9 +46,20 @@ def full_vae_decode(latents, model): model.upcast_vae() if hasattr(model.vae, "post_quant_conv"): latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype) - decoded = model.vae.decode(latents / model.vae.config.scaling_factor, return_dict=False)[0] - # Delete PyTorch VAE after OpenVINO compile + # normalize latents + latents_mean = model.vae.config.get("latents_mean", None) + latents_std = model.vae.config.get("latents_std", None) + scaling_factor = model.vae.config.get("scaling_factor", None) + if latents_mean and latents_std: + latents_mean = (torch.tensor(latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype)) + latents_std = (torch.tensor(latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype)) + latents = latents * latents_std / scaling_factor + latents_mean + else: + latents = latents / scaling_factor + decoded = model.vae.decode(latents, return_dict=False)[0] + + # delete vae after OpenVINO compile if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae: shared.compiled_model_state.first_pass_vae = False if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vae"): diff --git a/modules/sd_models.py b/modules/sd_models.py index 0b7bab037..0ca0774cd 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -780,6 +780,50 @@ def get_load_config(model_file, model_type): return None +def patch_diffuser_config(sd_model, model_file): + def load_config(fn, k): + model_file = os.path.splitext(fn)[0] + cfg_file = f'{model_file}_{k}.json' + try: + if os.path.exists(cfg_file): + with open(cfg_file, 'r', encoding='utf-8') as f: + return json.load(f) + cfg_file = f'{os.path.join(paths.sd_configs_path, os.path.basename(model_file))}_{k}.json' + if os.path.exists(cfg_file): + with open(cfg_file, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception: + pass + return {} + + if sd_model is None: + return sd_model + if hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpaint' in model_file.lower(): + if debug_load: + shared.log.debug('Model config patch: type=inpaint') + sd_model.unet.config.in_channels = 9 + if not hasattr(sd_model, '_internal_dict'): + return sd_model + for c in sd_model._internal_dict.keys(): # pylint: disable=protected-access + component = getattr(sd_model, c, None) + if hasattr(component, 'config'): + if debug_load: + shared.log.debug(f'Model config: component={c} config={component.config}') + override = load_config(model_file, c) + updated = {} + for k, v in override.items(): + if k.startswith('_'): + continue + if v != component.config.get(k, None): + if hasattr(component.config, '__frozen'): + component.config.__frozen = False # pylint: disable=protected-access + component.config[k] = v + updated[k] = v + if updated and debug_load: + shared.log.debug(f'Model config: component={c} override={updated}') + return sd_model + + def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model'): # pylint: disable=unused-argument if shared.cmd_opts.profile: import cProfile @@ -966,9 +1010,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No else: sd_hijack_accelerate.restore_accelerate() sd_model = pipeline.from_single_file(checkpoint_info.path, **diffusers_load_config) - if sd_model is not None and hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpainting' in checkpoint_info.path.lower(): - shared.log.debug('Model patch: type=inpaint') - sd_model.unet.config.in_channels = 9 + sd_model = patch_diffuser_config(sd_model, checkpoint_info.path) elif hasattr(pipeline, 'from_ckpt'): sd_model = pipeline.from_ckpt(checkpoint_info.path, **diffusers_load_config) else: diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py index 8299ea567..e463282ae 100644 --- a/modules/sd_samplers_diffusers.py +++ b/modules/sd_samplers_diffusers.py @@ -146,5 +146,7 @@ class DiffusionSampler: if key not in possible: shared.log.warning(f'Sampler: sampler="{name}" config={self.config} invalid={key}') del self.config[key] + # shared.log.debug(f'Sampler: sampler="{name}" config={self.config}') self.sampler = constructor(**self.config) + # shared.log.debug(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}') self.sampler.name = name diff --git a/requirements.txt b/requirements.txt index 4ffec69bd..996f09d40 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,7 +55,7 @@ pandas protobuf==3.20.3 pytorch_lightning==1.9.4 tokenizers==0.15.2 -transformers==4.37.2 +transformers==4.38.1 tomesd==0.1.3 urllib3==1.26.18 Pillow==10.2.0