add support for json configs per model component

pull/2932/head^2
Vladimir Mandic 2024-02-28 15:12:05 -05:00
parent db22bd5440
commit 4b911ea822
7 changed files with 107 additions and 8 deletions

View File

@ -10,7 +10,7 @@
- [Playground v2.5](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic)
- new model version from Playground: based on SDXL, but with some cool new concepts
- download using networks -> reference
- set sampler to DPM++ 2M EDM or Euler EDM
- set sampler to *DPM++ 2M EDM* or *Euler EDM* (EDM are new family of samplers)
- **Image2Video**
- new module for creating videos from images
- simply enable from *img2img -> scripts -> image2video*
@ -29,6 +29,7 @@
- default theme updates and additional built-in theme *black-gray*
- add **ROCm** 6.0 nightly option to installer, thanks @jicka
- support models with their own YAML model config files
- support models with their own JSON per-component config files, for example: `playground-v2.5_vae.config`
- **Internal**
- remove obsolete textual inversion training code
- remove obsolete hypernetworks training code

View File

@ -0,0 +1,43 @@
{
"_class_name": "AutoencoderKL",
"_diffusers_version": "0.27.0.dev0",
"act_fn": "silu",
"block_out_channels": [
128,
256,
512,
512
],
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
"force_upcast": true,
"in_channels": 3,
"latent_channels": 4,
"layers_per_block": 2,
"norm_num_groups": 32,
"out_channels": 3,
"sample_size": 1024,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D"
],
"latents_mean": [
-1.6574,
1.886,
-1.383,
2.5155
],
"latents_std": [
8.4927,
5.9022,
6.5498,
5.2299
],
"scaling_factor": 0.5
}

View File

@ -229,7 +229,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
if 'latents' in possible and getattr(p, "init_latent", None) is not None:
args['latents'] = p.init_latent
if 'output_type' in possible:
if hasattr(model, 'vae'):
if not hasattr(model, 'vae'):
args['output_type'] = 'np' # only set latent if model has vae
# stable cascade

View File

@ -46,9 +46,20 @@ def full_vae_decode(latents, model):
model.upcast_vae()
if hasattr(model.vae, "post_quant_conv"):
latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
decoded = model.vae.decode(latents / model.vae.config.scaling_factor, return_dict=False)[0]
# Delete PyTorch VAE after OpenVINO compile
# normalize latents
latents_mean = model.vae.config.get("latents_mean", None)
latents_std = model.vae.config.get("latents_std", None)
scaling_factor = model.vae.config.get("scaling_factor", None)
if latents_mean and latents_std:
latents_mean = (torch.tensor(latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype))
latents_std = (torch.tensor(latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype))
latents = latents * latents_std / scaling_factor + latents_mean
else:
latents = latents / scaling_factor
decoded = model.vae.decode(latents, return_dict=False)[0]
# delete vae after OpenVINO compile
if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
shared.compiled_model_state.first_pass_vae = False
if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vae"):

View File

@ -780,6 +780,50 @@ def get_load_config(model_file, model_type):
return None
def patch_diffuser_config(sd_model, model_file):
def load_config(fn, k):
model_file = os.path.splitext(fn)[0]
cfg_file = f'{model_file}_{k}.json'
try:
if os.path.exists(cfg_file):
with open(cfg_file, 'r', encoding='utf-8') as f:
return json.load(f)
cfg_file = f'{os.path.join(paths.sd_configs_path, os.path.basename(model_file))}_{k}.json'
if os.path.exists(cfg_file):
with open(cfg_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
pass
return {}
if sd_model is None:
return sd_model
if hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpaint' in model_file.lower():
if debug_load:
shared.log.debug('Model config patch: type=inpaint')
sd_model.unet.config.in_channels = 9
if not hasattr(sd_model, '_internal_dict'):
return sd_model
for c in sd_model._internal_dict.keys(): # pylint: disable=protected-access
component = getattr(sd_model, c, None)
if hasattr(component, 'config'):
if debug_load:
shared.log.debug(f'Model config: component={c} config={component.config}')
override = load_config(model_file, c)
updated = {}
for k, v in override.items():
if k.startswith('_'):
continue
if v != component.config.get(k, None):
if hasattr(component.config, '__frozen'):
component.config.__frozen = False # pylint: disable=protected-access
component.config[k] = v
updated[k] = v
if updated and debug_load:
shared.log.debug(f'Model config: component={c} override={updated}')
return sd_model
def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model'): # pylint: disable=unused-argument
if shared.cmd_opts.profile:
import cProfile
@ -966,9 +1010,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
else:
sd_hijack_accelerate.restore_accelerate()
sd_model = pipeline.from_single_file(checkpoint_info.path, **diffusers_load_config)
if sd_model is not None and hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpainting' in checkpoint_info.path.lower():
shared.log.debug('Model patch: type=inpaint')
sd_model.unet.config.in_channels = 9
sd_model = patch_diffuser_config(sd_model, checkpoint_info.path)
elif hasattr(pipeline, 'from_ckpt'):
sd_model = pipeline.from_ckpt(checkpoint_info.path, **diffusers_load_config)
else:

View File

@ -146,5 +146,7 @@ class DiffusionSampler:
if key not in possible:
shared.log.warning(f'Sampler: sampler="{name}" config={self.config} invalid={key}')
del self.config[key]
# shared.log.debug(f'Sampler: sampler="{name}" config={self.config}')
self.sampler = constructor(**self.config)
# shared.log.debug(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}')
self.sampler.name = name

View File

@ -55,7 +55,7 @@ pandas
protobuf==3.20.3
pytorch_lightning==1.9.4
tokenizers==0.15.2
transformers==4.37.2
transformers==4.38.1
tomesd==0.1.3
urllib3==1.26.18
Pillow==10.2.0