add support for json configs per model component

2024-02-28 15:12:05 -05:00 · 2024-02-28 15:12:05 -05:00 · 4b911ea822
parent db22bd5440
commit 4b911ea822
7 changed files with 107 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,7 +10,7 @@
 - [Playground v2.5](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic)
  - new model version from Playground: based on SDXL, but with some cool new concepts
  - download using networks -> reference
-  - set sampler to DPM++ 2M EDM or Euler EDM
+  - set sampler to *DPM++ 2M EDM* or *Euler EDM* (EDM are new family of samplers)
 - **Image2Video**
  - new module for creating videos from images  
  - simply enable from *img2img -> scripts -> image2video*  
@ -29,6 +29,7 @@
  - default theme updates and additional built-in theme *black-gray*
  - add **ROCm** 6.0 nightly option to installer, thanks @jicka
  - support models with their own YAML model config files
+  - support models with their own JSON per-component config files, for example: `playground-v2.5_vae.config`
 - **Internal**
  - remove obsolete textual inversion training code
  - remove obsolete hypernetworks training code
--- a/configs/playground-v2.5-1024px-aesthetic.fp16_vae.json
+++ b/configs/playground-v2.5-1024px-aesthetic.fp16_vae.json
@ -0,0 +1,43 @@
+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.27.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 1024,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "latents_mean": [
+    -1.6574,
+    1.886,
+    -1.383,
+    2.5155
+  ],
+  "latents_std": [
+    8.4927,
+    5.9022,
+    6.5498,
+    5.2299
+  ],
+  "scaling_factor": 0.5
+}
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@ -229,7 +229,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
        if 'latents' in possible and getattr(p, "init_latent", None) is not None:
            args['latents'] = p.init_latent
        if 'output_type' in possible:
-            if hasattr(model, 'vae'):
+            if not hasattr(model, 'vae'):
                args['output_type'] = 'np' # only set latent if model has vae

        # stable cascade
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@ -46,9 +46,20 @@ def full_vae_decode(latents, model):
        model.upcast_vae()
    if hasattr(model.vae, "post_quant_conv"):
        latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
-    decoded = model.vae.decode(latents / model.vae.config.scaling_factor, return_dict=False)[0]

-    # Delete PyTorch VAE after OpenVINO compile
+    # normalize latents
+    latents_mean = model.vae.config.get("latents_mean", None)
+    latents_std = model.vae.config.get("latents_std", None)
+    scaling_factor = model.vae.config.get("scaling_factor", None)
+    if latents_mean and latents_std:
+        latents_mean = (torch.tensor(latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype))
+        latents_std = (torch.tensor(latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype))
+        latents = latents * latents_std / scaling_factor + latents_mean
+    else:
+        latents = latents / scaling_factor
+    decoded = model.vae.decode(latents, return_dict=False)[0]
+
+    # delete vae after OpenVINO compile
    if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
        shared.compiled_model_state.first_pass_vae = False
        if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vae"):
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -780,6 +780,50 @@ def get_load_config(model_file, model_type):
    return None


+def patch_diffuser_config(sd_model, model_file):
+    def load_config(fn, k):
+        model_file = os.path.splitext(fn)[0]
+        cfg_file = f'{model_file}_{k}.json'
+        try:
+            if os.path.exists(cfg_file):
+                with open(cfg_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            cfg_file = f'{os.path.join(paths.sd_configs_path, os.path.basename(model_file))}_{k}.json'
+            if os.path.exists(cfg_file):
+                with open(cfg_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+        except Exception:
+            pass
+        return {}
+
+    if sd_model is None:
+        return sd_model
+    if hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpaint' in model_file.lower():
+        if debug_load:
+            shared.log.debug('Model config patch: type=inpaint')
+        sd_model.unet.config.in_channels = 9
+    if not hasattr(sd_model, '_internal_dict'):
+        return sd_model
+    for c in sd_model._internal_dict.keys(): # pylint: disable=protected-access
+        component = getattr(sd_model, c, None)
+        if hasattr(component, 'config'):
+            if debug_load:
+                shared.log.debug(f'Model config: component={c} config={component.config}')
+            override = load_config(model_file, c)
+            updated = {}
+            for k, v in override.items():
+                if k.startswith('_'):
+                    continue
+                if v != component.config.get(k, None):
+                    if hasattr(component.config, '__frozen'):
+                        component.config.__frozen = False # pylint: disable=protected-access
+                    component.config[k] = v
+                    updated[k] = v
+            if updated and debug_load:
+                shared.log.debug(f'Model config: component={c} override={updated}')
+    return sd_model
+
+
 def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model'): # pylint: disable=unused-argument
    if shared.cmd_opts.profile:
        import cProfile
@ -966,9 +1010,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
                    else:
                        sd_hijack_accelerate.restore_accelerate()
                    sd_model = pipeline.from_single_file(checkpoint_info.path, **diffusers_load_config)
-                    if sd_model is not None and hasattr(sd_model, 'unet') and hasattr(sd_model.unet, 'config') and 'inpainting' in checkpoint_info.path.lower():
-                        shared.log.debug('Model patch: type=inpaint')
-                        sd_model.unet.config.in_channels = 9
+                    sd_model = patch_diffuser_config(sd_model, checkpoint_info.path)
                elif hasattr(pipeline, 'from_ckpt'):
                    sd_model = pipeline.from_ckpt(checkpoint_info.path, **diffusers_load_config)
                else:
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@ -146,5 +146,7 @@ class DiffusionSampler:
            if key not in possible:
                shared.log.warning(f'Sampler: sampler="{name}" config={self.config} invalid={key}')
                del self.config[key]
+        # shared.log.debug(f'Sampler: sampler="{name}" config={self.config}')
        self.sampler = constructor(**self.config)
+        # shared.log.debug(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}')
        self.sampler.name = name
--- a/requirements.txt
+++ b/requirements.txt
@ -55,7 +55,7 @@ pandas
 protobuf==3.20.3
 pytorch_lightning==1.9.4
 tokenizers==0.15.2
-transformers==4.37.2
+transformers==4.38.1
 tomesd==0.1.3
 urllib3==1.26.18
 Pillow==10.2.0