fix prompt parser for sdxl and enable offloading

2023-08-10 21:20:56 +00:00 · 2023-08-10 21:20:56 +00:00 · f52249d5a8
parent 5bcd65d4c2
commit f52249d5a8
9 changed files with 65 additions and 43 deletions
--- a/html/locale_en.json
+++ b/html/locale_en.json
@ -562,18 +562,17 @@
  {"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"},
  {"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"},
  {"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"},
-  {"id":"","label":"Diffusers allow loading from safetensors files","localized":"","hint":"Allow loading of safetensors files as diffuser models"},
  {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"","hint":""},
  {"id":"","label":"Move base model to CPU when using refiner","localized":"","hint":""},
  {"id":"","label":"Move refiner model to CPU when not in use","localized":"","hint":""},
  {"id":"","label":"Move UNet to CPU while VAE decoding","localized":"","hint":""},
  {"id":"","label":"Use model EMA weights when possible","localized":"","hint":""},
  {"id":"","label":"Generator device","localized":"","hint":""},
-  {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%. Use with Enable Attention slicing for minimal memory consumption"},
-  {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings. Use with Enable Attention slicing for additional memory savings"},
-  {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches. Use with Enable Attention slicing"},
-  {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Might result in a minor increase in processing time. Use with Enable Attention Slicing"},
-  {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. 10% slower inference times. Greatly reduces memory usage. Best used, period"},
+  {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%"},
+  {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings"},
+  {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches"},
+  {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Results in a minor increase in processing time"},
+  {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. Slower inference times, but greatly reduced memory usage"},
  {"id":"","label":"Diffusers model loading variant","localized":"","hint":""},
  {"id":"","label":"Diffusers VAE loading variant","localized":"","hint":""},
  {"id":"","label":"Diffusers LoRA loading variant","localized":"","hint":"'sequential apply' loads and applies each LoRA in order of appearance, 'merge and apply' loads all LoRAs and merges them in-memory before applying to model, 'diffusers default' uses single LoRA loading method"}
--- a/html/locale_ko.json
+++ b/html/locale_ko.json
@ -562,7 +562,6 @@
  {"id":"","label":"Token merging ratio","localized":"토큰 병합 비율","hint":"속도와 메모리 절감을 위해 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
  {"id":"","label":"Token merging ratio for img2img","localized":"이미지➠이미지 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 이미지➠이미지에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
  {"id":"","label":"Token merging ratio for hires pass","localized":"텍스트➠이미지 업스케일링(Hires fix) 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 Hires fix에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
-  {"id":"","label":"Diffusers allow loading from safetensors files","localized":"safetensors 파일에서 로드 허용","hint":"safetensors 파일을 Diffusers 모델로 로드할 수 있게 한다."},
  {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"safetensors 파일에서 로드할 때 사용할 파이프라인 선택","hint":""},
  {"id":"","label":"Move base model to CPU when using refiner","localized":"리파이너를 사용 중일 때 base 모델을 CPU로 이동","hint":""},
  {"id":"","label":"Move refiner model to CPU when not in use","localized":"사용 중이지 않을 때 리파이너 모델을 CPU로 이동","hint":""},
--- a/installer.py
+++ b/installer.py
@ -186,8 +186,6 @@ def install(package, friendly: str = None, ignore: bool = False):
    if args.reinstall or args.upgrade:
        global quick_allowed # pylint: disable=global-statement
        quick_allowed = False
-    if args.use_ipex and "accelerate==" in package:
-        package = "accelerate==0.20.3"
    if args.reinstall or not installed(package, friendly):
        pip(f"install --upgrade {package}", ignore=ignore)

--- a/modules/lora_diffusers.py
+++ b/modules/lora_diffusers.py
@ -24,10 +24,10 @@ def unload_diffusers_lora():
            lora_state['all_loras'].reverse()
            lora_state['multiplier'].reverse()
            for i, lora_network in enumerate(lora_state['all_loras']):
-              if shared.opts.diffusers_lora_loader == "merge and apply":
-                lora_network.restore_from(multiplier=lora_state['multiplier'][i])
-              if shared.opts.diffusers_lora_loader == "sequential apply":
-                lora_network.unapply_to()
+                if shared.opts.diffusers_lora_loader == "merge and apply":
+                    lora_network.restore_from(multiplier=lora_state['multiplier'][i])
+                if shared.opts.diffusers_lora_loader == "sequential apply":
+                    lora_network.unapply_to()
        lora_state['active'] = False
        lora_state['loaded'] = 0
        lora_state['all_loras'] = []
@ -45,7 +45,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
        lora_state['multiplier'].append(strength)
        if shared.opts.diffusers_lora_loader == "diffusers default":
            pipe.load_lora_weights(lora.filename, cache_dir=shared.opts.diffusers_dir, local_files_only=True, lora_scale=strength)
-            shared.log.info(f"Diffusers LoRA loaded: {name} {lora_state['multiplier']}")
+            shared.log.info(f"LoRA loaded: {name} {lora_state['multiplier']}")
        else:
            from safetensors.torch import load_file
            lora_sd = load_file(lora.filename)
@ -61,7 +61,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
                lora_network.to(shared.device, dtype=pipe.unet.dtype)
                lora_network.apply_to(multiplier=strength)
            lora_state['all_loras'].append(lora_network)
-            shared.log.info(f"Diffusers LoRA loaded: {name} {strength}")
+            shared.log.info(f"LoRA loaded: {name}:{strength} loader={shared.opts.diffusers_lora_loader}")
    except Exception as e:
        shared.log.error(f"Diffusers LoRA loading failed: {name} {e}")

@ -332,7 +332,7 @@ def merge_lora_weights(pipe, weights_sd: Dict, multiplier: float = 1.0):


 # block weightや学習に対応しない簡易版 / simple version without block weight and training
-class LoRANetwork(torch.nn.Module):
+class LoRANetwork(torch.nn.Module): # pylint: disable=abstract-method
    UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]
    UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
    TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
@ -350,17 +350,17 @@ class LoRANetwork(torch.nn.Module):
        multiplier: float = 1.0,
        modules_dim: Optional[Dict[str, int]] = None,
        modules_alpha: Optional[Dict[str, int]] = None,
-        varbose: Optional[bool] = False,
+        varbose: Optional[bool] = False, # pylint: disable=unused-argument
    ) -> None:
        super().__init__()
        self.multiplier = multiplier

-        shared.log.debug("create LoRA network from weights")
+        # shared.log.debug("create LoRA network from weights")

        # convert SDXL Stability AI's U-Net modules to Diffusers
        converted = self.convert_unet_modules(modules_dim, modules_alpha)
        if converted:
-            shared.log.debug(f"converted {converted} Stability AI's U-Net LoRA modules to Diffusers (SDXL)")
+            shared.log.debug(f"LoRA convert: modules={converted} SDXL SAI/SGM to Diffusers")

        # create module instances
        def create_modules(
@ -422,18 +422,13 @@ class LoRANetwork(torch.nn.Module):
            text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE)
            self.text_encoder_loras.extend(text_encoder_loras)
            skipped_te += skipped
-        shared.log.debug(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")
-        if len(skipped_te) > 0:
-            shared.log.debug(f"skipped {len(skipped_te)} modules because of missing weight.")

        # extend U-Net target modules to include Conv2d 3x3
        target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE + LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3

        self.unet_loras: List[LoRAModule]
        self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)
-        shared.log.debug(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")
-        if len(skipped_un) > 0:
-            shared.log.debug(f"skipped {len(skipped_un)} modules because of missing weight.")
+        shared.log.debug(f"LoRA modules loaded/skipped: te={len(self.text_encoder_loras)}/{len(skipped_te)} unet={len(self.unet_loras)}/skip={len(skipped_un)}")

        # assertion
        names = set()
@ -480,11 +475,11 @@ class LoRANetwork(torch.nn.Module):

    def apply_to(self, multiplier=1.0, apply_text_encoder=True, apply_unet=True):
        if apply_text_encoder:
-            shared.log.debug("enable LoRA for text encoder")
+            # shared.log.debug("LoRA apply for text encoder")
            for lora in self.text_encoder_loras:
                lora.apply_to(multiplier)
        if apply_unet:
-            shared.log.debug("enable LoRA for U-Net")
+            # shared.log.debug("LoRA apply for U-Net")
            for lora in self.unet_loras:
                lora.apply_to(multiplier)

@ -493,16 +488,14 @@ class LoRANetwork(torch.nn.Module):
            lora.unapply_to()

    def merge_to(self, multiplier=1.0):
-        shared.log.debug("merge LoRA weights to original weights")
+        # shared.log.debug("LoRA merge weights for text encoder")
        for lora in tqdm(self.text_encoder_loras + self.unet_loras):
            lora.merge_to(multiplier)
-        shared.log.debug("weights are merged")

    def restore_from(self, multiplier=1.0):
-        shared.log.debug("restore LoRA weights from original weights")
+        # shared.log.debug("LoRA restore weights")
        for lora in tqdm(self.text_encoder_loras + self.unet_loras):
            lora.restore_from(multiplier)
-        shared.log.debug("weights are restored")

    def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
        # convert SDXL Stability AI's state dict to Diffusers' based state dict
@ -527,4 +520,3 @@ class LoRANetwork(torch.nn.Module):
                state_dict[key] = state_dict[key].view(my_state_dict[key].size())

        return super().load_state_dict(state_dict, strict)
-
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@ -52,6 +52,24 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
        imgs = model.image_processor.postprocess(decoded, output_type=output_type)
        return imgs

+    def fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2):
+        if type(prompts) is str:
+            prompts = [prompts]
+        if type(negative_prompts) is str:
+            negative_prompts = [negative_prompts]
+        while len(negative_prompts) < len(prompts):
+            negative_prompts.append(negative_prompts[-1])
+        if type(prompts_2) is str:
+            prompts_2 = [prompts_2]
+        if type(prompts_2) is list:
+            while len(prompts_2) < len(prompts):
+                prompts_2.append(prompts_2[-1])
+        if type(negative_prompts_2) is str:
+            negative_prompts_2 = [negative_prompts_2]
+        if type(negative_prompts_2) is list:
+            while len(negative_prompts_2) < len(prompts_2):
+                negative_prompts_2.append(negative_prompts_2[-1])
+        return prompts, negative_prompts, prompts_2, negative_prompts_2

    def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, is_refiner: bool=False, **kwargs):
        args = {}
@ -64,6 +82,7 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
        pooled = None
        negative_embed = None
        negative_pooled = None
+        prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
        if shared.opts.data['prompt_attention'] in {'Compel parser', 'Full parser'}:
            prompt_embed, pooled, negative_embed, negative_pooled = prompt_parser_diffusers.compel_encode_prompts(model,
                                                                                                                  prompts,
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@ -47,7 +47,12 @@ def compel_encode_prompts(
    negative_embeds = []
    negative_pooleds = []
    for i in range(len(prompts)):
-        prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, prompts[i], negative_prompts[i], prompts_2[i], negative_prompts_2[i], is_refiner, clip_skip)
+        prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline,
+                                                                                              prompts[i],
+                                                                                              negative_prompts[i],
+                                                                                              prompts_2[i] if prompts_2 is not None else None,
+                                                                                              negative_prompts_2[i] if negative_prompts_2 is not None else None,
+                                                                                              is_refiner, clip_skip)
        prompt_embeds.append(prompt_embed)
        positive_pooleds.append(positive_pooled)
        negative_embeds.append(negative_embed)
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -136,12 +136,9 @@ def list_models():
    checkpoints_list.clear()
    checkpoint_aliases.clear()
    ext_filter=[".safetensors"] if shared.opts.sd_disable_ckpt else [".ckpt", ".safetensors"]
-    model_list = []
-    if shared.backend == shared.Backend.ORIGINAL or shared.opts.diffusers_allow_safetensors:
-        model_list += modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
+    model_list = modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
    if shared.backend == shared.Backend.DIFFUSERS:
        model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir)
-
    for filename in sorted(model_list, key=str.lower):
        checkpoint_info = CheckpointInfo(filename)
        if checkpoint_info.name is not None:
@ -844,7 +841,6 @@ def set_diffuser_pipe(pipe, new_pipe_type):
        new_pipe = diffusers.AutoPipelineForImage2Image.from_pipe(pipe)
    elif new_pipe_type == DiffusersTaskType.INPAINTING:
        new_pipe = diffusers.AutoPipelineForInpainting.from_pipe(pipe)
-
    if pipe.__class__ == new_pipe.__class__:
        return

@ -1030,20 +1026,35 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model')
    shared.log.info(f"Weights loaded in {timer.summary()}")


+def disable_offload(sd_model):
+    from accelerate.hooks import remove_hook_from_module
+    if not sd_model.has_accelerate:
+        return
+    for _name, model in sd_model.components.items():
+        if not isinstance(model, torch.nn.Module):
+            continue
+        remove_hook_from_module(model, recurse=True)
+
+
 def unload_model_weights(op='model'):
    from modules import sd_hijack
    if op == 'model' or op == 'dict':
        if model_data.sd_model:
-            model_data.sd_model.to(devices.cpu)
            if shared.backend == shared.Backend.ORIGINAL:
+                model_data.sd_model.to(devices.cpu)
                sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
+            else:
+                disable_offload(model_data.sd_model)
+                model_data.sd_model.to('meta')
            model_data.sd_model = None
            shared.log.debug(f'Unload weights {op}: {memory_stats()}')
    else:
        if model_data.sd_refiner:
-            model_data.sd_refiner.to(devices.cpu)
+            model_data.sd_refiner.to('meta')
            if shared.backend == shared.Backend.ORIGINAL:
                sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner)
+            else:
+                disable_offload(model_data.sd_model)
            model_data.sd_refiner = None
            shared.log.debug(f'Unload weights {op}: {memory_stats()}')
    devices.torch_gc(force=True)
--- a/modules/shared.py
+++ b/modules/shared.py
@ -395,11 +395,10 @@ options_templates.update(options_section(('cuda', "Compute Settings"), {
 }))

 options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
-    "diffusers_allow_safetensors": OptionInfo(True, 'Diffusers allow loading from safetensors files'),
    "diffusers_pipeline": OptionInfo(pipelines[0], 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": pipelines}),
    "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
+    "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
    "diffusers_move_refiner": OptionInfo(True, "Move refiner model to CPU when not in use"),
-    "diffusers_move_unet": OptionInfo(False, "Move UNet to CPU while VAE decoding"),
    "diffusers_extract_ema": OptionInfo(True, "Use model EMA weights when possible"),
    "diffusers_generator_device": OptionInfo("default", "Generator device", gr.Radio, lambda: {"choices": ["default", "cpu"]}),
    "diffusers_seq_cpu_offload": OptionInfo(False, "Enable sequential CPU offload"),
--- a/requirements.txt
+++ b/requirements.txt
@ -46,7 +46,7 @@ typing-extensions==4.7.1
 antlr4-python3-runtime==4.9.3
 requests==2.31.0
 tqdm==4.65.0
-accelerate==0.21.0
+accelerate==0.20.3
 opencv-python-headless==4.7.0.72
 diffusers==0.19.3
 einops==0.4.1