diff --git a/html/locale_en.json b/html/locale_en.json index cbfd34342..c7b5c77f3 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -562,18 +562,17 @@ {"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"}, {"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"}, {"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"}, - {"id":"","label":"Diffusers allow loading from safetensors files","localized":"","hint":"Allow loading of safetensors files as diffuser models"}, {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"","hint":""}, {"id":"","label":"Move base model to CPU when using refiner","localized":"","hint":""}, {"id":"","label":"Move refiner model to CPU when not in use","localized":"","hint":""}, {"id":"","label":"Move UNet to CPU while VAE decoding","localized":"","hint":""}, {"id":"","label":"Use model EMA weights when possible","localized":"","hint":""}, {"id":"","label":"Generator device","localized":"","hint":""}, - {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%. Use with Enable Attention slicing for minimal memory consumption"}, - {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings. Use with Enable Attention slicing for additional memory savings"}, - {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches. Use with Enable Attention slicing"}, - {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Might result in a minor increase in processing time. Use with Enable Attention Slicing"}, - {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. 10% slower inference times. Greatly reduces memory usage. Best used, period"}, + {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%"}, + {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings"}, + {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches"}, + {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Results in a minor increase in processing time"}, + {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. Slower inference times, but greatly reduced memory usage"}, {"id":"","label":"Diffusers model loading variant","localized":"","hint":""}, {"id":"","label":"Diffusers VAE loading variant","localized":"","hint":""}, {"id":"","label":"Diffusers LoRA loading variant","localized":"","hint":"'sequential apply' loads and applies each LoRA in order of appearance, 'merge and apply' loads all LoRAs and merges them in-memory before applying to model, 'diffusers default' uses single LoRA loading method"} diff --git a/html/locale_ko.json b/html/locale_ko.json index 1dc3600e2..f85119fd0 100644 --- a/html/locale_ko.json +++ b/html/locale_ko.json @@ -562,7 +562,6 @@ {"id":"","label":"Token merging ratio","localized":"토큰 병합 비율","hint":"속도와 메모리 절감을 위해 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"}, {"id":"","label":"Token merging ratio for img2img","localized":"이미지➠이미지 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 이미지➠이미지에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"}, {"id":"","label":"Token merging ratio for hires pass","localized":"텍스트➠이미지 업스케일링(Hires fix) 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 Hires fix에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"}, - {"id":"","label":"Diffusers allow loading from safetensors files","localized":"safetensors 파일에서 로드 허용","hint":"safetensors 파일을 Diffusers 모델로 로드할 수 있게 한다."}, {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"safetensors 파일에서 로드할 때 사용할 파이프라인 선택","hint":""}, {"id":"","label":"Move base model to CPU when using refiner","localized":"리파이너를 사용 중일 때 base 모델을 CPU로 이동","hint":""}, {"id":"","label":"Move refiner model to CPU when not in use","localized":"사용 중이지 않을 때 리파이너 모델을 CPU로 이동","hint":""}, diff --git a/installer.py b/installer.py index 1376a5d33..b4bfb7444 100644 --- a/installer.py +++ b/installer.py @@ -186,8 +186,6 @@ def install(package, friendly: str = None, ignore: bool = False): if args.reinstall or args.upgrade: global quick_allowed # pylint: disable=global-statement quick_allowed = False - if args.use_ipex and "accelerate==" in package: - package = "accelerate==0.20.3" if args.reinstall or not installed(package, friendly): pip(f"install --upgrade {package}", ignore=ignore) diff --git a/modules/lora_diffusers.py b/modules/lora_diffusers.py index f8ab54dde..b2fe8fc00 100644 --- a/modules/lora_diffusers.py +++ b/modules/lora_diffusers.py @@ -24,10 +24,10 @@ def unload_diffusers_lora(): lora_state['all_loras'].reverse() lora_state['multiplier'].reverse() for i, lora_network in enumerate(lora_state['all_loras']): - if shared.opts.diffusers_lora_loader == "merge and apply": - lora_network.restore_from(multiplier=lora_state['multiplier'][i]) - if shared.opts.diffusers_lora_loader == "sequential apply": - lora_network.unapply_to() + if shared.opts.diffusers_lora_loader == "merge and apply": + lora_network.restore_from(multiplier=lora_state['multiplier'][i]) + if shared.opts.diffusers_lora_loader == "sequential apply": + lora_network.unapply_to() lora_state['active'] = False lora_state['loaded'] = 0 lora_state['all_loras'] = [] @@ -45,7 +45,7 @@ def load_diffusers_lora(name, lora, strength = 1.0): lora_state['multiplier'].append(strength) if shared.opts.diffusers_lora_loader == "diffusers default": pipe.load_lora_weights(lora.filename, cache_dir=shared.opts.diffusers_dir, local_files_only=True, lora_scale=strength) - shared.log.info(f"Diffusers LoRA loaded: {name} {lora_state['multiplier']}") + shared.log.info(f"LoRA loaded: {name} {lora_state['multiplier']}") else: from safetensors.torch import load_file lora_sd = load_file(lora.filename) @@ -61,7 +61,7 @@ def load_diffusers_lora(name, lora, strength = 1.0): lora_network.to(shared.device, dtype=pipe.unet.dtype) lora_network.apply_to(multiplier=strength) lora_state['all_loras'].append(lora_network) - shared.log.info(f"Diffusers LoRA loaded: {name} {strength}") + shared.log.info(f"LoRA loaded: {name}:{strength} loader={shared.opts.diffusers_lora_loader}") except Exception as e: shared.log.error(f"Diffusers LoRA loading failed: {name} {e}") @@ -332,7 +332,7 @@ def merge_lora_weights(pipe, weights_sd: Dict, multiplier: float = 1.0): # block weightや学習に対応しない簡易版 / simple version without block weight and training -class LoRANetwork(torch.nn.Module): +class LoRANetwork(torch.nn.Module): # pylint: disable=abstract-method UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"] UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"] TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"] @@ -350,17 +350,17 @@ class LoRANetwork(torch.nn.Module): multiplier: float = 1.0, modules_dim: Optional[Dict[str, int]] = None, modules_alpha: Optional[Dict[str, int]] = None, - varbose: Optional[bool] = False, + varbose: Optional[bool] = False, # pylint: disable=unused-argument ) -> None: super().__init__() self.multiplier = multiplier - shared.log.debug("create LoRA network from weights") + # shared.log.debug("create LoRA network from weights") # convert SDXL Stability AI's U-Net modules to Diffusers converted = self.convert_unet_modules(modules_dim, modules_alpha) if converted: - shared.log.debug(f"converted {converted} Stability AI's U-Net LoRA modules to Diffusers (SDXL)") + shared.log.debug(f"LoRA convert: modules={converted} SDXL SAI/SGM to Diffusers") # create module instances def create_modules( @@ -422,18 +422,13 @@ class LoRANetwork(torch.nn.Module): text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE) self.text_encoder_loras.extend(text_encoder_loras) skipped_te += skipped - shared.log.debug(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.") - if len(skipped_te) > 0: - shared.log.debug(f"skipped {len(skipped_te)} modules because of missing weight.") # extend U-Net target modules to include Conv2d 3x3 target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE + LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 self.unet_loras: List[LoRAModule] self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules) - shared.log.debug(f"create LoRA for U-Net: {len(self.unet_loras)} modules.") - if len(skipped_un) > 0: - shared.log.debug(f"skipped {len(skipped_un)} modules because of missing weight.") + shared.log.debug(f"LoRA modules loaded/skipped: te={len(self.text_encoder_loras)}/{len(skipped_te)} unet={len(self.unet_loras)}/skip={len(skipped_un)}") # assertion names = set() @@ -480,11 +475,11 @@ class LoRANetwork(torch.nn.Module): def apply_to(self, multiplier=1.0, apply_text_encoder=True, apply_unet=True): if apply_text_encoder: - shared.log.debug("enable LoRA for text encoder") + # shared.log.debug("LoRA apply for text encoder") for lora in self.text_encoder_loras: lora.apply_to(multiplier) if apply_unet: - shared.log.debug("enable LoRA for U-Net") + # shared.log.debug("LoRA apply for U-Net") for lora in self.unet_loras: lora.apply_to(multiplier) @@ -493,16 +488,14 @@ class LoRANetwork(torch.nn.Module): lora.unapply_to() def merge_to(self, multiplier=1.0): - shared.log.debug("merge LoRA weights to original weights") + # shared.log.debug("LoRA merge weights for text encoder") for lora in tqdm(self.text_encoder_loras + self.unet_loras): lora.merge_to(multiplier) - shared.log.debug("weights are merged") def restore_from(self, multiplier=1.0): - shared.log.debug("restore LoRA weights from original weights") + # shared.log.debug("LoRA restore weights") for lora in tqdm(self.text_encoder_loras + self.unet_loras): lora.restore_from(multiplier) - shared.log.debug("weights are restored") def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True): # convert SDXL Stability AI's state dict to Diffusers' based state dict @@ -527,4 +520,3 @@ class LoRANetwork(torch.nn.Module): state_dict[key] = state_dict[key].view(my_state_dict[key].size()) return super().load_state_dict(state_dict, strict) - diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index cf84055d2..66dfbf25f 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -52,6 +52,24 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro imgs = model.image_processor.postprocess(decoded, output_type=output_type) return imgs + def fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2): + if type(prompts) is str: + prompts = [prompts] + if type(negative_prompts) is str: + negative_prompts = [negative_prompts] + while len(negative_prompts) < len(prompts): + negative_prompts.append(negative_prompts[-1]) + if type(prompts_2) is str: + prompts_2 = [prompts_2] + if type(prompts_2) is list: + while len(prompts_2) < len(prompts): + prompts_2.append(prompts_2[-1]) + if type(negative_prompts_2) is str: + negative_prompts_2 = [negative_prompts_2] + if type(negative_prompts_2) is list: + while len(negative_prompts_2) < len(prompts_2): + negative_prompts_2.append(negative_prompts_2[-1]) + return prompts, negative_prompts, prompts_2, negative_prompts_2 def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, is_refiner: bool=False, **kwargs): args = {} @@ -64,6 +82,7 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro pooled = None negative_embed = None negative_pooled = None + prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2) if shared.opts.data['prompt_attention'] in {'Compel parser', 'Full parser'}: prompt_embed, pooled, negative_embed, negative_pooled = prompt_parser_diffusers.compel_encode_prompts(model, prompts, diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 559845059..eaa542c35 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -47,7 +47,12 @@ def compel_encode_prompts( negative_embeds = [] negative_pooleds = [] for i in range(len(prompts)): - prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, prompts[i], negative_prompts[i], prompts_2[i], negative_prompts_2[i], is_refiner, clip_skip) + prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, + prompts[i], + negative_prompts[i], + prompts_2[i] if prompts_2 is not None else None, + negative_prompts_2[i] if negative_prompts_2 is not None else None, + is_refiner, clip_skip) prompt_embeds.append(prompt_embed) positive_pooleds.append(positive_pooled) negative_embeds.append(negative_embed) diff --git a/modules/sd_models.py b/modules/sd_models.py index 02a4383d6..48775d2e4 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -136,12 +136,9 @@ def list_models(): checkpoints_list.clear() checkpoint_aliases.clear() ext_filter=[".safetensors"] if shared.opts.sd_disable_ckpt else [".ckpt", ".safetensors"] - model_list = [] - if shared.backend == shared.Backend.ORIGINAL or shared.opts.diffusers_allow_safetensors: - model_list += modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) + model_list = modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) if shared.backend == shared.Backend.DIFFUSERS: model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir) - for filename in sorted(model_list, key=str.lower): checkpoint_info = CheckpointInfo(filename) if checkpoint_info.name is not None: @@ -844,7 +841,6 @@ def set_diffuser_pipe(pipe, new_pipe_type): new_pipe = diffusers.AutoPipelineForImage2Image.from_pipe(pipe) elif new_pipe_type == DiffusersTaskType.INPAINTING: new_pipe = diffusers.AutoPipelineForInpainting.from_pipe(pipe) - if pipe.__class__ == new_pipe.__class__: return @@ -1030,20 +1026,35 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model') shared.log.info(f"Weights loaded in {timer.summary()}") +def disable_offload(sd_model): + from accelerate.hooks import remove_hook_from_module + if not sd_model.has_accelerate: + return + for _name, model in sd_model.components.items(): + if not isinstance(model, torch.nn.Module): + continue + remove_hook_from_module(model, recurse=True) + + def unload_model_weights(op='model'): from modules import sd_hijack if op == 'model' or op == 'dict': if model_data.sd_model: - model_data.sd_model.to(devices.cpu) if shared.backend == shared.Backend.ORIGINAL: + model_data.sd_model.to(devices.cpu) sd_hijack.model_hijack.undo_hijack(model_data.sd_model) + else: + disable_offload(model_data.sd_model) + model_data.sd_model.to('meta') model_data.sd_model = None shared.log.debug(f'Unload weights {op}: {memory_stats()}') else: if model_data.sd_refiner: - model_data.sd_refiner.to(devices.cpu) + model_data.sd_refiner.to('meta') if shared.backend == shared.Backend.ORIGINAL: sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner) + else: + disable_offload(model_data.sd_model) model_data.sd_refiner = None shared.log.debug(f'Unload weights {op}: {memory_stats()}') devices.torch_gc(force=True) diff --git a/modules/shared.py b/modules/shared.py index b156a4cb5..945f2a27f 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -395,11 +395,10 @@ options_templates.update(options_section(('cuda', "Compute Settings"), { })) options_templates.update(options_section(('diffusers', "Diffusers Settings"), { - "diffusers_allow_safetensors": OptionInfo(True, 'Diffusers allow loading from safetensors files'), "diffusers_pipeline": OptionInfo(pipelines[0], 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": pipelines}), "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"), + "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"), "diffusers_move_refiner": OptionInfo(True, "Move refiner model to CPU when not in use"), - "diffusers_move_unet": OptionInfo(False, "Move UNet to CPU while VAE decoding"), "diffusers_extract_ema": OptionInfo(True, "Use model EMA weights when possible"), "diffusers_generator_device": OptionInfo("default", "Generator device", gr.Radio, lambda: {"choices": ["default", "cpu"]}), "diffusers_seq_cpu_offload": OptionInfo(False, "Enable sequential CPU offload"), diff --git a/requirements.txt b/requirements.txt index 7dac5b439..55da323bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,7 +46,7 @@ typing-extensions==4.7.1 antlr4-python3-runtime==4.9.3 requests==2.31.0 tqdm==4.65.0 -accelerate==0.21.0 +accelerate==0.20.3 opencv-python-headless==4.7.0.72 diffusers==0.19.3 einops==0.4.1