fix prompt parser for sdxl and enable offloading

pull/1985/head
Vladimir Mandic 2023-08-10 21:20:56 +00:00
parent 5bcd65d4c2
commit f52249d5a8
9 changed files with 65 additions and 43 deletions

View File

@ -562,18 +562,17 @@
{"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"},
{"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"},
{"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"},
{"id":"","label":"Diffusers allow loading from safetensors files","localized":"","hint":"Allow loading of safetensors files as diffuser models"},
{"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"","hint":""},
{"id":"","label":"Move base model to CPU when using refiner","localized":"","hint":""},
{"id":"","label":"Move refiner model to CPU when not in use","localized":"","hint":""},
{"id":"","label":"Move UNet to CPU while VAE decoding","localized":"","hint":""},
{"id":"","label":"Use model EMA weights when possible","localized":"","hint":""},
{"id":"","label":"Generator device","localized":"","hint":""},
{"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%. Use with Enable Attention slicing for minimal memory consumption"},
{"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings. Use with Enable Attention slicing for additional memory savings"},
{"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches. Use with Enable Attention slicing"},
{"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Might result in a minor increase in processing time. Use with Enable Attention Slicing"},
{"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. 10% slower inference times. Greatly reduces memory usage. Best used, period"},
{"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%"},
{"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings"},
{"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches"},
{"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Results in a minor increase in processing time"},
{"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. Slower inference times, but greatly reduced memory usage"},
{"id":"","label":"Diffusers model loading variant","localized":"","hint":""},
{"id":"","label":"Diffusers VAE loading variant","localized":"","hint":""},
{"id":"","label":"Diffusers LoRA loading variant","localized":"","hint":"'sequential apply' loads and applies each LoRA in order of appearance, 'merge and apply' loads all LoRAs and merges them in-memory before applying to model, 'diffusers default' uses single LoRA loading method"}

View File

@ -562,7 +562,6 @@
{"id":"","label":"Token merging ratio","localized":"토큰 병합 비율","hint":"속도와 메모리 절감을 위해 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
{"id":"","label":"Token merging ratio for img2img","localized":"이미지➠이미지 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 이미지➠이미지에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
{"id":"","label":"Token merging ratio for hires pass","localized":"텍스트➠이미지 업스케일링(Hires fix) 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 Hires fix에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
{"id":"","label":"Diffusers allow loading from safetensors files","localized":"safetensors 파일에서 로드 허용","hint":"safetensors 파일을 Diffusers 모델로 로드할 수 있게 한다."},
{"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"safetensors 파일에서 로드할 때 사용할 파이프라인 선택","hint":""},
{"id":"","label":"Move base model to CPU when using refiner","localized":"리파이너를 사용 중일 때 base 모델을 CPU로 이동","hint":""},
{"id":"","label":"Move refiner model to CPU when not in use","localized":"사용 중이지 않을 때 리파이너 모델을 CPU로 이동","hint":""},

View File

@ -186,8 +186,6 @@ def install(package, friendly: str = None, ignore: bool = False):
if args.reinstall or args.upgrade:
global quick_allowed # pylint: disable=global-statement
quick_allowed = False
if args.use_ipex and "accelerate==" in package:
package = "accelerate==0.20.3"
if args.reinstall or not installed(package, friendly):
pip(f"install --upgrade {package}", ignore=ignore)

View File

@ -24,10 +24,10 @@ def unload_diffusers_lora():
lora_state['all_loras'].reverse()
lora_state['multiplier'].reverse()
for i, lora_network in enumerate(lora_state['all_loras']):
if shared.opts.diffusers_lora_loader == "merge and apply":
lora_network.restore_from(multiplier=lora_state['multiplier'][i])
if shared.opts.diffusers_lora_loader == "sequential apply":
lora_network.unapply_to()
if shared.opts.diffusers_lora_loader == "merge and apply":
lora_network.restore_from(multiplier=lora_state['multiplier'][i])
if shared.opts.diffusers_lora_loader == "sequential apply":
lora_network.unapply_to()
lora_state['active'] = False
lora_state['loaded'] = 0
lora_state['all_loras'] = []
@ -45,7 +45,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
lora_state['multiplier'].append(strength)
if shared.opts.diffusers_lora_loader == "diffusers default":
pipe.load_lora_weights(lora.filename, cache_dir=shared.opts.diffusers_dir, local_files_only=True, lora_scale=strength)
shared.log.info(f"Diffusers LoRA loaded: {name} {lora_state['multiplier']}")
shared.log.info(f"LoRA loaded: {name} {lora_state['multiplier']}")
else:
from safetensors.torch import load_file
lora_sd = load_file(lora.filename)
@ -61,7 +61,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
lora_network.to(shared.device, dtype=pipe.unet.dtype)
lora_network.apply_to(multiplier=strength)
lora_state['all_loras'].append(lora_network)
shared.log.info(f"Diffusers LoRA loaded: {name} {strength}")
shared.log.info(f"LoRA loaded: {name}:{strength} loader={shared.opts.diffusers_lora_loader}")
except Exception as e:
shared.log.error(f"Diffusers LoRA loading failed: {name} {e}")
@ -332,7 +332,7 @@ def merge_lora_weights(pipe, weights_sd: Dict, multiplier: float = 1.0):
# block weightや学習に対応しない簡易版 / simple version without block weight and training
class LoRANetwork(torch.nn.Module):
class LoRANetwork(torch.nn.Module): # pylint: disable=abstract-method
UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]
UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
@ -350,17 +350,17 @@ class LoRANetwork(torch.nn.Module):
multiplier: float = 1.0,
modules_dim: Optional[Dict[str, int]] = None,
modules_alpha: Optional[Dict[str, int]] = None,
varbose: Optional[bool] = False,
varbose: Optional[bool] = False, # pylint: disable=unused-argument
) -> None:
super().__init__()
self.multiplier = multiplier
shared.log.debug("create LoRA network from weights")
# shared.log.debug("create LoRA network from weights")
# convert SDXL Stability AI's U-Net modules to Diffusers
converted = self.convert_unet_modules(modules_dim, modules_alpha)
if converted:
shared.log.debug(f"converted {converted} Stability AI's U-Net LoRA modules to Diffusers (SDXL)")
shared.log.debug(f"LoRA convert: modules={converted} SDXL SAI/SGM to Diffusers")
# create module instances
def create_modules(
@ -422,18 +422,13 @@ class LoRANetwork(torch.nn.Module):
text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE)
self.text_encoder_loras.extend(text_encoder_loras)
skipped_te += skipped
shared.log.debug(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")
if len(skipped_te) > 0:
shared.log.debug(f"skipped {len(skipped_te)} modules because of missing weight.")
# extend U-Net target modules to include Conv2d 3x3
target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE + LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3
self.unet_loras: List[LoRAModule]
self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)
shared.log.debug(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")
if len(skipped_un) > 0:
shared.log.debug(f"skipped {len(skipped_un)} modules because of missing weight.")
shared.log.debug(f"LoRA modules loaded/skipped: te={len(self.text_encoder_loras)}/{len(skipped_te)} unet={len(self.unet_loras)}/skip={len(skipped_un)}")
# assertion
names = set()
@ -480,11 +475,11 @@ class LoRANetwork(torch.nn.Module):
def apply_to(self, multiplier=1.0, apply_text_encoder=True, apply_unet=True):
if apply_text_encoder:
shared.log.debug("enable LoRA for text encoder")
# shared.log.debug("LoRA apply for text encoder")
for lora in self.text_encoder_loras:
lora.apply_to(multiplier)
if apply_unet:
shared.log.debug("enable LoRA for U-Net")
# shared.log.debug("LoRA apply for U-Net")
for lora in self.unet_loras:
lora.apply_to(multiplier)
@ -493,16 +488,14 @@ class LoRANetwork(torch.nn.Module):
lora.unapply_to()
def merge_to(self, multiplier=1.0):
shared.log.debug("merge LoRA weights to original weights")
# shared.log.debug("LoRA merge weights for text encoder")
for lora in tqdm(self.text_encoder_loras + self.unet_loras):
lora.merge_to(multiplier)
shared.log.debug("weights are merged")
def restore_from(self, multiplier=1.0):
shared.log.debug("restore LoRA weights from original weights")
# shared.log.debug("LoRA restore weights")
for lora in tqdm(self.text_encoder_loras + self.unet_loras):
lora.restore_from(multiplier)
shared.log.debug("weights are restored")
def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
# convert SDXL Stability AI's state dict to Diffusers' based state dict
@ -527,4 +520,3 @@ class LoRANetwork(torch.nn.Module):
state_dict[key] = state_dict[key].view(my_state_dict[key].size())
return super().load_state_dict(state_dict, strict)

View File

@ -52,6 +52,24 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
imgs = model.image_processor.postprocess(decoded, output_type=output_type)
return imgs
def fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2):
if type(prompts) is str:
prompts = [prompts]
if type(negative_prompts) is str:
negative_prompts = [negative_prompts]
while len(negative_prompts) < len(prompts):
negative_prompts.append(negative_prompts[-1])
if type(prompts_2) is str:
prompts_2 = [prompts_2]
if type(prompts_2) is list:
while len(prompts_2) < len(prompts):
prompts_2.append(prompts_2[-1])
if type(negative_prompts_2) is str:
negative_prompts_2 = [negative_prompts_2]
if type(negative_prompts_2) is list:
while len(negative_prompts_2) < len(prompts_2):
negative_prompts_2.append(negative_prompts_2[-1])
return prompts, negative_prompts, prompts_2, negative_prompts_2
def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, is_refiner: bool=False, **kwargs):
args = {}
@ -64,6 +82,7 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
pooled = None
negative_embed = None
negative_pooled = None
prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
if shared.opts.data['prompt_attention'] in {'Compel parser', 'Full parser'}:
prompt_embed, pooled, negative_embed, negative_pooled = prompt_parser_diffusers.compel_encode_prompts(model,
prompts,

View File

@ -47,7 +47,12 @@ def compel_encode_prompts(
negative_embeds = []
negative_pooleds = []
for i in range(len(prompts)):
prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, prompts[i], negative_prompts[i], prompts_2[i], negative_prompts_2[i], is_refiner, clip_skip)
prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline,
prompts[i],
negative_prompts[i],
prompts_2[i] if prompts_2 is not None else None,
negative_prompts_2[i] if negative_prompts_2 is not None else None,
is_refiner, clip_skip)
prompt_embeds.append(prompt_embed)
positive_pooleds.append(positive_pooled)
negative_embeds.append(negative_embed)

View File

@ -136,12 +136,9 @@ def list_models():
checkpoints_list.clear()
checkpoint_aliases.clear()
ext_filter=[".safetensors"] if shared.opts.sd_disable_ckpt else [".ckpt", ".safetensors"]
model_list = []
if shared.backend == shared.Backend.ORIGINAL or shared.opts.diffusers_allow_safetensors:
model_list += modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
model_list = modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
if shared.backend == shared.Backend.DIFFUSERS:
model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir)
for filename in sorted(model_list, key=str.lower):
checkpoint_info = CheckpointInfo(filename)
if checkpoint_info.name is not None:
@ -844,7 +841,6 @@ def set_diffuser_pipe(pipe, new_pipe_type):
new_pipe = diffusers.AutoPipelineForImage2Image.from_pipe(pipe)
elif new_pipe_type == DiffusersTaskType.INPAINTING:
new_pipe = diffusers.AutoPipelineForInpainting.from_pipe(pipe)
if pipe.__class__ == new_pipe.__class__:
return
@ -1030,20 +1026,35 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model')
shared.log.info(f"Weights loaded in {timer.summary()}")
def disable_offload(sd_model):
from accelerate.hooks import remove_hook_from_module
if not sd_model.has_accelerate:
return
for _name, model in sd_model.components.items():
if not isinstance(model, torch.nn.Module):
continue
remove_hook_from_module(model, recurse=True)
def unload_model_weights(op='model'):
from modules import sd_hijack
if op == 'model' or op == 'dict':
if model_data.sd_model:
model_data.sd_model.to(devices.cpu)
if shared.backend == shared.Backend.ORIGINAL:
model_data.sd_model.to(devices.cpu)
sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
else:
disable_offload(model_data.sd_model)
model_data.sd_model.to('meta')
model_data.sd_model = None
shared.log.debug(f'Unload weights {op}: {memory_stats()}')
else:
if model_data.sd_refiner:
model_data.sd_refiner.to(devices.cpu)
model_data.sd_refiner.to('meta')
if shared.backend == shared.Backend.ORIGINAL:
sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner)
else:
disable_offload(model_data.sd_model)
model_data.sd_refiner = None
shared.log.debug(f'Unload weights {op}: {memory_stats()}')
devices.torch_gc(force=True)

View File

@ -395,11 +395,10 @@ options_templates.update(options_section(('cuda', "Compute Settings"), {
}))
options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
"diffusers_allow_safetensors": OptionInfo(True, 'Diffusers allow loading from safetensors files'),
"diffusers_pipeline": OptionInfo(pipelines[0], 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": pipelines}),
"diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
"diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
"diffusers_move_refiner": OptionInfo(True, "Move refiner model to CPU when not in use"),
"diffusers_move_unet": OptionInfo(False, "Move UNet to CPU while VAE decoding"),
"diffusers_extract_ema": OptionInfo(True, "Use model EMA weights when possible"),
"diffusers_generator_device": OptionInfo("default", "Generator device", gr.Radio, lambda: {"choices": ["default", "cpu"]}),
"diffusers_seq_cpu_offload": OptionInfo(False, "Enable sequential CPU offload"),

View File

@ -46,7 +46,7 @@ typing-extensions==4.7.1
antlr4-python3-runtime==4.9.3
requests==2.31.0
tqdm==4.65.0
accelerate==0.21.0
accelerate==0.20.3
opencv-python-headless==4.7.0.72
diffusers==0.19.3
einops==0.4.1