mirror of https://github.com/vladmandic/automatic
fix prompt parser for sdxl and enable offloading
parent
5bcd65d4c2
commit
f52249d5a8
|
|
@ -562,18 +562,17 @@
|
|||
{"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"},
|
||||
{"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"},
|
||||
{"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"},
|
||||
{"id":"","label":"Diffusers allow loading from safetensors files","localized":"","hint":"Allow loading of safetensors files as diffuser models"},
|
||||
{"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"","hint":""},
|
||||
{"id":"","label":"Move base model to CPU when using refiner","localized":"","hint":""},
|
||||
{"id":"","label":"Move refiner model to CPU when not in use","localized":"","hint":""},
|
||||
{"id":"","label":"Move UNet to CPU while VAE decoding","localized":"","hint":""},
|
||||
{"id":"","label":"Use model EMA weights when possible","localized":"","hint":""},
|
||||
{"id":"","label":"Generator device","localized":"","hint":""},
|
||||
{"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%. Use with Enable Attention slicing for minimal memory consumption"},
|
||||
{"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings. Use with Enable Attention slicing for additional memory savings"},
|
||||
{"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches. Use with Enable Attention slicing"},
|
||||
{"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Might result in a minor increase in processing time. Use with Enable Attention Slicing"},
|
||||
{"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. 10% slower inference times. Greatly reduces memory usage. Best used, period"},
|
||||
{"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%"},
|
||||
{"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings"},
|
||||
{"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches"},
|
||||
{"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Results in a minor increase in processing time"},
|
||||
{"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. Slower inference times, but greatly reduced memory usage"},
|
||||
{"id":"","label":"Diffusers model loading variant","localized":"","hint":""},
|
||||
{"id":"","label":"Diffusers VAE loading variant","localized":"","hint":""},
|
||||
{"id":"","label":"Diffusers LoRA loading variant","localized":"","hint":"'sequential apply' loads and applies each LoRA in order of appearance, 'merge and apply' loads all LoRAs and merges them in-memory before applying to model, 'diffusers default' uses single LoRA loading method"}
|
||||
|
|
|
|||
|
|
@ -562,7 +562,6 @@
|
|||
{"id":"","label":"Token merging ratio","localized":"토큰 병합 비율","hint":"속도와 메모리 절감을 위해 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
|
||||
{"id":"","label":"Token merging ratio for img2img","localized":"이미지➠이미지 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 이미지➠이미지에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
|
||||
{"id":"","label":"Token merging ratio for hires pass","localized":"텍스트➠이미지 업스케일링(Hires fix) 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 Hires fix에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
|
||||
{"id":"","label":"Diffusers allow loading from safetensors files","localized":"safetensors 파일에서 로드 허용","hint":"safetensors 파일을 Diffusers 모델로 로드할 수 있게 한다."},
|
||||
{"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"safetensors 파일에서 로드할 때 사용할 파이프라인 선택","hint":""},
|
||||
{"id":"","label":"Move base model to CPU when using refiner","localized":"리파이너를 사용 중일 때 base 모델을 CPU로 이동","hint":""},
|
||||
{"id":"","label":"Move refiner model to CPU when not in use","localized":"사용 중이지 않을 때 리파이너 모델을 CPU로 이동","hint":""},
|
||||
|
|
|
|||
|
|
@ -186,8 +186,6 @@ def install(package, friendly: str = None, ignore: bool = False):
|
|||
if args.reinstall or args.upgrade:
|
||||
global quick_allowed # pylint: disable=global-statement
|
||||
quick_allowed = False
|
||||
if args.use_ipex and "accelerate==" in package:
|
||||
package = "accelerate==0.20.3"
|
||||
if args.reinstall or not installed(package, friendly):
|
||||
pip(f"install --upgrade {package}", ignore=ignore)
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ def unload_diffusers_lora():
|
|||
lora_state['all_loras'].reverse()
|
||||
lora_state['multiplier'].reverse()
|
||||
for i, lora_network in enumerate(lora_state['all_loras']):
|
||||
if shared.opts.diffusers_lora_loader == "merge and apply":
|
||||
lora_network.restore_from(multiplier=lora_state['multiplier'][i])
|
||||
if shared.opts.diffusers_lora_loader == "sequential apply":
|
||||
lora_network.unapply_to()
|
||||
if shared.opts.diffusers_lora_loader == "merge and apply":
|
||||
lora_network.restore_from(multiplier=lora_state['multiplier'][i])
|
||||
if shared.opts.diffusers_lora_loader == "sequential apply":
|
||||
lora_network.unapply_to()
|
||||
lora_state['active'] = False
|
||||
lora_state['loaded'] = 0
|
||||
lora_state['all_loras'] = []
|
||||
|
|
@ -45,7 +45,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
|
|||
lora_state['multiplier'].append(strength)
|
||||
if shared.opts.diffusers_lora_loader == "diffusers default":
|
||||
pipe.load_lora_weights(lora.filename, cache_dir=shared.opts.diffusers_dir, local_files_only=True, lora_scale=strength)
|
||||
shared.log.info(f"Diffusers LoRA loaded: {name} {lora_state['multiplier']}")
|
||||
shared.log.info(f"LoRA loaded: {name} {lora_state['multiplier']}")
|
||||
else:
|
||||
from safetensors.torch import load_file
|
||||
lora_sd = load_file(lora.filename)
|
||||
|
|
@ -61,7 +61,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
|
|||
lora_network.to(shared.device, dtype=pipe.unet.dtype)
|
||||
lora_network.apply_to(multiplier=strength)
|
||||
lora_state['all_loras'].append(lora_network)
|
||||
shared.log.info(f"Diffusers LoRA loaded: {name} {strength}")
|
||||
shared.log.info(f"LoRA loaded: {name}:{strength} loader={shared.opts.diffusers_lora_loader}")
|
||||
except Exception as e:
|
||||
shared.log.error(f"Diffusers LoRA loading failed: {name} {e}")
|
||||
|
||||
|
|
@ -332,7 +332,7 @@ def merge_lora_weights(pipe, weights_sd: Dict, multiplier: float = 1.0):
|
|||
|
||||
|
||||
# block weightや学習に対応しない簡易版 / simple version without block weight and training
|
||||
class LoRANetwork(torch.nn.Module):
|
||||
class LoRANetwork(torch.nn.Module): # pylint: disable=abstract-method
|
||||
UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]
|
||||
UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
|
||||
TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
|
||||
|
|
@ -350,17 +350,17 @@ class LoRANetwork(torch.nn.Module):
|
|||
multiplier: float = 1.0,
|
||||
modules_dim: Optional[Dict[str, int]] = None,
|
||||
modules_alpha: Optional[Dict[str, int]] = None,
|
||||
varbose: Optional[bool] = False,
|
||||
varbose: Optional[bool] = False, # pylint: disable=unused-argument
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.multiplier = multiplier
|
||||
|
||||
shared.log.debug("create LoRA network from weights")
|
||||
# shared.log.debug("create LoRA network from weights")
|
||||
|
||||
# convert SDXL Stability AI's U-Net modules to Diffusers
|
||||
converted = self.convert_unet_modules(modules_dim, modules_alpha)
|
||||
if converted:
|
||||
shared.log.debug(f"converted {converted} Stability AI's U-Net LoRA modules to Diffusers (SDXL)")
|
||||
shared.log.debug(f"LoRA convert: modules={converted} SDXL SAI/SGM to Diffusers")
|
||||
|
||||
# create module instances
|
||||
def create_modules(
|
||||
|
|
@ -422,18 +422,13 @@ class LoRANetwork(torch.nn.Module):
|
|||
text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE)
|
||||
self.text_encoder_loras.extend(text_encoder_loras)
|
||||
skipped_te += skipped
|
||||
shared.log.debug(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")
|
||||
if len(skipped_te) > 0:
|
||||
shared.log.debug(f"skipped {len(skipped_te)} modules because of missing weight.")
|
||||
|
||||
# extend U-Net target modules to include Conv2d 3x3
|
||||
target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE + LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3
|
||||
|
||||
self.unet_loras: List[LoRAModule]
|
||||
self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)
|
||||
shared.log.debug(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")
|
||||
if len(skipped_un) > 0:
|
||||
shared.log.debug(f"skipped {len(skipped_un)} modules because of missing weight.")
|
||||
shared.log.debug(f"LoRA modules loaded/skipped: te={len(self.text_encoder_loras)}/{len(skipped_te)} unet={len(self.unet_loras)}/skip={len(skipped_un)}")
|
||||
|
||||
# assertion
|
||||
names = set()
|
||||
|
|
@ -480,11 +475,11 @@ class LoRANetwork(torch.nn.Module):
|
|||
|
||||
def apply_to(self, multiplier=1.0, apply_text_encoder=True, apply_unet=True):
|
||||
if apply_text_encoder:
|
||||
shared.log.debug("enable LoRA for text encoder")
|
||||
# shared.log.debug("LoRA apply for text encoder")
|
||||
for lora in self.text_encoder_loras:
|
||||
lora.apply_to(multiplier)
|
||||
if apply_unet:
|
||||
shared.log.debug("enable LoRA for U-Net")
|
||||
# shared.log.debug("LoRA apply for U-Net")
|
||||
for lora in self.unet_loras:
|
||||
lora.apply_to(multiplier)
|
||||
|
||||
|
|
@ -493,16 +488,14 @@ class LoRANetwork(torch.nn.Module):
|
|||
lora.unapply_to()
|
||||
|
||||
def merge_to(self, multiplier=1.0):
|
||||
shared.log.debug("merge LoRA weights to original weights")
|
||||
# shared.log.debug("LoRA merge weights for text encoder")
|
||||
for lora in tqdm(self.text_encoder_loras + self.unet_loras):
|
||||
lora.merge_to(multiplier)
|
||||
shared.log.debug("weights are merged")
|
||||
|
||||
def restore_from(self, multiplier=1.0):
|
||||
shared.log.debug("restore LoRA weights from original weights")
|
||||
# shared.log.debug("LoRA restore weights")
|
||||
for lora in tqdm(self.text_encoder_loras + self.unet_loras):
|
||||
lora.restore_from(multiplier)
|
||||
shared.log.debug("weights are restored")
|
||||
|
||||
def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
|
||||
# convert SDXL Stability AI's state dict to Diffusers' based state dict
|
||||
|
|
@ -527,4 +520,3 @@ class LoRANetwork(torch.nn.Module):
|
|||
state_dict[key] = state_dict[key].view(my_state_dict[key].size())
|
||||
|
||||
return super().load_state_dict(state_dict, strict)
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,24 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
|
|||
imgs = model.image_processor.postprocess(decoded, output_type=output_type)
|
||||
return imgs
|
||||
|
||||
def fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2):
|
||||
if type(prompts) is str:
|
||||
prompts = [prompts]
|
||||
if type(negative_prompts) is str:
|
||||
negative_prompts = [negative_prompts]
|
||||
while len(negative_prompts) < len(prompts):
|
||||
negative_prompts.append(negative_prompts[-1])
|
||||
if type(prompts_2) is str:
|
||||
prompts_2 = [prompts_2]
|
||||
if type(prompts_2) is list:
|
||||
while len(prompts_2) < len(prompts):
|
||||
prompts_2.append(prompts_2[-1])
|
||||
if type(negative_prompts_2) is str:
|
||||
negative_prompts_2 = [negative_prompts_2]
|
||||
if type(negative_prompts_2) is list:
|
||||
while len(negative_prompts_2) < len(prompts_2):
|
||||
negative_prompts_2.append(negative_prompts_2[-1])
|
||||
return prompts, negative_prompts, prompts_2, negative_prompts_2
|
||||
|
||||
def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, is_refiner: bool=False, **kwargs):
|
||||
args = {}
|
||||
|
|
@ -64,6 +82,7 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
|
|||
pooled = None
|
||||
negative_embed = None
|
||||
negative_pooled = None
|
||||
prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
|
||||
if shared.opts.data['prompt_attention'] in {'Compel parser', 'Full parser'}:
|
||||
prompt_embed, pooled, negative_embed, negative_pooled = prompt_parser_diffusers.compel_encode_prompts(model,
|
||||
prompts,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,12 @@ def compel_encode_prompts(
|
|||
negative_embeds = []
|
||||
negative_pooleds = []
|
||||
for i in range(len(prompts)):
|
||||
prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, prompts[i], negative_prompts[i], prompts_2[i], negative_prompts_2[i], is_refiner, clip_skip)
|
||||
prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline,
|
||||
prompts[i],
|
||||
negative_prompts[i],
|
||||
prompts_2[i] if prompts_2 is not None else None,
|
||||
negative_prompts_2[i] if negative_prompts_2 is not None else None,
|
||||
is_refiner, clip_skip)
|
||||
prompt_embeds.append(prompt_embed)
|
||||
positive_pooleds.append(positive_pooled)
|
||||
negative_embeds.append(negative_embed)
|
||||
|
|
|
|||
|
|
@ -136,12 +136,9 @@ def list_models():
|
|||
checkpoints_list.clear()
|
||||
checkpoint_aliases.clear()
|
||||
ext_filter=[".safetensors"] if shared.opts.sd_disable_ckpt else [".ckpt", ".safetensors"]
|
||||
model_list = []
|
||||
if shared.backend == shared.Backend.ORIGINAL or shared.opts.diffusers_allow_safetensors:
|
||||
model_list += modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
|
||||
model_list = modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
|
||||
if shared.backend == shared.Backend.DIFFUSERS:
|
||||
model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir)
|
||||
|
||||
for filename in sorted(model_list, key=str.lower):
|
||||
checkpoint_info = CheckpointInfo(filename)
|
||||
if checkpoint_info.name is not None:
|
||||
|
|
@ -844,7 +841,6 @@ def set_diffuser_pipe(pipe, new_pipe_type):
|
|||
new_pipe = diffusers.AutoPipelineForImage2Image.from_pipe(pipe)
|
||||
elif new_pipe_type == DiffusersTaskType.INPAINTING:
|
||||
new_pipe = diffusers.AutoPipelineForInpainting.from_pipe(pipe)
|
||||
|
||||
if pipe.__class__ == new_pipe.__class__:
|
||||
return
|
||||
|
||||
|
|
@ -1030,20 +1026,35 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model')
|
|||
shared.log.info(f"Weights loaded in {timer.summary()}")
|
||||
|
||||
|
||||
def disable_offload(sd_model):
|
||||
from accelerate.hooks import remove_hook_from_module
|
||||
if not sd_model.has_accelerate:
|
||||
return
|
||||
for _name, model in sd_model.components.items():
|
||||
if not isinstance(model, torch.nn.Module):
|
||||
continue
|
||||
remove_hook_from_module(model, recurse=True)
|
||||
|
||||
|
||||
def unload_model_weights(op='model'):
|
||||
from modules import sd_hijack
|
||||
if op == 'model' or op == 'dict':
|
||||
if model_data.sd_model:
|
||||
model_data.sd_model.to(devices.cpu)
|
||||
if shared.backend == shared.Backend.ORIGINAL:
|
||||
model_data.sd_model.to(devices.cpu)
|
||||
sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
|
||||
else:
|
||||
disable_offload(model_data.sd_model)
|
||||
model_data.sd_model.to('meta')
|
||||
model_data.sd_model = None
|
||||
shared.log.debug(f'Unload weights {op}: {memory_stats()}')
|
||||
else:
|
||||
if model_data.sd_refiner:
|
||||
model_data.sd_refiner.to(devices.cpu)
|
||||
model_data.sd_refiner.to('meta')
|
||||
if shared.backend == shared.Backend.ORIGINAL:
|
||||
sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner)
|
||||
else:
|
||||
disable_offload(model_data.sd_model)
|
||||
model_data.sd_refiner = None
|
||||
shared.log.debug(f'Unload weights {op}: {memory_stats()}')
|
||||
devices.torch_gc(force=True)
|
||||
|
|
|
|||
|
|
@ -395,11 +395,10 @@ options_templates.update(options_section(('cuda', "Compute Settings"), {
|
|||
}))
|
||||
|
||||
options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
|
||||
"diffusers_allow_safetensors": OptionInfo(True, 'Diffusers allow loading from safetensors files'),
|
||||
"diffusers_pipeline": OptionInfo(pipelines[0], 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": pipelines}),
|
||||
"diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
|
||||
"diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
|
||||
"diffusers_move_refiner": OptionInfo(True, "Move refiner model to CPU when not in use"),
|
||||
"diffusers_move_unet": OptionInfo(False, "Move UNet to CPU while VAE decoding"),
|
||||
"diffusers_extract_ema": OptionInfo(True, "Use model EMA weights when possible"),
|
||||
"diffusers_generator_device": OptionInfo("default", "Generator device", gr.Radio, lambda: {"choices": ["default", "cpu"]}),
|
||||
"diffusers_seq_cpu_offload": OptionInfo(False, "Enable sequential CPU offload"),
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ typing-extensions==4.7.1
|
|||
antlr4-python3-runtime==4.9.3
|
||||
requests==2.31.0
|
||||
tqdm==4.65.0
|
||||
accelerate==0.21.0
|
||||
accelerate==0.20.3
|
||||
opencv-python-headless==4.7.0.72
|
||||
diffusers==0.19.3
|
||||
einops==0.4.1
|
||||
|
|
|
|||
Loading…
Reference in New Issue