447 lines
20 KiB
Python
447 lines
20 KiB
Python
from modules import errors
|
|
try:
|
|
from diffusers import DiffusionPipeline
|
|
except ImportError as e:
|
|
errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111')
|
|
|
|
import os
|
|
import gc
|
|
import torch
|
|
import numpy as np
|
|
from PIL import Image, ImageOps, ImageFilter
|
|
from packaging import version
|
|
from modules import processing, shared, script_callbacks, images, devices, scripts, masking, sd_models, generation_parameters_copypaste, sd_vae#, sd_samplers
|
|
from modules.processing import Processed, StableDiffusionProcessing
|
|
from modules.shared import opts, state
|
|
from modules.sd_models import CheckpointInfo
|
|
from modules.paths_internal import script_path
|
|
#import pdb
|
|
|
|
class KProcessed(processing.Processed):
|
|
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
|
|
self.images = images_list
|
|
self.prompt = p.prompt
|
|
self.negative_prompt = p.negative_prompt
|
|
self.seed = int(seed)
|
|
self.subseed = subseed
|
|
self.subseed_strength = p.subseed_strength
|
|
self.info = info
|
|
self.comments = comments
|
|
self.width = p.width
|
|
self.height = p.height
|
|
self.sampler_name = p.sampler_name
|
|
self.cfg_scale = p.cfg_scale
|
|
self.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
|
|
self.steps = p.steps
|
|
self.batch_size = p.batch_size
|
|
self.restore_faces = p.restore_faces
|
|
self.face_restoration_model = opts.face_restoration_model if p.restore_faces else None
|
|
|
|
self.sd_model_hash = p.sd_model_hash
|
|
|
|
self.seed_resize_from_w = p.seed_resize_from_w
|
|
self.seed_resize_from_h = p.seed_resize_from_h
|
|
self.denoising_strength = getattr(p, 'denoising_strength', None)
|
|
self.extra_generation_params = p.extra_generation_params
|
|
self.index_of_first_image = index_of_first_image
|
|
self.styles = p.styles
|
|
self.job_timestamp = state.job_timestamp
|
|
self.clip_skip = 1
|
|
self.eta = p.eta
|
|
self.ddim_discretize = p.ddim_discretize
|
|
self.s_churn = p.s_churn
|
|
self.s_tmin = p.s_tmin
|
|
self.s_tmax = p.s_tmax
|
|
self.s_noise = p.s_noise
|
|
self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override
|
|
self.prompt = self.prompt if not isinstance(self.prompt, list) else self.prompt[0]
|
|
self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
|
|
self.seed = int(self.seed if not isinstance(self.seed, list) else self.seed[0]) if self.seed is not None else -1
|
|
self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
|
|
self.is_using_inpainting_conditioning = p.is_using_inpainting_conditioning
|
|
self.all_prompts = all_prompts or p.all_prompts or [self.prompt]
|
|
self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
|
|
self.all_seeds = all_seeds or p.all_seeds or [self.seed]
|
|
self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
|
|
self.infotexts = infotexts or [info]
|
|
|
|
class KandinskyCheckpointInfo(CheckpointInfo):
|
|
def __init__(self, name="kandinsky", filename=None, version="2.1"):
|
|
name += version
|
|
if filename is None:
|
|
filename = name
|
|
self.filename = filename
|
|
self.name = name
|
|
self.name_for_extra = f"{name}_extra"#os.path.splitext(os.path.basename(filename))[0]
|
|
self.model_name = f"{name}"#os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0]
|
|
self.hash = "0000000000000000000000000000000000000000000000000000000000000000"#model_hash(filename)
|
|
self.sha256 = "0000000000000000000000000000000000000000000000000000000000000000"#hashes.sha256_from_cache(self.filename, "checkpoint/" + name)
|
|
self.shorthash = self.sha256[0:10] if self.sha256 else None
|
|
self.sd_model_hash = self.shorthash
|
|
self.title = name if self.shorthash is None else f'{name} [{self.shorthash}]'
|
|
self.ids = [self.hash, self.model_name, self.title, name, f'{name} [{self.hash}]'] + ([self.shorthash, self.sha256, f'{self.name} [{self.shorthash}]'] if self.shorthash else [])
|
|
self.metadata = {}
|
|
|
|
def register(self):
|
|
return
|
|
#checkpoints_list[self.title] = self
|
|
# for i in self.ids:
|
|
# checkpoint_aliases[i] = self
|
|
|
|
def calculate_shorthash(self):
|
|
self.sha256 = "0000000000000000000000000000000000000000000000000000000000000000"
|
|
#if self.sha256 is None:
|
|
# return
|
|
self.shorthash = self.sha256[0:10]
|
|
if self.shorthash not in self.ids:
|
|
self.ids += [self.shorthash, self.sha256, f'{self.name} [{self.shorthash}]']
|
|
#checkpoints_list.pop(self.title)
|
|
self.title = f'{self.name} [{self.shorthash}]'
|
|
#self.register()
|
|
return self.shorthash
|
|
|
|
def truncate_string(string, max_length=images.max_filename_part_length, encoding='utf-8'):
|
|
return string.encode(encoding)[:max_length].decode(encoding, 'ignore')
|
|
|
|
class AbstractModel():
|
|
attention_type = 'auto'#'max'
|
|
cond_stage_key = "edit"
|
|
cached_image_embeds = {"settings": {}, "embeds": (None, None)}
|
|
|
|
def __init__(self, cache_dir="", version="0"):
|
|
self.stages = [1]
|
|
self.models_path = os.path.join(script_path, 'models')
|
|
self.cache_dir = os.path.join(self.models_path, cache_dir)
|
|
self.version = version
|
|
self.sd_checkpoint_info = KandinskyCheckpointInfo(version=self.version)
|
|
self.sd_model_hash = self.sd_checkpoint_info.shorthash
|
|
|
|
def load_pipeline(self, pipe_name: str, pipeline: DiffusionPipeline, pretrained_model_name_or_path, move_to_cuda = True, kwargs = {}, enable_sequential_cpu_offload = True):
|
|
pipe = getattr(self, pipe_name, None)
|
|
|
|
if not isinstance(pipe, pipeline) or pipe is None:
|
|
new_kwargs = {
|
|
"pretrained_model_name_or_path": pretrained_model_name_or_path,
|
|
"variant": "fp16",
|
|
"torch_dtype": torch.float16,
|
|
"cache_dir": self.cache_dir,
|
|
"resume_download": True,
|
|
#"local_files_only": True,
|
|
"low_cpu_mem_usage": True
|
|
}
|
|
new_kwargs.update(kwargs)
|
|
kwargs = new_kwargs
|
|
|
|
pipe = pipeline.from_pretrained(**kwargs)#, scheduler=dpm)
|
|
gc.collect()
|
|
devices.torch_gc()
|
|
|
|
if move_to_cuda:
|
|
pipe.to("cuda")
|
|
elif enable_sequential_cpu_offload:
|
|
pipe.enable_sequential_cpu_offload()
|
|
#pipe.enable_sequential_cpu_offload()
|
|
pipe.enable_attention_slicing(self.attention_type)
|
|
#pipe.unet.to(memory_format=torch.channels_last)
|
|
setattr(self, pipe_name, pipe)
|
|
elif move_to_cuda:
|
|
pipe.to("cuda")
|
|
elif enable_sequential_cpu_offload:
|
|
pipe.enable_sequential_cpu_offload()
|
|
|
|
return pipe
|
|
|
|
def create_infotext(self, p: StableDiffusionProcessing, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0): # pylint: disable=unused-argument
|
|
index = position_in_batch + iteration * p.batch_size
|
|
|
|
generation_params = {
|
|
"Steps": p.steps,
|
|
"Sampler": p.sampler_name,
|
|
"CFG scale": p.cfg_scale,
|
|
"Image CFG scale": getattr(p, 'image_cfg_scale', None),
|
|
"Seed": all_seeds[index],
|
|
"Face restoration": (opts.face_restoration_model if p.restore_faces else None),
|
|
"Size": f"{p.width}x{p.height}",
|
|
"Model hash": None,
|
|
"Model": None,
|
|
"Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
|
|
"Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
|
|
"Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
|
|
"Denoising strength": getattr(p, 'denoising_strength', None),
|
|
"Conditional mask weight": getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) if p.is_using_inpainting_conditioning else None,
|
|
"ENSD": None if opts.eta_noise_seed_delta == 0 else opts.eta_noise_seed_delta,
|
|
"Init image hash": getattr(p, 'init_img_hash', None)
|
|
}
|
|
generation_params.update(p.extra_generation_params)
|
|
|
|
generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
|
|
|
|
negative_prompt_text = "\nNegative prompt: " + p.all_negative_prompts[index] if p.all_negative_prompts[index] else ""
|
|
|
|
return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
|
|
|
|
def next_stage(self):
|
|
pass
|
|
|
|
def sd_processing_to_dict_encoder(self, p: StableDiffusionProcessing):
|
|
raise NotImplementedError("sd_processing_to_dict_encoder method not implemented")
|
|
|
|
def sd_processing_to_dict_generator(self, p: StableDiffusionProcessing):
|
|
raise NotImplementedError("sd_processing_to_dict_generator method not implemented")
|
|
|
|
def load_encoder(self):
|
|
raise NotImplementedError("load_encoder method not implemented")
|
|
|
|
def run_encoder(self, prior_settings_dict):
|
|
raise NotImplementedError("run_encoder method not implemented")
|
|
|
|
def txt2img(self, p, generation_parameters, b):
|
|
raise NotImplementedError("txt2img method not implemented")
|
|
|
|
def img2img(self, p, generation_parameters, b):
|
|
raise NotImplementedError("img2img method not implemented")
|
|
|
|
def inpaint(self, p, generation_parameters, b):
|
|
raise NotImplementedError("inpaint method not implemented")
|
|
|
|
def encoder_to_cpu(self):
|
|
raise NotImplementedError("encoder_to_cpu method not implemented")
|
|
|
|
def main_model_to_cpu(self):
|
|
raise NotImplementedError("main_model_to_cpu method not implemented")
|
|
|
|
def cleanup_on_error(self):
|
|
raise NotImplementedError("cleanup_on_error method not implemented")
|
|
|
|
def process_images(self, p: StableDiffusionProcessing) -> Processed:
|
|
try:
|
|
state.begin()
|
|
processing.fix_seed(p)
|
|
devices.torch_gc()
|
|
gc.collect()
|
|
torch.cuda.empty_cache()
|
|
torch.backends.cudnn.benchmark = False
|
|
|
|
all_result_images = []
|
|
initial_infos = []
|
|
p.all_negative_prompts = [p.negative_prompt] * p.n_iter * p.batch_size
|
|
p.all_prompts = [p.prompt] * p.n_iter * p.batch_size
|
|
p.seed = int(p.seed)
|
|
p.all_seeds = [p.seed + j for j in range(p.n_iter * p.batch_size)]
|
|
initial_info = self.create_infotext(p, p.all_prompts, p.all_seeds, p.all_seeds, iteration=0, position_in_batch=0)
|
|
|
|
p.sd_model_hash = self.sd_checkpoint_info.sd_model_hash
|
|
|
|
try:
|
|
if version.parse(torch.version.cuda) < version.parse("10.2"):
|
|
torch.use_deterministic_algorithms(True)
|
|
else:
|
|
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
|
except:
|
|
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
|
|
|
if p.init_image is not None:
|
|
p.init_image = p.init_image[0]
|
|
#print(f"{p.init_image.width}, {p.init_image.height}")
|
|
p.init_image = images.flatten(p.init_image, opts.img2img_background_color)
|
|
|
|
state.job = "Prior"
|
|
print("Starting Prior")
|
|
if p.batch_size * p.n_iter > 1:
|
|
p.generators = []
|
|
for i in range(p.batch_size * p.n_iter):
|
|
p.generators.append(torch.Generator().manual_seed(p.seed + i))
|
|
else:
|
|
p.generators = torch.Generator().manual_seed(p.seed)
|
|
|
|
|
|
prior_settings_dict = self.sd_processing_to_dict_encoder(p)
|
|
|
|
if self.cached_image_embeds["settings"] == prior_settings_dict and self.cached_image_embeds["embeds"] is not None:
|
|
p.image_embeds, p.negative_image_embeds = self.cached_image_embeds["embeds"]
|
|
else:
|
|
self.load_encoder()
|
|
|
|
if state.interrupted:
|
|
self.encoder_to_cpu()
|
|
gc.collect()
|
|
devices.torch_gc()
|
|
torch.cuda.empty_cache()
|
|
return KProcessed(p, [], p.seed, initial_info, all_seeds=p.all_seeds)
|
|
|
|
p.image_embeds, p.negative_image_embeds = self.run_encoder(prior_settings_dict)
|
|
|
|
self.cached_image_embeds["settings"] = prior_settings_dict
|
|
self.cached_image_embeds["embeds"] = (p.image_embeds, p.negative_image_embeds)
|
|
|
|
self.encoder_to_cpu()
|
|
devices.torch_gc()
|
|
gc.collect()
|
|
torch.cuda.empty_cache()
|
|
|
|
print("Finished Prior")
|
|
|
|
generation_parameters = self.sd_processing_to_dict_generator(p)
|
|
|
|
state.job_no = p.n_iter * p.batch_size
|
|
|
|
if p.init_image is None:
|
|
generate_type = "txt2img"
|
|
elif p.image_mask is None:
|
|
generate_type = "img2img"
|
|
else:
|
|
generate_type = "inpaint"
|
|
|
|
result_images = []
|
|
presult_images = []
|
|
|
|
for stage in self.stages:
|
|
self.current_stage = stage
|
|
|
|
for b in range(p.n_iter):
|
|
if len(presult_images) > 0:
|
|
p.init_image = presult_images[b]
|
|
|
|
if state.interrupted:
|
|
break
|
|
|
|
for batchid in range(p.batch_size):
|
|
initial_infos.append(self.create_infotext(p, p.all_prompts, p.all_seeds, p.all_seeds, iteration=b, position_in_batch=batchid))
|
|
|
|
state.job = "Generating"
|
|
if generate_type == "txt2img":
|
|
result_images = self.txt2img(p, generation_parameters, b)
|
|
|
|
elif generate_type == "img2img":
|
|
if p.denoising_strength == 0 and self.current_stage == 1:
|
|
result_images = [p.init_image] * p.batch_size
|
|
else:
|
|
result_images = self.img2img(p, generation_parameters, b)
|
|
|
|
else:
|
|
crop_region = None
|
|
if not p.inpainting_mask_invert:
|
|
p.image_mask = ImageOps.invert(p.image_mask)
|
|
|
|
if p.mask_blur > 0:
|
|
p.image_mask = p.image_mask.filter(ImageFilter.GaussianBlur(p.mask_blur))
|
|
|
|
mask = p.image_mask
|
|
mask = mask.convert('L')
|
|
new_init_image = p.init_image
|
|
|
|
if p.inpaint_full_res:
|
|
mask = ImageOps.invert(mask)
|
|
crop_region = masking.get_crop_region(np.array(mask), p.inpaint_full_res_padding)
|
|
crop_region = masking.expand_crop_region(crop_region, p.width, p.height, mask.width, mask.height)
|
|
x1, y1, x2, y2 = crop_region
|
|
mask = mask.crop(crop_region)
|
|
mask = images.resize_image(2, mask, p.width, p.height)
|
|
p.paste_to = (x1, y1, x2-x1, y2-y1)
|
|
|
|
new_init_image = new_init_image.crop(crop_region)
|
|
new_init_image = images.resize_image(2, new_init_image, p.width, p.height)
|
|
mask = ImageOps.invert(mask)
|
|
else:
|
|
p.image_mask = images.resize_image(p.resize_mode, p.image_mask, p.width, p.height)
|
|
np_mask = np.array(p.image_mask)
|
|
np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
|
|
mask = Image.fromarray(np_mask)
|
|
|
|
p.new_init_image = new_init_image
|
|
p.new_mask = mask
|
|
result_images = self.inpaint(p, generation_parameters, b)
|
|
|
|
if p.inpaint_full_res:
|
|
for i in range(len(result_images)):
|
|
paste_loc = p.paste_to
|
|
x, y, w, h = paste_loc
|
|
base_image = Image.new('RGBA', (p.init_image.width, p.init_image.height))
|
|
mask = ImageOps.invert(mask)
|
|
result_images[i] = images.resize_image(1, result_images[i], w, h)
|
|
mask = images.resize_image(1, mask, w, h)
|
|
mask = mask.convert('L')
|
|
|
|
base_image.paste(result_images[i], (x, y), mask=mask)
|
|
image = p.init_image
|
|
image = image.convert('RGBA')
|
|
image.alpha_composite(base_image)
|
|
image.convert('RGB')
|
|
processing.apply_color_correction(processing.setup_color_correction(p.init_image), image)
|
|
result_images[i] = image
|
|
#else:
|
|
# for i in range(len(result_images)):
|
|
# base_image = result_images[i]
|
|
# base_image = base_image.convert('RGBA')
|
|
# mask = ImageOps.invert(mask)
|
|
# mask = mask.convert('L')
|
|
# base_image.putalpha(mask)
|
|
|
|
# image = images.resize_image(1, init_image, p.width, p.height)
|
|
# image = image.convert('RGBA')
|
|
# image.alpha_composite(base_image)
|
|
# image.convert('RGB')
|
|
# processing.apply_color_correction(processing.setup_color_correction(init_image), image)
|
|
# result_images[i] = image
|
|
|
|
for imgid, result_image in enumerate(result_images):
|
|
images.save_image(result_image, p.outpath_samples, "", p.all_seeds[imgid],
|
|
truncate_string(p.prompt[0] if isinstance(p.prompt, list) else p.prompt),
|
|
opts.samples_format, info=initial_infos[imgid], p=p)
|
|
|
|
all_result_images.extend(result_images)
|
|
|
|
state.job_no = b * p.batch_size
|
|
state.current_image = result_images[0]
|
|
state.nextjob()
|
|
|
|
presult_images = result_images[:]
|
|
result_images = []
|
|
|
|
self.main_model_to_cpu()
|
|
self.next_stage()
|
|
|
|
#del pipe
|
|
del p.generators
|
|
gc.collect()
|
|
devices.torch_gc()
|
|
torch.cuda.empty_cache()
|
|
|
|
output_images = all_result_images
|
|
|
|
# Save Grid
|
|
unwanted_grid_because_of_img_count = len(output_images) < 2 and opts.grid_only_if_multiple
|
|
if (opts.return_grid or opts.grid_save) and not (p.do_not_save_grid or unwanted_grid_because_of_img_count):
|
|
grid = images.image_grid(output_images, p.batch_size)
|
|
|
|
if opts.return_grid:
|
|
text = initial_info
|
|
if opts.enable_pnginfo:
|
|
grid.info["parameters"] = text
|
|
output_images.insert(0, grid)
|
|
|
|
if opts.grid_save:
|
|
images.save_image(grid, p.outpath_grids, "grid",
|
|
p.all_seeds[0],
|
|
truncate_string(p.all_prompts[0]),
|
|
opts.grid_format, info=initial_info,
|
|
short_filename=not
|
|
opts.grid_extended_filename, p=p,
|
|
grid=True)
|
|
|
|
|
|
p.n_iter = 1
|
|
state.end()
|
|
|
|
return KProcessed(p, all_result_images, p.seed, initial_info, all_seeds=p.all_seeds)
|
|
|
|
except torch.cuda.OutOfMemoryError as re:
|
|
print(re)
|
|
finally:
|
|
self.cleanup_on_error()
|
|
gc.collect()
|
|
devices.torch_gc()
|
|
torch.cuda.empty_cache()
|
|
self.unload()
|
|
return
|