diff --git a/README.md b/README.md index 1c6ebab..d21f9ef 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Kandinsky For Automatic1111 Extension -Adds a script that run Kandinsky 2.X models. +Adds a script that run Kandinsky 2.X models. Kandinsky 2.2 can generate larger images, but it is much slower to use with VRAM optimizations. -!!Note!! Progress bar not supported yet. +!!Note!! Progress bar not supported, view terminal progress bar instead. ## Troubleshooting * Ignore the warning `Pipelines loaded with torch_dtype=torch.float16 cannot run with cpu device...` the Kandinsky model or prior is being moved to RAM to save VRAM. @@ -14,7 +14,6 @@ Adds a script that run Kandinsky 2.X models. * The real error is probably `CUDA out of memory` above the `AttributeError`. * Solution: In the script section, try reloading the stable diffusion model, and unloading it. - ## Examples The following are non cherry-picked examples, with various settings and resolutions. @@ -25,7 +24,8 @@ The following are non cherry-picked examples, with various settings and resoluti Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 7
+CFG Scale: 7
+Prior CFG Scale: 7
Seed: 3479955
Size: 1024x1024
Inference Steps: 128
@@ -39,7 +39,8 @@ In English: As the sun sets, the trees whisper, while the river gracefully meand Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 7
+CFG Scale: 7
+Prior CFG Scale: 7
Seed: 3479955
Size: 768x768
Inference Steps: 128
@@ -52,11 +53,12 @@ Inference Steps: 128
Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 7
+CFG Scale: 7
+Prior CFG Scale: 7
Seed: 3479955
Size: 1024x1024
Inference Steps: 128
- +

center image @@ -65,7 +67,8 @@ Inference Steps: 128
Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 7
+CFG Scale: 7
+Prior CFG Scale: 7
Seed: 3479955
Size: 512x512
Inference Steps: 128
@@ -78,7 +81,8 @@ Inference Steps: 128
Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 3
+CFG Scale: 3
+Prior CFG Scale: 3
Seed: 3479955
Size: 768x768
Inference Steps: 128
@@ -90,7 +94,8 @@ Combine images and/or prompts together. Can be used for style transfer, and comb Model: Kandinsky 2.1
Steps: 64
Sampler: Default
-CFG scale: 7
+CFG Scale: 7
+Prior CFG Scale: 7
Seed: 3479955494
Size: 1536x768
Inference Steps: 128
@@ -129,11 +134,13 @@ Result: * Seeds are somewhat consistent across different resolutions * Changing sampling steps keeps the same image, while changing quality * The seed is not as important as the prompt, the subjects/compositions across seeds are very similar -* It is very easy to "overcook" images with prompts, if this happens remove keywords or reduce CFG scale +* It is very easy to "overcook" images with prompts, if this happens remove keywords or reduce CFG Scale * Negative prompts aren't needed, so "low quality, bad quality..." can be ommited * Short positive prompts are good, too many keywords confuse the ai ## Features +* Kandinsky 2.1 +* Kandinsky 2.2 with VRAM optimizations * Text to image * Batching * Img2img diff --git a/install.py b/install.py index 968bc64..d11a516 100644 --- a/install.py +++ b/install.py @@ -8,67 +8,65 @@ import subprocess, pip # Get diffusers>=0.17.1 to add Kandinsky pipeline support filename = os.path.join(script_path, 'requirements.txt') -target_version = version.parse('0.17.1') -package_name = 'diffusers' +#target_version = version.parse('0.18.2') +#package_name = 'diffusers' +package_names = [('diffusers', version.parse('0.18.2')), ('transformers', version.parse('4.25.1'))] if os.path.isfile(filename): - print(f"Checking {package_name} version in requriments.txt") - with open(filename, 'r') as file: - lines = file.readlines() + for package_name, target_version in package_names: + print(f"Checking {package_name} version in requriments.txt") + with open(filename, 'r') as file: + lines = file.readlines() - corrent_version_in_requirements = True - found_diffusers_line = False - with open(filename, 'w') as file: - for line in lines: - package_equals = "==" - if line.startswith(f'{package_name}==') or line.startswith(f'{package_name}~='): - found_diffusers_line = True - if line.startswith(f'{package_name}~='): - package_equals = "~=" - else: - package_equals = ">=" + corrent_version_in_requirements = True + found_package_line = False + with open(filename, 'w') as file: + for line in lines: + package_equals = "==" + if line.startswith(f'{package_name}==') or line.startswith(f'{package_name}~='): + found_package_line = True + if line.startswith(f'{package_name}~='): + package_equals = "~=" + else: + package_equals = ">=" - version_str = line[len(package_name) + 2:] - if version_str != "": - current_version = version.parse(version_str) - print(f"Incompatible {package_name} version {current_version} in requirements.txt") - if current_version < target_version: - corrent_version_in_requirements = False - line = f'{package_name}{package_equals}{target_version}\n' - print(f"Changed {package_name} version to {package_equals}{target_version} in requirements.txt") + version_str = line[len(package_name) + 2:] + if version_str != "": + current_version = version.parse(version_str) + print(f"Incompatible {package_name} version {current_version} in requirements.txt") + if current_version < target_version: + corrent_version_in_requirements = False + line = f'{package_name}{package_equals}{target_version}\n' + print(f"Changed {package_name} version to {package_equals}{target_version} in requirements.txt") - elif line.startswith(f'{package_name}'): - found_diffusers_line = True + elif line.startswith(f'{package_name}'): + found_package_line = True - file.write(line) + file.write(line) - if not found_diffusers_line: - file.write(f'{package_name}>={target_version}\n') + if not found_package_line: + file.write(f'{package_name}>={target_version}\n') - file.write(f'huggingface_hub\n') + if corrent_version_in_requirements: + print(f"Correct {package_name} version in requriments.txt") - if corrent_version_in_requirements: - print(f"Correct {package_name} version in requriments.txt") +print_restart_message = False +for package_name, target_version in package_names: + try: + current_version = version.parse(pkg_resources.get_distribution(package_name).version) + print(f'Current {package_name} version: {current_version}') -try: - current_version = version.parse(pkg_resources.get_distribution(package_name).version) - print(f'Current {package_name} version: {current_version}') + if current_version < target_version: + subprocess.run(['pip', 'install', f'{package_name}>={target_version}']) + print(f'{package_name} upgraded to version {target_version}') + print_restart_message = True + else: + print(f'{package_name} is already up to date') - if current_version < target_version: + except pkg_resources.DistributionNotFound: subprocess.run(['pip', 'install', f'{package_name}>={target_version}']) - print(f'{package_name} upgraded to version {target_version}') - errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111') - else: - print(f'{package_name} is already up to date') + print(f'{package_name} installed with version {target_version}') -except pkg_resources.DistributionNotFound: - subprocess.run(['pip', 'install', f'{package_name}>={target_version}']) - print(f'{package_name} installed with version {target_version}') - -try: - current_version = version.parse(pkg_resources.get_distribution(package_name).version) - print(f'Checking if huggingface_hub is installed') -except pkg_resources.DistributionNotFound: - subprocess.run(['pip', 'install', 'huggingface_hub']) - print(f'huggingface_hub installed') +if print_restart_message: + errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111') diff --git a/scripts/abstract_model.py b/scripts/abstract_model.py index cd37bf7..8a724fc 100644 --- a/scripts/abstract_model.py +++ b/scripts/abstract_model.py @@ -111,6 +111,7 @@ class AbstractModel(): def __init__(self, cache_dir="", version="0"): self.stages = [1] self.cache_dir = os.path.join(os.path.join(script_path, 'models'), cache_dir) + self.models_path = os.path.join(script_path, 'models') self.version = version self.sd_checkpoint_info = KandinskyCheckpointInfo(version=self.version) self.sd_model_hash = self.sd_checkpoint_info.shorthash @@ -429,12 +430,12 @@ class AbstractModel(): return KProcessed(p, all_result_images, p.seed, initial_info, all_seeds=p.all_seeds) - except RuntimeError as re: + except torch.cuda.OutOfMemoryError as re: + print(re) + finally: self.cleanup_on_error() - gc.collect() devices.torch_gc() torch.cuda.empty_cache() - if str(re).startswith('CUDA out of memory.'): - print("OutOfMemoryError: CUDA out of memory.") + self.unload() return diff --git a/scripts/kandinsky.py b/scripts/kandinsky.py index a528934..e616105 100644 --- a/scripts/kandinsky.py +++ b/scripts/kandinsky.py @@ -1,9 +1,12 @@ from modules import errors try: - from diffusers import KandinskyPipeline, KandinskyImg2ImgPipeline, KandinskyPriorPipeline, KandinskyInpaintPipeline + from diffusers import KandinskyPipeline, KandinskyImg2ImgPipeline, KandinskyPriorPipeline, KandinskyInpaintPipeline, KandinskyV22Pipeline, KandinskyV22PriorPipeline + from diffusers.models import UNet2DConditionModel + from transformers import CLIPVisionModelWithProjection except ImportError as e: errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111') +import os import gc import torch from PIL import Image @@ -15,12 +18,16 @@ sys.path.append('extensions/kandinsky-for-automatic1111/scripts') from abstract_model import AbstractModel #import pdb -class KandinskyModel(AbstractModel): - pipe = None - pipe_prior = None +move_to_cuda=False +class KandinskyModel(AbstractModel): def __init__(self, cache_dir="", version="2.1"): AbstractModel.__init__(self, cache_dir="Kandinsky", version=version) + self.image_encoder = None + self.pipe_prior = None + self.pipe = None + self.unet = None + self.low_vram = True def mix_images(self, p, generation_parameters, b, result_images): if p.extra_image != [] and p.extra_image is not None: @@ -54,16 +61,87 @@ class KandinskyModel(AbstractModel): return result_images def load_encoder(self): - self.pipe_prior = self.load_pipeline("pipe_prior", KandinskyPriorPipeline, f"kandinsky-community/kandinsky-{self.version}-prior".replace(".", "-")) + if self.version == "2.1": + if self.pipe_prior is None: + self.pipe_prior = self.load_pipeline("pipe_prior", KandinskyPriorPipeline, f"kandinsky-community/kandinsky-{self.version}-prior".replace(".", "-")) + elif self.version == "2.2": + if self.image_encoder is None: + self.image_encoder = CLIPVisionModelWithProjection.from_pretrained( + 'kandinsky-community/kandinsky-2-2-prior', + subfolder='image_encoder', + cache_dir=os.path.join(self.models_path, "kandinsky22"), + low_cpu_mem_usage=True + # local_files_only=True + ) + + self.image_encoder.to("cpu" if self.low_vram else "cuda") + + self.pipe_prior = KandinskyV22PriorPipeline.from_pretrained( + 'kandinsky-community/kandinsky-2-2-prior', + image_encoder=self.image_encoder, + torch_dtype=torch.float32, + cache_dir=os.path.join(self.models_path, "kandinsky22"), + low_cpu_mem_usage=True + # local_files_only=True + ) + + self.image_encoder.to("cpu" if self.low_vram else "cuda") + + self.unet = UNet2DConditionModel.from_pretrained( + 'kandinsky-community/kandinsky-2-2-decoder', + subfolder='unet', + cache_dir=os.path.join(self.models_path, "kandinsky22"), + torch_dtype=torch.float16, + low_cpu_mem_usage=True + # local_files_only=True + ).half().to("cuda") + + self.pipe = KandinskyV22Pipeline.from_pretrained( + 'kandinsky-community/kandinsky-2-2-decoder', + unet=self.unet, + torch_dtype=torch.float16, + cache_dir=os.path.join(self.models_path, "kandinsky22"), + low_cpu_mem_usage=True + # local_files_only=True + ).to("cuda") def run_encoder(self, prior_settings_dict): + self.main_model_to_cpu() return self.pipe_prior(**prior_settings_dict).to_tuple() def encoder_to_cpu(self): - self.pipe_prior.to("cpu") + pass + #self.image_encoder.to("cpu") + #self.pipe_prior.to("cpu") + #self.pipe.to("cuda") + #self.unet.to("cuda") + + def unload(self): + if self.image_encoder is not None: + self.image_encoder.to("cpu") + del self.image_encoder + + if self.pipe_prior is not None: + self.pipe_prior.to("cpu") + del self.pipe_prior + + if self.pipe is not None: + self.pipe.to("cpu") + del self.pipe + + if self.unet is not None: + self.unet.to("cpu") + del self.unet + devices.torch_gc() + gc.collect() + torch.cuda.empty_cache() def main_model_to_cpu(self): - self.pipe.to("cpu") + pass + #self.pipe.to("cpu") + #self.unet.to("cpu") + #self.image_encoder.to("cuda") + #self.pipe_prior.to("cuda") def sd_processing_to_dict_encoder(self, p: StableDiffusionProcessing): torch.manual_seed(0) @@ -77,8 +155,12 @@ class KandinskyModel(AbstractModel): return parameters_dict def sd_processing_to_dict_generator(self, p: StableDiffusionProcessing): - generation_parameters = {"prompt": p.prompt, "negative_prompt": p.negative_prompt, "image_embeds": p.image_embeds, "negative_image_embeds": p.negative_image_embeds, - "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps} + if self.version == "2.1": + generation_parameters = {"prompt": p.prompt, "negative_prompt": p.negative_prompt, "image_embeds": p.image_embeds, "negative_image_embeds": p.negative_image_embeds, + "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps} + elif self.version == "2.2": + generation_parameters = {"image_embeds": p.image_embeds.half(), "negative_image_embeds": p.negative_image_embeds.half(), + "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps} return generation_parameters @@ -90,16 +172,21 @@ class KandinskyModel(AbstractModel): self.encoder_to_cpu() def txt2img(self, p, generation_parameters, b): - self.pipe = self.load_pipeline("pipe", KandinskyPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda = False) + if self.version == "2.1": + self.pipe = self.load_pipeline("pipe", KandinskyPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda=move_to_cuda) + #else: + # self.unet.to("cuda") + # self.pipe.to("cuda") + result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size).images return self.mix_images(p, generation_parameters, b, result_images) def img2img(self, p, generation_parameters, b): - self.pipe = self.load_pipeline("pipe", KandinskyImg2ImgPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda = False) + self.pipe = self.load_pipeline("pipe", KandinskyImg2ImgPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda=move_to_cuda) result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size, image=p.init_image, strength=p.denoising_strength).images return self.mix_images(p, generation_parameters, b, result_images) def inpaint(self, p, generation_parameters, b): - self.pipe = self.load_pipeline("pipe", KandinskyInpaintPipeline, f"kandinsky-community/kandinsky-{self.version}-inpaint".replace(".", "-"), move_to_cuda = False) + self.pipe = self.load_pipeline("pipe", KandinskyInpaintPipeline, f"kandinsky-community/kandinsky-{self.version}-inpaint".replace(".", "-"), move_to_cuda=move_to_cuda) result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size, image=p.new_init_image, mask_image=p.new_mask).images return self.mix_images(p, generation_parameters, b, result_images) diff --git a/scripts/kandinsky_script.py b/scripts/kandinsky_script.py index 86aea24..a5957a9 100644 --- a/scripts/kandinsky_script.py +++ b/scripts/kandinsky_script.py @@ -33,17 +33,18 @@ def reload_model(): def unload_kandinsky_model(): if getattr(shared, "kandinsky_model", None) is not None: - if getattr(shared.kandinsky_model, "pipe_prior", None) is not None: - del shared.kandinsky_model.pipe_prior - devices.torch_gc() - gc.collect() - torch.cuda.empty_cache() + getattr(shared, "kandinsky_model", None).unload() + #if getattr(shared.kandinsky_model, "pipe_prior", None) is not None: + # del shared.kandinsky_model.pipe_prior + # devices.torch_gc() + # gc.collect() + # torch.cuda.empty_cache() - if getattr(shared.kandinsky_model, "pipe", None) is not None: - del shared.kandinsky_model.pipe - devices.torch_gc() - gc.collect() - torch.cuda.empty_cache() + #if getattr(shared.kandinsky_model, "pipe", None) is not None: + # del shared.kandinsky_model.pipe + # devices.torch_gc() + # gc.collect() + # torch.cuda.empty_cache() del shared.kandinsky_model print("Unloaded Kandinsky model") @@ -75,9 +76,12 @@ class Script(scripts.Script): unload_k_model.click(unload_kandinsky_model) with gr.Row(): - prior_inference_steps = gr.inputs.Slider(minimum=2, maximum=1024, step=1, label="Prior Inference Steps", default=128) + prior_inference_steps = gr.inputs.Slider(minimum=2, maximum=1024, step=1, label="Prior Inference Steps", default=64) prior_cfg_scale = gr.inputs.Slider(minimum=1, maximum=20, step=0.5, label="Prior CFG Scale", default=4) - model_version = gr.inputs.Dropdown(["2.1", "2.2"], label="Kandinsky Version", default="2.1") + + model_version = gr.inputs.Dropdown(["2.1", "2.2"], label="Kandinsky Version", default="2.2") + gr.Markdown("Kandinsky 2.2 requires much more RAM") + low_vram = gr.inputs.Checkbox(label="Kandinsky 2.2 Low VRAM", default=True) with gr.Accordion("Image Mixing", open=False): with gr.Row(): @@ -85,25 +89,30 @@ class Script(scripts.Script): img2_strength = gr.inputs.Slider(minimum=-2, maximum=2, label="Interpolate Image 2 Strength (image below)", default=0.5) extra_image = gr.inputs.Image() - inputs = [extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength] + inputs = [extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength, low_vram] return inputs - def run(self, p, extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength) -> Processed: + def run(self, p, extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength, low_vram) -> Processed: p.extra_image = extra_image p.prior_inference_steps = prior_inference_steps p.prior_cfg_scale = prior_cfg_scale p.img1_strength = img1_strength p.img2_strength = img2_strength - p.sampler_name = "DDIM" + if model_version == "2.1": + p.sampler_name = "DDIM" + elif model_version == "2.2": + p.sampler_name = "DDPM" p.init_image = getattr(p, 'init_images', None) p.extra_generation_params["Prior Inference Steps"] = prior_inference_steps p.extra_generation_params["Prior CFG Scale"] = prior_cfg_scale p.extra_generation_params["Script"] = self.title() + p.extra_generation_params["Kandinsky Version"] = model_version shared.kandinsky_model = getattr(shared, 'kandinsky_model', None) - if shared.kandinsky_model is None or shared.kandinsky_model.version != model_version: + if shared.kandinsky_model is None or shared.kandinsky_model.version != model_version or (model_version == "2.2" and shared.kandinsky_model.low_vram != low_vram): shared.kandinsky_model = KandinskyModel(version=model_version) + shared.kandinsky_model.low_vram = low_vram return shared.kandinsky_model.process_images(p)