Git issue fix

2023-07-23 12:05:00 -04:00 · 2023-07-23 12:05:00 -04:00 · 9e8cf35cef
parent 939a5f6006
commit 9e8cf35cef
5 changed files with 195 additions and 93 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # Kandinsky For Automatic1111 Extension
-Adds a script that run Kandinsky 2.X models.
+Adds a script that run Kandinsky 2.X models. Kandinsky 2.2 can generate larger images, but it is much slower to use with VRAM optimizations.

-<strong>!!Note!!</strong> Progress bar not supported yet.
+<strong>!!Note!!</strong> Progress bar not supported, view terminal progress bar instead.

 ## Troubleshooting
 * Ignore the warning `Pipelines loaded with torch_dtype=torch.float16 cannot run with cpu device...` the Kandinsky model or prior is being moved to RAM to save VRAM.
@ -14,7 +14,6 @@ Adds a script that run Kandinsky 2.X models.
    * The real error is probably `CUDA out of memory` above the `AttributeError`.
    * <strong>Solution:</strong> In the script section, try reloading the stable diffusion model, and unloading it.

-
 ## Examples
 The following are non cherry-picked examples, with various settings and resolutions.

@ -25,7 +24,8 @@ The following are non cherry-picked examples, with various settings and resoluti
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 7<br>
+CFG Scale: 7<br>
+Prior CFG Scale: 7<br>
 Seed: 3479955<br>
 Size: 1024x1024<br>
 Inference Steps: 128<br>
@ -39,7 +39,8 @@ In English: As the sun sets, the trees whisper, while the river gracefully meand
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 7<br>
+CFG Scale: 7<br>
+Prior CFG Scale: 7<br>
 Seed: 3479955<br>
 Size: 768x768<br>
 Inference Steps: 128<br>
@ -52,11 +53,12 @@ Inference Steps: 128<br>
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 7<br>
+CFG Scale: 7<br>
+Prior CFG Scale: 7<br>
 Seed: 3479955<br>
 Size: 1024x1024<br>
 Inference Steps: 128<br>
-
+<br>

 <p align="center">
   <img src="https://github.com/MMqd/kandinsky-for-automatic1111/blob/main/images/spaceship,%20retro,%20realistic,%20high%20quality,%204k.jpg?raw=true" width="25%" alt="center image" />
@ -65,7 +67,8 @@ Inference Steps: 128<br>
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 7<br>
+CFG Scale: 7<br>
+Prior CFG Scale: 7<br>
 Seed: 3479955<br>
 Size: 512x512<br>
 Inference Steps: 128<br>
@ -78,7 +81,8 @@ Inference Steps: 128<br>
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 3<br>
+CFG Scale: 3<br>
+Prior CFG Scale: 3<br>
 Seed: 3479955<br>
 Size: 768x768<br>
 Inference Steps: 128<br>
@ -90,7 +94,8 @@ Combine images and/or prompts together. Can be used for style transfer, and comb
 Model: Kandinsky 2.1<br>
 Steps: 64<br>
 Sampler: Default<br>
-CFG scale: 7<br>
+CFG Scale: 7<br>
+Prior CFG Scale: 7<br>
 Seed: 3479955494<br>
 Size: 1536x768<br>
 Inference Steps: 128<br>
@ -129,11 +134,13 @@ Result:
 * Seeds are somewhat consistent across different resolutions
 * Changing sampling steps keeps the same image, while changing quality
 * The seed is not as important as the prompt, the subjects/compositions across seeds are very similar
-* It is very easy to "overcook" images with prompts, if this happens remove keywords or reduce CFG scale
+* It is very easy to "overcook" images with prompts, if this happens remove keywords or reduce CFG Scale
    * Negative prompts aren't needed, so "low quality, bad quality..." can be ommited
    * Short positive prompts are good, too many keywords confuse the ai

 ## Features
+* Kandinsky 2.1
+* Kandinsky 2.2 with VRAM optimizations
 * Text to image
 * Batching
 * Img2img
--- a/install.py
+++ b/install.py
@ -8,67 +8,65 @@ import subprocess, pip
 # Get diffusers>=0.17.1 to add Kandinsky pipeline support
 filename = os.path.join(script_path, 'requirements.txt')

-target_version = version.parse('0.17.1')
-package_name = 'diffusers'
+#target_version = version.parse('0.18.2')
+#package_name = 'diffusers'
+package_names = [('diffusers', version.parse('0.18.2')), ('transformers', version.parse('4.25.1'))]


 if os.path.isfile(filename):
-    print(f"Checking {package_name} version in requriments.txt")
-    with open(filename, 'r') as file:
-        lines = file.readlines()
+    for package_name, target_version in package_names:
+        print(f"Checking {package_name} version in requriments.txt")
+        with open(filename, 'r') as file:
+            lines = file.readlines()

-    corrent_version_in_requirements = True
-    found_diffusers_line = False
-    with open(filename, 'w') as file:
-        for line in lines:
-            package_equals = "=="
-            if line.startswith(f'{package_name}==') or line.startswith(f'{package_name}~='):
-                found_diffusers_line = True
-                if line.startswith(f'{package_name}~='):
-                    package_equals = "~="
-                else:
-                    package_equals = ">="
+        corrent_version_in_requirements = True
+        found_package_line = False
+        with open(filename, 'w') as file:
+            for line in lines:
+                package_equals = "=="
+                if line.startswith(f'{package_name}==') or line.startswith(f'{package_name}~='):
+                    found_package_line = True
+                    if line.startswith(f'{package_name}~='):
+                        package_equals = "~="
+                    else:
+                        package_equals = ">="

-                version_str = line[len(package_name) + 2:]
-                if version_str != "":
-                    current_version = version.parse(version_str)
-                    print(f"Incompatible {package_name} version {current_version} in requirements.txt")
-                    if current_version < target_version:
-                        corrent_version_in_requirements = False
-                        line = f'{package_name}{package_equals}{target_version}\n'
-                        print(f"Changed {package_name} version to {package_equals}{target_version} in requirements.txt")
+                    version_str = line[len(package_name) + 2:]
+                    if version_str != "":
+                        current_version = version.parse(version_str)
+                        print(f"Incompatible {package_name} version {current_version} in requirements.txt")
+                        if current_version < target_version:
+                            corrent_version_in_requirements = False
+                            line = f'{package_name}{package_equals}{target_version}\n'
+                            print(f"Changed {package_name} version to {package_equals}{target_version} in requirements.txt")

-            elif line.startswith(f'{package_name}'):
-                found_diffusers_line = True
+                elif line.startswith(f'{package_name}'):
+                    found_package_line = True

-            file.write(line)
+                file.write(line)

-        if not found_diffusers_line:
-            file.write(f'{package_name}>={target_version}\n')
+            if not found_package_line:
+                file.write(f'{package_name}>={target_version}\n')

-        file.write(f'huggingface_hub\n')
+        if corrent_version_in_requirements:
+            print(f"Correct {package_name} version in requriments.txt")

-    if corrent_version_in_requirements:
-        print(f"Correct {package_name} version in requriments.txt")
+print_restart_message = False
+for package_name, target_version in package_names:
+    try:
+        current_version = version.parse(pkg_resources.get_distribution(package_name).version)
+        print(f'Current {package_name} version: {current_version}')

-try:
-    current_version = version.parse(pkg_resources.get_distribution(package_name).version)
-    print(f'Current {package_name} version: {current_version}')
+        if current_version < target_version:
+            subprocess.run(['pip', 'install', f'{package_name}>={target_version}'])
+            print(f'{package_name} upgraded to version {target_version}')
+            print_restart_message = True
+        else:
+            print(f'{package_name} is already up to date')

-    if current_version < target_version:
+    except pkg_resources.DistributionNotFound:
        subprocess.run(['pip', 'install', f'{package_name}>={target_version}'])
-        print(f'{package_name} upgraded to version {target_version}')
-        errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111')
-    else:
-        print(f'{package_name} is already up to date')
+        print(f'{package_name} installed with version {target_version}')

-except pkg_resources.DistributionNotFound:
-    subprocess.run(['pip', 'install', f'{package_name}>={target_version}'])
-    print(f'{package_name} installed with version {target_version}')
-
-try:
-    current_version = version.parse(pkg_resources.get_distribution(package_name).version)
-    print(f'Checking if huggingface_hub is installed')
-except pkg_resources.DistributionNotFound:
-    subprocess.run(['pip', 'install', 'huggingface_hub'])
-    print(f'huggingface_hub installed')
+if print_restart_message:
+    errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111')
--- a/scripts/abstract_model.py
+++ b/scripts/abstract_model.py
@ -111,6 +111,7 @@ class AbstractModel():
    def __init__(self, cache_dir="", version="0"):
        self.stages = [1]
        self.cache_dir = os.path.join(os.path.join(script_path, 'models'), cache_dir)
+        self.models_path = os.path.join(script_path, 'models')
        self.version = version
        self.sd_checkpoint_info = KandinskyCheckpointInfo(version=self.version)
        self.sd_model_hash = self.sd_checkpoint_info.shorthash
@ -429,12 +430,12 @@ class AbstractModel():

            return KProcessed(p, all_result_images, p.seed, initial_info, all_seeds=p.all_seeds)

-        except RuntimeError as re:
+        except torch.cuda.OutOfMemoryError as re:
+            print(re)
+        finally:
            self.cleanup_on_error()
-
            gc.collect()
            devices.torch_gc()
            torch.cuda.empty_cache()
-            if str(re).startswith('CUDA out of memory.'):
-                print("OutOfMemoryError: CUDA out of memory.")
+            self.unload()
        return
--- a/scripts/kandinsky.py
+++ b/scripts/kandinsky.py
@ -1,9 +1,12 @@
 from modules import errors
 try:
-    from diffusers import KandinskyPipeline, KandinskyImg2ImgPipeline, KandinskyPriorPipeline, KandinskyInpaintPipeline
+    from diffusers import KandinskyPipeline, KandinskyImg2ImgPipeline, KandinskyPriorPipeline, KandinskyInpaintPipeline, KandinskyV22Pipeline, KandinskyV22PriorPipeline
+    from diffusers.models import UNet2DConditionModel
+    from transformers import CLIPVisionModelWithProjection
 except ImportError as e:
    errors.print_error_explanation('RESTART AUTOMATIC1111 COMPLETELY TO FINISH INSTALLING PACKAGES FOR kandinsky-for-automatic1111')

+import os
 import gc
 import torch
 from PIL import Image
@ -15,12 +18,16 @@ sys.path.append('extensions/kandinsky-for-automatic1111/scripts')
 from abstract_model import AbstractModel
 #import pdb

-class KandinskyModel(AbstractModel):
-    pipe = None
-    pipe_prior = None
+move_to_cuda=False

+class KandinskyModel(AbstractModel):
    def __init__(self, cache_dir="", version="2.1"):
        AbstractModel.__init__(self, cache_dir="Kandinsky", version=version)
+        self.image_encoder = None
+        self.pipe_prior = None
+        self.pipe = None
+        self.unet = None
+        self.low_vram = True

    def mix_images(self, p, generation_parameters, b, result_images):
        if p.extra_image != [] and p.extra_image is not None:
@ -54,16 +61,87 @@ class KandinskyModel(AbstractModel):
        return result_images

    def load_encoder(self):
-        self.pipe_prior = self.load_pipeline("pipe_prior", KandinskyPriorPipeline, f"kandinsky-community/kandinsky-{self.version}-prior".replace(".", "-"))
+        if self.version == "2.1":
+            if self.pipe_prior is None:
+                self.pipe_prior = self.load_pipeline("pipe_prior", KandinskyPriorPipeline, f"kandinsky-community/kandinsky-{self.version}-prior".replace(".", "-"))
+        elif self.version == "2.2":
+            if self.image_encoder is None:
+                self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+                    'kandinsky-community/kandinsky-2-2-prior',
+                    subfolder='image_encoder',
+                    cache_dir=os.path.join(self.models_path, "kandinsky22"),
+                    low_cpu_mem_usage=True
+                    # local_files_only=True
+                )
+                
+                self.image_encoder.to("cpu" if self.low_vram else "cuda")
+
+                self.pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
+                    'kandinsky-community/kandinsky-2-2-prior',
+                    image_encoder=self.image_encoder,
+                    torch_dtype=torch.float32,
+                    cache_dir=os.path.join(self.models_path, "kandinsky22"),
+                    low_cpu_mem_usage=True
+                    # local_files_only=True
+                )
+
+                self.image_encoder.to("cpu" if self.low_vram else "cuda")
+
+                self.unet = UNet2DConditionModel.from_pretrained(
+                    'kandinsky-community/kandinsky-2-2-decoder',
+                    subfolder='unet',
+                    cache_dir=os.path.join(self.models_path, "kandinsky22"),
+                    torch_dtype=torch.float16,
+                    low_cpu_mem_usage=True
+                    # local_files_only=True
+                ).half().to("cuda")
+
+                self.pipe = KandinskyV22Pipeline.from_pretrained(
+                    'kandinsky-community/kandinsky-2-2-decoder',
+                    unet=self.unet,
+                    torch_dtype=torch.float16,
+                    cache_dir=os.path.join(self.models_path, "kandinsky22"),
+                    low_cpu_mem_usage=True
+                    # local_files_only=True
+                ).to("cuda")

    def run_encoder(self, prior_settings_dict):
+        self.main_model_to_cpu()
        return self.pipe_prior(**prior_settings_dict).to_tuple()

    def encoder_to_cpu(self):
-        self.pipe_prior.to("cpu")
+        pass
+        #self.image_encoder.to("cpu")
+        #self.pipe_prior.to("cpu")
+        #self.pipe.to("cuda")
+        #self.unet.to("cuda")
+
+    def unload(self):
+        if self.image_encoder is not None:
+            self.image_encoder.to("cpu")
+            del self.image_encoder
+
+        if self.pipe_prior is not None:
+            self.pipe_prior.to("cpu")
+            del self.pipe_prior
+
+        if self.pipe is not None:
+            self.pipe.to("cpu")
+            del self.pipe
+
+        if self.unet is not None:
+            self.unet.to("cpu")
+            del self.unet
+        devices.torch_gc()
+        gc.collect()
+        torch.cuda.empty_cache()

    def main_model_to_cpu(self):
-        self.pipe.to("cpu")
+        pass
+        #self.pipe.to("cpu")
+        #self.unet.to("cpu")
+        #self.image_encoder.to("cuda")
+        #self.pipe_prior.to("cuda")

    def sd_processing_to_dict_encoder(self, p: StableDiffusionProcessing):
        torch.manual_seed(0)
@ -77,8 +155,12 @@ class KandinskyModel(AbstractModel):
        return parameters_dict

    def sd_processing_to_dict_generator(self, p: StableDiffusionProcessing):
-        generation_parameters = {"prompt": p.prompt, "negative_prompt": p.negative_prompt, "image_embeds": p.image_embeds, "negative_image_embeds": p.negative_image_embeds,
-                                "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps}
+        if self.version == "2.1":
+            generation_parameters = {"prompt": p.prompt, "negative_prompt": p.negative_prompt, "image_embeds": p.image_embeds, "negative_image_embeds": p.negative_image_embeds,
+                                    "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps}
+        elif self.version == "2.2":
+            generation_parameters = {"image_embeds": p.image_embeds.half(), "negative_image_embeds": p.negative_image_embeds.half(),
+                                    "height": p.height, "width": p.width, "guidance_scale": p.cfg_scale, "num_inference_steps": p.steps}
        return generation_parameters


@ -90,16 +172,21 @@ class KandinskyModel(AbstractModel):
            self.encoder_to_cpu()

    def txt2img(self, p, generation_parameters, b):
-        self.pipe = self.load_pipeline("pipe", KandinskyPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda = False)
+        if self.version == "2.1":
+            self.pipe = self.load_pipeline("pipe", KandinskyPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda=move_to_cuda)
+        #else:
+        #    self.unet.to("cuda")
+        #    self.pipe.to("cuda")
+
        result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size).images
        return self.mix_images(p, generation_parameters, b, result_images)

    def img2img(self, p, generation_parameters, b):
-        self.pipe = self.load_pipeline("pipe", KandinskyImg2ImgPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda = False)
+        self.pipe = self.load_pipeline("pipe", KandinskyImg2ImgPipeline, f"kandinsky-community/kandinsky-{self.version}".replace(".", "-"), move_to_cuda=move_to_cuda)
        result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size, image=p.init_image, strength=p.denoising_strength).images
        return self.mix_images(p, generation_parameters, b, result_images)

    def inpaint(self, p, generation_parameters, b):
-        self.pipe = self.load_pipeline("pipe", KandinskyInpaintPipeline, f"kandinsky-community/kandinsky-{self.version}-inpaint".replace(".", "-"), move_to_cuda = False)
+        self.pipe = self.load_pipeline("pipe", KandinskyInpaintPipeline, f"kandinsky-community/kandinsky-{self.version}-inpaint".replace(".", "-"), move_to_cuda=move_to_cuda)
        result_images = self.pipe(**generation_parameters, num_images_per_prompt=p.batch_size, image=p.new_init_image, mask_image=p.new_mask).images
        return self.mix_images(p, generation_parameters, b, result_images)
--- a/scripts/kandinsky_script.py
+++ b/scripts/kandinsky_script.py
@ -33,17 +33,18 @@ def reload_model():

 def unload_kandinsky_model():
    if getattr(shared, "kandinsky_model", None) is not None:
-        if getattr(shared.kandinsky_model, "pipe_prior", None) is not None:
-            del shared.kandinsky_model.pipe_prior
-            devices.torch_gc()
-            gc.collect()
-            torch.cuda.empty_cache()
+        getattr(shared, "kandinsky_model", None).unload()
+        #if getattr(shared.kandinsky_model, "pipe_prior", None) is not None:
+        #    del shared.kandinsky_model.pipe_prior
+        #    devices.torch_gc()
+        #    gc.collect()
+        #    torch.cuda.empty_cache()

-        if getattr(shared.kandinsky_model, "pipe", None) is not None:
-            del shared.kandinsky_model.pipe
-            devices.torch_gc()
-            gc.collect()
-            torch.cuda.empty_cache()
+        #if getattr(shared.kandinsky_model, "pipe", None) is not None:
+        #    del shared.kandinsky_model.pipe
+        #    devices.torch_gc()
+        #    gc.collect()
+        #    torch.cuda.empty_cache()

        del shared.kandinsky_model
        print("Unloaded Kandinsky model")
@ -75,9 +76,12 @@ class Script(scripts.Script):
            unload_k_model.click(unload_kandinsky_model)

        with gr.Row():
-            prior_inference_steps = gr.inputs.Slider(minimum=2, maximum=1024, step=1, label="Prior Inference Steps", default=128)
+            prior_inference_steps = gr.inputs.Slider(minimum=2, maximum=1024, step=1, label="Prior Inference Steps", default=64)
            prior_cfg_scale = gr.inputs.Slider(minimum=1, maximum=20, step=0.5, label="Prior CFG Scale", default=4)
-            model_version = gr.inputs.Dropdown(["2.1", "2.2"], label="Kandinsky Version", default="2.1")
+
+        model_version = gr.inputs.Dropdown(["2.1", "2.2"], label="Kandinsky Version", default="2.2")
+        gr.Markdown("Kandinsky 2.2 requires much more RAM")
+        low_vram = gr.inputs.Checkbox(label="Kandinsky 2.2 Low VRAM", default=True)

        with gr.Accordion("Image Mixing", open=False):
            with gr.Row():
@ -85,25 +89,30 @@ class Script(scripts.Script):
                img2_strength = gr.inputs.Slider(minimum=-2, maximum=2, label="Interpolate Image 2 Strength (image below)", default=0.5)
            extra_image = gr.inputs.Image()

-        inputs = [extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength]
+        inputs = [extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength, low_vram]

        return inputs

-    def run(self, p, extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength) -> Processed:
+    def run(self, p, extra_image, prior_inference_steps, prior_cfg_scale, model_version, img1_strength, img2_strength, low_vram) -> Processed:
        p.extra_image = extra_image
        p.prior_inference_steps = prior_inference_steps
        p.prior_cfg_scale = prior_cfg_scale
        p.img1_strength = img1_strength
        p.img2_strength = img2_strength
-        p.sampler_name = "DDIM"
+        if model_version == "2.1":
+            p.sampler_name = "DDIM"
+        elif model_version == "2.2":
+            p.sampler_name = "DDPM"
        p.init_image = getattr(p, 'init_images', None)
        p.extra_generation_params["Prior Inference Steps"] = prior_inference_steps
        p.extra_generation_params["Prior CFG Scale"] = prior_cfg_scale
        p.extra_generation_params["Script"] = self.title()
+        p.extra_generation_params["Kandinsky Version"] = model_version

        shared.kandinsky_model = getattr(shared, 'kandinsky_model', None)

-        if shared.kandinsky_model is None or shared.kandinsky_model.version != model_version:
+        if shared.kandinsky_model is None or shared.kandinsky_model.version != model_version or (model_version == "2.2" and shared.kandinsky_model.low_vram != low_vram):
            shared.kandinsky_model = KandinskyModel(version=model_version)
+            shared.kandinsky_model.low_vram = low_vram

        return shared.kandinsky_model.process_images(p)