minimum functionality - v0.1

video and outpaint options added (Using mask_blur>0 will cause confilict with frame interpolation)
2023-04-12 15:51:11 +04:00 · 2023-04-12 15:51:11 +04:00 · b4cea33d0d
parent 787777a932
commit b4cea33d0d
1 changed files with 215 additions and 111 deletions
--- a/scripts/inifnite-zoom.py
+++ b/scripts/inifnite-zoom.py
@ -1,33 +1,36 @@
 import sys
 import os
 import time
+
 basedir = os.getcwd()
-sys.path.extend(basedir + '/extensions/infinite-zoom-sd-webui/')
+sys.path.extend(basedir + "/extensions/infinite-zoom-sd-webui/")
 import numpy as np
 import gradio as gr
 from PIL import Image

-from iz_helpers.image import shrink_and_paste_on_blank
-from iz_helpers.video import write_video
+from iz_helpers import shrink_and_paste_on_blank, write_video
 from webui import wrap_gradio_gpu_call
 from modules import script_callbacks
 import modules.shared as shared
-import modules.scripts as scripts
-from modules.processing import process_images, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img
-from modules.ui import create_output_panel, plaintext_to_html, wrap_gradio_call
+from modules.processing import (
+    process_images,
+    StableDiffusionProcessingTxt2Img,
+    StableDiffusionProcessingImg2Img,
+)

+from modules.ui import create_output_panel, plaintext_to_html

-output_path = basedir + '/extensions/infinite-zoom-sd-webui/out'
+output_path = basedir + "/extensions/infinite-zoom-sd-webui/out"
 default_prompt = "A psychedelic jungle with trees that have glowing, fractal-like patterns, Simon stalenhag poster 1920s style, street level view, hyper futuristic, 8k resolution, hyper realistic"
 default_negative_prompt = "frames, borderline, text, character, duplicate, error, out of frame, watermark, low quality, ugly, deformed, blur"


 def renderTxt2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, height):
-    processetd = None
+    processed = None
    p = StableDiffusionProcessingTxt2Img(
        sd_model=shared.sd_model,
-        outpath_samples=output_path,
-        outpath_grids=output_path,
+        outpath_samples=shared.opts.outdir_txt2img_samples,
+        outpath_grids=shared.opts.outdir_txt2img_grids,
        prompt=prompt,
        negative_prompt=negative_prompt,
        # seed=-1,
@ -38,20 +41,31 @@ def renderTxt2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, hei
        width=width,
        height=height,
    )
-    # script_runner = scripts.scripts_img2img
-    # p.scripts = script_runner
-    # shared.state.begin()
    processed = process_images(p)
-    # shared.state.end()
    return processed


-def renderImg2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, height, init_image, mask_image):
-    processetd = None
+def renderImg2Img(
+    prompt,
+    negative_prompt,
+    sampler,
+    steps,
+    cfg_scale,
+    width,
+    height,
+    init_image,
+    mask_image,
+    inpainting_denoising_strength,
+    inpainting_mask_blur,
+    inpainting_fill_mode,
+    inpainting_full_res,
+    inpainting_padding,
+):
+    processed = None
    p = StableDiffusionProcessingImg2Img(
        sd_model=shared.sd_model,
-        outpath_samples=output_path,
-        outpath_grids=output_path,
+        outpath_samples=shared.opts.outdir_img2img_samples,
+        outpath_grids=shared.opts.outdir_img2img_grids,
        prompt=prompt,
        negative_prompt=negative_prompt,
        # seed=-1,
@ -62,13 +76,16 @@ def renderImg2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, hei
        width=width,
        height=height,
        init_images=[init_image],
-        mask=mask_image
+        denoising_strength=inpainting_denoising_strength,
+        mask_blur=inpainting_mask_blur,
+        inpainting_fill=inpainting_fill_mode,
+        inpaint_full_res=inpainting_full_res,
+        inpaint_full_res_padding=inpainting_padding,
+        mask=mask_image,
    )
-    # script_runner = scripts.scripts_txt2img
-    # p.scripts = script_runner
-    # shared.state.begin()
+    # p.latent_mask = Image.new("RGB", (p.width, p.height), "white")
+
    processed = process_images(p)
-    # shared.state.end()
    return processed


@ -78,7 +95,16 @@ def create_zoom(
    num_outpainting_steps,
    guidance_scale,
    num_inference_steps,
-    custom_init_image
+    custom_init_image,
+    video_frame_rate,
+    video_zoom_mode,
+    video_start_frame_dupe_amount,
+    video_last_frame_dupe_amount,
+    inpainting_denoising_strength,
+    inpainting_mask_blur,
+    inpainting_fill_mode,
+    inpainting_full_res,
+    inpainting_padding,
 ):
    prompts = {}
    for x in prompts_array:
@ -94,15 +120,23 @@ def create_zoom(
    height = 512
    current_image = Image.new(mode="RGBA", size=(height, width))
    mask_image = np.array(current_image)[:, :, 3]
-    mask_image = Image.fromarray(255-mask_image).convert("RGB")
+    mask_image = Image.fromarray(255 - mask_image).convert("RGB")
    current_image = current_image.convert("RGB")

-    if (custom_init_image):
+    if custom_init_image:
        current_image = custom_init_image.resize(
-            (width, height), resample=Image.LANCZOS)
+            (width, height), resample=Image.LANCZOS
+        )
    else:
-        processed = renderTxt2Img(prompts[min(k for k in prompts.keys() if k >= 0)],
-                                  negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height)
+        processed = renderTxt2Img(
+            prompts[min(k for k in prompts.keys() if k >= 0)],
+            negative_prompt,
+            "Euler a",
+            num_inference_steps,
+            guidance_scale,
+            width,
+            height,
+        )
        current_image = processed.images[0]
    mask_width = 128
    num_interpol_frames = 30
@ -110,8 +144,7 @@ def create_zoom(
    all_frames = []
    all_frames.append(current_image)
    for i in range(num_outpainting_steps):
-    #     print('Outpaint step: ' + str(i+1) +
-    #           ' / ' + str(num_outpainting_steps))
+        print("Outpaint step: " + str(i + 1) + " / " + str(num_outpainting_steps))

        prev_image_fix = current_image

@ -121,21 +154,26 @@ def create_zoom(

        # create mask (black image with white mask_width width edges)
        mask_image = np.array(current_image)[:, :, 3]
-        mask_image = Image.fromarray(255-mask_image).convert("RGB")
+        mask_image = Image.fromarray(255 - mask_image).convert("RGB")

        # inpainting step
        current_image = current_image.convert("RGB")
-        # images = pipe(prompt=prompts[max(k for k in prompts.keys() if k <= i)],
-        #               negative_prompt=negative_prompt,
-        #               image=current_image,
-        #               guidance_scale=guidance_scale,
-        #               height=height,
-        #               width=width,
-        #               # generator = g_cuda.manual_seed(seed),
-        #               mask_image=mask_image,
-        #               num_inference_steps=num_inference_steps)[0]
-        # current_image = images[0]
-        processed = renderImg2Img(prompts[max(k for k in prompts.keys() if k <= i)], negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height, current_image, mask_image)
+        processed = renderImg2Img(
+            prompts[max(k for k in prompts.keys() if k <= i)],
+            negative_prompt,
+            "Euler a",
+            num_inference_steps,
+            guidance_scale,
+            width,
+            height,
+            current_image,
+            mask_image,
+            inpainting_denoising_strength,
+            inpainting_mask_blur,
+            inpainting_fill_mode,
+            inpainting_full_res,
+            inpainting_padding,
+        )
        current_image = processed.images[0]

        current_image.paste(prev_image, mask=prev_image)
@ -144,44 +182,58 @@ def create_zoom(
        for j in range(num_interpol_frames - 1):
            interpol_image = current_image
            interpol_width = round(
-                (1 - (1-2*mask_width/height)**(1-(j+1)/num_interpol_frames))*height/2
+                (
+                    1
+                    - (1 - 2 * mask_width / height)
+                    ** (1 - (j + 1) / num_interpol_frames)
+                )
+                * height
+                / 2
+            )
+            interpol_image = interpol_image.crop(
+                (
+                    interpol_width,
+                    interpol_width,
+                    width - interpol_width,
+                    height - interpol_width,
+                )
            )
-            interpol_image = interpol_image.crop((interpol_width,
-                                                  interpol_width,
-                                                  width - interpol_width,
-                                                  height - interpol_width))

            interpol_image = interpol_image.resize((height, width))
            # paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
            interpol_width2 = round(
-                (1 - (height-2*mask_width) / (height-2*interpol_width)) / 2*height
+                (1 - (height - 2 * mask_width) / (height - 2 * interpol_width))
+                / 2
+                * height
            )
            prev_image_fix_crop = shrink_and_paste_on_blank(
-                prev_image_fix, interpol_width2)
+                prev_image_fix, interpol_width2
+            )
            interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)

            all_frames.append(interpol_image)
        all_frames.append(current_image)
-    video_file_name = "infinite_zoom_" + str(time.time())
-    fps = 30
-    save_path = output_path + video_file_name + ".mp4"
-    start_frame_dupe_amount = 15
-    last_frame_dupe_amount = 15

-    write_video(save_path, all_frames, fps, False,
-                start_frame_dupe_amount, last_frame_dupe_amount)
+    video_file_name = "infinite_zoom_" + str(int(time.time())) + ".mp4"
+    save_path = os.path.join(output_path, "videos")
+    if not os.path.exists(save_path):
+        os.makedirs(save_path)
+    write_video(
+        os.path.join(save_path, video_file_name),
+        all_frames,
+        video_frame_rate,
+        video_zoom_mode,
+        int(video_start_frame_dupe_amount),
+        int(video_last_frame_dupe_amount),
+    )

-    ## to debug
-    # img = custom_init_image.resize(
-    #     (width, height), resample=Image.LANCZOS)
-    # img = shrink_and_paste_on_blank(img, 128)
-    # mask_image = np.array(img)[:, :, 3]
-    # mask_image = Image.fromarray(255-mask_image).convert("RGB")
-    
-    # processed = renderImg2Img(prompts[min(k for k in prompts.keys(
-    # ) if k >= 0)], negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height, img, mask_image)
-    ## to debug
-    return save_path , processed.images, processed.js(), plaintext_to_html(processed.info), plaintext_to_html("")
+    return (
+        save_path,
+        processed.images,
+        processed.js(),
+        plaintext_to_html(processed.info),
+        plaintext_to_html(""),
+    )


 def on_ui_tabs():
@ -193,38 +245,39 @@ def on_ui_tabs():
        </p>
        """
        )
+        generate_btn = gr.Button(value="Generate video", variant="primary")
        with gr.Row():
-            with gr.Column(scale=1, variant='panel'):
-                outpaint_prompts = gr.Dataframe(
-                    type="array",
-                    headers=["outpaint steps", "prompt"],
-                    datatype=["number", "str"],
-                    row_count=1,
-                    col_count=(2, "fixed"),
-                    value=[[0, default_prompt]],
-                    wrap=True
-                )
+            with gr.Column(scale=1, variant="panel"):
+                with gr.Tab("Main"):
+                    outpaint_prompts = gr.Dataframe(
+                        type="array",
+                        headers=["outpaint steps", "prompt"],
+                        datatype=["number", "str"],
+                        row_count=1,
+                        col_count=(2, "fixed"),
+                        value=[[0, default_prompt]],
+                        wrap=True,
+                    )

-                outpaint_negative_prompt = gr.Textbox(
-                    lines=1,
-                    value=default_negative_prompt,
-                    label='Negative Prompt'
-                )
+                    outpaint_negative_prompt = gr.Textbox(
+                        value=default_negative_prompt, label="Negative Prompt"
+                    )
+
+                    outpaint_steps = gr.Slider(
+                        minimum=2,
+                        maximum=25,
+                        step=1,
+                        value=8,
+                        label="Total Outpaint Steps",
+                        info="The more it is, the longer your videos will be",
+                    )

-                outpaint_steps = gr.Slider(
-                    minimum=5,
-                    maximum=25,
-                    step=1,
-                    value=12,
-                    label='Total Outpaint Steps'
-                )
-                with gr.Accordion("Advanced Options", open=False):
                    guidance_scale = gr.Slider(
                        minimum=0.1,
                        maximum=15,
                        step=0.1,
                        value=7,
-                        label='Guidance Scale'
+                        label="Guidance Scale",
                    )

                    sampling_step = gr.Slider(
@ -232,35 +285,86 @@ def on_ui_tabs():
                        maximum=100,
                        step=1,
                        value=50,
-                        label='Sampling Steps for each outpaint'
+                        label="Sampling Steps for each outpaint",
+                    )
+                    init_image = gr.Image(type="pil", label="custom initial image")
+                with gr.Tab("Video"):
+                    video_frame_rate = gr.Slider(
+                        label="Frames per second",
+                        value=30,
+                        minimum=1,
+                        maximum=60,
+                    )
+                    video_zoom_mode = gr.Radio(
+                        label="Zoom mode",
+                        choices=["Zoom-out", "Zoom-in"],
+                        value="Zoom-out",
+                        type="index",
+                    )
+                    video_start_frame_dupe_amount = gr.Slider(
+                        label="number of start frame dupe",
+                        info="Frames to freeze at the start of the video",
+                        value=0,
+                        minimum=1,
+                        maximum=60,
+                    )
+                    video_last_frame_dupe_amount = gr.Slider(
+                        label="number of last frame dupe",
+                        info="Frames to freeze at the end of the video",
+                        value=0,
+                        minimum=1,
+                        maximum=60,
+                    )
+                with gr.Tab("Outpaint"):
+                    inpainting_denoising_strength = gr.Slider(
+                        label="Denoising Strength", minimum=0.75, maximum=1, value=1
+                    )
+                    inpainting_mask_blur = gr.Slider(
+                        label="Mask Blur", minimum=0, maximum=64, value=0
+                    )
+                    inpainting_fill_mode = gr.Radio(
+                        label="Masked content",
+                        choices=["fill", "original", "latent noise", "latent nothing"],
+                        value="latent noise",
+                        type="index",
+                    )
+                    inpainting_full_res = gr.Checkbox(label="Inpaint Full Resolution")
+                    inpainting_padding = gr.Slider(
+                        label="masked padding", minimum=0, maximum=256, value=0
                    )
-                    init_image = gr.Image(
-                        type="pil", label="custom initial image")
-                generate_btn = gr.Button(value='Generate video')

-            with gr.Column(scale=1, variant='compact'):
-                output_video = gr.Video(label='Output', format="mp4").style(
-                    width=512, height=512, interactive=False)
-                # output_video = gr.Image(label="output", interactive=False)
-                out_image, generation_info, html_info, html_log = create_output_panel(
-                    "infinit-zoom", output_path)
+            with gr.Column(scale=1, variant="compact"):
+                output_video = gr.Video(
+                    label="Output", format="mp4", interactive=True
+                ).style(width=512, height=512)
+                (
+                    out_image,
+                    generation_info,
+                    html_info,
+                    html_log,
+                ) = create_output_panel(
+                    "infinit-zoom", shared.opts.outdir_img2img_samples
+                )
        generate_btn.click(
-            fn=wrap_gradio_gpu_call(create_zoom, extra_outputs=[None, '', '']),
+            fn=wrap_gradio_gpu_call(create_zoom, extra_outputs=[None, "", ""]),
            inputs=[
                outpaint_prompts,
                outpaint_negative_prompt,
                outpaint_steps,
                guidance_scale,
                sampling_step,
-                init_image
-            ],
-            outputs=[
-                output_video,
-                out_image,
-                generation_info,
-                html_info,
-                html_log
+                init_image,
+                video_frame_rate,
+                video_zoom_mode,
+                video_start_frame_dupe_amount,
+                video_last_frame_dupe_amount,
+                inpainting_denoising_strength,
+                inpainting_mask_blur,
+                inpainting_fill_mode,
+                inpainting_full_res,
+                inpainting_padding,
            ],
+            outputs=[output_video, out_image, generation_info, html_info, html_log],
        )

    return [(infinite_zoom_interface, "Infinite Zoom", "iz_interface")]