minimum functionality - v0.1
video and outpaint options added (Using mask_blur>0 will cause confilict with frame interpolation)pull/5/head
parent
787777a932
commit
b4cea33d0d
|
|
@ -1,33 +1,36 @@
|
|||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
basedir = os.getcwd()
|
||||
sys.path.extend(basedir + '/extensions/infinite-zoom-sd-webui/')
|
||||
sys.path.extend(basedir + "/extensions/infinite-zoom-sd-webui/")
|
||||
import numpy as np
|
||||
import gradio as gr
|
||||
from PIL import Image
|
||||
|
||||
from iz_helpers.image import shrink_and_paste_on_blank
|
||||
from iz_helpers.video import write_video
|
||||
from iz_helpers import shrink_and_paste_on_blank, write_video
|
||||
from webui import wrap_gradio_gpu_call
|
||||
from modules import script_callbacks
|
||||
import modules.shared as shared
|
||||
import modules.scripts as scripts
|
||||
from modules.processing import process_images, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img
|
||||
from modules.ui import create_output_panel, plaintext_to_html, wrap_gradio_call
|
||||
from modules.processing import (
|
||||
process_images,
|
||||
StableDiffusionProcessingTxt2Img,
|
||||
StableDiffusionProcessingImg2Img,
|
||||
)
|
||||
|
||||
from modules.ui import create_output_panel, plaintext_to_html
|
||||
|
||||
output_path = basedir + '/extensions/infinite-zoom-sd-webui/out'
|
||||
output_path = basedir + "/extensions/infinite-zoom-sd-webui/out"
|
||||
default_prompt = "A psychedelic jungle with trees that have glowing, fractal-like patterns, Simon stalenhag poster 1920s style, street level view, hyper futuristic, 8k resolution, hyper realistic"
|
||||
default_negative_prompt = "frames, borderline, text, character, duplicate, error, out of frame, watermark, low quality, ugly, deformed, blur"
|
||||
|
||||
|
||||
def renderTxt2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, height):
|
||||
processetd = None
|
||||
processed = None
|
||||
p = StableDiffusionProcessingTxt2Img(
|
||||
sd_model=shared.sd_model,
|
||||
outpath_samples=output_path,
|
||||
outpath_grids=output_path,
|
||||
outpath_samples=shared.opts.outdir_txt2img_samples,
|
||||
outpath_grids=shared.opts.outdir_txt2img_grids,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
# seed=-1,
|
||||
|
|
@ -38,20 +41,31 @@ def renderTxt2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, hei
|
|||
width=width,
|
||||
height=height,
|
||||
)
|
||||
# script_runner = scripts.scripts_img2img
|
||||
# p.scripts = script_runner
|
||||
# shared.state.begin()
|
||||
processed = process_images(p)
|
||||
# shared.state.end()
|
||||
return processed
|
||||
|
||||
|
||||
def renderImg2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, height, init_image, mask_image):
|
||||
processetd = None
|
||||
def renderImg2Img(
|
||||
prompt,
|
||||
negative_prompt,
|
||||
sampler,
|
||||
steps,
|
||||
cfg_scale,
|
||||
width,
|
||||
height,
|
||||
init_image,
|
||||
mask_image,
|
||||
inpainting_denoising_strength,
|
||||
inpainting_mask_blur,
|
||||
inpainting_fill_mode,
|
||||
inpainting_full_res,
|
||||
inpainting_padding,
|
||||
):
|
||||
processed = None
|
||||
p = StableDiffusionProcessingImg2Img(
|
||||
sd_model=shared.sd_model,
|
||||
outpath_samples=output_path,
|
||||
outpath_grids=output_path,
|
||||
outpath_samples=shared.opts.outdir_img2img_samples,
|
||||
outpath_grids=shared.opts.outdir_img2img_grids,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
# seed=-1,
|
||||
|
|
@ -62,13 +76,16 @@ def renderImg2Img(prompt, negative_prompt, sampler, steps, cfg_scale, width, hei
|
|||
width=width,
|
||||
height=height,
|
||||
init_images=[init_image],
|
||||
mask=mask_image
|
||||
denoising_strength=inpainting_denoising_strength,
|
||||
mask_blur=inpainting_mask_blur,
|
||||
inpainting_fill=inpainting_fill_mode,
|
||||
inpaint_full_res=inpainting_full_res,
|
||||
inpaint_full_res_padding=inpainting_padding,
|
||||
mask=mask_image,
|
||||
)
|
||||
# script_runner = scripts.scripts_txt2img
|
||||
# p.scripts = script_runner
|
||||
# shared.state.begin()
|
||||
# p.latent_mask = Image.new("RGB", (p.width, p.height), "white")
|
||||
|
||||
processed = process_images(p)
|
||||
# shared.state.end()
|
||||
return processed
|
||||
|
||||
|
||||
|
|
@ -78,7 +95,16 @@ def create_zoom(
|
|||
num_outpainting_steps,
|
||||
guidance_scale,
|
||||
num_inference_steps,
|
||||
custom_init_image
|
||||
custom_init_image,
|
||||
video_frame_rate,
|
||||
video_zoom_mode,
|
||||
video_start_frame_dupe_amount,
|
||||
video_last_frame_dupe_amount,
|
||||
inpainting_denoising_strength,
|
||||
inpainting_mask_blur,
|
||||
inpainting_fill_mode,
|
||||
inpainting_full_res,
|
||||
inpainting_padding,
|
||||
):
|
||||
prompts = {}
|
||||
for x in prompts_array:
|
||||
|
|
@ -94,15 +120,23 @@ def create_zoom(
|
|||
height = 512
|
||||
current_image = Image.new(mode="RGBA", size=(height, width))
|
||||
mask_image = np.array(current_image)[:, :, 3]
|
||||
mask_image = Image.fromarray(255-mask_image).convert("RGB")
|
||||
mask_image = Image.fromarray(255 - mask_image).convert("RGB")
|
||||
current_image = current_image.convert("RGB")
|
||||
|
||||
if (custom_init_image):
|
||||
if custom_init_image:
|
||||
current_image = custom_init_image.resize(
|
||||
(width, height), resample=Image.LANCZOS)
|
||||
(width, height), resample=Image.LANCZOS
|
||||
)
|
||||
else:
|
||||
processed = renderTxt2Img(prompts[min(k for k in prompts.keys() if k >= 0)],
|
||||
negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height)
|
||||
processed = renderTxt2Img(
|
||||
prompts[min(k for k in prompts.keys() if k >= 0)],
|
||||
negative_prompt,
|
||||
"Euler a",
|
||||
num_inference_steps,
|
||||
guidance_scale,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
current_image = processed.images[0]
|
||||
mask_width = 128
|
||||
num_interpol_frames = 30
|
||||
|
|
@ -110,8 +144,7 @@ def create_zoom(
|
|||
all_frames = []
|
||||
all_frames.append(current_image)
|
||||
for i in range(num_outpainting_steps):
|
||||
# print('Outpaint step: ' + str(i+1) +
|
||||
# ' / ' + str(num_outpainting_steps))
|
||||
print("Outpaint step: " + str(i + 1) + " / " + str(num_outpainting_steps))
|
||||
|
||||
prev_image_fix = current_image
|
||||
|
||||
|
|
@ -121,21 +154,26 @@ def create_zoom(
|
|||
|
||||
# create mask (black image with white mask_width width edges)
|
||||
mask_image = np.array(current_image)[:, :, 3]
|
||||
mask_image = Image.fromarray(255-mask_image).convert("RGB")
|
||||
mask_image = Image.fromarray(255 - mask_image).convert("RGB")
|
||||
|
||||
# inpainting step
|
||||
current_image = current_image.convert("RGB")
|
||||
# images = pipe(prompt=prompts[max(k for k in prompts.keys() if k <= i)],
|
||||
# negative_prompt=negative_prompt,
|
||||
# image=current_image,
|
||||
# guidance_scale=guidance_scale,
|
||||
# height=height,
|
||||
# width=width,
|
||||
# # generator = g_cuda.manual_seed(seed),
|
||||
# mask_image=mask_image,
|
||||
# num_inference_steps=num_inference_steps)[0]
|
||||
# current_image = images[0]
|
||||
processed = renderImg2Img(prompts[max(k for k in prompts.keys() if k <= i)], negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height, current_image, mask_image)
|
||||
processed = renderImg2Img(
|
||||
prompts[max(k for k in prompts.keys() if k <= i)],
|
||||
negative_prompt,
|
||||
"Euler a",
|
||||
num_inference_steps,
|
||||
guidance_scale,
|
||||
width,
|
||||
height,
|
||||
current_image,
|
||||
mask_image,
|
||||
inpainting_denoising_strength,
|
||||
inpainting_mask_blur,
|
||||
inpainting_fill_mode,
|
||||
inpainting_full_res,
|
||||
inpainting_padding,
|
||||
)
|
||||
current_image = processed.images[0]
|
||||
|
||||
current_image.paste(prev_image, mask=prev_image)
|
||||
|
|
@ -144,44 +182,58 @@ def create_zoom(
|
|||
for j in range(num_interpol_frames - 1):
|
||||
interpol_image = current_image
|
||||
interpol_width = round(
|
||||
(1 - (1-2*mask_width/height)**(1-(j+1)/num_interpol_frames))*height/2
|
||||
(
|
||||
1
|
||||
- (1 - 2 * mask_width / height)
|
||||
** (1 - (j + 1) / num_interpol_frames)
|
||||
)
|
||||
* height
|
||||
/ 2
|
||||
)
|
||||
interpol_image = interpol_image.crop(
|
||||
(
|
||||
interpol_width,
|
||||
interpol_width,
|
||||
width - interpol_width,
|
||||
height - interpol_width,
|
||||
)
|
||||
)
|
||||
interpol_image = interpol_image.crop((interpol_width,
|
||||
interpol_width,
|
||||
width - interpol_width,
|
||||
height - interpol_width))
|
||||
|
||||
interpol_image = interpol_image.resize((height, width))
|
||||
# paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
|
||||
interpol_width2 = round(
|
||||
(1 - (height-2*mask_width) / (height-2*interpol_width)) / 2*height
|
||||
(1 - (height - 2 * mask_width) / (height - 2 * interpol_width))
|
||||
/ 2
|
||||
* height
|
||||
)
|
||||
prev_image_fix_crop = shrink_and_paste_on_blank(
|
||||
prev_image_fix, interpol_width2)
|
||||
prev_image_fix, interpol_width2
|
||||
)
|
||||
interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)
|
||||
|
||||
all_frames.append(interpol_image)
|
||||
all_frames.append(current_image)
|
||||
video_file_name = "infinite_zoom_" + str(time.time())
|
||||
fps = 30
|
||||
save_path = output_path + video_file_name + ".mp4"
|
||||
start_frame_dupe_amount = 15
|
||||
last_frame_dupe_amount = 15
|
||||
|
||||
write_video(save_path, all_frames, fps, False,
|
||||
start_frame_dupe_amount, last_frame_dupe_amount)
|
||||
video_file_name = "infinite_zoom_" + str(int(time.time())) + ".mp4"
|
||||
save_path = os.path.join(output_path, "videos")
|
||||
if not os.path.exists(save_path):
|
||||
os.makedirs(save_path)
|
||||
write_video(
|
||||
os.path.join(save_path, video_file_name),
|
||||
all_frames,
|
||||
video_frame_rate,
|
||||
video_zoom_mode,
|
||||
int(video_start_frame_dupe_amount),
|
||||
int(video_last_frame_dupe_amount),
|
||||
)
|
||||
|
||||
## to debug
|
||||
# img = custom_init_image.resize(
|
||||
# (width, height), resample=Image.LANCZOS)
|
||||
# img = shrink_and_paste_on_blank(img, 128)
|
||||
# mask_image = np.array(img)[:, :, 3]
|
||||
# mask_image = Image.fromarray(255-mask_image).convert("RGB")
|
||||
|
||||
# processed = renderImg2Img(prompts[min(k for k in prompts.keys(
|
||||
# ) if k >= 0)], negative_prompt, "Euler a", num_inference_steps, guidance_scale, width, height, img, mask_image)
|
||||
## to debug
|
||||
return save_path , processed.images, processed.js(), plaintext_to_html(processed.info), plaintext_to_html("")
|
||||
return (
|
||||
save_path,
|
||||
processed.images,
|
||||
processed.js(),
|
||||
plaintext_to_html(processed.info),
|
||||
plaintext_to_html(""),
|
||||
)
|
||||
|
||||
|
||||
def on_ui_tabs():
|
||||
|
|
@ -193,38 +245,39 @@ def on_ui_tabs():
|
|||
</p>
|
||||
"""
|
||||
)
|
||||
generate_btn = gr.Button(value="Generate video", variant="primary")
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, variant='panel'):
|
||||
outpaint_prompts = gr.Dataframe(
|
||||
type="array",
|
||||
headers=["outpaint steps", "prompt"],
|
||||
datatype=["number", "str"],
|
||||
row_count=1,
|
||||
col_count=(2, "fixed"),
|
||||
value=[[0, default_prompt]],
|
||||
wrap=True
|
||||
)
|
||||
with gr.Column(scale=1, variant="panel"):
|
||||
with gr.Tab("Main"):
|
||||
outpaint_prompts = gr.Dataframe(
|
||||
type="array",
|
||||
headers=["outpaint steps", "prompt"],
|
||||
datatype=["number", "str"],
|
||||
row_count=1,
|
||||
col_count=(2, "fixed"),
|
||||
value=[[0, default_prompt]],
|
||||
wrap=True,
|
||||
)
|
||||
|
||||
outpaint_negative_prompt = gr.Textbox(
|
||||
lines=1,
|
||||
value=default_negative_prompt,
|
||||
label='Negative Prompt'
|
||||
)
|
||||
outpaint_negative_prompt = gr.Textbox(
|
||||
value=default_negative_prompt, label="Negative Prompt"
|
||||
)
|
||||
|
||||
outpaint_steps = gr.Slider(
|
||||
minimum=2,
|
||||
maximum=25,
|
||||
step=1,
|
||||
value=8,
|
||||
label="Total Outpaint Steps",
|
||||
info="The more it is, the longer your videos will be",
|
||||
)
|
||||
|
||||
outpaint_steps = gr.Slider(
|
||||
minimum=5,
|
||||
maximum=25,
|
||||
step=1,
|
||||
value=12,
|
||||
label='Total Outpaint Steps'
|
||||
)
|
||||
with gr.Accordion("Advanced Options", open=False):
|
||||
guidance_scale = gr.Slider(
|
||||
minimum=0.1,
|
||||
maximum=15,
|
||||
step=0.1,
|
||||
value=7,
|
||||
label='Guidance Scale'
|
||||
label="Guidance Scale",
|
||||
)
|
||||
|
||||
sampling_step = gr.Slider(
|
||||
|
|
@ -232,35 +285,86 @@ def on_ui_tabs():
|
|||
maximum=100,
|
||||
step=1,
|
||||
value=50,
|
||||
label='Sampling Steps for each outpaint'
|
||||
label="Sampling Steps for each outpaint",
|
||||
)
|
||||
init_image = gr.Image(type="pil", label="custom initial image")
|
||||
with gr.Tab("Video"):
|
||||
video_frame_rate = gr.Slider(
|
||||
label="Frames per second",
|
||||
value=30,
|
||||
minimum=1,
|
||||
maximum=60,
|
||||
)
|
||||
video_zoom_mode = gr.Radio(
|
||||
label="Zoom mode",
|
||||
choices=["Zoom-out", "Zoom-in"],
|
||||
value="Zoom-out",
|
||||
type="index",
|
||||
)
|
||||
video_start_frame_dupe_amount = gr.Slider(
|
||||
label="number of start frame dupe",
|
||||
info="Frames to freeze at the start of the video",
|
||||
value=0,
|
||||
minimum=1,
|
||||
maximum=60,
|
||||
)
|
||||
video_last_frame_dupe_amount = gr.Slider(
|
||||
label="number of last frame dupe",
|
||||
info="Frames to freeze at the end of the video",
|
||||
value=0,
|
||||
minimum=1,
|
||||
maximum=60,
|
||||
)
|
||||
with gr.Tab("Outpaint"):
|
||||
inpainting_denoising_strength = gr.Slider(
|
||||
label="Denoising Strength", minimum=0.75, maximum=1, value=1
|
||||
)
|
||||
inpainting_mask_blur = gr.Slider(
|
||||
label="Mask Blur", minimum=0, maximum=64, value=0
|
||||
)
|
||||
inpainting_fill_mode = gr.Radio(
|
||||
label="Masked content",
|
||||
choices=["fill", "original", "latent noise", "latent nothing"],
|
||||
value="latent noise",
|
||||
type="index",
|
||||
)
|
||||
inpainting_full_res = gr.Checkbox(label="Inpaint Full Resolution")
|
||||
inpainting_padding = gr.Slider(
|
||||
label="masked padding", minimum=0, maximum=256, value=0
|
||||
)
|
||||
init_image = gr.Image(
|
||||
type="pil", label="custom initial image")
|
||||
generate_btn = gr.Button(value='Generate video')
|
||||
|
||||
with gr.Column(scale=1, variant='compact'):
|
||||
output_video = gr.Video(label='Output', format="mp4").style(
|
||||
width=512, height=512, interactive=False)
|
||||
# output_video = gr.Image(label="output", interactive=False)
|
||||
out_image, generation_info, html_info, html_log = create_output_panel(
|
||||
"infinit-zoom", output_path)
|
||||
with gr.Column(scale=1, variant="compact"):
|
||||
output_video = gr.Video(
|
||||
label="Output", format="mp4", interactive=True
|
||||
).style(width=512, height=512)
|
||||
(
|
||||
out_image,
|
||||
generation_info,
|
||||
html_info,
|
||||
html_log,
|
||||
) = create_output_panel(
|
||||
"infinit-zoom", shared.opts.outdir_img2img_samples
|
||||
)
|
||||
generate_btn.click(
|
||||
fn=wrap_gradio_gpu_call(create_zoom, extra_outputs=[None, '', '']),
|
||||
fn=wrap_gradio_gpu_call(create_zoom, extra_outputs=[None, "", ""]),
|
||||
inputs=[
|
||||
outpaint_prompts,
|
||||
outpaint_negative_prompt,
|
||||
outpaint_steps,
|
||||
guidance_scale,
|
||||
sampling_step,
|
||||
init_image
|
||||
],
|
||||
outputs=[
|
||||
output_video,
|
||||
out_image,
|
||||
generation_info,
|
||||
html_info,
|
||||
html_log
|
||||
init_image,
|
||||
video_frame_rate,
|
||||
video_zoom_mode,
|
||||
video_start_frame_dupe_amount,
|
||||
video_last_frame_dupe_amount,
|
||||
inpainting_denoising_strength,
|
||||
inpainting_mask_blur,
|
||||
inpainting_fill_mode,
|
||||
inpainting_full_res,
|
||||
inpainting_padding,
|
||||
],
|
||||
outputs=[output_video, out_image, generation_info, html_info, html_log],
|
||||
)
|
||||
|
||||
return [(infinite_zoom_interface, "Infinite Zoom", "iz_interface")]
|
||||
|
|
|
|||
Loading…
Reference in New Issue