diff --git a/readme.md b/readme.md index 7e158ed..b6b9950 100644 --- a/readme.md +++ b/readme.md @@ -56,10 +56,17 @@ All examples you can see here are originally generated at 512x512 resolution usi ## Installing the extension To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder. +## Known issues +* If you see error like this ```IndexError: list index out of range``` try to restart webui, it should fix it. +* The extension might work incorrectly if 'Apply color correction to img2img results to match original colors.' option is enabled. Make sure to disable it in 'Settings' tab -> 'Stable Diffusion' section. + ## Last version changes: v0.9 -* Issue #76 fixed. +* Fixed issues #69, #76, #91, #92. * Fixed an issue in vid2vid mode when an occlusion mask computed from the optical flow may include unnecessary parts (where flow is non-zero). * Added 'Extra params' in vid2vid mode for more fine-grain controls of the processing pipeline. * Better default parameters set for vid2vid pipeline. * In txt2vid mode after the first frame is generated the seed is now automatically set to -1 to prevent blurring issues. * Added an option to save resulting frames into a folder alongside the video. +* Added ability to export current parameters in a human readable form as a json. +* Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring. +* Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch) diff --git a/scripts/base_ui.py b/scripts/base_ui.py index f4cf4f0..b51b5ee 100644 --- a/scripts/base_ui.py +++ b/scripts/base_ui.py @@ -1,16 +1,4 @@ import sys, os -basedirs = [os.getcwd()] - -for basedir in basedirs: - paths_to_ensure = [ - basedir, - basedir + '/extensions/sd-cn-animation/scripts', - basedir + '/extensions/SD-CN-Animation/scripts' - ] - - for scripts_path_fix in paths_to_ensure: - if not scripts_path_fix in sys.path: - sys.path.extend([scripts_path_fix]) import gradio as gr import modules @@ -27,7 +15,7 @@ import modules.scripts as scripts from modules.sd_samplers import samplers_for_img2img from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel -from core import vid2vid, txt2vid, utils +from scripts.core import vid2vid, txt2vid, utils import traceback def V2VArgs(): @@ -79,7 +67,7 @@ def inputs_ui(): v2v_args = SimpleNamespace(**V2VArgs()) t2v_args = SimpleNamespace(**T2VArgs()) with gr.Tabs(): - sdcn_process_mode = gr.State(value='vid2vid') + glo_sdcn_process_mode = gr.State(value='vid2vid') with gr.Tab('vid2vid') as tab_vid2vid: with gr.Row(): @@ -126,32 +114,33 @@ def inputs_ui(): with gr.Row(): t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True) t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True) - with FormRow(elem_id="txt2vid_override_settings_row") as row: + + with FormRow(elem_id="txt2vid_override_settings_row") as row: t2v_override_settings = create_override_settings_dropdown("txt2vid", row) with FormGroup(elem_id=f"script_container"): t2v_custom_inputs = scripts.scripts_txt2img.setup_ui() - tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[sdcn_process_mode]) - tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[sdcn_process_mode]) + tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[glo_sdcn_process_mode]) + tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[glo_sdcn_process_mode]) return locals() def process(*args): msg = 'Done' try: - if args[0] == 'vid2vid': - yield from vid2vid.start_process(*args) - elif args[0] == 'txt2vid': - yield from txt2vid.start_process(*args) - else: - msg = f"Unsupported processing mode: '{args[0]}'" - raise Exception(msg) + if args[0] == 'vid2vid': + yield from vid2vid.start_process(*args) + elif args[0] == 'txt2vid': + yield from txt2vid.start_process(*args) + else: + msg = f"Unsupported processing mode: '{args[0]}'" + raise Exception(msg) except Exception as error: - # handle the exception - msg = f"An exception occurred while trying to process the frame: {error}" - print(msg) - traceback.print_exc() + # handle the exception + msg = f"An exception occurred while trying to process the frame: {error}" + print(msg) + traceback.print_exc() yield msg, gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Video.update(), gr.Button.update(interactive=True), gr.Button.update(interactive=False) @@ -159,81 +148,130 @@ def stop_process(*args): utils.shared.is_interrupted = True return gr.Button.update(interactive=False) +import json +def get_json(obj): + return json.loads( + json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o))) + ) + +def export_settings(*args): + args_dict = utils.args_to_dict(*args) + if args[0] == 'vid2vid': + args_dict = utils.get_mode_args('v2v', args_dict) + elif args[0] == 'txt2vid': + args_dict = utils.get_mode_args('t2v', args_dict) + else: + msg = f"Unsupported processing mode: '{args[0]}'" + raise Exception(msg) + + # convert CN params into a readable dict + cn_remove_list = ['low_vram', 'is_ui', 'input_mode', 'batch_images', 'output_dir', 'loopback'] + + args_dict['ControlNets'] = [] + for script_input in args_dict['script_inputs']: + if type(script_input).__name__ == 'UiControlNetUnit': + cn_values_dict = get_json(script_input) + if cn_values_dict['enabled']: + for key in cn_remove_list: + if key in cn_values_dict: del cn_values_dict[key] + args_dict['ControlNets'].append(cn_values_dict) + + # remove unimportant values + remove_list = ['save_frames_check', 'restore_faces', 'prompt_styles', 'mask_blur', 'inpainting_fill', 'tiling', 'n_iter', 'batch_size', 'subseed', 'subseed_strength', 'seed_resize_from_h', \ + 'seed_resize_from_w', 'seed_enable_extras', 'resize_mode', 'inpaint_full_res', 'inpaint_full_res_padding', 'inpainting_mask_invert', 'file', 'denoising_strength', \ + 'override_settings', 'script_inputs', 'init_img', 'mask_img', 'mode', 'init_video'] + + for key in remove_list: + if key in args_dict: del args_dict[key] + + return json.dumps(args_dict, indent=2, default=lambda o: getattr(o, '__dict__', str(o))) + def on_ui_tabs(): - modules.scripts.scripts_current = modules.scripts.scripts_img2img - modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True) + modules.scripts.scripts_current = modules.scripts.scripts_img2img + modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True) - with gr.Blocks(analytics_enabled=False) as sdcnanim_interface: - components = {} - - #dv = SimpleNamespace(**T2VOutputArgs()) - with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'): - with gr.Column(scale=1, variant='panel'): - with gr.Tabs(): - components = inputs_ui() + with gr.Blocks(analytics_enabled=False) as sdcnanim_interface: + components = {} - with gr.Column(scale=1, variant='compact'): - with gr.Row(variant='compact'): - run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary') - stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False) - - save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=False, interactive=True) - gr.HTML('
') + #dv = SimpleNamespace(**T2VOutputArgs()) + with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'): + with gr.Column(scale=1, variant='panel'): + #with gr.Tabs(): + components = inputs_ui() + + with gr.Accordion("Export settings", open=False): + export_settings_button = gr.Button('Export', elem_id=f"sdcn_export_settings_button") + export_setting_json = gr.Code(value='') - with gr.Column(variant="panel"): - sp_progress = gr.HTML(elem_id="sp_progress", value="") - sp_progress.update() - #sp_outcome = gr.HTML(elem_id="sp_error", value="") - #sp_progressbar = gr.HTML(elem_id="sp_progressbar") - #setup_progressbar(sp_progressbar, sp_preview, 'sp', textinfo=sp_progress) - - with gr.Row(variant='compact'): - img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240) - img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240) - with gr.Row(variant='compact'): - img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240) - img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240) - - # html_log = gr.HTML(elem_id=f'html_log_vid2vid') - video_preview = gr.Video(interactive=False) - - with gr.Row(variant='compact'): - dummy_component = gr.Label(visible=False) - components['glo_save_frames_check'] = save_frames_check + with gr.Column(scale=1, variant='compact'): + with gr.Row(variant='compact'): + run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary') + stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False) + + save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=False, interactive=True) + gr.HTML('
') - # Define parameters for the action methods. - method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs'] + with gr.Column(variant="panel"): + sp_progress = gr.HTML(elem_id="sp_progress", value="") + + with gr.Row(variant='compact'): + img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240) + img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240) + with gr.Row(variant='compact'): + img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240) + img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240) - method_outputs = [ - sp_progress, - img_preview_curr_frame, - img_preview_curr_occl, - img_preview_prev_warp, - img_preview_processed, - video_preview, - run_button, - stop_button, - ] + video_preview = gr.Video(interactive=False) + + with gr.Row(variant='compact'): + dummy_component = gr.Label(visible=False) - run_button.click( - fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']), - inputs=method_inputs, - outputs=method_outputs, - show_progress=True, - ) + components['glo_save_frames_check'] = save_frames_check + + # Define parameters for the action methods. + utils.shared.v2v_custom_inputs_size = len(components['v2v_custom_inputs']) + utils.shared.t2v_custom_inputs_size = len(components['t2v_custom_inputs']) + #print('v2v_custom_inputs', len(components['v2v_custom_inputs']), components['v2v_custom_inputs']) + #print('t2v_custom_inputs', len(components['t2v_custom_inputs']), components['t2v_custom_inputs']) + method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs'] + components['t2v_custom_inputs'] - stop_button.click( - fn=stop_process, - outputs=[stop_button], - show_progress=False - ) + method_outputs = [ + sp_progress, + img_preview_curr_frame, + img_preview_curr_occl, + img_preview_prev_warp, + img_preview_processed, + video_preview, + run_button, + stop_button, + ] - modules.scripts.scripts_current = None + run_button.click( + fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']), + inputs=method_inputs, + outputs=method_outputs, + show_progress=True, + ) - # define queue - required for generators - sdcnanim_interface.queue(concurrency_count=1) - return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")] + stop_button.click( + fn=stop_process, + outputs=[stop_button], + show_progress=False + ) + + export_settings_button.click( + fn=export_settings, + inputs=method_inputs, + outputs=[export_setting_json], + show_progress=False + ) + + modules.scripts.scripts_current = None + + # define queue - required for generators + sdcnanim_interface.queue(concurrency_count=1) + return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")] script_callbacks.on_ui_tabs(on_ui_tabs) diff --git a/scripts/core/flow_utils.py b/scripts/core/flow_utils.py index f921af6..ab8dbaa 100644 --- a/scripts/core/flow_utils.py +++ b/scripts/core/flow_utils.py @@ -1,18 +1,4 @@ import sys, os -basedirs = [os.getcwd()] - -for basedir in basedirs: - paths_to_ensure = [ - basedir, - basedir + '/extensions/sd-cn-animation/scripts', - basedir + '/extensions/SD-CN-Animation/scripts', - basedir + '/extensions/sd-cn-animation/RAFT', - basedir + '/extensions/SD-CN-Animation/RAFT' - ] - - for scripts_path_fix in paths_to_ensure: - if not scripts_path_fix in sys.path: - sys.path.extend([scripts_path_fix]) import numpy as np import cv2 @@ -130,8 +116,8 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W - warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy() - warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy() + warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, mode="nearest", padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy() + warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, mode="nearest", padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy() #warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT) #warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT) diff --git a/scripts/core/txt2vid.py b/scripts/core/txt2vid.py index deb32c6..77de60e 100644 --- a/scripts/core/txt2vid.py +++ b/scripts/core/txt2vid.py @@ -1,16 +1,4 @@ import sys, os -basedirs = [os.getcwd()] - -for basedir in basedirs: - paths_to_ensure = [ - basedir, - basedir + '/extensions/sd-cn-animation/scripts', - basedir + '/extensions/SD-CN-Animation/scripts' - ] - - for scripts_path_fix in paths_to_ensure: - if not scripts_path_fix in sys.path: - sys.path.extend([scripts_path_fix]) import torch import gc @@ -20,7 +8,7 @@ from PIL import Image import modules.paths as ph from modules.shared import devices -from core import utils, flow_utils +from scripts.core import utils, flow_utils from FloweR.model import FloweR import skimage @@ -128,7 +116,7 @@ def start_process(*args): flow_map[:,:,0] += np.arange(args_dict['width']) flow_map[:,:,1] += np.arange(args_dict['height'])[:,np.newaxis] - warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_CUBIC, borderMode = cv2.BORDER_REFLECT_101) + warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT_101) curr_frame = warped_frame.copy() diff --git a/scripts/core/utils.py b/scripts/core/utils.py index 90c8dcb..b7a402f 100644 --- a/scripts/core/utils.py +++ b/scripts/core/utils.py @@ -1,9 +1,11 @@ class shared: is_interrupted = False + v2v_custom_inputs_size = 0 + t2v_custom_inputs_size = 0 def get_component_names(): components_list = [ - 'sdcn_process_mode', + 'glo_sdcn_process_mode', 'v2v_file', 'v2v_width', 'v2v_height', 'v2v_prompt', 'v2v_n_prompt', 'v2v_cfg_scale', 'v2v_seed', 'v2v_processing_strength', 'v2v_fix_frame_strength', 'v2v_sampler_index', 'v2v_steps', 'v2v_override_settings', 'v2v_occlusion_mask_blur', 'v2v_occlusion_mask_trailing', 'v2v_occlusion_mask_flow_multiplier', 'v2v_occlusion_mask_difo_multiplier', 'v2v_occlusion_mask_difs_multiplier', @@ -96,13 +98,16 @@ def args_to_dict(*args): # converts list of argumets into dictionary for better args = list(args) for i in range(len(args_list)): - if (args[i] is None) and (args_list[i] in args_dict): - args[i] = args_dict[args_list[i]] - else: - args_dict[args_list[i]] = args[i] + if (args[i] is None) and (args_list[i] in args_dict): + #args[i] = args_dict[args_list[i]] + pass + else: + args_dict[args_list[i]] = args[i] - args_dict['v2v_script_inputs'] = args[len(args_list):] - args_dict['t2v_script_inputs'] = args[len(args_list):] #do it for both + args_dict['v2v_script_inputs'] = args[len(args_list):len(args_list)+shared.v2v_custom_inputs_size] + #print('v2v_script_inputs', args_dict['v2v_script_inputs']) + args_dict['t2v_script_inputs'] = args[len(args_list)+shared.v2v_custom_inputs_size:] + #print('t2v_script_inputs', args_dict['t2v_script_inputs']) return args_dict def get_mode_args(mode, args_dict): diff --git a/scripts/core/vid2vid.py b/scripts/core/vid2vid.py index 038489f..6b18e03 100644 --- a/scripts/core/vid2vid.py +++ b/scripts/core/vid2vid.py @@ -1,16 +1,4 @@ import sys, os -basedirs = [os.getcwd()] - -for basedir in basedirs: - paths_to_ensure = [ - basedir, - basedir + '/extensions/sd-cn-animation/scripts', - basedir + '/extensions/SD-CN-Animation/scripts' - ] - - for scripts_path_fix in paths_to_ensure: - if not scripts_path_fix in sys.path: - sys.path.extend([scripts_path_fix]) import math import os @@ -34,8 +22,8 @@ import time import skimage import datetime -from core.flow_utils import RAFT_estimate_flow, RAFT_clear_memory, compute_diff_map -from core import utils +from scripts.core.flow_utils import RAFT_estimate_flow, RAFT_clear_memory, compute_diff_map +from scripts.core import utils class sdcn_anim_tmp: prepear_counter = 0 @@ -183,8 +171,8 @@ def start_process(*args): sdcn_anim_tmp.frames_prepared = False cn = sdcn_anim_tmp.process_counter % 10 - curr_frame = sdcn_anim_tmp.prepared_frames[cn+1] - prev_frame = sdcn_anim_tmp.prepared_frames[cn] + curr_frame = sdcn_anim_tmp.prepared_frames[cn+1][...,:3] + prev_frame = sdcn_anim_tmp.prepared_frames[cn][...,:3] next_flow = sdcn_anim_tmp.prepared_next_flows[cn] prev_flow = sdcn_anim_tmp.prepared_prev_flows[cn] @@ -205,7 +193,7 @@ def start_process(*args): occlusion_mask = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8) # fix warped styled frame from duplicated that occures on the places where flow is zero, but only because there is no place to get the color from - warped_styled_frame = curr_frame[...,:3].astype(float) * alpha_mask + warped_styled_frame[...,:3].astype(float) * (1 - alpha_mask) + warped_styled_frame = curr_frame.astype(float) * alpha_mask + warped_styled_frame.astype(float) * (1 - alpha_mask) # process current frame # TODO: convert args_dict into separate dict that stores only params necessery for img2img processing