Merge branch 'main' into patch-1

2023-05-14 04:13:44 +03:00 · 2023-05-14 04:13:44 +03:00 · 9ab15587d8
parent b9d080ff4e e67bcb9264
commit 9ab15587d8
6 changed files with 159 additions and 147 deletions
--- a/readme.md
+++ b/readme.md
@ -56,10 +56,17 @@ All examples you can see here are originally generated at 512x512 resolution usi
 ## Installing the extension
 To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.

+## Known issues
+* If you see error like this ```IndexError: list index out of range``` try to restart webui, it should fix it.
+* The extension might work incorrectly if 'Apply color correction to img2img results to match original colors.' option is enabled. Make sure to disable it in 'Settings' tab -> 'Stable Diffusion' section. 
+
 ## Last version changes: v0.9
-* Issue #76 fixed.
+* Fixed issues #69, #76, #91, #92.
 * Fixed an issue in vid2vid mode when an occlusion mask computed from the optical flow may include unnecessary parts (where flow is non-zero).
 * Added 'Extra params' in vid2vid mode for more fine-grain controls of the processing pipeline.
 * Better default parameters set for vid2vid pipeline.
 * In txt2vid mode after the first frame is generated the seed is now automatically set to -1 to prevent blurring issues.
 * Added an option to save resulting frames into a folder alongside the video.
+* Added ability to export current parameters in a human readable form as a json.
+* Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring.
+* Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch)
--- a/scripts/base_ui.py
+++ b/scripts/base_ui.py
@ -1,16 +1,4 @@
 import sys, os
-basedirs = [os.getcwd()]
-
-for basedir in basedirs:
-    paths_to_ensure = [
-        basedir,
-        basedir + '/extensions/sd-cn-animation/scripts',
-        basedir + '/extensions/SD-CN-Animation/scripts'
-        ]
-
-    for scripts_path_fix in paths_to_ensure:
-        if not scripts_path_fix in sys.path:
-            sys.path.extend([scripts_path_fix])

 import gradio as gr
 import modules
@ -27,7 +15,7 @@ import modules.scripts as scripts
 from modules.sd_samplers import samplers_for_img2img
 from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel

-from core import vid2vid, txt2vid, utils
+from scripts.core import vid2vid, txt2vid, utils
 import traceback

 def V2VArgs():
@ -79,7 +67,7 @@ def inputs_ui():
    v2v_args = SimpleNamespace(**V2VArgs())
    t2v_args = SimpleNamespace(**T2VArgs())
    with gr.Tabs():
-        sdcn_process_mode = gr.State(value='vid2vid')
+        glo_sdcn_process_mode = gr.State(value='vid2vid')

        with gr.Tab('vid2vid') as tab_vid2vid:
            with gr.Row():
@ -126,32 +114,33 @@ def inputs_ui():
            with gr.Row():
                t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
                t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
-             with FormRow(elem_id="txt2vid_override_settings_row") as row:
+            
+            with FormRow(elem_id="txt2vid_override_settings_row") as row:
                t2v_override_settings = create_override_settings_dropdown("txt2vid", row)

            with FormGroup(elem_id=f"script_container"):
                t2v_custom_inputs = scripts.scripts_txt2img.setup_ui()
    
-    tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[sdcn_process_mode])
-    tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[sdcn_process_mode])
+    tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[glo_sdcn_process_mode])
+    tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[glo_sdcn_process_mode])
         
    return locals()

 def process(*args):
    msg = 'Done'
    try:    
-        if args[0] == 'vid2vid':
-            yield from vid2vid.start_process(*args)
-        elif args[0] == 'txt2vid':
-            yield from txt2vid.start_process(*args)
-        else:
-            msg = f"Unsupported processing mode: '{args[0]}'"
-            raise Exception(msg)
+      if args[0] == 'vid2vid':
+        yield from vid2vid.start_process(*args)
+      elif args[0] == 'txt2vid':
+        yield from txt2vid.start_process(*args)
+      else:
+        msg = f"Unsupported processing mode: '{args[0]}'"
+        raise Exception(msg)
    except Exception as error:
-        # handle the exception
-        msg = f"An exception occurred while trying to process the frame: {error}"
-        print(msg)
-        traceback.print_exc()
+      # handle the exception
+      msg = f"An exception occurred while trying to process the frame: {error}"
+      print(msg)
+      traceback.print_exc()
    
    yield msg, gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Video.update(), gr.Button.update(interactive=True), gr.Button.update(interactive=False)

@ -159,81 +148,130 @@ def stop_process(*args):
    utils.shared.is_interrupted = True
    return gr.Button.update(interactive=False)

+import json
+def get_json(obj):
+  return json.loads(
+    json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
+  )
+
+def export_settings(*args):
+  args_dict = utils.args_to_dict(*args)
+  if args[0] == 'vid2vid':
+    args_dict = utils.get_mode_args('v2v', args_dict)
+  elif args[0] == 'txt2vid':
+    args_dict = utils.get_mode_args('t2v', args_dict)
+  else:
+    msg = f"Unsupported processing mode: '{args[0]}'"
+    raise Exception(msg)
+  
+  # convert CN params into a readable dict
+  cn_remove_list = ['low_vram', 'is_ui', 'input_mode', 'batch_images', 'output_dir', 'loopback']
+
+  args_dict['ControlNets'] = []
+  for script_input in args_dict['script_inputs']:
+    if type(script_input).__name__ == 'UiControlNetUnit':
+      cn_values_dict = get_json(script_input)
+      if cn_values_dict['enabled']:
+        for key in cn_remove_list:
+          if key in cn_values_dict: del cn_values_dict[key]
+        args_dict['ControlNets'].append(cn_values_dict)
+  
+  # remove unimportant values
+  remove_list = ['save_frames_check', 'restore_faces', 'prompt_styles', 'mask_blur', 'inpainting_fill', 'tiling', 'n_iter', 'batch_size', 'subseed', 'subseed_strength', 'seed_resize_from_h', \
+                 'seed_resize_from_w', 'seed_enable_extras', 'resize_mode', 'inpaint_full_res', 'inpaint_full_res_padding', 'inpainting_mask_invert', 'file', 'denoising_strength', \
+                 'override_settings', 'script_inputs', 'init_img', 'mask_img', 'mode', 'init_video']
+  
+  for key in remove_list:
+    if key in args_dict: del args_dict[key]
+
+  return json.dumps(args_dict, indent=2, default=lambda o: getattr(o, '__dict__', str(o)))
+
 def on_ui_tabs():
-    modules.scripts.scripts_current = modules.scripts.scripts_img2img
-    modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
+  modules.scripts.scripts_current = modules.scripts.scripts_img2img
+  modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)

-    with gr.Blocks(analytics_enabled=False) as sdcnanim_interface:
-        components = {}
+  with gr.Blocks(analytics_enabled=False) as sdcnanim_interface:
+    components = {}
    
-        #dv = SimpleNamespace(**T2VOutputArgs())
-        with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'):
-            with gr.Column(scale=1, variant='panel'):
-                with gr.Tabs():
-                    components = inputs_ui()
+    #dv = SimpleNamespace(**T2VOutputArgs())
+    with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'):
+      with gr.Column(scale=1, variant='panel'):
+        #with gr.Tabs():
+        components = inputs_ui()
          
-            with gr.Column(scale=1, variant='compact'):
-                with gr.Row(variant='compact'):
-                    run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')
-                    stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False)
+        with gr.Accordion("Export settings", open=False):
+          export_settings_button = gr.Button('Export', elem_id=f"sdcn_export_settings_button")
+          export_setting_json = gr.Code(value='')

-                save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=False, interactive=True)
-                gr.HTML('<br>')

-                with gr.Column(variant="panel"):
-                    sp_progress = gr.HTML(elem_id="sp_progress", value="")
-                    sp_progress.update()
-                    #sp_outcome = gr.HTML(elem_id="sp_error", value="")
-                    #sp_progressbar = gr.HTML(elem_id="sp_progressbar")
-                    #setup_progressbar(sp_progressbar, sp_preview, 'sp', textinfo=sp_progress)
+      with gr.Column(scale=1, variant='compact'):
+        with gr.Row(variant='compact'):
+          run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')
+          stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False)
        
-                    with gr.Row(variant='compact'):
-                        img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
-                        img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240)
-                    with gr.Row(variant='compact'):
-                        img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
-                        img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240)
+        save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=False, interactive=True)
+        gr.HTML('<br>')

-                    # html_log = gr.HTML(elem_id=f'html_log_vid2vid')
-                    video_preview = gr.Video(interactive=False)
+        with gr.Column(variant="panel"):
+          sp_progress = gr.HTML(elem_id="sp_progress", value="")
          
-                with gr.Row(variant='compact'):
-                    dummy_component = gr.Label(visible=False)
+          with gr.Row(variant='compact'):
+            img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
+            img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240)
+          with gr.Row(variant='compact'):
+            img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
+            img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240)

-            components['glo_save_frames_check'] = save_frames_check
+          video_preview = gr.Video(interactive=False)
        
-            # Define parameters for the action methods.
-            method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs']
+        with gr.Row(variant='compact'):
+          dummy_component = gr.Label(visible=False)

-            method_outputs = [
-                sp_progress,
-                img_preview_curr_frame,
-                img_preview_curr_occl,
-                img_preview_prev_warp,
-                img_preview_processed,
-                video_preview,
-                run_button,
-                stop_button,
-            ]
+      components['glo_save_frames_check'] = save_frames_check
      
-            run_button.click(
-                fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']), 
-                inputs=method_inputs,
-                outputs=method_outputs,
-                show_progress=True,
-            )
+      # Define parameters for the action methods.
+      utils.shared.v2v_custom_inputs_size = len(components['v2v_custom_inputs'])
+      utils.shared.t2v_custom_inputs_size = len(components['t2v_custom_inputs'])
+      #print('v2v_custom_inputs', len(components['v2v_custom_inputs']), components['v2v_custom_inputs'])
+      #print('t2v_custom_inputs', len(components['t2v_custom_inputs']), components['t2v_custom_inputs'])
+      method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs'] + components['t2v_custom_inputs']

-            stop_button.click(
-                fn=stop_process,
-                outputs=[stop_button],
-                show_progress=False
-            )
+      method_outputs = [
+        sp_progress,
+        img_preview_curr_frame,
+        img_preview_curr_occl,
+        img_preview_prev_warp,
+        img_preview_processed,
+        video_preview,
+        run_button,
+        stop_button,
+      ]

-        modules.scripts.scripts_current = None
+      run_button.click(
+        fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']), 
+        inputs=method_inputs,
+        outputs=method_outputs,
+        show_progress=True,
+      )

-        # define queue - required for generators
-        sdcnanim_interface.queue(concurrency_count=1)
-    return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")]
+      stop_button.click(
+        fn=stop_process,
+        outputs=[stop_button],
+        show_progress=False
+      )
+
+      export_settings_button.click(
+        fn=export_settings,
+        inputs=method_inputs,
+        outputs=[export_setting_json],
+        show_progress=False
+      )
+
+    modules.scripts.scripts_current = None
+
+    # define queue - required for generators
+    sdcnanim_interface.queue(concurrency_count=1)
+  return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")]


 script_callbacks.on_ui_tabs(on_ui_tabs)
--- a/scripts/core/flow_utils.py
+++ b/scripts/core/flow_utils.py
@ -1,18 +1,4 @@
 import sys, os
-basedirs = [os.getcwd()]
-
-for basedir in basedirs:
-    paths_to_ensure = [
-        basedir,
-        basedir + '/extensions/sd-cn-animation/scripts',
-        basedir + '/extensions/SD-CN-Animation/scripts',
-        basedir + '/extensions/sd-cn-animation/RAFT',
-        basedir + '/extensions/SD-CN-Animation/RAFT'
-        ]
-
-    for scripts_path_fix in paths_to_ensure:
-        if not scripts_path_fix in sys.path:
-            sys.path.extend([scripts_path_fix])

 import numpy as np
 import cv2
@ -130,8 +116,8 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
  prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
  prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W

-  warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
-  warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
+  warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, mode="nearest", padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
+  warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, mode="nearest", padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()

  #warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
  #warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
--- a/scripts/core/txt2vid.py
+++ b/scripts/core/txt2vid.py
@ -1,16 +1,4 @@
 import sys, os
-basedirs = [os.getcwd()]
-
-for basedir in basedirs:
-    paths_to_ensure = [
-        basedir,
-        basedir + '/extensions/sd-cn-animation/scripts',
-        basedir + '/extensions/SD-CN-Animation/scripts'
-        ]
-
-    for scripts_path_fix in paths_to_ensure:
-        if not scripts_path_fix in sys.path:
-            sys.path.extend([scripts_path_fix])

 import torch
 import gc
@ -20,7 +8,7 @@ from PIL import Image
 import modules.paths as ph
 from modules.shared import devices

-from core import utils, flow_utils
+from scripts.core import utils, flow_utils
 from FloweR.model import FloweR

 import skimage
@ -128,7 +116,7 @@ def start_process(*args):
      flow_map[:,:,0] += np.arange(args_dict['width'])
      flow_map[:,:,1] += np.arange(args_dict['height'])[:,np.newaxis]

-      warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_CUBIC, borderMode = cv2.BORDER_REFLECT_101)
+      warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT_101)

      curr_frame = warped_frame.copy()
      
--- a/scripts/core/utils.py
+++ b/scripts/core/utils.py
@ -1,9 +1,11 @@
 class shared:
  is_interrupted = False
+  v2v_custom_inputs_size = 0
+  t2v_custom_inputs_size = 0

 def get_component_names():
  components_list = [
-    'sdcn_process_mode',
+    'glo_sdcn_process_mode',
    'v2v_file', 'v2v_width', 'v2v_height', 'v2v_prompt', 'v2v_n_prompt', 'v2v_cfg_scale', 'v2v_seed', 'v2v_processing_strength', 'v2v_fix_frame_strength', 
    'v2v_sampler_index', 'v2v_steps', 'v2v_override_settings',
    'v2v_occlusion_mask_blur', 'v2v_occlusion_mask_trailing', 'v2v_occlusion_mask_flow_multiplier', 'v2v_occlusion_mask_difo_multiplier', 'v2v_occlusion_mask_difs_multiplier',
@ -96,13 +98,16 @@ def args_to_dict(*args): # converts list of argumets into dictionary for better
  args = list(args)

  for i in range(len(args_list)):
-      if (args[i] is None) and (args_list[i] in args_dict):
-          args[i] = args_dict[args_list[i]] 
-      else:
-          args_dict[args_list[i]] = args[i]
+    if (args[i] is None) and (args_list[i] in args_dict):
+      #args[i] = args_dict[args_list[i]] 
+      pass
+    else:
+      args_dict[args_list[i]] = args[i]

-  args_dict['v2v_script_inputs'] = args[len(args_list):]
-  args_dict['t2v_script_inputs'] = args[len(args_list):] #do it for both
+  args_dict['v2v_script_inputs'] = args[len(args_list):len(args_list)+shared.v2v_custom_inputs_size]
+  #print('v2v_script_inputs', args_dict['v2v_script_inputs'])
+  args_dict['t2v_script_inputs'] = args[len(args_list)+shared.v2v_custom_inputs_size:]
+  #print('t2v_script_inputs', args_dict['t2v_script_inputs'])
  return args_dict

 def get_mode_args(mode, args_dict):
--- a/scripts/core/vid2vid.py
+++ b/scripts/core/vid2vid.py
@ -1,16 +1,4 @@
 import sys, os
-basedirs = [os.getcwd()]
-
-for basedir in basedirs:
-    paths_to_ensure = [
-        basedir,
-        basedir + '/extensions/sd-cn-animation/scripts',
-        basedir + '/extensions/SD-CN-Animation/scripts'
-        ]
-
-    for scripts_path_fix in paths_to_ensure:
-        if not scripts_path_fix in sys.path:
-            sys.path.extend([scripts_path_fix])

 import math
 import os
@ -34,8 +22,8 @@ import time
 import skimage
 import datetime

-from core.flow_utils import RAFT_estimate_flow, RAFT_clear_memory, compute_diff_map
-from core import utils
+from scripts.core.flow_utils import RAFT_estimate_flow, RAFT_clear_memory, compute_diff_map
+from scripts.core import utils

 class sdcn_anim_tmp:
  prepear_counter = 0
@ -183,8 +171,8 @@ def start_process(*args):
        sdcn_anim_tmp.frames_prepared = False

        cn = sdcn_anim_tmp.process_counter % 10 
-        curr_frame = sdcn_anim_tmp.prepared_frames[cn+1]
-        prev_frame = sdcn_anim_tmp.prepared_frames[cn]
+        curr_frame = sdcn_anim_tmp.prepared_frames[cn+1][...,:3]
+        prev_frame = sdcn_anim_tmp.prepared_frames[cn][...,:3]
        next_flow = sdcn_anim_tmp.prepared_next_flows[cn]
        prev_flow = sdcn_anim_tmp.prepared_prev_flows[cn]

@ -205,7 +193,7 @@ def start_process(*args):
        occlusion_mask = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)

        # fix warped styled frame from duplicated that occures on the places where flow is zero, but only because there is no place to get the color from
-        warped_styled_frame = curr_frame[...,:3].astype(float) * alpha_mask + warped_styled_frame[...,:3].astype(float) * (1 - alpha_mask)
+        warped_styled_frame = curr_frame.astype(float) * alpha_mask + warped_styled_frame.astype(float) * (1 - alpha_mask)

        # process current frame
        # TODO: convert args_dict into separate dict that stores only params necessery for img2img processing