critical fixes

2023-05-14 05:58:37 +03:00 · 2023-05-14 05:58:37 +03:00 · 9849e6389e
parent cd400ea7b1
commit 9849e6389e
4 changed files with 45 additions and 9 deletions
--- a/readme.md
+++ b/readme.md
@ -71,3 +71,5 @@ To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https:
 * Added ability to export current parameters in a human readable form as a json.
 * Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring.
 * Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch)
 * Fixed a major issue when ConrtolNet used wrong input images. Because of this vid2vid results were way worse than they should be.
 * Text to video mode now supports video as a guidance for ControlNet. It allows to create much stronger video stylizations.
--- a/scripts/base_ui.py
+++ b/scripts/base_ui.py
@ -71,7 +71,7 @@ def inputs_ui():
        with gr.Tab('vid2vid') as tab_vid2vid:
            with gr.Row():
-                gr.HTML('Put your video here:')
+                gr.HTML('Input video (each frame will be used as initial image for SD and as input image to CN): *REQUIRED')
            with gr.Row():
                v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
@ -110,7 +110,13 @@ def inputs_ui():
                v2v_custom_inputs = scripts.scripts_img2img.setup_ui()
        with gr.Tab('txt2vid') as tab_txt2vid:
            with gr.Row():
                gr.HTML('Control video (each frame will be used as input image to CN): *NOT REQUIRED')
            with gr.Row():
                t2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="tex_to_vid_chosen_file")
            t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength, t2v_sampler_index, t2v_steps = setup_common_values('txt2vid', t2v_args)
            with gr.Row():
                t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
                t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
--- a/scripts/core/txt2vid.py
+++ b/scripts/core/txt2vid.py
@ -44,18 +44,30 @@ def FloweR_load_model(w, h):
  # Move the model to the device
  FloweR_model = FloweR_model.to(DEVICE)
 def read_frame_from_video(input_video):
  if input_video is None: return None
  # Reading video file
  if input_video.isOpened():
    ret, cur_frame = input_video.read()
    if cur_frame is not None: 
      cur_frame = cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB) 
  else:
    cur_frame = None
    input_video.release()
    input_video = None
  return cur_frame
 def start_process(*args):
    processing_start_time = time.time()
    args_dict = utils.args_to_dict(*args)
    args_dict = utils.get_mode_args('t2v', args_dict)
-    #utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
+    # Open the input video file
-    processed_frames, _, _, _ = utils.txt2img(args_dict)
+    input_video = None
-    processed_frame = np.array(processed_frames[0])
+    if args_dict['file'] is not None:
-    processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
+      input_video = cv2.VideoCapture(args_dict['file'].name)
    init_frame = processed_frame.copy()
    # Create an output video file with the same fps, width, and height as the input video
    output_video_name = f'outputs/sd-cn-animation/txt2vid/{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.mp4'
@ -69,6 +81,16 @@ def start_process(*args):
      if args_dict['save_frames_check']: 
        cv2.imwrite(os.path.join(output_video_folder, f'{ind:05d}.png'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
    if input_video is not None:
      curr_video_frame = read_frame_from_video(input_video)
      curr_video_frame = cv2.resize(curr_video_frame, (args_dict['width'], args_dict['height']))
      utils.set_CNs_input_image(args_dict, Image.fromarray(curr_video_frame))
    processed_frames, _, _, _ = utils.txt2img(args_dict)
    processed_frame = np.array(processed_frames[0])
    processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
    init_frame = processed_frame.copy()
    output_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'mp4v'), args_dict['fps'], (args_dict['width'], args_dict['height']))
    output_video.write(cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR))
@ -125,7 +147,11 @@ def start_process(*args):
      args_dict['mask_img'] = Image.fromarray(pred_occl)
      args_dict['seed'] = -1
-      #utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
+      if input_video is not None:
        curr_video_frame = read_frame_from_video(input_video)
        curr_video_frame = cv2.resize(curr_video_frame, (args_dict['width'], args_dict['height']))
        utils.set_CNs_input_image(args_dict, Image.fromarray(curr_video_frame))
      processed_frames, _, _, _ = utils.img2img(args_dict)
      processed_frame = np.array(processed_frames[0])
      processed_frame = skimage.exposure.match_histograms(processed_frame, init_frame, channel_axis=None)
@ -150,6 +176,7 @@ def start_process(*args):
      stat = f"Frame: {ind + 2} / {args_dict['length']}; " + utils.get_time_left(ind+2, args_dict['length'], processing_start_time)
      yield stat, curr_frame, pred_occl, warped_frame, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)
    if input_video is not None: input_video.release()
    output_video.release()
    FloweR_clear_memory()
--- a/scripts/core/utils.py
+++ b/scripts/core/utils.py
@ -10,7 +10,7 @@ def get_component_names():
    'v2v_sampler_index', 'v2v_steps', 'v2v_override_settings',
    'v2v_occlusion_mask_blur', 'v2v_occlusion_mask_trailing', 'v2v_occlusion_mask_flow_multiplier', 'v2v_occlusion_mask_difo_multiplier', 'v2v_occlusion_mask_difs_multiplier',
    'v2v_step_1_processing_mode', 'v2v_step_1_blend_alpha', 'v2v_step_1_seed', 'v2v_step_2_seed',
-    't2v_width', 't2v_height', 't2v_prompt', 't2v_n_prompt', 't2v_cfg_scale', 't2v_seed', 't2v_processing_strength', 't2v_fix_frame_strength',
+    't2v_file', 't2v_width', 't2v_height', 't2v_prompt', 't2v_n_prompt', 't2v_cfg_scale', 't2v_seed', 't2v_processing_strength', 't2v_fix_frame_strength',
    't2v_sampler_index', 't2v_steps', 't2v_length', 't2v_fps',
    'glo_save_frames_check'
  ]
@ -121,7 +121,8 @@ def get_mode_args(mode, args_dict):
 def set_CNs_input_image(args_dict, image):
  for script_input in args_dict['script_inputs']:
    if type(script_input).__name__ == 'UiControlNetUnit':
-      script_input.batch_images = [image]
+      script_input.batch_images = [np.array(image)]
      script_input.image = np.array(image)
 import time
 import datetime