diff --git a/javascript/deforum-hints.js b/javascript/deforum-hints.js index 083cb324..19092dbd 100644 --- a/javascript/deforum-hints.js +++ b/javascript/deforum-hints.js @@ -26,7 +26,7 @@ deforum_titles = { "Filename format": "specify the format of the filename for output images", "Seed behavior": "defines the seed behavior that is used for animations", "iter": "the seed value will increment by 1 for each subsequent frame of the animation", - "fixed": "the seed will remain fixed across all frames of animation", + "fixed": "the seed will remain fixed across all frames of animation. **NOT RECOMMENDED.** Unless you know what you are doing, it will *deep fry* the pictures over time", "random": "a random seed will be used on each frame of the animation", "schedule": "specify your own seed schedule", "Seed iter N":"controls for how many frames the same seed should stick before iterating to the next one", diff --git a/scripts/deforum_helpers/args.py b/scripts/deforum_helpers/args.py index e17cf3d6..7149e491 100644 --- a/scripts/deforum_helpers/args.py +++ b/scripts/deforum_helpers/args.py @@ -138,6 +138,8 @@ def DeforumAnimArgs(): hybrid_use_first_frame_as_init_image = True hybrid_motion = "None" #['None','Optical Flow','Perspective','Affine'] hybrid_motion_use_prev_img = False + hybrid_flow_consistency = False + hybrid_consistency_blur = 2 hybrid_flow_method = "RAFT" #['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback'] hybrid_composite = 'None' #['None', 'Normal', 'Before Motion', 'After Generation'] hybrid_use_init_image = False @@ -617,7 +619,7 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True): with gr.Row(variant='compact'): animation_prompts_positive = gr.Textbox(label="Prompts positive", lines=1, interactive=True, placeholder="words in here will be added to the start of all positive prompts") with gr.Row(variant='compact'): - animation_prompts_negative = gr.Textbox(label="Prompts negative", lines=1, interactive=True, placeholder="words in here will be added to the end of all negative prompts") + animation_prompts_negative = gr.Textbox(label="Prompts negative", value="nsfw, nude", lines=1, interactive=True, placeholder="words in here will be added to the end of all negative prompts") # COMPOSABLE MASK SCHEDULING ACCORD with gr.Accordion('Composable Mask scheduling', open=False): gr.HTML(""" @@ -738,11 +740,10 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True): with gr.Row(variant='compact'): with gr.Column(min_width=340): with gr.Row(variant='compact'): - hybrid_generate_inputframes = gr.Checkbox(label="Generate inputframes", value=False, interactive=True) - hybrid_motion_use_prev_img = gr.Checkbox(label="Motion use prev img", value=False, interactive=True, visible=False) + hybrid_generate_inputframes = gr.Checkbox(label="Generate inputframes", value=da.hybrid_generate_inputframes, interactive=True) hybrid_use_first_frame_as_init_image = gr.Checkbox(label="First frame as init image", value=da.hybrid_use_first_frame_as_init_image, interactive=True, visible=False) hybrid_use_init_image = gr.Checkbox(label="Use init image as video", value=da.hybrid_use_init_image, interactive=True, visible=True) - with gr.Row(variant='compact') as hybrid_flow_row: + with gr.Row(variant='compact'): with gr.Column(variant='compact'): with gr.Row(variant='compact'): hybrid_motion = gr.Radio(['None', 'Optical Flow', 'Perspective', 'Affine'], label="Hybrid motion", value=da.hybrid_motion, elem_id="hybrid_motion") @@ -750,13 +751,19 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True): with gr.Row(variant='compact'): with gr.Column(scale=1): hybrid_flow_method = gr.Radio(['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback'], label="Flow method", value=da.hybrid_flow_method, elem_id="hybrid_flow_method", visible=False) - with gr.Row(variant='compact') as hybrid_flow_row: + with gr.Row(variant='compact'): + with gr.Column(variant='compact'): + hybrid_flow_consistency = gr.Checkbox(label="Flow consistency mask", value=da.hybrid_flow_consistency, interactive=True, visible=False) + hybrid_consistency_blur = gr.Slider(label="Consistency mask blur", minimum=0, maximum=16, step=1, value=da.hybrid_consistency_blur, interactive=True, visible=False) + with gr.Column(variant='compact'): + hybrid_motion_use_prev_img = gr.Checkbox(label="Motion use prev img", value=da.hybrid_motion_use_prev_img, interactive=True, visible=False) + with gr.Row(variant='compact'): hybrid_comp_mask_type = gr.Radio(['None', 'Depth', 'Video Depth', 'Blend', 'Difference'], label="Comp mask type", value=da.hybrid_comp_mask_type, elem_id="hybrid_comp_mask_type", visible=False) with gr.Row(visible=False, variant='compact') as hybrid_comp_mask_row: hybrid_comp_mask_equalize = gr.Radio(['None', 'Before', 'After', 'Both'], label="Comp mask equalize", value=da.hybrid_comp_mask_equalize, elem_id="hybrid_comp_mask_equalize") with gr.Column(variant='compact'): hybrid_comp_mask_auto_contrast = gr.Checkbox(label="Comp mask auto contrast", value=False, interactive=True) - hybrid_comp_mask_inverse = gr.Checkbox(label="Comp mask inverse", value=False, interactive=True) + hybrid_comp_mask_inverse = gr.Checkbox(label="Comp mask inverse", value=da.hybrid_comp_mask_inverse, interactive=True) with gr.Row(variant='compact'): hybrid_comp_save_extra_frames = gr.Checkbox(label="Comp save extra frames", value=False, interactive=True) # HYBRID SCHEDULES ACCORD @@ -940,6 +947,8 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True): Important Notes: """) with gr.Row(variant='compact'): @@ -991,9 +1000,10 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True): optical_flow_redo_generation.change(fn=hide_if_none, inputs=optical_flow_redo_generation, outputs=redo_flow_factor_schedule_column) override_settings_with_file.change(fn=hide_if_false, inputs=override_settings_with_file,outputs=custom_settings_file) hybrid_comp_mask_type.change(fn=hide_if_none, inputs=hybrid_comp_mask_type, outputs=hybrid_comp_mask_row) - hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=hybrid_flow_method) - hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=hybrid_flow_factor_schedule) - hybrid_motion.change(fn=hide_if_none, inputs=hybrid_motion, outputs=hybrid_motion_use_prev_img) + hybrid_motion_outputs = [hybrid_flow_method, hybrid_flow_factor_schedule, hybrid_flow_consistency, hybrid_consistency_blur, hybrid_motion_use_prev_img] + for output in hybrid_motion_outputs: + hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=output) + hybrid_flow_consistency.change(fn=hide_if_false, inputs=hybrid_flow_consistency, outputs=hybrid_consistency_blur) optical_flow_cadence.change(fn=hide_if_none, inputs=optical_flow_cadence, outputs=cadence_flow_factor_schedule_column) hybrid_composite.change(fn=disable_by_hybrid_composite_dynamic, inputs=[hybrid_composite, hybrid_comp_mask_type], outputs=hybrid_comp_mask_row) hybrid_composite_outputs = [humans_masking_accord, hybrid_sch_accord, hybrid_comp_mask_type, hybrid_use_first_frame_as_init_image, hybrid_use_init_image] @@ -1063,7 +1073,8 @@ anim_args_names = str(r'''animation_mode, max_frames, border, resume_from_timestring, resume_timestring''' ).replace("\n", "").replace("\r", "").replace(" ", "").split(',') hybrid_args_names = str(r'''hybrid_generate_inputframes, hybrid_generate_human_masks, hybrid_use_first_frame_as_init_image, - hybrid_motion, hybrid_motion_use_prev_img, hybrid_flow_method, hybrid_composite, hybrid_use_init_image, hybrid_comp_mask_type, hybrid_comp_mask_inverse, + hybrid_motion, hybrid_motion_use_prev_img, hybrid_flow_consistency, hybrid_consistency_blur, hybrid_flow_method, hybrid_composite, + hybrid_use_init_image, hybrid_comp_mask_type, hybrid_comp_mask_inverse, hybrid_comp_mask_equalize, hybrid_comp_mask_auto_contrast, hybrid_comp_save_extra_frames, hybrid_comp_alpha_schedule, hybrid_flow_factor_schedule, hybrid_comp_mask_blend_alpha_schedule, hybrid_comp_mask_contrast_schedule, diff --git a/scripts/deforum_helpers/consistency_check.py b/scripts/deforum_helpers/consistency_check.py new file mode 100644 index 00000000..669e8c65 --- /dev/null +++ b/scripts/deforum_helpers/consistency_check.py @@ -0,0 +1,132 @@ +''' +Taken from https://github.com/Sxela/flow_tools/blob/main +''' +# import argparse +# import PIL.Image +import numpy as np +# import scipy.ndimage +# import glob +# from tqdm import tqdm + +def make_consistency(flow1, flow2, edges_unreliable=False): + # Awesome pythonic consistency check from [maua](https://github.com/maua-maua-maua/maua/blob/44485c745c65cf9d83cb1b1c792a177588e9c9fc/maua/flow/consistency.py) by Hans Brouwer and Henry Rachootin + # algorithm based on https://github.com/manuelruder/artistic-videos/blob/master/consistencyChecker/consistencyChecker.cpp + # reimplemented in numpy by Hans Brouwer + # // consistencyChecker + # // Check consistency of forward flow via backward flow. + # // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016 + + flow1 = np.flip(flow1, axis=2) + flow2 = np.flip(flow2, axis=2) + h, w, _ = flow1.shape + + # get grid of coordinates for each pixel + orig_coord = np.flip(np.mgrid[:w, :h], 0).T + + # find where the flow1 maps each pixel + warp_coord = orig_coord + flow1 + + # clip the coordinates in bounds and round down + warp_coord_inbound = np.zeros_like(warp_coord) + warp_coord_inbound[..., 0] = np.clip(warp_coord[..., 0], 0, h - 2) + warp_coord_inbound[..., 1] = np.clip(warp_coord[..., 1], 0, w - 2) + warp_coord_floor = np.floor(warp_coord_inbound).astype(int) + + # for each pixel: bilinear interpolation of the corresponding flow2 values around the point mapped to by flow1 + alpha = warp_coord_inbound - warp_coord_floor + flow2_00 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1]] + flow2_01 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1] + 1] + flow2_10 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1]] + flow2_11 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1] + 1] + flow2_0_blend = (1 - alpha[..., 1, None]) * flow2_00 + alpha[..., 1, None] * flow2_01 + flow2_1_blend = (1 - alpha[..., 1, None]) * flow2_10 + alpha[..., 1, None] * flow2_11 + warp_coord_flow2 = (1 - alpha[..., 0, None]) * flow2_0_blend + alpha[..., 0, None] * flow2_1_blend + + # coordinates that flow2 remaps each flow1-mapped pixel to + rewarp_coord = warp_coord + warp_coord_flow2 + + # where the difference in position after flow1 and flow2 are applied is larger than a threshold there is likely an + # occlusion. set values to -1 so the final gaussian blur will spread the value a couple pixels around this area + squared_diff = np.sum((rewarp_coord - orig_coord) ** 2, axis=2) + threshold = 0.01 * np.sum(warp_coord_flow2 ** 2 + flow1 ** 2, axis=2) + 0.5 + + reliable_flow = np.ones((squared_diff.shape[0], squared_diff.shape[1], 3)) + reliable_flow[...,0] = np.where(squared_diff >= threshold, -0.75, 1) + + # areas mapping outside of the frame are also occluded (don't need extra region around these though, so set 0) + if edges_unreliable: + reliable_flow[...,1] = np.where( + np.logical_or.reduce( + ( + warp_coord[..., 0] < 0, + warp_coord[..., 1] < 0, + warp_coord[..., 0] >= h - 1, + warp_coord[..., 1] >= w - 1, + ) + ), + 0, + reliable_flow[...,1], + ) + + # get derivative of flow, large changes in derivative => edge of moving object + dx = np.diff(flow1, axis=1, append=0) + dy = np.diff(flow1, axis=0, append=0) + motion_edge = np.sum(dx ** 2 + dy ** 2, axis=2) + motion_threshold = 0.01 * np.sum(flow1 ** 2, axis=2) + 0.002 + reliable_flow[...,2] = np.where(np.logical_and(motion_edge > motion_threshold, reliable_flow[...,2] != -0.75), 0, reliable_flow[...,2]) + + return reliable_flow + + +# parser = argparse.ArgumentParser() +# parser.add_argument("--flow_fwd", type=str, required=True, help="Forward flow path or glob pattern") +# parser.add_argument("--flow_bwd", type=str, required=True, help="Backward flow path or glob pattern") +# parser.add_argument("--output", type=str, required=True, help="Output consistency map path") +# parser.add_argument("--output_postfix", type=str, default='_cc', help="Output consistency map name postfix") +# parser.add_argument("--image_output", action='store_true', help="Output consistency map as b\w image path") +# parser.add_argument("--skip_numpy_output", action='store_true', help="Don`t save numpy array") +# parser.add_argument("--blur", type=float, default=2., help="Gaussian blur kernel size (0 for no blur)") +# parser.add_argument("--bottom_clamp", type=float, default=0., help="Clamp lower values") +# parser.add_argument("--edges_reliable", action='store_true', help="Consider edges reliable") +# parser.add_argument("--save_separate_channels", action='store_true', help="Save consistency mask layers as separate channels") +# args = parser.parse_args() + +# def run(args): +# flow_fwd_many = sorted(glob.glob(args.flow_fwd)) +# flow_bwd_many = sorted(glob.glob(args.flow_bwd)) +# if len(flow_fwd_many)!= len(flow_bwd_many): +# raise Exception('Forward and backward flow file numbers don`t match') +# return + +# for flow_fwd,flow_bwd in tqdm(zip(flow_fwd_many, flow_bwd_many)): +# flow_fwd = flow_fwd.replace('\\','/') +# flow_bwd = flow_bwd.replace('\\','/') +# flow1 = np.load(flow_fwd) +# flow2 = np.load(flow_bwd) +# consistency_map_multilayer = make_consistency(flow1, flow2, edges_unreliable=not args.edges_reliable) + +# if args.save_separate_channels: +# consistency_map = consistency_map_multilayer +# else: +# consistency_map = np.ones_like(consistency_map_multilayer[...,0]) +# consistency_map*=consistency_map_multilayer[...,0] +# consistency_map*=consistency_map_multilayer[...,1] +# consistency_map*=consistency_map_multilayer[...,2] + +# # blur +# if args.blur>0.: +# consistency_map = scipy.ndimage.gaussian_filter(consistency_map, [args.blur, args.blur]) + +# #clip values between bottom_clamp and 1 +# bottom_clamp = min(max(args.bottom_clamp,0.), 0.999) +# consistency_map = consistency_map.clip(bottom_clamp, 1) +# out_fname = args.output+'/'+flow_fwd.split('/')[-1][:-4]+args.output_postfix + +# if not args.skip_numpy_output: +# np.save(out_fname, consistency_map) + +# #save as jpeg +# if args.image_output: +# PIL.Image.fromarray((consistency_map*255.).astype('uint8')).save(out_fname+'.jpg', quality=90) + +# run(args) diff --git a/scripts/deforum_helpers/deforum_controlnet.py b/scripts/deforum_helpers/deforum_controlnet.py index db855458..a4251bde 100644 --- a/scripts/deforum_helpers/deforum_controlnet.py +++ b/scripts/deforum_helpers/deforum_controlnet.py @@ -72,8 +72,8 @@ def setup_controlnet_ui_raw(): refresh_models.click(refresh_all_models, model, model) with gr.Row(visible=False) as weight_row: weight = gr.Slider(label=f"Weight", value=1.0, minimum=0.0, maximum=2.0, step=.05, interactive=True) - guidance_start = gr.Slider(label="Guidance start", value=0.0, minimum=0.0, maximum=1.0, interactive=True) - guidance_end = gr.Slider(label="Guidance end", value=1.0, minimum=0.0, maximum=1.0, interactive=True) + guidance_start = gr.Slider(label="Starting Control Step", value=0.0, minimum=0.0, maximum=1.0, interactive=True) + guidance_end = gr.Slider(label="Ending Control Step", value=1.0, minimum=0.0, maximum=1.0, interactive=True) model_dropdowns.append(model) with gr.Column(visible=False) as advanced_column: processor_res = gr.Slider(label="Annotator resolution", value=64, minimum=64, maximum=2048, interactive=False) diff --git a/scripts/deforum_helpers/deforum_controlnet_gradio.py b/scripts/deforum_helpers/deforum_controlnet_gradio.py index cb223c15..3a4c68b5 100644 --- a/scripts/deforum_helpers/deforum_controlnet_gradio.py +++ b/scripts/deforum_helpers/deforum_controlnet_gradio.py @@ -33,7 +33,7 @@ class ToolButton(gr.Button, gr.components.FormComponent): def get_block_name(self): return "button" - + def build_sliders(module, pp): # module = self.get_module_basename(module) if module == "canny": @@ -71,7 +71,7 @@ def build_sliders(module, pp): gr.update(visible=False, interactive=False), gr.update(visible=True) ] - elif module in ["depth_leres", "depth_leres_boost"]: + elif module in ["depth_leres", "depth_leres++"]: return [ gr.update(label="Preprocessor Resolution", minimum=64, maximum=2048, value=512, step=1, visible=not pp, interactive=not pp), gr.update(label="Remove Near %", value=0, minimum=0, maximum=100, step=0.1, visible=True, interactive=True), @@ -99,7 +99,7 @@ def build_sliders(module, pp): gr.update(visible=False, interactive=False), gr.update(visible=True) ] - elif module == "tile_gaussian": + elif module == "tile_resample": return [ gr.update(visible=False, interactive=False), gr.update(label="Down Sampling Rate", value=1.0, minimum=1.0, maximum=8.0, step=0.01, visible=True, interactive=True), diff --git a/scripts/deforum_helpers/hybrid_video.py b/scripts/deforum_helpers/hybrid_video.py index fcdf6d51..80c22328 100644 --- a/scripts/deforum_helpers/hybrid_video.py +++ b/scripts/deforum_helpers/hybrid_video.py @@ -1,16 +1,21 @@ -import cv2 import os import pathlib -import numpy as np import random + +import cv2 +import numpy as np import PIL +import torch from PIL import Image, ImageChops, ImageOps, ImageEnhance -from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name, get_next_frame +from scipy.ndimage.filters import gaussian_filter + +from .consistency_check import make_consistency from .human_masking import video2humanmasks from .load_images import load_image +from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name, get_next_frame from modules.shared import opts -DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False) +# DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False) def delete_all_imgs_in_folder(folder_path): files = list(pathlib.Path(folder_path).glob('*.jpg')) @@ -149,8 +154,8 @@ def get_matrix_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_motio print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}") img1 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions), cv2.COLOR_BGR2GRAY) img2 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY) - matrix = get_transformation_matrix_from_images(img1, img2, hybrid_motion) - return matrix + M = get_transformation_matrix_from_images(img1, img2, hybrid_motion) + return M def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_img, hybrid_motion): print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}") @@ -161,20 +166,24 @@ def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_im else: prev_img_gray = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY) - matrix = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion) - return matrix + M = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion) + return M -def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, do_flow_visualization=False): - print(f"Calculating {method} optical flow for frames {frame_idx} to {frame_idx+1}") +def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False): + print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}") i1 = get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions) i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions) - flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) - if do_flow_visualization: - save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path) + if consistency_check: + flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check + if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path) + else: + flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) # old single flow forward + if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path) return flow -def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, do_flow_visualization=False): - print(f"Calculating {method} optical flow for frames {frame_idx} to {frame_idx+1}") +def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False): + print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}") + reliable_flow = None # first handle invalid images by returning default flow height, width = prev_img.shape[:2] if height == 0 or width == 0: @@ -182,16 +191,44 @@ def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_fr else: i1 = prev_img.astype(np.uint8) i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions) - flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) - if do_flow_visualization: - save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path) + if consistency_check: + flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check + if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path) + else: + flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) + if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path) return flow -def image_transform_ransac(image_cv2, xform, hybrid_motion): +def get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur, reliability=0): + flow_forward = get_flow_from_images(i1, i2, method, raft_model, prev_flow) + flow_backward = get_flow_from_images(i2, i1, method, raft_model, None) + reliable_flow = make_consistency(flow_forward, flow_backward, edges_unreliable=False) + if consistency_blur > 0: + reliable_flow = custom_gaussian_blur(reliable_flow.astype(np.float32), 1, consistency_blur) + return filter_flow(flow_forward, reliable_flow, consistency_blur, reliability), reliable_flow + +def custom_gaussian_blur(input_array, blur_size, sigma): + return gaussian_filter(input_array, sigma=(sigma, sigma, 0), order=0, mode='constant', cval=0.0, truncate=blur_size) + +def filter_flow(flow, reliable_flow, reliability=0.5, consistency_blur=0): + # reliability from reliabile flow: -0.75 is bad, 0 is meh/outside, 1 is great + # Create a mask from the first channel of the reliable_flow array + mask = reliable_flow[..., 0] + + # to set everything to 1 or 0 based on reliability + # mask = np.where(mask >= reliability, 1, 0) + + # Expand the mask to match the shape of the forward_flow array + mask = np.repeat(mask[..., np.newaxis], flow.shape[2], axis=2) + + # Apply the mask to the flow + return flow * mask + +def image_transform_ransac(image_cv2, M, hybrid_motion, depth=None): if hybrid_motion == "Perspective": - return image_transform_perspective(image_cv2, xform) + return image_transform_perspective(image_cv2, M, depth) else: # Affine - return image_transform_affine(image_cv2, xform) + return image_transform_affine(image_cv2, M, depth) def image_transform_optical_flow(img, flow, flow_factor): # if flow factor not normal, calculate flow factor @@ -204,21 +241,35 @@ def image_transform_optical_flow(img, flow, flow_factor): flow[:, :, 1] += np.arange(h)[:,np.newaxis] return remap(img, flow) -def image_transform_affine(image_cv2, xform): - return cv2.warpAffine( - image_cv2, - xform, - (image_cv2.shape[1],image_cv2.shape[0]), - borderMode=cv2.BORDER_REFLECT_101 - ) +def image_transform_affine(image_cv2, M, depth=None): + if depth is None: + return cv2.warpAffine( + image_cv2, + M, + (image_cv2.shape[1],image_cv2.shape[0]), + borderMode=cv2.BORDER_REFLECT_101 + ) + else: + return depth_based_affine_warp( + image_cv2, + depth, + M + ) -def image_transform_perspective(image_cv2, xform): - return cv2.warpPerspective( - image_cv2, - xform, - (image_cv2.shape[1], image_cv2.shape[0]), - borderMode=cv2.BORDER_REFLECT_101 - ) +def image_transform_perspective(image_cv2, M, depth=None): + if depth is None: + return cv2.warpPerspective( + image_cv2, + M, + (image_cv2.shape[1], image_cv2.shape[0]), + borderMode=cv2.BORDER_REFLECT_101 + ) + else: + return render_3d_perspective( + image_cv2, + depth, + M + ) def get_hybrid_motion_default_matrix(hybrid_motion): if hybrid_motion == "Perspective": @@ -373,7 +424,37 @@ def save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_fram cv2.imwrite(flow_img_file, flow_img) print(f"Saved optical flow visualization: {flow_img_file}") -def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 1, max_magnitude = 10000): +def save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path, color=True): + flow_mask_img_file = os.path.join(hybrid_frame_path, f"flow_mask{frame_idx:09}.jpg") + if color: + # Normalize the reliable_flow array to the range [0, 255] + normalized_reliable_flow = (reliable_flow - reliable_flow.min()) / (reliable_flow.max() - reliable_flow.min()) * 255 + # Change the data type to np.uint8 + mask_image = normalized_reliable_flow.astype(np.uint8) + else: + # Extract the first channel of the reliable_flow array + first_channel = reliable_flow[..., 0] + # Normalize the first channel to the range [0, 255] + normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255 + # Change the data type to np.uint8 + grayscale_image = normalized_first_channel.astype(np.uint8) + # Replicate the grayscale channel three times to form a BGR image + mask_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2) + cv2.imwrite(flow_mask_img_file, mask_image) + print(f"Saved mask flow visualization: {flow_mask_img_file}") + +def reliable_flow_to_image(reliable_flow): + # Extract the first channel of the reliable_flow array + first_channel = reliable_flow[..., 0] + # Normalize the first channel to the range [0, 255] + normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255 + # Change the data type to np.uint8 + grayscale_image = normalized_first_channel.astype(np.uint8) + # Replicate the grayscale channel three times to form a BGR image + bgr_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2) + return bgr_image + +def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000): flow = flow * magnitude_multiplier h, w = img.shape[:2] y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int) diff --git a/scripts/deforum_helpers/render.py b/scripts/deforum_helpers/render.py index 892f33c1..b77b403a 100644 --- a/scripts/deforum_helpers/render.py +++ b/scripts/deforum_helpers/render.py @@ -1,20 +1,16 @@ import os -import json import pandas as pd import cv2 -import re import numpy as np -import itertools import numexpr import gc import random import PIL import time from PIL import Image, ImageOps -from .rich import console from .generate import generate, isJson from .noise import add_noise -from .animation import sample_from_cv2, sample_to_cv2, anim_frame_warp +from .animation import anim_frame_warp from .animation_key_frames import DeformAnimKeys, LooperAnimKeys from .video_audio_utilities import get_frame_name, get_next_frame from .depth import MidasModel, AdaBinsModel @@ -37,8 +33,6 @@ from .prompt import prepare_prompt from modules.shared import opts, cmd_opts, state, sd_model from modules import lowvram, devices, sd_hijack from .RAFT import RAFT -from .ZoeDepth import ZoeDepth -import torch def render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, animation_prompts, root): DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False) @@ -280,6 +274,10 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro tween_frame_start_idx = max(start_frame, frame_idx-turbo_steps) cadence_flow = None for tween_frame_idx in range(tween_frame_start_idx, frame_idx): + # update progress during cadence + state.job = f"frame {tween_frame_idx + 1}/{anim_args.max_frames}" + state.job_no = tween_frame_idx + 1 + # cadence vars tween = float(tween_frame_idx - tween_frame_start_idx + 1) / float(frame_idx - tween_frame_start_idx) advance_prev = turbo_prev_image is not None and tween_frame_idx > turbo_prev_frame_idx advance_next = tween_frame_idx > turbo_next_frame_idx @@ -341,14 +339,14 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion) if anim_args.hybrid_motion in ['Optical Flow']: if anim_args.hybrid_motion_use_prev_img: - flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames) + flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames) if advance_prev: turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor']) if advance_next: turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor']) prev_flow = flow else: - flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames) + flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames) if advance_prev: turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor']) if advance_next: @@ -374,6 +372,9 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro # get prev_img during cadence prev_img = img + # current image update for cadence frames (left commented because it doesn't currently update the preview) + # state.current_image = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)) + # saving cadence frames filename = f"{args.timestring}_{tween_frame_idx:09}.png" cv2.imwrite(os.path.join(args.outdir, filename), img) @@ -408,9 +409,9 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro prev_img = image_transform_ransac(prev_img, matrix, anim_args.hybrid_motion) if anim_args.hybrid_motion in ['Optical Flow']: if anim_args.hybrid_motion_use_prev_img: - flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames) + flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames) else: - flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model,anim_args.hybrid_comp_save_extra_frames) + flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames) prev_img = image_transform_optical_flow(prev_img, flow, hybrid_comp_schedules['flow_factor']) prev_flow = flow diff --git a/scripts/deforum_helpers/video_audio_utilities.py b/scripts/deforum_helpers/video_audio_utilities.py index 111f1aca..f3cda5b9 100644 --- a/scripts/deforum_helpers/video_audio_utilities.py +++ b/scripts/deforum_helpers/video_audio_utilities.py @@ -163,7 +163,6 @@ def ffmpeg_stitch_video(ffmpeg_location=None, fps=None, outmp4_path=None, stitch cmd = [ ffmpeg_location, '-y', - '-vcodec', 'png', '-r', str(float(fps)), '-start_number', str(stitch_from_frame), '-i', imgs_path, @@ -174,9 +173,12 @@ def ffmpeg_stitch_video(ffmpeg_location=None, fps=None, outmp4_path=None, stitch '-pix_fmt', 'yuv420p', '-crf', str(crf), '-preset', preset, - '-pattern_type', 'sequence', - outmp4_path + '-pattern_type', 'sequence' ] + cmd.append('-vcodec') + cmd.append('png' if imgs_path[0].find('.png') != -1 else 'libx264') + cmd.append(outmp4_path) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() except FileNotFoundError: