Merge remote-tracking branch 'upstream/automatic1111-webui' into remove-alwayson

2023-05-02 18:29:47 +03:00 · 2023-05-02 18:29:47 +03:00 · 85f1b15fcf
parent 1c75ad8e19 3b72cb3f00
commit 85f1b15fcf
8 changed files with 293 additions and 66 deletions
--- a/javascript/deforum-hints.js
+++ b/javascript/deforum-hints.js
@ -26,7 +26,7 @@ deforum_titles = {
    "Filename format": "specify the format of the filename for output images",
    "Seed behavior": "defines the seed behavior that is used for animations",
        "iter": "the seed value will increment by 1 for each subsequent frame of the animation",
-        "fixed": "the seed will remain fixed across all frames of animation",
+        "fixed": "the seed will remain fixed across all frames of animation. **NOT RECOMMENDED.** Unless you know what you are doing, it will *deep fry* the pictures over time",
        "random": "a random seed will be used on each frame of the animation",
 		"schedule": "specify your own seed schedule",
 	"Seed iter N":"controls for how many frames the same seed should stick before iterating to the next one",
--- a/scripts/deforum_helpers/args.py
+++ b/scripts/deforum_helpers/args.py
@ -138,6 +138,8 @@ def DeforumAnimArgs():
    hybrid_use_first_frame_as_init_image = True 
    hybrid_motion = "None" #['None','Optical Flow','Perspective','Affine']
    hybrid_motion_use_prev_img = False 
+    hybrid_flow_consistency = False
+    hybrid_consistency_blur = 2
    hybrid_flow_method = "RAFT" #['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback']
    hybrid_composite = 'None' #['None', 'Normal', 'Before Motion', 'After Generation'] 
    hybrid_use_init_image = False 
@ -617,7 +619,7 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
                with gr.Row(variant='compact'):
                    animation_prompts_positive = gr.Textbox(label="Prompts positive", lines=1, interactive=True, placeholder="words in here will be added to the start of all positive prompts")
                with gr.Row(variant='compact'):
-                    animation_prompts_negative = gr.Textbox(label="Prompts negative", lines=1, interactive=True, placeholder="words in here will be added to the end of all negative prompts")
+                    animation_prompts_negative = gr.Textbox(label="Prompts negative", value="nsfw, nude", lines=1, interactive=True, placeholder="words in here will be added to the end of all negative prompts")
                # COMPOSABLE MASK SCHEDULING ACCORD
                with gr.Accordion('Composable Mask scheduling', open=False):
                    gr.HTML("""
@ -738,11 +740,10 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
                    with gr.Row(variant='compact'):
                        with gr.Column(min_width=340):
                            with gr.Row(variant='compact'):
-                                hybrid_generate_inputframes = gr.Checkbox(label="Generate inputframes", value=False, interactive=True)
-                                hybrid_motion_use_prev_img = gr.Checkbox(label="Motion use prev img", value=False, interactive=True, visible=False)
+                                hybrid_generate_inputframes = gr.Checkbox(label="Generate inputframes", value=da.hybrid_generate_inputframes, interactive=True)
                                hybrid_use_first_frame_as_init_image = gr.Checkbox(label="First frame as init image", value=da.hybrid_use_first_frame_as_init_image, interactive=True, visible=False)
                                hybrid_use_init_image = gr.Checkbox(label="Use init image as video", value=da.hybrid_use_init_image, interactive=True, visible=True)
-                    with gr.Row(variant='compact') as hybrid_flow_row:
+                    with gr.Row(variant='compact'):
                        with gr.Column(variant='compact'):
                            with gr.Row(variant='compact'):
                                hybrid_motion = gr.Radio(['None', 'Optical Flow', 'Perspective', 'Affine'], label="Hybrid motion", value=da.hybrid_motion, elem_id="hybrid_motion")
@ -750,13 +751,19 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
                            with gr.Row(variant='compact'):
                                with gr.Column(scale=1):
                                    hybrid_flow_method = gr.Radio(['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback'], label="Flow method", value=da.hybrid_flow_method, elem_id="hybrid_flow_method", visible=False)
-                    with gr.Row(variant='compact') as hybrid_flow_row:
+                            with gr.Row(variant='compact'):
+                                with gr.Column(variant='compact'):
+                                    hybrid_flow_consistency = gr.Checkbox(label="Flow consistency mask", value=da.hybrid_flow_consistency, interactive=True, visible=False)
+                                    hybrid_consistency_blur = gr.Slider(label="Consistency mask blur", minimum=0, maximum=16, step=1, value=da.hybrid_consistency_blur, interactive=True, visible=False)
+                                with gr.Column(variant='compact'):
+                                    hybrid_motion_use_prev_img = gr.Checkbox(label="Motion use prev img", value=da.hybrid_motion_use_prev_img, interactive=True, visible=False)
+                    with gr.Row(variant='compact'):
                        hybrid_comp_mask_type = gr.Radio(['None', 'Depth', 'Video Depth', 'Blend', 'Difference'], label="Comp mask type", value=da.hybrid_comp_mask_type, elem_id="hybrid_comp_mask_type", visible=False)
                    with gr.Row(visible=False, variant='compact') as hybrid_comp_mask_row:
                        hybrid_comp_mask_equalize = gr.Radio(['None', 'Before', 'After', 'Both'], label="Comp mask equalize", value=da.hybrid_comp_mask_equalize, elem_id="hybrid_comp_mask_equalize")
                        with gr.Column(variant='compact'):
                            hybrid_comp_mask_auto_contrast = gr.Checkbox(label="Comp mask auto contrast", value=False, interactive=True)
-                            hybrid_comp_mask_inverse = gr.Checkbox(label="Comp mask inverse", value=False, interactive=True)
+                            hybrid_comp_mask_inverse = gr.Checkbox(label="Comp mask inverse", value=da.hybrid_comp_mask_inverse, interactive=True)
                    with gr.Row(variant='compact'):
                            hybrid_comp_save_extra_frames = gr.Checkbox(label="Comp save extra frames", value=False, interactive=True)
                # HYBRID SCHEDULES ACCORD
@ -940,6 +947,8 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
                        Important Notes:
                        <ul style="list-style-type:circle; margin-left:1em; margin-bottom:0.25em">
                            <li>Enter relative to webui folder or Full-Absolute path, and make sure it ends with something like this: '20230124234916_%09d.png', just replace 20230124234916 with your batch ID. The %09d is important, don't forget it!</li>
+                            <li>In the filename, '%09d' represents the 9 counting numbers, For '20230124234916_000000001.png', use '20230124234916_%09d.png'</li>
+                            <li>If non-deforum frames, use the correct number of counting digits. For files like 'bunnies-0000.jpg', you'd use 'bunnies-%04d.jpg'</li>
                        </ul>
                        """)
                    with gr.Row(variant='compact'):
@ -991,9 +1000,10 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
    optical_flow_redo_generation.change(fn=hide_if_none, inputs=optical_flow_redo_generation, outputs=redo_flow_factor_schedule_column)
    override_settings_with_file.change(fn=hide_if_false, inputs=override_settings_with_file,outputs=custom_settings_file)
    hybrid_comp_mask_type.change(fn=hide_if_none, inputs=hybrid_comp_mask_type, outputs=hybrid_comp_mask_row)
-    hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=hybrid_flow_method)
-    hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=hybrid_flow_factor_schedule)
-    hybrid_motion.change(fn=hide_if_none, inputs=hybrid_motion, outputs=hybrid_motion_use_prev_img)
+    hybrid_motion_outputs = [hybrid_flow_method, hybrid_flow_factor_schedule, hybrid_flow_consistency, hybrid_consistency_blur, hybrid_motion_use_prev_img]
+    for output in hybrid_motion_outputs:
+        hybrid_motion.change(fn=disable_by_non_optical_flow, inputs=hybrid_motion, outputs=output)
+    hybrid_flow_consistency.change(fn=hide_if_false, inputs=hybrid_flow_consistency, outputs=hybrid_consistency_blur)
    optical_flow_cadence.change(fn=hide_if_none, inputs=optical_flow_cadence, outputs=cadence_flow_factor_schedule_column)
    hybrid_composite.change(fn=disable_by_hybrid_composite_dynamic, inputs=[hybrid_composite, hybrid_comp_mask_type], outputs=hybrid_comp_mask_row)
    hybrid_composite_outputs = [humans_masking_accord, hybrid_sch_accord, hybrid_comp_mask_type, hybrid_use_first_frame_as_init_image, hybrid_use_init_image]
@ -1063,7 +1073,8 @@ anim_args_names =   str(r'''animation_mode, max_frames, border,
                        resume_from_timestring, resume_timestring'''
                    ).replace("\n", "").replace("\r", "").replace(" ", "").split(',')
 hybrid_args_names =   str(r'''hybrid_generate_inputframes, hybrid_generate_human_masks, hybrid_use_first_frame_as_init_image,
-                        hybrid_motion, hybrid_motion_use_prev_img, hybrid_flow_method, hybrid_composite, hybrid_use_init_image, hybrid_comp_mask_type, hybrid_comp_mask_inverse,
+                        hybrid_motion, hybrid_motion_use_prev_img, hybrid_flow_consistency, hybrid_consistency_blur, hybrid_flow_method, hybrid_composite,
+                        hybrid_use_init_image, hybrid_comp_mask_type, hybrid_comp_mask_inverse,
                        hybrid_comp_mask_equalize, hybrid_comp_mask_auto_contrast, hybrid_comp_save_extra_frames,
                        hybrid_comp_alpha_schedule, hybrid_flow_factor_schedule,
                        hybrid_comp_mask_blend_alpha_schedule, hybrid_comp_mask_contrast_schedule,
--- a/scripts/deforum_helpers/consistency_check.py
+++ b/scripts/deforum_helpers/consistency_check.py
@ -0,0 +1,132 @@
+'''
+Taken from https://github.com/Sxela/flow_tools/blob/main
+'''
+# import argparse
+# import PIL.Image
+import numpy as np
+# import scipy.ndimage
+# import glob
+# from tqdm import tqdm
+
+def make_consistency(flow1, flow2, edges_unreliable=False):
+      # Awesome pythonic consistency check from [maua](https://github.com/maua-maua-maua/maua/blob/44485c745c65cf9d83cb1b1c792a177588e9c9fc/maua/flow/consistency.py) by Hans Brouwer and Henry Rachootin
+      # algorithm based on https://github.com/manuelruder/artistic-videos/blob/master/consistencyChecker/consistencyChecker.cpp
+      # reimplemented in numpy by Hans Brouwer
+      # // consistencyChecker
+      # // Check consistency of forward flow via backward flow.
+      # // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016
+
+      flow1 = np.flip(flow1, axis=2)
+      flow2 = np.flip(flow2, axis=2)
+      h, w, _ = flow1.shape
+
+      # get grid of coordinates for each pixel
+      orig_coord = np.flip(np.mgrid[:w, :h], 0).T
+
+      # find where the flow1 maps each pixel
+      warp_coord = orig_coord + flow1
+
+      # clip the coordinates in bounds and round down
+      warp_coord_inbound = np.zeros_like(warp_coord)
+      warp_coord_inbound[..., 0] = np.clip(warp_coord[..., 0], 0, h - 2)
+      warp_coord_inbound[..., 1] = np.clip(warp_coord[..., 1], 0, w - 2)
+      warp_coord_floor = np.floor(warp_coord_inbound).astype(int)
+
+      # for each pixel: bilinear interpolation of the corresponding flow2 values around the point mapped to by flow1
+      alpha = warp_coord_inbound - warp_coord_floor
+      flow2_00 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1]]
+      flow2_01 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1] + 1]
+      flow2_10 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1]]
+      flow2_11 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1] + 1]
+      flow2_0_blend = (1 - alpha[..., 1, None]) * flow2_00 + alpha[..., 1, None] * flow2_01
+      flow2_1_blend = (1 - alpha[..., 1, None]) * flow2_10 + alpha[..., 1, None] * flow2_11
+      warp_coord_flow2 = (1 - alpha[..., 0, None]) * flow2_0_blend + alpha[..., 0, None] * flow2_1_blend
+
+      # coordinates that flow2 remaps each flow1-mapped pixel to
+      rewarp_coord = warp_coord + warp_coord_flow2
+
+      # where the difference in position after flow1 and flow2 are applied is larger than a threshold there is likely an
+      # occlusion. set values to -1 so the final gaussian blur will spread the value a couple pixels around this area
+      squared_diff = np.sum((rewarp_coord - orig_coord) ** 2, axis=2)
+      threshold = 0.01 * np.sum(warp_coord_flow2 ** 2 + flow1 ** 2, axis=2) + 0.5
+      
+      reliable_flow = np.ones((squared_diff.shape[0], squared_diff.shape[1], 3))
+      reliable_flow[...,0] = np.where(squared_diff >= threshold, -0.75, 1)
+
+      # areas mapping outside of the frame are also occluded (don't need extra region around these though, so set 0)
+      if edges_unreliable:
+          reliable_flow[...,1] = np.where(
+              np.logical_or.reduce(
+                  (
+                      warp_coord[..., 0] < 0,
+                      warp_coord[..., 1] < 0,
+                      warp_coord[..., 0] >= h - 1,
+                      warp_coord[..., 1] >= w - 1,
+                  )
+              ),
+              0,
+              reliable_flow[...,1],
+          )
+
+      # get derivative of flow, large changes in derivative => edge of moving object
+      dx = np.diff(flow1, axis=1, append=0)
+      dy = np.diff(flow1, axis=0, append=0)
+      motion_edge = np.sum(dx ** 2 + dy ** 2, axis=2)
+      motion_threshold = 0.01 * np.sum(flow1 ** 2, axis=2) + 0.002
+      reliable_flow[...,2] = np.where(np.logical_and(motion_edge > motion_threshold, reliable_flow[...,2] != -0.75), 0, reliable_flow[...,2])
+
+      return reliable_flow
+
+
+# parser = argparse.ArgumentParser()
+# parser.add_argument("--flow_fwd", type=str, required=True, help="Forward flow path or glob pattern")
+# parser.add_argument("--flow_bwd", type=str, required=True, help="Backward flow path or glob pattern")
+# parser.add_argument("--output", type=str, required=True, help="Output consistency map path")
+# parser.add_argument("--output_postfix", type=str, default='_cc', help="Output consistency map name postfix")
+# parser.add_argument("--image_output", action='store_true', help="Output consistency map as b\w image path")
+# parser.add_argument("--skip_numpy_output", action='store_true', help="Don`t save numpy array")
+# parser.add_argument("--blur", type=float, default=2., help="Gaussian blur kernel size (0 for no blur)")
+# parser.add_argument("--bottom_clamp", type=float, default=0., help="Clamp lower values")
+# parser.add_argument("--edges_reliable", action='store_true', help="Consider edges reliable")
+# parser.add_argument("--save_separate_channels", action='store_true', help="Save consistency mask layers as separate channels")
+# args = parser.parse_args()
+
+# def run(args):
+#   flow_fwd_many = sorted(glob.glob(args.flow_fwd))
+#   flow_bwd_many = sorted(glob.glob(args.flow_bwd))
+#   if len(flow_fwd_many)!= len(flow_bwd_many): 
+#     raise Exception('Forward and backward flow file numbers don`t match')
+#     return
+  
+#   for flow_fwd,flow_bwd in tqdm(zip(flow_fwd_many, flow_bwd_many)):
+#     flow_fwd = flow_fwd.replace('\\','/')
+#     flow_bwd = flow_bwd.replace('\\','/')
+#     flow1 = np.load(flow_fwd)
+#     flow2 = np.load(flow_bwd)
+#     consistency_map_multilayer = make_consistency(flow1, flow2, edges_unreliable=not args.edges_reliable)
+    
+#     if args.save_separate_channels:  
+#           consistency_map = consistency_map_multilayer
+#     else:
+#           consistency_map = np.ones_like(consistency_map_multilayer[...,0])
+#           consistency_map*=consistency_map_multilayer[...,0]
+#           consistency_map*=consistency_map_multilayer[...,1]
+#           consistency_map*=consistency_map_multilayer[...,2] 
+          
+#     # blur
+#     if args.blur>0.:
+#       consistency_map = scipy.ndimage.gaussian_filter(consistency_map, [args.blur, args.blur])
+
+#     #clip values between bottom_clamp and 1
+#     bottom_clamp = min(max(args.bottom_clamp,0.), 0.999)
+#     consistency_map = consistency_map.clip(bottom_clamp, 1)
+#     out_fname = args.output+'/'+flow_fwd.split('/')[-1][:-4]+args.output_postfix
+      
+#     if not args.skip_numpy_output:
+#       np.save(out_fname, consistency_map)
+
+#     #save as jpeg 
+#     if args.image_output:
+#       PIL.Image.fromarray((consistency_map*255.).astype('uint8')).save(out_fname+'.jpg', quality=90)
+
+# run(args)
--- a/scripts/deforum_helpers/deforum_controlnet.py
+++ b/scripts/deforum_helpers/deforum_controlnet.py
@ -72,8 +72,8 @@ def setup_controlnet_ui_raw():
            refresh_models.click(refresh_all_models, model, model)
        with gr.Row(visible=False) as weight_row:
            weight = gr.Slider(label=f"Weight", value=1.0, minimum=0.0, maximum=2.0, step=.05, interactive=True)
-            guidance_start =  gr.Slider(label="Guidance start", value=0.0, minimum=0.0, maximum=1.0, interactive=True)
-            guidance_end =  gr.Slider(label="Guidance end", value=1.0, minimum=0.0, maximum=1.0, interactive=True)
+            guidance_start =  gr.Slider(label="Starting Control Step", value=0.0, minimum=0.0, maximum=1.0, interactive=True)
+            guidance_end =  gr.Slider(label="Ending Control Step", value=1.0, minimum=0.0, maximum=1.0, interactive=True)
            model_dropdowns.append(model)
        with gr.Column(visible=False) as advanced_column:
            processor_res = gr.Slider(label="Annotator resolution", value=64, minimum=64, maximum=2048, interactive=False)
--- a/scripts/deforum_helpers/deforum_controlnet_gradio.py
+++ b/scripts/deforum_helpers/deforum_controlnet_gradio.py
@ -33,7 +33,7 @@ class ToolButton(gr.Button, gr.components.FormComponent):

        def get_block_name(self):
            return "button"
-             
+
 def build_sliders(module, pp):
    # module = self.get_module_basename(module)
    if module == "canny":
@ -71,7 +71,7 @@ def build_sliders(module, pp):
            gr.update(visible=False, interactive=False),
            gr.update(visible=True)
        ]
-    elif module in ["depth_leres", "depth_leres_boost"]:
+    elif module in ["depth_leres", "depth_leres++"]:
        return [
            gr.update(label="Preprocessor Resolution", minimum=64, maximum=2048, value=512, step=1, visible=not pp, interactive=not pp),
            gr.update(label="Remove Near %", value=0, minimum=0, maximum=100, step=0.1, visible=True, interactive=True),
@ -99,7 +99,7 @@ def build_sliders(module, pp):
            gr.update(visible=False, interactive=False),
            gr.update(visible=True)
        ]
-    elif module == "tile_gaussian":
+    elif module == "tile_resample":
        return [
            gr.update(visible=False, interactive=False),
            gr.update(label="Down Sampling Rate", value=1.0, minimum=1.0, maximum=8.0, step=0.01, visible=True, interactive=True),
--- a/scripts/deforum_helpers/hybrid_video.py
+++ b/scripts/deforum_helpers/hybrid_video.py
@ -1,16 +1,21 @@
-import cv2
 import os
 import pathlib
-import numpy as np
 import random
+
+import cv2
+import numpy as np
 import PIL
+import torch
 from PIL import Image, ImageChops, ImageOps, ImageEnhance
-from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name, get_next_frame
+from scipy.ndimage.filters import gaussian_filter
+
+from .consistency_check import make_consistency
 from .human_masking import video2humanmasks
 from .load_images import load_image
+from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name, get_next_frame
 from modules.shared import opts

-DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
+# DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)

 def delete_all_imgs_in_folder(folder_path):
        files = list(pathlib.Path(folder_path).glob('*.jpg'))
@ -149,8 +154,8 @@ def get_matrix_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_motio
    print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
    img1 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions), cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
-    matrix = get_transformation_matrix_from_images(img1, img2, hybrid_motion)
-    return matrix
+    M = get_transformation_matrix_from_images(img1, img2, hybrid_motion)
+    return M

 def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_img, hybrid_motion):
    print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
@ -161,20 +166,24 @@ def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_im
    else:
        prev_img_gray = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
        img = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
-        matrix = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion)
-        return matrix
+        M = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion)
+        return M

-def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, do_flow_visualization=False):
-    print(f"Calculating {method} optical flow for frames {frame_idx} to {frame_idx+1}")
+def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
+    print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
    i1 = get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions)
    i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
-    flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
-    if do_flow_visualization:
-        save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
+    if consistency_check:
+        flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
+        if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
+    else:
+        flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) # old single flow forward
+    if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
    return flow

-def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, do_flow_visualization=False):
-    print(f"Calculating {method} optical flow for frames {frame_idx} to {frame_idx+1}")
+def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
+    print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
+    reliable_flow = None
    # first handle invalid images by returning default flow
    height, width = prev_img.shape[:2]   
    if height == 0 or width == 0:
@ -182,16 +191,44 @@ def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_fr
    else:
        i1 = prev_img.astype(np.uint8)
        i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
-        flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
-    if do_flow_visualization:
-        save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
+        if consistency_check:
+            flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
+            if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
+        else:
+            flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
+    if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
    return flow

-def image_transform_ransac(image_cv2, xform, hybrid_motion):
+def get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur, reliability=0):
+    flow_forward = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
+    flow_backward = get_flow_from_images(i2, i1, method, raft_model, None)
+    reliable_flow = make_consistency(flow_forward, flow_backward, edges_unreliable=False)
+    if consistency_blur > 0:
+        reliable_flow = custom_gaussian_blur(reliable_flow.astype(np.float32), 1, consistency_blur)
+    return filter_flow(flow_forward, reliable_flow, consistency_blur, reliability), reliable_flow
+
+def custom_gaussian_blur(input_array, blur_size, sigma):
+    return gaussian_filter(input_array, sigma=(sigma, sigma, 0), order=0, mode='constant', cval=0.0, truncate=blur_size)
+
+def filter_flow(flow, reliable_flow, reliability=0.5, consistency_blur=0):
+    # reliability from reliabile flow: -0.75 is bad, 0 is meh/outside, 1 is great
+    # Create a mask from the first channel of the reliable_flow array
+    mask = reliable_flow[..., 0]
+
+    # to set everything to 1 or 0 based on reliability
+    # mask = np.where(mask >= reliability, 1, 0)
+
+    # Expand the mask to match the shape of the forward_flow array
+    mask = np.repeat(mask[..., np.newaxis], flow.shape[2], axis=2)
+
+    # Apply the mask to the flow
+    return flow * mask
+
+def image_transform_ransac(image_cv2, M, hybrid_motion, depth=None):
    if hybrid_motion == "Perspective":
-        return image_transform_perspective(image_cv2, xform)
+        return image_transform_perspective(image_cv2, M, depth)
    else: # Affine
-        return image_transform_affine(image_cv2, xform)
+        return image_transform_affine(image_cv2, M, depth)

 def image_transform_optical_flow(img, flow, flow_factor):
    # if flow factor not normal, calculate flow factor
@ -204,21 +241,35 @@ def image_transform_optical_flow(img, flow, flow_factor):
    flow[:, :, 1] += np.arange(h)[:,np.newaxis]
    return remap(img, flow)

-def image_transform_affine(image_cv2, xform):
-    return cv2.warpAffine(
-        image_cv2,
-        xform,
-        (image_cv2.shape[1],image_cv2.shape[0]),
-        borderMode=cv2.BORDER_REFLECT_101
-    )
+def image_transform_affine(image_cv2, M, depth=None):
+    if depth is None:
+        return cv2.warpAffine(
+            image_cv2,
+            M,
+            (image_cv2.shape[1],image_cv2.shape[0]),
+            borderMode=cv2.BORDER_REFLECT_101
+        )
+    else:
+        return depth_based_affine_warp(
+            image_cv2,
+            depth,
+            M            
+        )

-def image_transform_perspective(image_cv2, xform):
-    return cv2.warpPerspective(
-        image_cv2,
-        xform,
-        (image_cv2.shape[1], image_cv2.shape[0]),
-        borderMode=cv2.BORDER_REFLECT_101
-    )
+def image_transform_perspective(image_cv2, M, depth=None):
+    if depth is None:
+        return cv2.warpPerspective(
+            image_cv2,
+            M,
+            (image_cv2.shape[1], image_cv2.shape[0]),
+            borderMode=cv2.BORDER_REFLECT_101
+        )
+    else:
+        return render_3d_perspective(
+            image_cv2,
+            depth,
+            M            
+        )

 def get_hybrid_motion_default_matrix(hybrid_motion):
    if hybrid_motion == "Perspective":
@ -373,7 +424,37 @@ def save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_fram
    cv2.imwrite(flow_img_file, flow_img)
    print(f"Saved optical flow visualization: {flow_img_file}")

-def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 1, max_magnitude = 10000):
+def save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path, color=True):
+    flow_mask_img_file = os.path.join(hybrid_frame_path, f"flow_mask{frame_idx:09}.jpg")
+    if color:
+        # Normalize the reliable_flow array to the range [0, 255]
+        normalized_reliable_flow = (reliable_flow - reliable_flow.min()) / (reliable_flow.max() - reliable_flow.min()) * 255
+        # Change the data type to np.uint8
+        mask_image = normalized_reliable_flow.astype(np.uint8)
+    else:
+        # Extract the first channel of the reliable_flow array
+        first_channel = reliable_flow[..., 0]
+        # Normalize the first channel to the range [0, 255]
+        normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
+        # Change the data type to np.uint8
+        grayscale_image = normalized_first_channel.astype(np.uint8)
+        # Replicate the grayscale channel three times to form a BGR image
+        mask_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
+    cv2.imwrite(flow_mask_img_file, mask_image)
+    print(f"Saved mask flow visualization: {flow_mask_img_file}")
+
+def reliable_flow_to_image(reliable_flow):
+    # Extract the first channel of the reliable_flow array
+    first_channel = reliable_flow[..., 0]
+    # Normalize the first channel to the range [0, 255]
+    normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
+    # Change the data type to np.uint8
+    grayscale_image = normalized_first_channel.astype(np.uint8)
+    # Replicate the grayscale channel three times to form a BGR image
+    bgr_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
+    return bgr_image
+
+def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000):
    flow = flow * magnitude_multiplier
    h, w = img.shape[:2]
    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
--- a/scripts/deforum_helpers/render.py
+++ b/scripts/deforum_helpers/render.py
@ -1,20 +1,16 @@
 import os
-import json
 import pandas as pd
 import cv2
-import re
 import numpy as np
-import itertools
 import numexpr
 import gc
 import random
 import PIL
 import time
 from PIL import Image, ImageOps
-from .rich import console
 from .generate import generate, isJson
 from .noise import add_noise
-from .animation import sample_from_cv2, sample_to_cv2, anim_frame_warp
+from .animation import anim_frame_warp
 from .animation_key_frames import DeformAnimKeys, LooperAnimKeys
 from .video_audio_utilities import get_frame_name, get_next_frame
 from .depth import MidasModel, AdaBinsModel
@ -37,8 +33,6 @@ from .prompt import prepare_prompt
 from modules.shared import opts, cmd_opts, state, sd_model
 from modules import lowvram, devices, sd_hijack
 from .RAFT import RAFT
-from .ZoeDepth import ZoeDepth
-import torch

 def render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, animation_prompts, root):
    DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
@ -280,6 +274,10 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro
            tween_frame_start_idx = max(start_frame, frame_idx-turbo_steps)
            cadence_flow = None
            for tween_frame_idx in range(tween_frame_start_idx, frame_idx):
+                # update progress during cadence
+                state.job = f"frame {tween_frame_idx + 1}/{anim_args.max_frames}"
+                state.job_no = tween_frame_idx + 1
+                # cadence vars
                tween = float(tween_frame_idx - tween_frame_start_idx + 1) / float(frame_idx - tween_frame_start_idx)
                advance_prev = turbo_prev_image is not None and tween_frame_idx > turbo_prev_frame_idx
                advance_next = tween_frame_idx > turbo_next_frame_idx
@ -341,14 +339,14 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro
                                turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion)
                    if anim_args.hybrid_motion in ['Optical Flow']:
                        if anim_args.hybrid_motion_use_prev_img:
-                            flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames)                            
+                            flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)                            
                            if advance_prev:
                                turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
                            if advance_next:
                                turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor'])
                            prev_flow = flow
                        else:
-                            flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames)
+                            flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
                            if advance_prev:
                                turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
                            if advance_next:
@ -374,6 +372,9 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro
                # get prev_img during cadence
                prev_img = img

+                # current image update for cadence frames (left commented because it doesn't currently update the preview)
+                # state.current_image = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB))
+
                # saving cadence frames
                filename = f"{args.timestring}_{tween_frame_idx:09}.png"
                cv2.imwrite(os.path.join(args.outdir, filename), img)
@ -408,9 +409,9 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro
                prev_img = image_transform_ransac(prev_img, matrix, anim_args.hybrid_motion)    
            if anim_args.hybrid_motion in ['Optical Flow']:
                if anim_args.hybrid_motion_use_prev_img:
-                    flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_comp_save_extra_frames)
+                    flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
                else:
-                    flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model,anim_args.hybrid_comp_save_extra_frames)
+                    flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
                prev_img = image_transform_optical_flow(prev_img, flow, hybrid_comp_schedules['flow_factor'])
                prev_flow = flow

--- a/scripts/deforum_helpers/video_audio_utilities.py
+++ b/scripts/deforum_helpers/video_audio_utilities.py
@ -163,7 +163,6 @@ def ffmpeg_stitch_video(ffmpeg_location=None, fps=None, outmp4_path=None, stitch
        cmd = [
            ffmpeg_location,
            '-y',
-            '-vcodec', 'png',
            '-r', str(float(fps)),
            '-start_number', str(stitch_from_frame),
            '-i', imgs_path,
@ -174,9 +173,12 @@ def ffmpeg_stitch_video(ffmpeg_location=None, fps=None, outmp4_path=None, stitch
            '-pix_fmt', 'yuv420p',
            '-crf', str(crf),
            '-preset', preset,
-            '-pattern_type', 'sequence',
-            outmp4_path
+            '-pattern_type', 'sequence'
        ]
+        cmd.append('-vcodec')
+        cmd.append('png' if imgs_path[0].find('.png') != -1 else 'libx264')
+        cmd.append(outmp4_path)
+
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
    except FileNotFoundError: