Added Optical flow redo generation

Option works in 2D/3D mode Does a disposable generation before the actual generation to get the flow between previous image and the generation. Then, it discards that generation and warps the init_sample based on the captured flow before the actual generation happens. - Takes twice as long (obviously) - It can introduce smoothing, especially over time, but also adds a type of artistic consistency between frames. - can be mixed and matched with any other modes, optical flow cadence, video, etc...
2023-03-16 01:39:41 -07:00 · 2023-03-16 01:39:41 -07:00 · e0a4337d8b
parent 97923e5768
commit e0a4337d8b
4 changed files with 17 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,3 +9,4 @@ outputs
 # Log files for colab-convert
 cc-outputs.log
 *.safetensors
+scripts/deforum_helpers/navigation.py
--- a/javascript/deforum-hints.js
+++ b/javascript/deforum-hints.js
@ -70,6 +70,7 @@ deforum_titles = {
        "RGB": "RGB is good for enforcing unbiased amounts of color in each red, green and blue channel - some images may yield colorized artifacts if sampling is too low.",
    "Cadence": "A setting of 1 will cause every frame to receive diffusion in the sequence of image outputs. A setting of 2 will only diffuse on every other frame, yet motion will still be in effect. The output of images during the cadence sequence will be automatically blended, additively and saved to the specified drive. This may improve the illusion of coherence in some workflows as the content and context of an image will not change or diffuse during frames that were skipped. Higher values of 4-8 cadence will skip over a larger amount of frames and only diffuse the “Nth” frame as set by the diffusion_cadence value. This may produce more continuity in an animation, at the cost of little opportunity to add more diffused content. In extreme examples, motion within a frame will fail to produce diverse prompt context, and the space will be filled with lines or approximations of content - resulting in unexpected animation patterns and artifacts. Video Input & Interpolation modes are not affected by diffusion_cadence.",
    "Optical flow cadence": "Whether to use optical flow to blend frames during cadence in 3D animation mode (if cadence more than 1)",
+    "Optical flow redo generation": "This option takes twice as long because it generates twice in order to capture the optical flow from the previous image to the first generation, then warps the previous image and redoes the generation. Works in 2D/3D animation modes.",
    "Noise type": "Selects the type of noise being added to each frame",
        "uniform": "Uniform noise covers the entire frame. It somewhat flattens and sharpens the video over time, but may be good for cartoonish look. This is the old default setting.",
        "perlin": "Perlin noise is a more natural looking noise. It is heterogeneous and less sharp than uniform noise, this way it is more likely that new details will appear in a more coherent way. This is the new default setting.",
--- a/scripts/deforum_helpers/args.py
+++ b/scripts/deforum_helpers/args.py
@ -101,6 +101,7 @@ def DeforumAnimArgs():
    color_force_grayscale = False 
    diffusion_cadence = '2' #['1','2','3','4','5','6','7','8']
    optical_flow_cadence = False
+    optical_flow_redo_generation = False
    #**Noise settings:**
    noise_type = 'perlin' # ['uniform', 'perlin']
    # Perlin params
@ -550,6 +551,7 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
                        with gr.Row(variant='compact'):
                            contrast_schedule = gr.Textbox(label="Contrast schedule", lines=1, value = da.contrast_schedule, interactive=True)
                            optical_flow_cadence = gr.Checkbox(label="Optical flow cadence", value=False, visible=False, interactive=True, elem_id='optical_flow_cadence')
+                            optical_flow_redo_generation = gr.Checkbox(label="Optical flow redo generation", value=False, visible=True, interactive=True, elem_id='optical_flow_redo_generation')
                        with gr.Row(variant='compact'):
                            # what to do with blank frames (they may result from glitches or the NSFW filter being turned on): reroll with +1 seed, interrupt the animation generation, or do nothing
                            reroll_blank_frames = gr.Radio(['reroll', 'interrupt', 'ignore'], label="Reroll blank frames", value=d.reroll_blank_frames, elem_id="reroll_blank_frames")
@ -945,7 +947,7 @@ def setup_deforum_setting_dictionary(self, is_img2img, is_extension = True):
    ncnn_upscale_factor.change(update_upscale_out_res, inputs=[ncnn_upscale_in_vid_res, ncnn_upscale_factor], outputs=ncnn_upscale_out_vid_res)
    vid_to_upscale_chosen_file.change(vid_upscale_gradio_update_stats,inputs=[vid_to_upscale_chosen_file, ncnn_upscale_factor],outputs=[ncnn_upscale_in_vid_fps_ui_window, ncnn_upscale_in_vid_frame_count_window, ncnn_upscale_in_vid_res, ncnn_upscale_out_vid_res])
    animation_mode.change(fn=change_max_frames_visibility, inputs=animation_mode, outputs=max_frames)
-    diffusion_cadence_outputs = [diffusion_cadence,guided_images_accord,optical_flow_cadence]
+    diffusion_cadence_outputs = [diffusion_cadence,guided_images_accord,optical_flow_cadence,optical_flow_redo_generation]
    for output in diffusion_cadence_outputs:
        animation_mode.change(fn=change_diffusion_cadence_visibility, inputs=animation_mode, outputs=output)
    three_d_related_outputs = [depth_3d_warping_accord,fov_accord,optical_flow_cadence,only_3d_motion_column]
@ -1021,7 +1023,7 @@ anim_args_names =   str(r'''animation_mode, max_frames, border,
                        enable_clipskip_scheduling, clipskip_schedule, enable_noise_multiplier_scheduling, noise_multiplier_schedule,
                        kernel_schedule, sigma_schedule, amount_schedule, threshold_schedule,
                        color_coherence, color_coherence_image_path, color_coherence_video_every_N_frames, color_force_grayscale,
-                        diffusion_cadence, optical_flow_cadence,
+                        diffusion_cadence, optical_flow_cadence,optical_flow_redo_generation,
                        noise_type, perlin_w, perlin_h, perlin_octaves, perlin_persistence,
                        use_depth_warping, midas_weight,
                        padding_mode, sampling_mode, save_depth_maps,
--- a/scripts/deforum_helpers/render.py
+++ b/scripts/deforum_helpers/render.py
@ -471,7 +471,17 @@ def render_animation(args, anim_args, video_args, parseq_args, loop_args, contro
            lowvram.setup_for_low_vram(sd_model, cmd_opts.medvram)
            sd_hijack.model_hijack.hijack(sd_model)
        
-        # sample the diffusion model
+        # optical flow redo before generation
+        if anim_args.optical_flow_redo_generation and prev_img is not None:
+            print("Optical Flow redo creating disposable diffusion before actual diffusion for flow estimate.")
+            disposable_image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
+            disposable_image = cv2.cvtColor(np.array(disposable_image), cv2.COLOR_RGB2BGR)
+            disposable_flow = get_flow_from_images(prev_img, disposable_image, "DIS Medium")
+            noised_image = image_transform_optical_flow(noised_image, disposable_flow)
+            args.init_sample = Image.fromarray(cv2.cvtColor(noised_image, cv2.COLOR_BGR2RGB))
+            del(disposable_image,disposable_flow)
+
+        # generation
        image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
        patience = 10