From 961ac85cde0d96b589aecfe2574506f9c67bb1c1 Mon Sep 17 00:00:00 2001
From: Charles Fettinger <charles@onacloud.org>
Date: Tue, 6 Jun 2023 00:09:40 -0700
Subject: [PATCH] Add sound volume cleanup extra code fix bug add video length
 estimate

---
 iz_helpers/InfZoomConfig.py |   1 +
 iz_helpers/run.py           | 285 +-----------------------------------
 iz_helpers/run_interface.py |   2 +
 iz_helpers/ui.py            |  56 +++++--
 iz_helpers/video.py         |  10 +-
 5 files changed, 61 insertions(+), 293 deletions(-)

diff --git a/iz_helpers/InfZoomConfig.py b/iz_helpers/InfZoomConfig.py
index 46167c1..ab0e0bd 100644
--- a/iz_helpers/InfZoomConfig.py
+++ b/iz_helpers/InfZoomConfig.py
@@ -35,6 +35,7 @@ class InfZoomConfig():
     blend_invert_do:bool
     blend_color:str
     audio_filename:str=None
+    audio_volume:float = 1
     inpainting_denoising_strength:float=1
     inpainting_full_res:int =0
     inpainting_padding:int=0
diff --git a/iz_helpers/run.py b/iz_helpers/run.py
index c4bf1ab..2cbff0d 100644
--- a/iz_helpers/run.py
+++ b/iz_helpers/run.py
@@ -118,12 +118,12 @@ class InfZoomer:
         processed = self.fnOutpaintMainFrames()
 
         #trim frames that are blended or luma wiped
+        self.start_frames = self.main_frames[:2]
+        self.end_frames = self.main_frames[(len(self.main_frames) - 2):]
         if (self.C.blend_mode != 0):
             #trim first and last frames only from main_frames, store 2 frames in each start_frames and end_frames for blending
-            self.start_frames = self.main_frames[:2]
-            self.end_frames = self.main_frames[(len(self.main_frames) - 2):]
             self.main_frames = self.main_frames[1:(len(self.main_frames) - 1)]
-            print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}")
+        print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}")
 
         if (self.C.upscale_do): 
             self.doUpscaling()
@@ -154,7 +154,7 @@ class InfZoomer:
         self.fnInterpolateFrames() # changes main_frame and writes to video
 
         if self.C.audio_filename is not None:
-            self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary())
+            self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), self.C.audio_volume, find_ffmpeg_binary())
 
         print("Video saved in: " + os.path.join(script_path, self.out_config["video_filename"]))
 
@@ -790,7 +790,7 @@ class InfZoomer:
 ##########################################################################################################################
 
 ##########################################################################################################################
-# Infinite Zoom
+# Infinite Zoom Classic
 
 def prepare_output_path():
     isCollect = shared.opts.data.get("infzoom_collectAllResources", False)
@@ -830,281 +830,6 @@ def frames2Collect(all_frames, out_config):
     for i, f in enumerate(all_frames):
         save2Collect(f, out_config, f"frame_{i}")
 
-
-def create_zoom_single(
-    common_prompt_pre,
-    prompts_array,
-    common_prompt_suf,
-    negative_prompt,
-    num_outpainting_steps,
-    guidance_scale,
-    num_inference_steps,
-    custom_init_image,
-    custom_exit_image,
-    video_frame_rate,
-    video_zoom_mode,
-    video_start_frame_dupe_amount,
-    video_last_frame_dupe_amount,
-    inpainting_mask_blur,
-    inpainting_fill_mode,
-    zoom_speed,
-    seed,
-    outputsizeW,
-    outputsizeH,
-    sampler,
-    upscale_do,
-    upscaler_name,
-    upscale_by,
-    overmask,
-    outpaintStrategy,
-    outpaint_amount_px,
-    blend_image,
-    blend_mode,
-    blend_gradient_size,
-    blend_invert_do,
-    blend_color,
-    inpainting_denoising_strength,
-    inpainting_full_res,
-    inpainting_padding,
-    progress,
-    audio_filename = None
-):
-    # try:
-    #     if gr.Progress() is not None:
-    #         progress = gr.Progress()
-    #         progress(0, desc="Preparing Initial Image")
-    # except Exception:
-    #     pass
-    fix_env_Path_ffprobe()
-    out_config = prepare_output_path()
-
-    prompts = {}
-    prompt_images = {}
-    prompt_alpha_mask_images = {}
-    prompt_image_is_keyframe = {}
-
-    for x in prompts_array:
-        try:
-            key = int(x[0])
-            value = str(x[1])
-            file_loc = str(x[2])
-            alpha_mask_loc = str(x[3])
-            is_keyframe = bool(x[4])
-            prompts[key] = value
-            prompt_images[key] = file_loc
-            prompt_alpha_mask_images[key] = alpha_mask_loc
-            prompt_image_is_keyframe[key] = value_to_bool(is_keyframe)
-        except ValueError:
-            pass
-
-    assert len(prompts_array) > 0, "prompts is empty"
-    print(str(len(prompts)) + " prompts found")
-    print(str(len([value for value in prompt_images.values() if value != ""])) + " prompt Images found")
-    print(str(len([value for value in prompt_alpha_mask_images.values() if value != ""])) + " prompt Alpha Masks found")
-
-    width = closest_upper_divisible_by_eight(outputsizeW)
-    height = closest_upper_divisible_by_eight(outputsizeH)
-
-    current_image = Image.new(mode="RGBA", size=(width, height))
-    #mask_image = np.array(current_image)[:, :, 3]
-    #mask_image = Image.fromarray(255 - mask_image).convert("RGB")
-    #current_image = current_image.convert("RGB")
-    current_seed = seed
-    extra_frames = 0
-
-    if custom_init_image:
-        current_image = resize_and_crop_image(custom_init_image, width, height)
-        print("using Custom Initial Image")
-        save2Collect(current_image, out_config, f"init_custom.png")
-        #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="init_custom image")
-    else:
-        if prompt_images[min(k for k in prompt_images.keys() if k >= 0)] == "":
-            load_model_from_setting(
-                "infzoom_txt2img_model", progress, "Loading Model for txt2img: "
-            )
-            pr = prompts[min(k for k in prompts.keys() if k >= 0)]
-            processed, current_seed = renderTxt2Img(
-            f"{common_prompt_pre}\n{pr}\n{common_prompt_suf}".strip(),
-            negative_prompt,
-            sampler,
-            num_inference_steps,
-            guidance_scale,
-            current_seed,
-            width,
-            height,
-            )
-            if len(processed.images) > 0:
-                current_image = processed.images[0]
-                save2Collect(current_image, out_config, f"init_txt2img.png")
-        else:
-            print("using image 0 as Initial keyframe")
-            current_image = open_image(prompt_images[min(k for k in prompt_images.keys() if k >= 0)])
-            current_image = resize_and_crop_image(current_image, width, height)
-            save2Collect(current_image, out_config, f"init_custom.png")
-            #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="prompt_0 image")
-
-    mask_width = math.trunc(width / 4)  # was initially 512px => 128px
-    mask_height = math.trunc(height / 4)  # was initially 512px => 128px
-
-    num_interpol_frames = round(video_frame_rate * zoom_speed)
-
-    all_frames = []
-
-    if upscale_do and progress:
-        progress(0, desc="upscaling inital image")
-
-    load_model_from_setting(
-        "infzoom_inpainting_model", progress, "Loading Model for inpainting/img2img: "
-    )
-
-    if custom_exit_image:
-        extra_frames += 1
-
-    main_frames, processed = outpaint_steps(
-        width,
-        height,
-        common_prompt_pre,
-        common_prompt_suf,
-        prompts,
-        prompt_images,
-        prompt_alpha_mask_images,
-        prompt_image_is_keyframe,
-        negative_prompt,
-        current_seed,
-        sampler,
-        int(num_inference_steps),
-        guidance_scale,
-        inpainting_denoising_strength,
-        inpainting_mask_blur,
-        inpainting_fill_mode,
-        inpainting_full_res,
-        inpainting_padding,
-        current_image,
-        num_outpainting_steps + extra_frames,
-        out_config,
-        mask_width,
-        mask_height,
-        custom_exit_image,
-        False,
-        blend_gradient_size
-    )
-
-    #for k in range(len(main_frames)):
-        #print(str(f"Frame {k} : {main_frames[k]}"))
-        #resize_and_crop_image(main_frames[k], width, height)        
-
-    all_frames.append(
-        do_upscaleImg(main_frames[0], upscale_do, upscaler_name, upscale_by)
-        if upscale_do
-        else main_frames[0]
-    )
-    for i in range(len(main_frames) - 1):
-        print(f"processing frame {i}")
-
-        # interpolation steps between 2 inpainted images (=sequential zoom and crop)
-        for j in range(num_interpol_frames - 1):
-            current_image = main_frames[i + 1]
-            interpol_image = current_image
-            save2Collect(interpol_image, out_config, f"interpol_img_{i}_{j}].png")
-
-            interpol_width = math.ceil(
-                (
-                    1
-                    - (1 - 2 * mask_width / width)
-                    ** (1 - (j + 1) / num_interpol_frames)
-                )
-                * width
-                / 2
-            )
-
-            interpol_height = math.ceil(
-                (
-                    1
-                    - (1 - 2 * mask_height / height)
-                    ** (1 - (j + 1) / num_interpol_frames)
-                )
-                * height
-                / 2
-            )
-
-            interpol_image = interpol_image.crop(
-                (
-                    interpol_width,
-                    interpol_height,
-                    width - interpol_width,
-                    height - interpol_height,
-                )
-            )
-
-            interpol_image = interpol_image.resize((width, height))
-            save2Collect(interpol_image, out_config, f"interpol_resize_{i}_{j}.png")
-
-            # paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
-            interpol_width2 = math.ceil(
-                (1 - (width - 2 * mask_width) / (width - 2 * interpol_width))
-                / 2
-                * width
-            )
-
-            interpol_height2 = math.ceil(
-                (1 - (height - 2 * mask_height) / (height - 2 * interpol_height))
-                / 2
-                * height
-            )
-
-            prev_image_fix_crop = shrink_and_paste_on_blank(
-                main_frames[i], interpol_width2, interpol_height2
-            )
-
-            interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)
-            save2Collect(interpol_image, out_config, f"interpol_prevcrop_{i}_{j}.png")
-
-            if upscale_do and progress:
-                progress(((i + 1) / num_outpainting_steps), desc="upscaling interpol")
-
-            all_frames.append(
-                do_upscaleImg(interpol_image, upscale_do, upscaler_name, upscale_by)
-                if upscale_do
-                else interpol_image
-            )
-
-        if upscale_do and progress:
-            progress(((i + 1) / num_outpainting_steps), desc="upscaling current")
-
-        all_frames.append(
-            #do_upscaleImg(current_image, upscale_do, upscaler_name, upscale_by)
-            #if upscale_do
-            #else 
-            current_image
-        )
-
-    frames2Collect(all_frames, out_config)
-
-    write_video(
-        out_config["video_filename"],
-        all_frames,
-        video_frame_rate,
-        video_zoom_mode,
-        int(video_start_frame_dupe_amount),
-        int(video_last_frame_dupe_amount),
-        num_interpol_frames,
-        blend_invert_do,
-        blend_image,
-        blend_mode,
-        blend_gradient_size,
-        hex_to_rgba(blend_color),
-    )
-    if audio_filename is not None:
-        out_config["video_filename"] = add_audio_to_video(out_config["video_filename"], audio_filename, str.replace(out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary())
-
-    print("Video saved in: " + os.path.join(script_path, out_config["video_filename"]))
-    return (
-        out_config["video_filename"],
-        main_frames,
-        processed.js(),
-        plaintext_to_html(processed.info),
-        plaintext_to_html(""),
-    )
 #################################################################################################################
 def create_mask_with_circles(original_image_width, original_image_height, border_width, border_height, overmask: int, radius=4):
     # Create a new image with border and draw a mask
diff --git a/iz_helpers/run_interface.py b/iz_helpers/run_interface.py
index e87c210..91ecec0 100644
--- a/iz_helpers/run_interface.py
+++ b/iz_helpers/run_interface.py
@@ -38,6 +38,7 @@ def createZoom(
     blend_invert_do:bool,
     blend_color:str,
     audio_filename:str = None,
+    audio_volume:float = 1,
     inpainting_denoising_strength:float=1,
     inpainting_full_res:int =0,
     inpainting_padding:int=0,
@@ -77,6 +78,7 @@ def createZoom(
         blend_invert_do,
         blend_color,
         audio_filename,
+        audio_volume,
         inpainting_denoising_strength,
         inpainting_full_res,
         inpainting_padding,
diff --git a/iz_helpers/ui.py b/iz_helpers/ui.py
index e5a1fa9..c489067 100644
--- a/iz_helpers/ui.py
+++ b/iz_helpers/ui.py
@@ -27,7 +27,7 @@ from .static_variables import promptTableHeaders
 
 def on_ui_tabs():
     main_seed = gr.Number()
-    audio_filename = gr.Textbox(None)
+    audio_filename = gr.Textbox(None)    
 
     with gr.Blocks(analytics_enabled=False) as infinite_zoom_interface:
         gr.HTML(
@@ -56,7 +56,7 @@ def on_ui_tabs():
 
                         main_outpaint_steps = gr.Slider(
                             minimum=2,
-                            maximum=100,
+                            maximum=120,
                             step=1,
                             label="Total video length [s]",
                             value=default_total_outpaints,
@@ -169,14 +169,23 @@ def on_ui_tabs():
                         maximum=120,
                         step=1
                     )
-                    video_zoom_speed = gr.Slider(
-                        label="Zoom Speed",
-                        value=1.0,
-                        minimum=0.1,
-                        maximum=20.0,
-                        step=0.1,
-                        info="Zoom speed in seconds (higher values create slower zoom)",
-                    )
+                    with gr.Row():
+                        video_zoom_speed = gr.Slider(
+                            label="Zoom Speed",
+                            value=1.0,
+                            minimum=0.1,
+                            maximum=20.0,
+                            step=0.1,
+                            info="Zoom speed in seconds (higher values create slower zoom)",
+                        )
+                        video_est_length = gr.Number(
+                            label="Estimated video length [s]",
+                            info="a basic estimation of the video length",
+                            value=1.0,
+                            precision=1,
+                            readonly=True,
+                            id="infzoom_est_length",
+                        )
                     with gr.Accordion("FFMPEG Expert", open=False):
                         gr.Markdown(
                             """# I need FFMPEG control
@@ -216,6 +225,12 @@ You might give multiple options in one line.
                                 label='Blend Edge Color', 
                                 default='#ffff00'
                             )
+                            video_zoom_speed.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            main_outpaint_steps.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_frame_rate.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_start_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_last_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            blend_mode.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])  
                     with gr.Accordion("Blend Info", open=False):
                         gr.Markdown(
                             """# Important Blend Info:
@@ -240,6 +255,13 @@ Ideas for custom blend images: https://www.pexels.com/search/gradient/
                             type="file",
                             label="Audio File")
                         audio_file.change(get_filename, inputs=[audio_file], outputs=[audio_filename])
+                    with gr.Row():
+                        audio_volume = gr.Slider(
+                            label="Audio volume",
+                            minimum=0.0,
+                            maximum=2.0,
+                            step=.05,
+                            value=1.0)
 
                 with gr.Tab("Outpaint"):
                     outpaint_amount_px = gr.Slider(
@@ -426,6 +448,7 @@ Our best experience and trade-off is the R-ERSGAn4x upscaler.
                 blend_invert_do,
                 blend_color,
                 audio_filename,
+                audio_volume,
             ],
             outputs=[output_video, out_image, generation_info, html_info, html_log],
         )
@@ -453,4 +476,15 @@ def get_min_outpaint_amount(width, outpaint_amount, strategy):
     min_outpaint_px = outpaint_amount
     if strategy == "Center":
         min_outpaint_px = closest_upper_divisible_by_eight(max(outpaint_amount, width // 4))
-    return min_outpaint_px
\ No newline at end of file
+    return min_outpaint_px
+
+def calc_est_video_length(blend_mode, video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,fps, main_outpaint_steps):
+    #calculates the estimated video length based on the blend mode, zoom speed, and outpaint steps
+    #this is just an estimate, the actual length will vary
+    steps = main_outpaint_steps
+    estimate = (steps * video_zoom_speed) + ((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps)
+    if blend_mode != 0:
+        steps = (main_outpaint_steps - 3)
+        estimate = (steps * video_zoom_speed) + (((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps) - 1.0)
+
+    return estimate
\ No newline at end of file
diff --git a/iz_helpers/video.py b/iz_helpers/video.py
index 14e7aa0..2155e53 100644
--- a/iz_helpers/video.py
+++ b/iz_helpers/video.py
@@ -139,9 +139,15 @@ class ContinuousVideoWriter:
         results = reverse_video(self._file_path, self._file_path)
 
 
-def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'):
+#def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'):
+#    # Construct the FFmpeg command
+#    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path]
+#    subprocess.run(command)
+#    return output_path
+
+def add_audio_to_video(video_path, audio_path, output_path, volume=1.0, ffmpeg_location='ffmpeg'):
     # Construct the FFmpeg command
-    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path]
+    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-af', f'volume={volume}', output_path]
     subprocess.run(command)
     return output_path