Add sound volume

cleanup extra code fix bug add video length estimate
2023-06-06 00:09:40 -07:00 · 2023-06-06 00:09:40 -07:00 · 961ac85cde
parent dfaa47ca0f
commit 961ac85cde
5 changed files with 61 additions and 293 deletions
--- a/iz_helpers/InfZoomConfig.py
+++ b/iz_helpers/InfZoomConfig.py
@ -35,6 +35,7 @@ class InfZoomConfig():
    blend_invert_do:bool
    blend_color:str
    audio_filename:str=None
+    audio_volume:float = 1
    inpainting_denoising_strength:float=1
    inpainting_full_res:int =0
    inpainting_padding:int=0
--- a/iz_helpers/run.py
+++ b/iz_helpers/run.py
@ -118,12 +118,12 @@ class InfZoomer:
        processed = self.fnOutpaintMainFrames()

        #trim frames that are blended or luma wiped
+        self.start_frames = self.main_frames[:2]
+        self.end_frames = self.main_frames[(len(self.main_frames) - 2):]
        if (self.C.blend_mode != 0):
            #trim first and last frames only from main_frames, store 2 frames in each start_frames and end_frames for blending
-            self.start_frames = self.main_frames[:2]
-            self.end_frames = self.main_frames[(len(self.main_frames) - 2):]
            self.main_frames = self.main_frames[1:(len(self.main_frames) - 1)]
-            print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}")
+        print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}")

        if (self.C.upscale_do): 
            self.doUpscaling()
@ -154,7 +154,7 @@ class InfZoomer:
        self.fnInterpolateFrames() # changes main_frame and writes to video

        if self.C.audio_filename is not None:
-            self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary())
+            self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), self.C.audio_volume, find_ffmpeg_binary())

        print("Video saved in: " + os.path.join(script_path, self.out_config["video_filename"]))

@ -790,7 +790,7 @@ class InfZoomer:
 ##########################################################################################################################

 ##########################################################################################################################
-# Infinite Zoom
+# Infinite Zoom Classic

 def prepare_output_path():
    isCollect = shared.opts.data.get("infzoom_collectAllResources", False)
@ -830,281 +830,6 @@ def frames2Collect(all_frames, out_config):
    for i, f in enumerate(all_frames):
        save2Collect(f, out_config, f"frame_{i}")

-
-def create_zoom_single(
-    common_prompt_pre,
-    prompts_array,
-    common_prompt_suf,
-    negative_prompt,
-    num_outpainting_steps,
-    guidance_scale,
-    num_inference_steps,
-    custom_init_image,
-    custom_exit_image,
-    video_frame_rate,
-    video_zoom_mode,
-    video_start_frame_dupe_amount,
-    video_last_frame_dupe_amount,
-    inpainting_mask_blur,
-    inpainting_fill_mode,
-    zoom_speed,
-    seed,
-    outputsizeW,
-    outputsizeH,
-    sampler,
-    upscale_do,
-    upscaler_name,
-    upscale_by,
-    overmask,
-    outpaintStrategy,
-    outpaint_amount_px,
-    blend_image,
-    blend_mode,
-    blend_gradient_size,
-    blend_invert_do,
-    blend_color,
-    inpainting_denoising_strength,
-    inpainting_full_res,
-    inpainting_padding,
-    progress,
-    audio_filename = None
-):
-    # try:
-    #     if gr.Progress() is not None:
-    #         progress = gr.Progress()
-    #         progress(0, desc="Preparing Initial Image")
-    # except Exception:
-    #     pass
-    fix_env_Path_ffprobe()
-    out_config = prepare_output_path()
-
-    prompts = {}
-    prompt_images = {}
-    prompt_alpha_mask_images = {}
-    prompt_image_is_keyframe = {}
-
-    for x in prompts_array:
-        try:
-            key = int(x[0])
-            value = str(x[1])
-            file_loc = str(x[2])
-            alpha_mask_loc = str(x[3])
-            is_keyframe = bool(x[4])
-            prompts[key] = value
-            prompt_images[key] = file_loc
-            prompt_alpha_mask_images[key] = alpha_mask_loc
-            prompt_image_is_keyframe[key] = value_to_bool(is_keyframe)
-        except ValueError:
-            pass
-
-    assert len(prompts_array) > 0, "prompts is empty"
-    print(str(len(prompts)) + " prompts found")
-    print(str(len([value for value in prompt_images.values() if value != ""])) + " prompt Images found")
-    print(str(len([value for value in prompt_alpha_mask_images.values() if value != ""])) + " prompt Alpha Masks found")
-
-    width = closest_upper_divisible_by_eight(outputsizeW)
-    height = closest_upper_divisible_by_eight(outputsizeH)
-
-    current_image = Image.new(mode="RGBA", size=(width, height))
-    #mask_image = np.array(current_image)[:, :, 3]
-    #mask_image = Image.fromarray(255 - mask_image).convert("RGB")
-    #current_image = current_image.convert("RGB")
-    current_seed = seed
-    extra_frames = 0
-
-    if custom_init_image:
-        current_image = resize_and_crop_image(custom_init_image, width, height)
-        print("using Custom Initial Image")
-        save2Collect(current_image, out_config, f"init_custom.png")
-        #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="init_custom image")
-    else:
-        if prompt_images[min(k for k in prompt_images.keys() if k >= 0)] == "":
-            load_model_from_setting(
-                "infzoom_txt2img_model", progress, "Loading Model for txt2img: "
-            )
-            pr = prompts[min(k for k in prompts.keys() if k >= 0)]
-            processed, current_seed = renderTxt2Img(
-            f"{common_prompt_pre}\n{pr}\n{common_prompt_suf}".strip(),
-            negative_prompt,
-            sampler,
-            num_inference_steps,
-            guidance_scale,
-            current_seed,
-            width,
-            height,
-            )
-            if len(processed.images) > 0:
-                current_image = processed.images[0]
-                save2Collect(current_image, out_config, f"init_txt2img.png")
-        else:
-            print("using image 0 as Initial keyframe")
-            current_image = open_image(prompt_images[min(k for k in prompt_images.keys() if k >= 0)])
-            current_image = resize_and_crop_image(current_image, width, height)
-            save2Collect(current_image, out_config, f"init_custom.png")
-            #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="prompt_0 image")
-
-    mask_width = math.trunc(width / 4)  # was initially 512px => 128px
-    mask_height = math.trunc(height / 4)  # was initially 512px => 128px
-
-    num_interpol_frames = round(video_frame_rate * zoom_speed)
-
-    all_frames = []
-
-    if upscale_do and progress:
-        progress(0, desc="upscaling inital image")
-
-    load_model_from_setting(
-        "infzoom_inpainting_model", progress, "Loading Model for inpainting/img2img: "
-    )
-
-    if custom_exit_image:
-        extra_frames += 1
-
-    main_frames, processed = outpaint_steps(
-        width,
-        height,
-        common_prompt_pre,
-        common_prompt_suf,
-        prompts,
-        prompt_images,
-        prompt_alpha_mask_images,
-        prompt_image_is_keyframe,
-        negative_prompt,
-        current_seed,
-        sampler,
-        int(num_inference_steps),
-        guidance_scale,
-        inpainting_denoising_strength,
-        inpainting_mask_blur,
-        inpainting_fill_mode,
-        inpainting_full_res,
-        inpainting_padding,
-        current_image,
-        num_outpainting_steps + extra_frames,
-        out_config,
-        mask_width,
-        mask_height,
-        custom_exit_image,
-        False,
-        blend_gradient_size
-    )
-
-    #for k in range(len(main_frames)):
-        #print(str(f"Frame {k} : {main_frames[k]}"))
-        #resize_and_crop_image(main_frames[k], width, height)        
-
-    all_frames.append(
-        do_upscaleImg(main_frames[0], upscale_do, upscaler_name, upscale_by)
-        if upscale_do
-        else main_frames[0]
-    )
-    for i in range(len(main_frames) - 1):
-        print(f"processing frame {i}")
-
-        # interpolation steps between 2 inpainted images (=sequential zoom and crop)
-        for j in range(num_interpol_frames - 1):
-            current_image = main_frames[i + 1]
-            interpol_image = current_image
-            save2Collect(interpol_image, out_config, f"interpol_img_{i}_{j}].png")
-
-            interpol_width = math.ceil(
-                (
-                    1
-                    - (1 - 2 * mask_width / width)
-                    ** (1 - (j + 1) / num_interpol_frames)
-                )
-                * width
-                / 2
-            )
-
-            interpol_height = math.ceil(
-                (
-                    1
-                    - (1 - 2 * mask_height / height)
-                    ** (1 - (j + 1) / num_interpol_frames)
-                )
-                * height
-                / 2
-            )
-
-            interpol_image = interpol_image.crop(
-                (
-                    interpol_width,
-                    interpol_height,
-                    width - interpol_width,
-                    height - interpol_height,
-                )
-            )
-
-            interpol_image = interpol_image.resize((width, height))
-            save2Collect(interpol_image, out_config, f"interpol_resize_{i}_{j}.png")
-
-            # paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
-            interpol_width2 = math.ceil(
-                (1 - (width - 2 * mask_width) / (width - 2 * interpol_width))
-                / 2
-                * width
-            )
-
-            interpol_height2 = math.ceil(
-                (1 - (height - 2 * mask_height) / (height - 2 * interpol_height))
-                / 2
-                * height
-            )
-
-            prev_image_fix_crop = shrink_and_paste_on_blank(
-                main_frames[i], interpol_width2, interpol_height2
-            )
-
-            interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)
-            save2Collect(interpol_image, out_config, f"interpol_prevcrop_{i}_{j}.png")
-
-            if upscale_do and progress:
-                progress(((i + 1) / num_outpainting_steps), desc="upscaling interpol")
-
-            all_frames.append(
-                do_upscaleImg(interpol_image, upscale_do, upscaler_name, upscale_by)
-                if upscale_do
-                else interpol_image
-            )
-
-        if upscale_do and progress:
-            progress(((i + 1) / num_outpainting_steps), desc="upscaling current")
-
-        all_frames.append(
-            #do_upscaleImg(current_image, upscale_do, upscaler_name, upscale_by)
-            #if upscale_do
-            #else 
-            current_image
-        )
-
-    frames2Collect(all_frames, out_config)
-
-    write_video(
-        out_config["video_filename"],
-        all_frames,
-        video_frame_rate,
-        video_zoom_mode,
-        int(video_start_frame_dupe_amount),
-        int(video_last_frame_dupe_amount),
-        num_interpol_frames,
-        blend_invert_do,
-        blend_image,
-        blend_mode,
-        blend_gradient_size,
-        hex_to_rgba(blend_color),
-    )
-    if audio_filename is not None:
-        out_config["video_filename"] = add_audio_to_video(out_config["video_filename"], audio_filename, str.replace(out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary())
-
-    print("Video saved in: " + os.path.join(script_path, out_config["video_filename"]))
-    return (
-        out_config["video_filename"],
-        main_frames,
-        processed.js(),
-        plaintext_to_html(processed.info),
-        plaintext_to_html(""),
-    )
 #################################################################################################################
 def create_mask_with_circles(original_image_width, original_image_height, border_width, border_height, overmask: int, radius=4):
    # Create a new image with border and draw a mask
--- a/iz_helpers/run_interface.py
+++ b/iz_helpers/run_interface.py
@ -38,6 +38,7 @@ def createZoom(
    blend_invert_do:bool,
    blend_color:str,
    audio_filename:str = None,
+    audio_volume:float = 1,
    inpainting_denoising_strength:float=1,
    inpainting_full_res:int =0,
    inpainting_padding:int=0,
@ -77,6 +78,7 @@ def createZoom(
        blend_invert_do,
        blend_color,
        audio_filename,
+        audio_volume,
        inpainting_denoising_strength,
        inpainting_full_res,
        inpainting_padding,
--- a/iz_helpers/ui.py
+++ b/iz_helpers/ui.py
@ -27,7 +27,7 @@ from .static_variables import promptTableHeaders

 def on_ui_tabs():
    main_seed = gr.Number()
-    audio_filename = gr.Textbox(None)
+    audio_filename = gr.Textbox(None)    

    with gr.Blocks(analytics_enabled=False) as infinite_zoom_interface:
        gr.HTML(
@ -56,7 +56,7 @@ def on_ui_tabs():

                        main_outpaint_steps = gr.Slider(
                            minimum=2,
-                            maximum=100,
+                            maximum=120,
                            step=1,
                            label="Total video length [s]",
                            value=default_total_outpaints,
@ -169,14 +169,23 @@ def on_ui_tabs():
                        maximum=120,
                        step=1
                    )
-                    video_zoom_speed = gr.Slider(
-                        label="Zoom Speed",
-                        value=1.0,
-                        minimum=0.1,
-                        maximum=20.0,
-                        step=0.1,
-                        info="Zoom speed in seconds (higher values create slower zoom)",
-                    )
+                    with gr.Row():
+                        video_zoom_speed = gr.Slider(
+                            label="Zoom Speed",
+                            value=1.0,
+                            minimum=0.1,
+                            maximum=20.0,
+                            step=0.1,
+                            info="Zoom speed in seconds (higher values create slower zoom)",
+                        )
+                        video_est_length = gr.Number(
+                            label="Estimated video length [s]",
+                            info="a basic estimation of the video length",
+                            value=1.0,
+                            precision=1,
+                            readonly=True,
+                            id="infzoom_est_length",
+                        )
                    with gr.Accordion("FFMPEG Expert", open=False):
                        gr.Markdown(
                            """# I need FFMPEG control
@ -216,6 +225,12 @@ You might give multiple options in one line.
                                label='Blend Edge Color', 
                                default='#ffff00'
                            )
+                            video_zoom_speed.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            main_outpaint_steps.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_frame_rate.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_start_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            video_last_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])
+                            blend_mode.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length])  
                    with gr.Accordion("Blend Info", open=False):
                        gr.Markdown(
                            """# Important Blend Info:
@ -240,6 +255,13 @@ Ideas for custom blend images: https://www.pexels.com/search/gradient/
                            type="file",
                            label="Audio File")
                        audio_file.change(get_filename, inputs=[audio_file], outputs=[audio_filename])
+                    with gr.Row():
+                        audio_volume = gr.Slider(
+                            label="Audio volume",
+                            minimum=0.0,
+                            maximum=2.0,
+                            step=.05,
+                            value=1.0)

                with gr.Tab("Outpaint"):
                    outpaint_amount_px = gr.Slider(
@ -426,6 +448,7 @@ Our best experience and trade-off is the R-ERSGAn4x upscaler.
                blend_invert_do,
                blend_color,
                audio_filename,
+                audio_volume,
            ],
            outputs=[output_video, out_image, generation_info, html_info, html_log],
        )
@ -453,4 +476,15 @@ def get_min_outpaint_amount(width, outpaint_amount, strategy):
    min_outpaint_px = outpaint_amount
    if strategy == "Center":
        min_outpaint_px = closest_upper_divisible_by_eight(max(outpaint_amount, width // 4))
-    return min_outpaint_px
+    return min_outpaint_px
+
+def calc_est_video_length(blend_mode, video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,fps, main_outpaint_steps):
+    #calculates the estimated video length based on the blend mode, zoom speed, and outpaint steps
+    #this is just an estimate, the actual length will vary
+    steps = main_outpaint_steps
+    estimate = (steps * video_zoom_speed) + ((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps)
+    if blend_mode != 0:
+        steps = (main_outpaint_steps - 3)
+        estimate = (steps * video_zoom_speed) + (((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps) - 1.0)
+
+    return estimate
--- a/iz_helpers/video.py
+++ b/iz_helpers/video.py
@ -139,9 +139,15 @@ class ContinuousVideoWriter:
        results = reverse_video(self._file_path, self._file_path)


-def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'):
+#def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'):
+#    # Construct the FFmpeg command
+#    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path]
+#    subprocess.run(command)
+#    return output_path
+
+def add_audio_to_video(video_path, audio_path, output_path, volume=1.0, ffmpeg_location='ffmpeg'):
    # Construct the FFmpeg command
-    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path]
+    command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-af', f'volume={volume}', output_path]
    subprocess.run(command)
    return output_path