From 961ac85cde0d96b589aecfe2574506f9c67bb1c1 Mon Sep 17 00:00:00 2001 From: Charles Fettinger Date: Tue, 6 Jun 2023 00:09:40 -0700 Subject: [PATCH] Add sound volume cleanup extra code fix bug add video length estimate --- iz_helpers/InfZoomConfig.py | 1 + iz_helpers/run.py | 285 +----------------------------------- iz_helpers/run_interface.py | 2 + iz_helpers/ui.py | 56 +++++-- iz_helpers/video.py | 10 +- 5 files changed, 61 insertions(+), 293 deletions(-) diff --git a/iz_helpers/InfZoomConfig.py b/iz_helpers/InfZoomConfig.py index 46167c1..ab0e0bd 100644 --- a/iz_helpers/InfZoomConfig.py +++ b/iz_helpers/InfZoomConfig.py @@ -35,6 +35,7 @@ class InfZoomConfig(): blend_invert_do:bool blend_color:str audio_filename:str=None + audio_volume:float = 1 inpainting_denoising_strength:float=1 inpainting_full_res:int =0 inpainting_padding:int=0 diff --git a/iz_helpers/run.py b/iz_helpers/run.py index c4bf1ab..2cbff0d 100644 --- a/iz_helpers/run.py +++ b/iz_helpers/run.py @@ -118,12 +118,12 @@ class InfZoomer: processed = self.fnOutpaintMainFrames() #trim frames that are blended or luma wiped + self.start_frames = self.main_frames[:2] + self.end_frames = self.main_frames[(len(self.main_frames) - 2):] if (self.C.blend_mode != 0): #trim first and last frames only from main_frames, store 2 frames in each start_frames and end_frames for blending - self.start_frames = self.main_frames[:2] - self.end_frames = self.main_frames[(len(self.main_frames) - 2):] self.main_frames = self.main_frames[1:(len(self.main_frames) - 1)] - print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}") + print(f"Trimmed Blending Mode frames: start_frames:{len(self.start_frames)} end_frames:{len(self.end_frames)} main_frames:{len(self.main_frames)}") if (self.C.upscale_do): self.doUpscaling() @@ -154,7 +154,7 @@ class InfZoomer: self.fnInterpolateFrames() # changes main_frame and writes to video if self.C.audio_filename is not None: - self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary()) + self.out_config["video_filename"] = add_audio_to_video(self.out_config["video_filename"], self.C.audio_filename, str.replace(self.out_config["video_filename"], ".mp4", "_audio.mp4"), self.C.audio_volume, find_ffmpeg_binary()) print("Video saved in: " + os.path.join(script_path, self.out_config["video_filename"])) @@ -790,7 +790,7 @@ class InfZoomer: ########################################################################################################################## ########################################################################################################################## -# Infinite Zoom +# Infinite Zoom Classic def prepare_output_path(): isCollect = shared.opts.data.get("infzoom_collectAllResources", False) @@ -830,281 +830,6 @@ def frames2Collect(all_frames, out_config): for i, f in enumerate(all_frames): save2Collect(f, out_config, f"frame_{i}") - -def create_zoom_single( - common_prompt_pre, - prompts_array, - common_prompt_suf, - negative_prompt, - num_outpainting_steps, - guidance_scale, - num_inference_steps, - custom_init_image, - custom_exit_image, - video_frame_rate, - video_zoom_mode, - video_start_frame_dupe_amount, - video_last_frame_dupe_amount, - inpainting_mask_blur, - inpainting_fill_mode, - zoom_speed, - seed, - outputsizeW, - outputsizeH, - sampler, - upscale_do, - upscaler_name, - upscale_by, - overmask, - outpaintStrategy, - outpaint_amount_px, - blend_image, - blend_mode, - blend_gradient_size, - blend_invert_do, - blend_color, - inpainting_denoising_strength, - inpainting_full_res, - inpainting_padding, - progress, - audio_filename = None -): - # try: - # if gr.Progress() is not None: - # progress = gr.Progress() - # progress(0, desc="Preparing Initial Image") - # except Exception: - # pass - fix_env_Path_ffprobe() - out_config = prepare_output_path() - - prompts = {} - prompt_images = {} - prompt_alpha_mask_images = {} - prompt_image_is_keyframe = {} - - for x in prompts_array: - try: - key = int(x[0]) - value = str(x[1]) - file_loc = str(x[2]) - alpha_mask_loc = str(x[3]) - is_keyframe = bool(x[4]) - prompts[key] = value - prompt_images[key] = file_loc - prompt_alpha_mask_images[key] = alpha_mask_loc - prompt_image_is_keyframe[key] = value_to_bool(is_keyframe) - except ValueError: - pass - - assert len(prompts_array) > 0, "prompts is empty" - print(str(len(prompts)) + " prompts found") - print(str(len([value for value in prompt_images.values() if value != ""])) + " prompt Images found") - print(str(len([value for value in prompt_alpha_mask_images.values() if value != ""])) + " prompt Alpha Masks found") - - width = closest_upper_divisible_by_eight(outputsizeW) - height = closest_upper_divisible_by_eight(outputsizeH) - - current_image = Image.new(mode="RGBA", size=(width, height)) - #mask_image = np.array(current_image)[:, :, 3] - #mask_image = Image.fromarray(255 - mask_image).convert("RGB") - #current_image = current_image.convert("RGB") - current_seed = seed - extra_frames = 0 - - if custom_init_image: - current_image = resize_and_crop_image(custom_init_image, width, height) - print("using Custom Initial Image") - save2Collect(current_image, out_config, f"init_custom.png") - #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="init_custom image") - else: - if prompt_images[min(k for k in prompt_images.keys() if k >= 0)] == "": - load_model_from_setting( - "infzoom_txt2img_model", progress, "Loading Model for txt2img: " - ) - pr = prompts[min(k for k in prompts.keys() if k >= 0)] - processed, current_seed = renderTxt2Img( - f"{common_prompt_pre}\n{pr}\n{common_prompt_suf}".strip(), - negative_prompt, - sampler, - num_inference_steps, - guidance_scale, - current_seed, - width, - height, - ) - if len(processed.images) > 0: - current_image = processed.images[0] - save2Collect(current_image, out_config, f"init_txt2img.png") - else: - print("using image 0 as Initial keyframe") - current_image = open_image(prompt_images[min(k for k in prompt_images.keys() if k >= 0)]) - current_image = resize_and_crop_image(current_image, width, height) - save2Collect(current_image, out_config, f"init_custom.png") - #processed = Processed(StableDiffusionProcessing(),images_list=[current_image], seed=current_seed, info="prompt_0 image") - - mask_width = math.trunc(width / 4) # was initially 512px => 128px - mask_height = math.trunc(height / 4) # was initially 512px => 128px - - num_interpol_frames = round(video_frame_rate * zoom_speed) - - all_frames = [] - - if upscale_do and progress: - progress(0, desc="upscaling inital image") - - load_model_from_setting( - "infzoom_inpainting_model", progress, "Loading Model for inpainting/img2img: " - ) - - if custom_exit_image: - extra_frames += 1 - - main_frames, processed = outpaint_steps( - width, - height, - common_prompt_pre, - common_prompt_suf, - prompts, - prompt_images, - prompt_alpha_mask_images, - prompt_image_is_keyframe, - negative_prompt, - current_seed, - sampler, - int(num_inference_steps), - guidance_scale, - inpainting_denoising_strength, - inpainting_mask_blur, - inpainting_fill_mode, - inpainting_full_res, - inpainting_padding, - current_image, - num_outpainting_steps + extra_frames, - out_config, - mask_width, - mask_height, - custom_exit_image, - False, - blend_gradient_size - ) - - #for k in range(len(main_frames)): - #print(str(f"Frame {k} : {main_frames[k]}")) - #resize_and_crop_image(main_frames[k], width, height) - - all_frames.append( - do_upscaleImg(main_frames[0], upscale_do, upscaler_name, upscale_by) - if upscale_do - else main_frames[0] - ) - for i in range(len(main_frames) - 1): - print(f"processing frame {i}") - - # interpolation steps between 2 inpainted images (=sequential zoom and crop) - for j in range(num_interpol_frames - 1): - current_image = main_frames[i + 1] - interpol_image = current_image - save2Collect(interpol_image, out_config, f"interpol_img_{i}_{j}].png") - - interpol_width = math.ceil( - ( - 1 - - (1 - 2 * mask_width / width) - ** (1 - (j + 1) / num_interpol_frames) - ) - * width - / 2 - ) - - interpol_height = math.ceil( - ( - 1 - - (1 - 2 * mask_height / height) - ** (1 - (j + 1) / num_interpol_frames) - ) - * height - / 2 - ) - - interpol_image = interpol_image.crop( - ( - interpol_width, - interpol_height, - width - interpol_width, - height - interpol_height, - ) - ) - - interpol_image = interpol_image.resize((width, height)) - save2Collect(interpol_image, out_config, f"interpol_resize_{i}_{j}.png") - - # paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming - interpol_width2 = math.ceil( - (1 - (width - 2 * mask_width) / (width - 2 * interpol_width)) - / 2 - * width - ) - - interpol_height2 = math.ceil( - (1 - (height - 2 * mask_height) / (height - 2 * interpol_height)) - / 2 - * height - ) - - prev_image_fix_crop = shrink_and_paste_on_blank( - main_frames[i], interpol_width2, interpol_height2 - ) - - interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop) - save2Collect(interpol_image, out_config, f"interpol_prevcrop_{i}_{j}.png") - - if upscale_do and progress: - progress(((i + 1) / num_outpainting_steps), desc="upscaling interpol") - - all_frames.append( - do_upscaleImg(interpol_image, upscale_do, upscaler_name, upscale_by) - if upscale_do - else interpol_image - ) - - if upscale_do and progress: - progress(((i + 1) / num_outpainting_steps), desc="upscaling current") - - all_frames.append( - #do_upscaleImg(current_image, upscale_do, upscaler_name, upscale_by) - #if upscale_do - #else - current_image - ) - - frames2Collect(all_frames, out_config) - - write_video( - out_config["video_filename"], - all_frames, - video_frame_rate, - video_zoom_mode, - int(video_start_frame_dupe_amount), - int(video_last_frame_dupe_amount), - num_interpol_frames, - blend_invert_do, - blend_image, - blend_mode, - blend_gradient_size, - hex_to_rgba(blend_color), - ) - if audio_filename is not None: - out_config["video_filename"] = add_audio_to_video(out_config["video_filename"], audio_filename, str.replace(out_config["video_filename"], ".mp4", "_audio.mp4"), find_ffmpeg_binary()) - - print("Video saved in: " + os.path.join(script_path, out_config["video_filename"])) - return ( - out_config["video_filename"], - main_frames, - processed.js(), - plaintext_to_html(processed.info), - plaintext_to_html(""), - ) ################################################################################################################# def create_mask_with_circles(original_image_width, original_image_height, border_width, border_height, overmask: int, radius=4): # Create a new image with border and draw a mask diff --git a/iz_helpers/run_interface.py b/iz_helpers/run_interface.py index e87c210..91ecec0 100644 --- a/iz_helpers/run_interface.py +++ b/iz_helpers/run_interface.py @@ -38,6 +38,7 @@ def createZoom( blend_invert_do:bool, blend_color:str, audio_filename:str = None, + audio_volume:float = 1, inpainting_denoising_strength:float=1, inpainting_full_res:int =0, inpainting_padding:int=0, @@ -77,6 +78,7 @@ def createZoom( blend_invert_do, blend_color, audio_filename, + audio_volume, inpainting_denoising_strength, inpainting_full_res, inpainting_padding, diff --git a/iz_helpers/ui.py b/iz_helpers/ui.py index e5a1fa9..c489067 100644 --- a/iz_helpers/ui.py +++ b/iz_helpers/ui.py @@ -27,7 +27,7 @@ from .static_variables import promptTableHeaders def on_ui_tabs(): main_seed = gr.Number() - audio_filename = gr.Textbox(None) + audio_filename = gr.Textbox(None) with gr.Blocks(analytics_enabled=False) as infinite_zoom_interface: gr.HTML( @@ -56,7 +56,7 @@ def on_ui_tabs(): main_outpaint_steps = gr.Slider( minimum=2, - maximum=100, + maximum=120, step=1, label="Total video length [s]", value=default_total_outpaints, @@ -169,14 +169,23 @@ def on_ui_tabs(): maximum=120, step=1 ) - video_zoom_speed = gr.Slider( - label="Zoom Speed", - value=1.0, - minimum=0.1, - maximum=20.0, - step=0.1, - info="Zoom speed in seconds (higher values create slower zoom)", - ) + with gr.Row(): + video_zoom_speed = gr.Slider( + label="Zoom Speed", + value=1.0, + minimum=0.1, + maximum=20.0, + step=0.1, + info="Zoom speed in seconds (higher values create slower zoom)", + ) + video_est_length = gr.Number( + label="Estimated video length [s]", + info="a basic estimation of the video length", + value=1.0, + precision=1, + readonly=True, + id="infzoom_est_length", + ) with gr.Accordion("FFMPEG Expert", open=False): gr.Markdown( """# I need FFMPEG control @@ -216,6 +225,12 @@ You might give multiple options in one line. label='Blend Edge Color', default='#ffff00' ) + video_zoom_speed.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) + main_outpaint_steps.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) + video_frame_rate.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) + video_start_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) + video_last_frame_dupe_amount.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) + blend_mode.change(calc_est_video_length,inputs=[blend_mode,video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,video_frame_rate,main_outpaint_steps],outputs=[video_est_length]) with gr.Accordion("Blend Info", open=False): gr.Markdown( """# Important Blend Info: @@ -240,6 +255,13 @@ Ideas for custom blend images: https://www.pexels.com/search/gradient/ type="file", label="Audio File") audio_file.change(get_filename, inputs=[audio_file], outputs=[audio_filename]) + with gr.Row(): + audio_volume = gr.Slider( + label="Audio volume", + minimum=0.0, + maximum=2.0, + step=.05, + value=1.0) with gr.Tab("Outpaint"): outpaint_amount_px = gr.Slider( @@ -426,6 +448,7 @@ Our best experience and trade-off is the R-ERSGAn4x upscaler. blend_invert_do, blend_color, audio_filename, + audio_volume, ], outputs=[output_video, out_image, generation_info, html_info, html_log], ) @@ -453,4 +476,15 @@ def get_min_outpaint_amount(width, outpaint_amount, strategy): min_outpaint_px = outpaint_amount if strategy == "Center": min_outpaint_px = closest_upper_divisible_by_eight(max(outpaint_amount, width // 4)) - return min_outpaint_px \ No newline at end of file + return min_outpaint_px + +def calc_est_video_length(blend_mode, video_zoom_speed, video_start_frame_dupe_amount,video_last_frame_dupe_amount,fps, main_outpaint_steps): + #calculates the estimated video length based on the blend mode, zoom speed, and outpaint steps + #this is just an estimate, the actual length will vary + steps = main_outpaint_steps + estimate = (steps * video_zoom_speed) + ((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps) + if blend_mode != 0: + steps = (main_outpaint_steps - 3) + estimate = (steps * video_zoom_speed) + (((video_start_frame_dupe_amount + video_last_frame_dupe_amount) / fps) - 1.0) + + return estimate \ No newline at end of file diff --git a/iz_helpers/video.py b/iz_helpers/video.py index 14e7aa0..2155e53 100644 --- a/iz_helpers/video.py +++ b/iz_helpers/video.py @@ -139,9 +139,15 @@ class ContinuousVideoWriter: results = reverse_video(self._file_path, self._file_path) -def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'): +#def add_audio_to_video(video_path, audio_path, output_path, ffmpeg_location = 'ffmpeg'): +# # Construct the FFmpeg command +# command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path] +# subprocess.run(command) +# return output_path + +def add_audio_to_video(video_path, audio_path, output_path, volume=1.0, ffmpeg_location='ffmpeg'): # Construct the FFmpeg command - command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', output_path] + command = [ffmpeg_location, '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-af', f'volume={volume}', output_path] subprocess.run(command) return output_path