From 07747ed844103e27421401d6343c59b5a048d5a5 Mon Sep 17 00:00:00 2001
From: alexbofa <58225118+alexbofa@users.noreply.github.com>
Date: Sun, 22 Oct 2023 00:55:16 +0300
Subject: [PATCH] Main Repository
---
README.md | 147 +++++-
calculator.py | 237 +++++++++
ebsynth_utility.py | 361 +++++++-------
install.py | 59 +--
scripts/custom_script.py | 1012 ++++++++++++++++++++++++++++++++++++++
scripts/ui.py | 367 +++++++-------
stage3_5.py | 178 +++++++
stage5.py | 2 +-
stage8.py | 292 +++++------
style.css | 82 +--
10 files changed, 2156 insertions(+), 581 deletions(-)
create mode 100644 calculator.py
create mode 100644 scripts/custom_script.py
create mode 100644 stage3_5.py
diff --git a/README.md b/README.md
index 8b856ab..841c529 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,58 @@
-# ebsynth_utility_lite
-Fork was created to facilitate the creation of videos via img2img based on the original [ebsynth_utility](https://github.com/s9roll7/ebsynth_utility)
-
-## TODO
-- [x] Delete script for img2img
-- [ ] Add configuration → stage 5
-- [ ] Stage 0 — changing the video size, for example from 1080x1920 to 512x904
-- [ ] Stage 2 — manually add **custom_gap**
-- [ ] Change Stage 3 for create a grid (min 1x1 max 3x3)
-- [ ] Change Stage 4 for disassemble the grid back
-- [ ] Stage 0 — add Presets (with changes via .json)
-- [ ] Stage 5 — automatisation with Ebsynth? (Is it possible?)
-- [ ] Edit **Readme.md**
+# ebsynth_utility
-#### If you want to help, feel free to create the [PR](https://github.com/alexbofa/ebsynth_utility_lite/pulls)
+## Overview
+#### AUTOMATIC1111 UI extension for creating videos using img2img and ebsynth.
+#### This extension allows you to output edited videos using ebsynth.(AE is not required)
+
+
+##### With [Controlnet](https://github.com/Mikubill/sd-webui-controlnet) installed, I have confirmed that all features of this extension are working properly!
+##### [Controlnet](https://github.com/Mikubill/sd-webui-controlnet) is a must for video editing, so I recommend installing it.
+##### Multi ControlNet("canny" + "normal map") would be suitable for video editing.
+
+
+
+###### I modified animatediff-cli to create a txt2video tool that allows flexible prompt specification. You can use it if you like.
+###### [animatediff-cli-prompt-travel](https://github.com/s9roll7/animatediff-cli-prompt-travel)
+
\ + See \ + [here] for depth map.\ +
") + + with gr.Accordion("ControlNet option"): + controlnet_weight = gr.Slider(minimum=0.0, maximum=2.0, step=0.01, value=0.5, label="Control Net Weight") + controlnet_weight_for_face = gr.Slider(minimum=0.0, maximum=2.0, step=0.01, value=0.5, label="Control Net Weight For Face") + use_preprocess_img = gr.Checkbox(True, label="Use Preprocess image If exists in /controlnet_preprocess") + gr.HTML(value="\
+ Please enable the following settings to use controlnet from this script.
\
+ \
+ Settings->ControlNet->Allow other script to control this extension\
+ \
+
\
+ The results are stored in timestamp_prompts.txt.
\
+ If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt
\
+ Recommend enabling the following settings.
\
+ \
+ Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
+ \
+
\ + If loading of the Yolov5_anime model fails, check\ + [this] solution.\ +
") + face_crop_resolution = gr.Slider(minimum=128, maximum=2048, step=1, value=512, label="Face Crop Resolution") + max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size") + face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength") + face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ") + disable_facecrop_lpbk_last_time = gr.Checkbox(False, label="Disable at the last loopback time") + + with gr.Column(): + enable_face_prompt = gr.Checkbox(False, label="Enable Face Prompt") + face_prompt = gr.Textbox(label="Face Prompt", show_label=False, lines=2, + placeholder="Prompt for Face", + value = "face close up," + ) + + return [project_dir, generation_test, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, add_tag_replace_underscore, is_facecrop, face_detection_method, face_crop_resolution, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt, controlnet_weight, controlnet_weight_for_face, disable_facecrop_lpbk_last_time,use_preprocess_img] + + + def detect_face_from_img(self, img_array): + if not self.face_detector: + dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv")) + self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0)) + + self.face_detector.setInputSize((img_array.shape[1], img_array.shape[0])) + _, result = self.face_detector.detect(img_array) + return result + + def detect_anime_face_from_img(self, img_array): + import sys + + if not self.anime_face_detector: + if 'models' in sys.modules: + del sys.modules['models'] + + anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime")) + + if not os.path.isfile(anime_model_path): + print( "WARNING!! " + anime_model_path + " not found.") + print( "use YuNet instead.") + return self.detect_face_from_img(img_array) + + self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path) + + # warmup + test = np.zeros([512,512,3],dtype=np.uint8) + _ = self.anime_face_detector(test) + + result = self.anime_face_detector(img_array) + #models.common.Detections + faces = [] + for x_c, y_c, w, h, _, _ in result.xywh[0].tolist(): + faces.append( [ x_c - w/2 , y_c - h/2, w, h ] ) + + return faces + + def detect_face(self, img, mask, face_detection_method, max_crop_size): + img_array = np.array(img) + + # image without alpha + if img_array.shape[2] == 4: + img_array = img_array[:,:,:3] + + if mask is not None: + if self.is_invert_mask: + mask = ImageOps.invert(mask) + mask_array = np.array(mask)/255 + if mask_array.ndim == 2: + mask_array = mask_array[:, :, np.newaxis] + + if mask_array.shape[2] == 4: + mask_array = mask_array[:,:,:3] + + img_array = mask_array * img_array + img_array = img_array.astype(np.uint8) + + if face_detection_method == "YuNet": + faces = self.detect_face_from_img(img_array) + elif face_detection_method == "Yolov5_anime": + faces = self.detect_anime_face_from_img(img_array) + else: + faces = self.detect_face_from_img(img_array) + + if faces is None or len(faces) == 0: + return [] + + face_coords = [] + for face in faces: + x = int(face[0]) + y = int(face[1]) + w = int(face[2]) + h = int(face[3]) + if max(w,h) > max_crop_size: + print("ignore big face") + continue + if w == 0 or h == 0: + print("ignore w,h = 0 face") + continue + + face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] ) + + return face_coords + + def get_mask(self): + def create_mask( output, x_rate, y_rate, k_size ): + img = np.zeros((512, 512, 3)) + img = cv2.ellipse(img, ((256, 256), (int(512 * x_rate), int(512 * y_rate)), 0), (255, 255, 255), thickness=-1) + img = cv2.GaussianBlur(img, (k_size, k_size), 0) + cv2.imwrite(output, img) + + if self.face_merge_mask_image is None: + mask_file_path = os.path.join( get_my_dir() , self.face_merge_mask_filename) + if not os.path.isfile(mask_file_path): + create_mask( mask_file_path, 0.9, 0.9, 91) + + m = cv2.imread( mask_file_path )[:,:,0] + m = m[:, :, np.newaxis] + self.face_merge_mask_image = m / 255 + + return self.face_merge_mask_image + + def face_img_crop(self, img, face_coords,face_area_magnification): + img_array = np.array(img) + face_imgs =[] + new_coords = [] + + for face in face_coords: + x = int(face[0] * img_array.shape[1]) + y = int(face[1] * img_array.shape[0]) + w = int(face[2] * img_array.shape[1]) + h = int(face[3] * img_array.shape[0]) + print([x,y,w,h]) + + cx = x + int(w/2) + cy = y + int(h/2) + + x = cx - int(w*face_area_magnification / 2) + x = x if x > 0 else 0 + w = cx + int(w*face_area_magnification / 2) - x + w = w if x+w < img.width else img.width - x + + y = cy - int(h*face_area_magnification / 2) + y = y if y > 0 else 0 + h = cy + int(h*face_area_magnification / 2) - y + h = h if y+h < img.height else img.height - y + + print([x,y,w,h]) + + face_imgs.append( img_array[y: y+h, x: x+w] ) + new_coords.append( [x,y,w,h] ) + + resized = [] + for face_img in face_imgs: + if face_img.shape[1] < face_img.shape[0]: + re_w = self.face_crop_resolution + re_h = int(x_ceiling( (self.face_crop_resolution / face_img.shape[1]) * face_img.shape[0] , 64)) + else: + re_w = int(x_ceiling( (self.face_crop_resolution / face_img.shape[0]) * face_img.shape[1] , 64)) + re_h = self.face_crop_resolution + + face_img = resize_img(face_img, re_w, re_h) + resized.append( Image.fromarray(face_img)) + + return resized, new_coords + + def face_crop_img2img(self, p, face_coords, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt, controlnet_input_img, controlnet_input_face_imgs, preprocess_img_exist): + + def merge_face(img, face_img, face_coord, base_img_size, mask): + x_rate = img.width / base_img_size[0] + y_rate = img.height / base_img_size[1] + + img_array = np.array(img) + x = int(face_coord[0] * x_rate) + y = int(face_coord[1] * y_rate) + w = int(face_coord[2] * x_rate) + h = int(face_coord[3] * y_rate) + + face_array = np.array(face_img) + face_array = resize_img(face_array, w, h) + mask = resize_img(mask, w, h) + if mask.ndim == 2: + mask = mask[:, :, np.newaxis] + + bg = img_array[y: y+h, x: x+w] + img_array[y: y+h, x: x+w] = mask * face_array + (1-mask)*bg + + return Image.fromarray(img_array) + + base_img = p.init_images[0] + + base_img_size = (base_img.width, base_img.height) + + if face_coords is None or len(face_coords) == 0: + print("no face detected") + return process_images(p) + + print(face_coords) + face_imgs, new_coords = self.face_img_crop(base_img, face_coords, face_area_magnification) + + if not face_imgs: + return process_images(p) + + face_p = copy.copy(p) + + ### img2img base img + proc = self.process_images(p, controlnet_input_img, self.controlnet_weight, preprocess_img_exist) + print(proc.seed) + + ### img2img for each face + face_img2img_results = [] + + for face, coord, controlnet_input_face in zip(face_imgs, new_coords, controlnet_input_face_imgs): + # cv2.imwrite("scripts/face.png", np.array(face)[:, :, ::-1]) + face_p.init_images = [face] + face_p.width = face.width + face_p.height = face.height + face_p.denoising_strength = face_denoising_strength + + if enable_face_prompt: + face_p.prompt = face_prompt + else: + face_p.prompt = "close-up face ," + face_p.prompt + + if p.image_mask is not None: + x,y,w,h = coord + cropped_face_mask = Image.fromarray(np.array(p.image_mask)[y: y+h, x: x+w]) + face_p.image_mask = modules.images.resize_image(0, cropped_face_mask, face.width, face.height) + + face_proc = self.process_images(face_p, controlnet_input_face, self.controlnet_weight_for_face, preprocess_img_exist) + print(face_proc.seed) + + face_img2img_results.append((face_proc.images[0], coord)) + + ### merge faces + bg = proc.images[0] + mask = self.get_mask() + + for face_img, coord in face_img2img_results: + bg = merge_face(bg, face_img, coord, base_img_size, mask) + + proc.images[0] = bg + + return proc + + def get_depth_map(self, mask, depth_path ,img_basename, is_invert_mask): + depth_img_path = os.path.join( depth_path , img_basename ) + + depth = None + + if os.path.isfile( depth_img_path ): + depth = Image.open(depth_img_path) + else: + # try 00001-0000.png + os.path.splitext(img_basename)[0] + depth_img_path = os.path.join( depth_path , os.path.splitext(img_basename)[0] + "-0000.png" ) + if os.path.isfile( depth_img_path ): + depth = Image.open(depth_img_path) + + if depth: + if mask: + mask_array = np.array(mask) + depth_array = np.array(depth) + + if is_invert_mask == False: + depth_array[mask_array[:,:,0] == 0] = 0 + else: + depth_array[mask_array[:,:,0] != 0] = 0 + + depth = Image.fromarray(depth_array) + + tmp_path = os.path.join( depth_path , "tmp" ) + os.makedirs(tmp_path, exist_ok=True) + tmp_path = os.path.join( tmp_path , img_basename ) + depth_array = depth_array.astype(np.uint16) + cv2.imwrite(tmp_path, depth_array) + + mask = depth + + return depth!=None, mask + +### auto tagging + debug_count = 0 + + def get_masked_image(self, image, mask_image): + + if mask_image == None: + return image.convert("RGB") + + mask = mask_image.convert('L') + if self.is_invert_mask: + mask = ImageOps.invert(mask) + crop_region = masking.get_crop_region(np.array(mask), 0) +# crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) +# x1, y1, x2, y2 = crop_region + image = image.crop(crop_region).convert("RGB") + mask = mask.crop(crop_region) + + base_img = Image.new("RGB", image.size, (255, 190, 200)) + + image = Image.composite( image, base_img, mask ) + +# image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png") +# self.debug_count += 1 + + return image + + def interrogate_deepdanbooru(self, imgs, masks): + prompts_dict = {} + cause_err = False + + try: + deepbooru.model.start() + + for img,mask in zip(imgs,masks): + key = os.path.basename(img) + print(key + " interrogate deepdanbooru") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + prompt = deepbooru.model.tag_multi(image) + + prompts_dict[key] = prompt + except Exception as e: + import traceback + traceback.print_exc() + print(e) + cause_err = True + finally: + deepbooru.model.stop() + if cause_err: + print("Exception occurred during auto-tagging(deepdanbooru)") + return Processed() + + return prompts_dict + + + def interrogate_clip(self, imgs, masks): + from modules import devices, shared, lowvram, paths + import importlib + import models + + caption_list = [] + prompts_dict = {} + cause_err = False + + try: + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.send_everything_to_cpu() + devices.torch_gc() + + with paths.Prioritize("BLIP"): + importlib.reload(models) + shared.interrogator.load() + + for img,mask in zip(imgs,masks): + key = os.path.basename(img) + print(key + " generate caption") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + caption = shared.interrogator.generate_caption(image) + caption_list.append(caption) + + shared.interrogator.send_blip_to_ram() + devices.torch_gc() + + for img,mask,caption in zip(imgs,masks,caption_list): + key = os.path.basename(img) + print(key + " interrogate clip") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate) + + res = "" + + with torch.no_grad(), devices.autocast(): + image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype) + image_features /= image_features.norm(dim=-1, keepdim=True) + + for name, topn, items in shared.interrogator.categories(): + matches = shared.interrogator.rank(image_features, items, top_count=topn) + for match, score in matches: + if shared.opts.interrogate_return_ranks: + res += f", ({match}:{score/100:.3f})" + else: + res += ", " + match + + prompts_dict[key] = (caption + res) + + except Exception as e: + import traceback + traceback.print_exc() + print(e) + cause_err = True + finally: + shared.interrogator.unload() + if cause_err: + print("Exception occurred during auto-tagging(blip/clip)") + return Processed() + + return prompts_dict + + + def remove_reserved_token(self, token_list): + reserved_list = ["pink_background","simple_background","pink","pink_theme"] + + result_list = [] + + head_token = token_list[0] + + if head_token[2] == "normal": + head_token_str = head_token[0].replace('pink background', '') + token_list[0] = (head_token_str, head_token[1], head_token[2]) + + for token in token_list: + if token[0] in reserved_list: + continue + result_list.append(token) + + return result_list + + def remove_blacklisted_token(self, token_list): + black_list_path = os.path.join(self.prompts_dir, "blacklist.txt") + if not os.path.isfile(black_list_path): + print(black_list_path + " not found.") + return token_list + + with open(black_list_path) as f: + black_list = [s.strip() for s in f.readlines()] + + result_list = [] + + for token in token_list: + if token[0] in black_list: + continue + result_list.append(token) + + token_list = result_list + + return token_list + + def add_token(self, token_list): + add_list_path = os.path.join(self.prompts_dir, "add_token.txt") + if not os.path.isfile(add_list_path): + print(add_list_path + " not found.") + + if self.add_tag_replace_underscore: + token_list = [ (x[0].replace("_"," "), x[1], x[2]) for x in token_list ] + + return token_list + + if not self.calc_parser: + self.calc_parser = CalcParser() + + with open(add_list_path) as f: + add_list = json.load(f) + ''' + [ + { + "target":"test_token", + "min_score":0.8, + "token": ["lora_name_A", "0.5"], + "type":"lora" + }, + { + "target":"test_token", + "min_score":0.5, + "token": ["bbbb", "score - 0.1"], + "type":"normal" + }, + { + "target":"test_token2", + "min_score":0.8, + "token": ["hypernet_name_A", "score"], + "type":"hypernet" + }, + { + "target":"test_token3", + "min_score":0.0, + "token": ["dddd", "score"], + "type":"normal" + } + ] + ''' + result_list = [] + + for token in token_list: + for add_item in add_list: + if token[0] == add_item["target"]: + if token[1] > add_item["min_score"]: + # hit + formula = str(add_item["token"][1]) + formula = formula.replace("score",str(token[1])) + print('Input: %s' % str(add_item["token"][1])) + + try: + score = self.calc_parser.parse(formula) + score = round(score, 3) + except (ParseError, ZeroDivisionError) as e: + print('Input: %s' % str(add_item["token"][1])) + print('Error: %s' % e) + print("ignore this token") + continue + + print("score = " + str(score)) + result_list.append( ( add_item["token"][0], score, add_item["type"] ) ) + + if self.add_tag_replace_underscore: + token_list = [ (x[0].replace("_"," "), x[1], x[2]) for x in token_list ] + + token_list = token_list + result_list + + return token_list + + def create_prompts_dict(self, imgs, masks, auto_tag_mode): + prompts_dict = {} + + if auto_tag_mode == "DeepDanbooru": + raw_dict = self.interrogate_deepdanbooru(imgs, masks) + elif auto_tag_mode == "CLIP": + raw_dict = self.interrogate_clip(imgs, masks) + + repatter = re.compile(r'\((.+)\:([0-9\.]+)\)') + + for key, value_str in raw_dict.items(): + value_list = [x.strip() for x in value_str.split(',')] + + value = [] + for v in value_list: + m = repatter.fullmatch(v) + if m: + value.append((m.group(1), float(m.group(2)), "normal")) + else: + value.append((v, 1, "no_score")) + +# print(value) + value = self.remove_reserved_token(value) +# print(value) + value = self.remove_blacklisted_token(value) +# print(value) + value = self.add_token(value) +# print(value) + + def create_token_str(x): + print(x) + if x[2] == "no_score": + return x[0] + elif x[2] == "lora": + return "\ - If you have trouble entering the video path manually, you can also use drag and drop. \ -
") - - with gr.TabItem('configuration', elem_id='ebs_configuration'): - with gr.Tabs(elem_id="ebs_configuration_tab"): - with gr.TabItem(label="stage 1",elem_id='ebs_configuration_tab1'): - with gr.Row(): - frame_width = gr.Number(value=-1, label="Frame Width", precision=0, interactive=True) - frame_height = gr.Number(value=-1, label="Frame Height", precision=0, interactive=True) - gr.HTML(value="\ - -1 means that it is calculated automatically. If both are -1, the size will be the same as the source size. \ -
") - - st1_masking_method_index = gr.Radio(label='Masking Method', choices=["transparent-background","clipseg","transparent-background AND clipseg"], value="transparent-background", type="index") - - with gr.Accordion(label="transparent-background options"): - st1_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) - - # https://pypi.org/project/transparent-background/ - gr.HTML(value="\ - configuration for \ - [transparent-background]\ -
") - tb_use_fast_mode = gr.Checkbox(label="Use Fast Mode(It will be faster, but the quality of the mask will be lower.)", value=False) - tb_use_jit = gr.Checkbox(label="Use Jit", value=False) - - with gr.Accordion(label="clipseg options"): - clipseg_mask_prompt = gr.Textbox(label='Mask Target (e.g., girl, cats)', lines=1) - clipseg_exclude_prompt = gr.Textbox(label='Exclude Target (e.g., finger, book)', lines=1) - clipseg_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.4) - clipseg_mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(MedianBlur)', value=11) - clipseg_mask_blur_size2 = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(GaussianBlur)', value=11) - - with gr.TabItem(label="stage 2", elem_id='ebs_configuration_tab2'): - key_min_gap = gr.Slider(minimum=0, maximum=500, step=1, label='Minimum keyframe gap', value=10) - key_max_gap = gr.Slider(minimum=0, maximum=1000, step=1, label='Maximum keyframe gap', value=300) - key_th = gr.Slider(minimum=0.0, maximum=100.0, step=0.1, label='Threshold of delta frame edge', value=8.5) - key_add_last_frame = gr.Checkbox(label="Add last frame to keyframes", value=True) - - with gr.TabItem(label="stage 7", elem_id='ebs_configuration_tab7'): - blend_rate = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Crossfade blend rate', value=1.0) - export_type = gr.Dropdown(choices=["mp4","webm","gif","rawvideo"], value="mp4" ,label="Export type") - - with gr.TabItem(label="stage 8", elem_id='ebs_configuration_tab8'): - bg_src = gr.Textbox(label='Background source(mp4 or directory containing images)', lines=1) - bg_type = gr.Dropdown(choices=["Fit video length","Loop"], value="Fit video length" ,label="Background type") - mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size', value=5) - mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) - #is_transparent = gr.Checkbox(label="Is Transparent", value=True, visible = False) - fg_transparency = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Foreground Transparency', value=0.0) - - with gr.TabItem(label="etc", elem_id='ebs_configuration_tab_etc'): - mask_mode = gr.Dropdown(choices=["Normal","Invert","None"], value="Normal" ,label="Mask Mode") - with gr.TabItem('info', elem_id='ebs_info'): - gr.HTML(value="\
- The process of creating a video can be divided into the following stages.
\
- (Stage 3, 4, and 6 only show a guide and do nothing actual processing.)
\
- stage 1
\
- Extract frames from the original video.
\
- Generate a mask image.
\
- stage 2
\
- Select keyframes to be given to ebsynth.
\
- stage 3
\
- img2img keyframes.
\
- stage 4
\
- and upscale to the size of the original video.
\
- stage 5
\
- Rename keyframes.
\
- Generate .ebs file.(ebsynth project file)
\
- stage 6
\
- Running ebsynth.(on your self)
\
- Open the generated .ebs under project directory and press [Run All] button.
\
- If ""out-*"" directory already exists in the Project directory, delete it manually before executing.
\
- If multiple .ebs files are generated, run them all.
\
- stage 7
\
- Concatenate each frame while crossfading.
\
- Composite audio files extracted from the original video onto the concatenated video.
\
- stage 8
\
- This is an extra stage.
\
- You can put any image or images or video you like in the background.
\
- You can specify in this field -> [Ebsynth Utility]->[configuration]->[stage 8]->[Background source]
\
- If you have already created a background video in Invert Mask Mode([Ebsynth Utility]->[configuration]->[etc]->[Mask Mode]),
\
- You can specify \"path_to_project_dir/inv/crossfade_tmp\".
\
-
\ + If you have trouble entering the video path manually, you can also use drag and drop.For large videos, please enter the path manually. \ +
") + + with gr.TabItem('configuration', elem_id='ebs_configuration'): + with gr.Tabs(elem_id="ebs_configuration_tab"): + with gr.TabItem(label="stage 1",elem_id='ebs_configuration_tab1'): + with gr.Row(): + frame_width = gr.Number(value=-1, label="Frame Width", precision=0, interactive=True) + frame_height = gr.Number(value=-1, label="Frame Height", precision=0, interactive=True) + gr.HTML(value="\ + -1 means that it is calculated automatically. If both are -1, the size will be the same as the source size. \ +
") + + st1_masking_method_index = gr.Radio(label='Masking Method', choices=["transparent-background","clipseg","transparent-background AND clipseg"], value="transparent-background", type="index") + + with gr.Accordion(label="transparent-background options"): + st1_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) + + # https://pypi.org/project/transparent-background/ + gr.HTML(value="\ + configuration for \ + [transparent-background]\ +
") + tb_use_fast_mode = gr.Checkbox(label="Use Fast Mode(It will be faster, but the quality of the mask will be lower.)", value=False) + tb_use_jit = gr.Checkbox(label="Use Jit", value=False) + + with gr.Accordion(label="clipseg options"): + clipseg_mask_prompt = gr.Textbox(label='Mask Target (e.g., girl, cats)', lines=1) + clipseg_exclude_prompt = gr.Textbox(label='Exclude Target (e.g., finger, book)', lines=1) + clipseg_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.4) + clipseg_mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(MedianBlur)', value=11) + clipseg_mask_blur_size2 = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(GaussianBlur)', value=11) + + with gr.TabItem(label="stage 2", elem_id='ebs_configuration_tab2'): + key_min_gap = gr.Slider(minimum=0, maximum=500, step=1, label='Minimum keyframe gap', value=10) + key_max_gap = gr.Slider(minimum=0, maximum=1000, step=1, label='Maximum keyframe gap', value=300) + key_th = gr.Slider(minimum=0.0, maximum=100.0, step=0.1, label='Threshold of delta frame edge', value=8.5) + key_add_last_frame = gr.Checkbox(label="Add last frame to keyframes", value=True) + + with gr.TabItem(label="stage 3.5", elem_id='ebs_configuration_tab3_5'): + gr.HTML(value="\ + [color-matcher]\ +
") + + color_matcher_method = gr.Radio(label='Color Transfer Method', choices=['default', 'hm', 'reinhard', 'mvgd', 'mkl', 'hm-mvgd-hm', 'hm-mkl-hm'], value="hm-mkl-hm", type="value") + color_matcher_ref_type = gr.Radio(label='Color Matcher Ref Image Type', choices=['original video frame', 'first frame of img2img result'], value="original video frame", type="index") + gr.HTML(value="\ + If an image is specified below, it will be used with highest priority.\ +
") + color_matcher_ref_image = gr.Image(label="Color Matcher Ref Image", source='upload', mirror_webcam=False, type='pil') + st3_5_use_mask = gr.Checkbox(label="Apply mask to the result", value=True) + st3_5_use_mask_ref = gr.Checkbox(label="Apply mask to the Ref Image", value=False) + st3_5_use_mask_org = gr.Checkbox(label="Apply mask to original image", value=False) + #st3_5_number_of_itr = gr.Slider(minimum=1, maximum=10, step=1, label='Number of iterations', value=1) + + with gr.TabItem(label="stage 7", elem_id='ebs_configuration_tab7'): + blend_rate = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Crossfade blend rate', value=1.0) + export_type = gr.Dropdown(choices=["mp4","webm","gif","rawvideo"], value="mp4" ,label="Export type") + + with gr.TabItem(label="stage 8", elem_id='ebs_configuration_tab8'): + bg_src = gr.Textbox(label='Background source(mp4 or directory containing images)', lines=1) + bg_type = gr.Dropdown(choices=["Fit video length","Loop"], value="Fit video length" ,label="Background type") + mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size', value=5) + mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) + #is_transparent = gr.Checkbox(label="Is Transparent", value=True, visible = False) + fg_transparency = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Foreground Transparency', value=0.0) + + with gr.TabItem(label="etc", elem_id='ebs_configuration_tab_etc'): + mask_mode = gr.Dropdown(choices=["Normal","Invert","None"], value="Normal" ,label="Mask Mode") + with gr.TabItem('info', elem_id='ebs_info'): + gr.HTML(value="\
+ The process of creating a video can be divided into the following stages.
\
+ (Stage 3, 4, and 6 only show a guide and do nothing actual processing.)
\
+ stage 1
\
+ Extract frames from the original video.
\
+ Generate a mask image.
\
+ stage 2
\
+ Select keyframes to be given to ebsynth.
\
+ stage 3
\
+ img2img keyframes.
\
+ stage 3.5
\
+ (this is optional. Perform color correction on the img2img results and expect flickering to decrease. Or, you can simply change the color tone from the generated result.)
\
+ stage 4
\
+ and upscale to the size of the original video.
\
+ stage 5
\
+ Rename keyframes.
\
+ Generate .ebs file.(ebsynth project file)
\
+ stage 6
\
+ Running ebsynth.(on your self)
\
+ Open the generated .ebs under project directory and press [Run All] button.
\
+ If ""out-*"" directory already exists in the Project directory, delete it manually before executing.
\
+ If multiple .ebs files are generated, run them all.
\
+ stage 7
\
+ Concatenate each frame while crossfading.
\
+ Composite audio files extracted from the original video onto the concatenated video.
\
+ stage 8
\
+ This is an extra stage.
\
+ You can put any image or images or video you like in the background.
\
+ You can specify in this field -> [Ebsynth Utility]->[configuration]->[stage 8]->[Background source]
\
+ If you have already created a background video in Invert Mask Mode([Ebsynth Utility]->[configuration]->[etc]->[Mask Mode]),
\
+ You can specify \"path_to_project_dir/inv/crossfade_tmp\".
\
+