diff --git a/README.md b/README.md index 953b538..8192056 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,30 @@ #### sample 1
-#### sample 2 - - -#### sample 3 blend background +#### sample 2 blend background - person : masterpiece, best quality, masterpiece, 1girl, masterpiece, best quality,anime screencap, anime style - background : cyberpunk, factory, room ,anime screencap, anime style - It is also possible to blend with your favorite videos. +#### sample 3 auto tagging +- left : original +- center : apply the same prompts in all keyframes +- right : apply auto tagging by deepdanbooru in all keyframes +- This function improves the detailed changes in facial expressions, hand expressions, etc. + In the sample video, the "closed_eyes" and "hands_on_own_face" tags have been added to better represent eye blinks and hands brought in front of the face. + + +#### sample 4 auto tagging (apply lora dynamically) +- left : apply auto tagging by deepdanbooru in all keyframes +- right : apply auto tagging by deepdanbooru in all keyframes + apply "anyahehface" lora dynamically +- Added the function to dynamically apply TI, hypernet, Lora, and additional prompts according to automatically attached tags. + In the sample video, if the "smile" tag is given, the lora and lora trigger keywords are set to be added according to the strength of the "smile" tag. + Also, since automatically added tags are sometimes incorrect, unnecessary tags are listed in the blacklist. + [Here](sample/) is the actual configuration file used. placed in "Project directory" for use. + + + ## Installation - Install [ffmpeg](https://ffmpeg.org/) for your operating system (https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) diff --git a/calculator.py b/calculator.py new file mode 100644 index 0000000..35d66bf --- /dev/null +++ b/calculator.py @@ -0,0 +1,237 @@ +# https://www.mycompiler.io/view/3TFZagC + +class ParseError(Exception): + def __init__(self, pos, msg, *args): + self.pos = pos + self.msg = msg + self.args = args + + def __str__(self): + return '%s at position %s' % (self.msg % self.args, self.pos) + +class Parser: + def __init__(self): + self.cache = {} + + def parse(self, text): + self.text = text + self.pos = -1 + self.len = len(text) - 1 + rv = self.start() + self.assert_end() + return rv + + def assert_end(self): + if self.pos < self.len: + raise ParseError( + self.pos + 1, + 'Expected end of string but got %s', + self.text[self.pos + 1] + ) + + def eat_whitespace(self): + while self.pos < self.len and self.text[self.pos + 1] in " \f\v\r\t\n": + self.pos += 1 + + def split_char_ranges(self, chars): + try: + return self.cache[chars] + except KeyError: + pass + + rv = [] + index = 0 + length = len(chars) + + while index < length: + if index + 2 < length and chars[index + 1] == '-': + if chars[index] >= chars[index + 2]: + raise ValueError('Bad character range') + + rv.append(chars[index:index + 3]) + index += 3 + else: + rv.append(chars[index]) + index += 1 + + self.cache[chars] = rv + return rv + + def char(self, chars=None): + if self.pos >= self.len: + raise ParseError( + self.pos + 1, + 'Expected %s but got end of string', + 'character' if chars is None else '[%s]' % chars + ) + + next_char = self.text[self.pos + 1] + if chars == None: + self.pos += 1 + return next_char + + for char_range in self.split_char_ranges(chars): + if len(char_range) == 1: + if next_char == char_range: + self.pos += 1 + return next_char + elif char_range[0] <= next_char <= char_range[2]: + self.pos += 1 + return next_char + + raise ParseError( + self.pos + 1, + 'Expected %s but got %s', + 'character' if chars is None else '[%s]' % chars, + next_char + ) + + def keyword(self, *keywords): + self.eat_whitespace() + if self.pos >= self.len: + raise ParseError( + self.pos + 1, + 'Expected %s but got end of string', + ','.join(keywords) + ) + + for keyword in keywords: + low = self.pos + 1 + high = low + len(keyword) + + if self.text[low:high] == keyword: + self.pos += len(keyword) + self.eat_whitespace() + return keyword + + raise ParseError( + self.pos + 1, + 'Expected %s but got %s', + ','.join(keywords), + self.text[self.pos + 1], + ) + + def match(self, *rules): + self.eat_whitespace() + last_error_pos = -1 + last_exception = None + last_error_rules = [] + + for rule in rules: + initial_pos = self.pos + try: + rv = getattr(self, rule)() + self.eat_whitespace() + return rv + except ParseError as e: + self.pos = initial_pos + + if e.pos > last_error_pos: + last_exception = e + last_error_pos = e.pos + last_error_rules.clear() + last_error_rules.append(rule) + elif e.pos == last_error_pos: + last_error_rules.append(rule) + + if len(last_error_rules) == 1: + raise last_exception + else: + raise ParseError( + last_error_pos, + 'Expected %s but got %s', + ','.join(last_error_rules), + self.text[last_error_pos] + ) + + def maybe_char(self, chars=None): + try: + return self.char(chars) + except ParseError: + return None + + def maybe_match(self, *rules): + try: + return self.match(*rules) + except ParseError: + return None + + def maybe_keyword(self, *keywords): + try: + return self.keyword(*keywords) + except ParseError: + return None + +class CalcParser(Parser): + def start(self): + return self.expression() + + def expression(self): + rv = self.match('term') + while True: + op = self.maybe_keyword('+', '-') + if op is None: + break + + term = self.match('term') + if op == '+': + rv += term + else: + rv -= term + + return rv + + def term(self): + rv = self.match('factor') + while True: + op = self.maybe_keyword('*', '/') + if op is None: + break + + term = self.match('factor') + if op == '*': + rv *= term + else: + rv /= term + + return rv + + def factor(self): + if self.maybe_keyword('('): + rv = self.match('expression') + self.keyword(')') + + return rv + + return self.match('number') + + def number(self): + chars = [] + + sign = self.maybe_keyword('+', '-') + if sign is not None: + chars.append(sign) + + chars.append(self.char('0-9')) + + while True: + char = self.maybe_char('0-9') + if char is None: + break + + chars.append(char) + + if self.maybe_char('.'): + chars.append('.') + chars.append(self.char('0-9')) + + while True: + char = self.maybe_char('0-9') + if char is None: + break + + chars.append(char) + + rv = float(''.join(chars)) + return rv + diff --git a/imgs/sample_anyaheh.mp4 b/imgs/sample_anyaheh.mp4 new file mode 100644 index 0000000..58891b9 Binary files /dev/null and b/imgs/sample_anyaheh.mp4 differ diff --git a/imgs/sample_autotag.mp4 b/imgs/sample_autotag.mp4 new file mode 100644 index 0000000..5c14c5c Binary files /dev/null and b/imgs/sample_autotag.mp4 differ diff --git a/sample/add_token.txt b/sample/add_token.txt new file mode 100644 index 0000000..0f41358 --- /dev/null +++ b/sample/add_token.txt @@ -0,0 +1,54 @@ +[ + { + "target":"smile", + "min_score":0.5, + "token": ["lottalewds_v0", "1.2"], + "type":"lora" + }, + { + "target":"smile", + "min_score":0.5, + "token": ["anyahehface", "score*1.2"], + "type":"normal" + }, + { + "target":"smile", + "min_score":0.5, + "token": ["wicked smug", "score*1.2"], + "type":"normal" + }, + { + "target":"smile", + "min_score":0.5, + "token": ["half closed eyes", "0.2 + score*0.3"], + "type":"normal" + }, + + + + { + "target":"test_token", + "min_score":0.8, + "token": ["lora_name_A", "0.5"], + "type":"lora" + }, + { + "target":"test_token", + "min_score":0.5, + "token": ["bbbb", "score - 0.1"], + "type":"normal" + }, + { + "target":"test_token2", + "min_score":0.8, + "token": ["hypernet_name_A", "score"], + "type":"hypernet" + }, + { + "target":"test_token3", + "min_score":0.0, + "token": ["dddd", "score"], + "type":"normal" + } +] + diff --git a/sample/blacklist.txt b/sample/blacklist.txt new file mode 100644 index 0000000..6938ecd --- /dev/null +++ b/sample/blacklist.txt @@ -0,0 +1,10 @@ +motion_blur +blurry +realistic +depth_of_field +mountain +tree +water +underwater +tongue +tongue_out diff --git a/scripts/custom_script.py b/scripts/custom_script.py index 25fa4f9..bbb58fe 100644 --- a/scripts/custom_script.py +++ b/scripts/custom_script.py @@ -3,17 +3,23 @@ import gradio as gr import os import torch import random +import time +import pprint from modules.processing import process_images,Processed from modules.paths import models_path from modules.textual_inversion import autocrop import modules.images +from modules import shared,deepbooru,masking import cv2 import copy import numpy as np -from PIL import Image +from PIL import Image,ImageOps import glob import requests +import json +import re +from extensions.ebsynth_utility.calculator import CalcParser,ParseError def get_my_dir(): if os.path.isdir("extensions/ebsynth_utility"): @@ -61,6 +67,9 @@ class Script(scripts.Script): face_detector = None face_merge_mask_filename = "face_crop_img2img_mask.png" face_merge_mask_image = None + prompts_dir = "" + calc_parser = None + is_invert_mask = False # The title of the script. This is what will be displayed in the dropdown menu. def title(self): @@ -83,27 +92,40 @@ class Script(scripts.Script): with gr.Column(variant='panel'): with gr.Column(): project_dir = gr.Textbox(label='Project directory', lines=1) - mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)") - inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)") - - with gr.Column(): - use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth") - gr.HTML(value="\ - See \ - [here] for depth map.\ -
") - with gr.Column(): + with gr.Accordion("Mask option"): + mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)") + inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)") + use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth") + gr.HTML(value="\ + See \ + [here] for depth map.\ +
") + + with gr.Accordion("Loopback option"): img2img_repeat_count = gr.Slider(minimum=1, maximum=30, step=1, value=1, label="Img2Img Repeat Count (Loop Back)") inc_seed = gr.Slider(minimum=0, maximum=9999999, step=1, value=1, label="Add N to seed when repeating ") - with gr.Column(): - is_facecrop = gr.Checkbox(False, label="use Face Crop img2img") - face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method") + with gr.Accordion("Auto Tagging option"): + auto_tag_mode = gr.Dropdown(choices=["None","DeepDanbooru","CLIP"], value="None" ,label="Auto Tagging") + add_tag_to_head = gr.Checkbox(False, label="Add additional prompts to the head") gr.HTML(value="\
- If loading of the Yolov5_anime model fails, check\
- [this] solution.\
+ The results are stored in timestamp_prompts.txt.
\
+ If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt
\
+ Recommend enabling the following settings.
\
+ \
+ Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
+ \
\ + If loading of the Yolov5_anime model fails, check\ + [this] solution.\ +
") max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size") face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength") face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ") @@ -115,10 +137,10 @@ class Script(scripts.Script): value = "face close up," ) - return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt] + return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt] - def detect_face(self, img_array): + def detect_face_from_img(self, img_array): if not self.face_detector: dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv")) self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0)) @@ -127,14 +149,19 @@ class Script(scripts.Script): _, result = self.face_detector.detect(img_array) return result - def detect_anime_face(self, img_array): + def detect_anime_face_from_img(self, img_array): + import sys + if not self.anime_face_detector: + if 'models' in sys.modules: + del sys.modules['models'] + anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime")) if not os.path.isfile(anime_model_path): print( "WARNING!! " + anime_model_path + " not found.") print( "use YuNet instead.") - return self.detect_face(img_array) + return self.detect_face_from_img(img_array) self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path) @@ -150,6 +177,49 @@ class Script(scripts.Script): return faces + def detect_face(self, img, mask, face_detection_method, max_crop_size): + img_array = np.array(img) + + if mask is not None: + if self.is_invert_mask: + mask = ImageOps.invert(mask) + mask_array = np.array(mask)/255 + if mask_array.ndim == 2: + mask_array = mask_array[:, :, np.newaxis] + + img_array = mask_array * img_array + img_array = img_array.astype(np.uint8) + + # image without alpha + img_array = img_array[:,:,:3] + + if face_detection_method == "YuNet": + faces = self.detect_face_from_img(img_array) + elif face_detection_method == "Yolov5_anime": + faces = self.detect_anime_face_from_img(img_array) + else: + faces = self.detect_face_from_img(img_array) + + if faces is None or len(faces) == 0: + return [] + + face_coords = [] + for face in faces: + x = int(face[0]) + y = int(face[1]) + w = int(face[2]) + h = int(face[3]) + if max(w,h) > max_crop_size: + print("ignore big face") + continue + if w == 0 or h == 0: + print("ignore w,h = 0 face") + continue + + face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] ) + + return face_coords + def get_mask(self): def create_mask( output, x_rate, y_rate, k_size ): img = np.zeros((512, 512, 3)) @@ -327,6 +397,312 @@ class Script(scripts.Script): mask = depth return depth!=None, mask + +### auto tagging + debug_count = 0 + + def get_masked_image(self, image, mask_image): + + if mask_image == None: + return image.convert("RGB") + + mask = mask_image.convert('L') + if self.is_invert_mask: + mask = ImageOps.invert(mask) + crop_region = masking.get_crop_region(np.array(mask), 0) +# crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) +# x1, y1, x2, y2 = crop_region + image = image.crop(crop_region).convert("RGB") + mask = mask.crop(crop_region) + + base_img = Image.new("RGB", image.size, (255, 190, 200)) + + image = Image.composite( image, base_img, mask ) + +# image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png") +# self.debug_count += 1 + + return image + + def interrogate_deepdanbooru(self, imgs, masks): + prompts_dict = {} + cause_err = False + + try: + deepbooru.model.start() + + for img,mask in zip(imgs,masks): + key = os.path.basename(img) + print(key + " interrogate deepdanbooru") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + prompt = deepbooru.model.tag_multi(image) + + prompts_dict[key] = prompt + except Exception as e: + import traceback + traceback.print_exc() + print(e) + cause_err = True + finally: + deepbooru.model.stop() + if cause_err: + print("Exception occurred during auto-tagging(deepdanbooru)") + return Processed() + + return prompts_dict + + + def interrogate_clip(self, imgs, masks): + from modules import devices, shared, lowvram, paths + import importlib + import models + + caption_list = [] + prompts_dict = {} + cause_err = False + + try: + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.send_everything_to_cpu() + devices.torch_gc() + + with paths.Prioritize("BLIP"): + importlib.reload(models) + shared.interrogator.load() + + for img,mask in zip(imgs,masks): + key = os.path.basename(img) + print(key + " generate caption") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + caption = shared.interrogator.generate_caption(image) + caption_list.append(caption) + + shared.interrogator.send_blip_to_ram() + devices.torch_gc() + + for img,mask,caption in zip(imgs,masks,caption_list): + key = os.path.basename(img) + print(key + " interrogate clip") + + image = Image.open(img) + mask_image = Image.open(mask) if mask else None + image = self.get_masked_image(image, mask_image) + + clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate) + + res = "" + + with torch.no_grad(), devices.autocast(): + image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype) + image_features /= image_features.norm(dim=-1, keepdim=True) + + for name, topn, items in shared.interrogator.categories(): + matches = shared.interrogator.rank(image_features, items, top_count=topn) + for match, score in matches: + if shared.opts.interrogate_return_ranks: + res += f", ({match}:{score/100:.3f})" + else: + res += ", " + match + + prompts_dict[key] = (caption + res) + + except Exception as e: + import traceback + traceback.print_exc() + print(e) + cause_err = True + finally: + shared.interrogator.unload() + if cause_err: + print("Exception occurred during auto-tagging(blip/clip)") + return Processed() + + return prompts_dict + + + def remove_reserved_token(self, token_list): + reserved_list = ["pink_background","simple_background","pink"] + + result_list = [] + + head_token = token_list[0] + + if head_token[2] == "normal": + head_token_str = head_token[0].replace('pink background', '') + token_list[0] = (head_token_str, head_token[1], head_token[2]) + + for token in token_list: + if token[0] in reserved_list: + continue + result_list.append(token) + + return result_list + + def remove_blacklisted_token(self, token_list): + black_list_path = os.path.join(self.prompts_dir, "blacklist.txt") + if not os.path.isfile(black_list_path): + print(black_list_path + " not found.") + return token_list + + with open(black_list_path) as f: + black_list = [s.strip() for s in f.readlines()] + + result_list = [] + + for token in token_list: + if token[0] in black_list: + continue + result_list.append(token) + + token_list = result_list + + return token_list + + def add_token(self, token_list): + add_list_path = os.path.join(self.prompts_dir, "add_token.txt") + if not os.path.isfile(add_list_path): + print(add_list_path + " not found.") + return token_list + + if not self.calc_parser: + self.calc_parser = CalcParser() + + with open(add_list_path) as f: + add_list = json.load(f) + ''' + [ + { + "target":"test_token", + "min_score":0.8, + "token": ["lora_name_A", "0.5"], + "type":"lora" + }, + { + "target":"test_token", + "min_score":0.5, + "token": ["bbbb", "score - 0.1"], + "type":"normal" + }, + { + "target":"test_token2", + "min_score":0.8, + "token": ["hypernet_name_A", "score"], + "type":"hypernet" + }, + { + "target":"test_token3", + "min_score":0.0, + "token": ["dddd", "score"], + "type":"normal" + } + ] + ''' + result_list = [] + + for token in token_list: + for add_item in add_list: + if token[0] == add_item["target"]: + if token[1] > add_item["min_score"]: + # hit + formula = str(add_item["token"][1]) + formula = formula.replace("score",str(token[1])) + print('Input: %s' % str(add_item["token"][1])) + + try: + score = self.calc_parser.parse(formula) + score = round(score, 3) + except (ParseError, ZeroDivisionError) as e: + print('Input: %s' % str(add_item["token"][1])) + print('Error: %s' % e) + print("ignore this token") + continue + + print("score = " + str(score)) + result_list.append( ( add_item["token"][0], score, add_item["type"] ) ) + + token_list = token_list + result_list + + return token_list + + def create_prompts_dict(self, imgs, masks, auto_tag_mode): + prompts_dict = {} + + if auto_tag_mode == "DeepDanbooru": + raw_dict = self.interrogate_deepdanbooru(imgs, masks) + elif auto_tag_mode == "CLIP": + raw_dict = self.interrogate_clip(imgs, masks) + + repatter = re.compile(r'\((.+)\:([0-9\.]+)\)') + + for key, value_str in raw_dict.items(): + value_list = [x.strip() for x in value_str.split(',')] + + value = [] + for v in value_list: + m = repatter.fullmatch(v) + if m: + value.append((m.group(1), float(m.group(2)), "normal")) + else: + value.append((v, 1, "no_score")) + +# print(value) + value = self.remove_reserved_token(value) +# print(value) + value = self.remove_blacklisted_token(value) +# print(value) + value = self.add_token(value) +# print(value) + + def create_token_str(x): + print(x) + if x[2] == "no_score": + return x[0] + elif x[2] == "lora": + return "