add function auto tagging

Temporary fix for module name conflict between blip and yolov
Improved UI
refactoring
Updated readme
pull/48/head
s9roll7 2023-02-11 17:02:33 +09:00
parent fb192bc100
commit 9553fb33ff
7 changed files with 788 additions and 99 deletions

View File

@ -9,15 +9,30 @@
#### sample 1
<div><video controls src="https://user-images.githubusercontent.com/118420657/213474231-38cac10e-7e75-43e1-b912-4e7727074d39.mp4" muted="false"></video></div>
#### sample 2
<div><video controls src="https://user-images.githubusercontent.com/118420657/213474343-e49e797d-386e-459f-9be9-2241b2d6266d.mp4" muted="false"></video></div>
#### sample 3 blend background
#### sample 2 blend background
- person : masterpiece, best quality, masterpiece, 1girl, masterpiece, best quality,anime screencap, anime style
- background : cyberpunk, factory, room ,anime screencap, anime style
- It is also possible to blend with your favorite videos.
<div><video controls src="https://user-images.githubusercontent.com/118420657/214592811-9677634f-93bb-40dd-95b6-1c97c8e7bb63.mp4" muted="false"></video></div>
#### sample 3 auto tagging
- left : original
- center : apply the same prompts in all keyframes
- right : apply auto tagging by deepdanbooru in all keyframes
- This function improves the detailed changes in facial expressions, hand expressions, etc.
In the sample video, the "closed_eyes" and "hands_on_own_face" tags have been added to better represent eye blinks and hands brought in front of the face.
#### sample 4 auto tagging (apply lora dynamically)
- left : apply auto tagging by deepdanbooru in all keyframes
- right : apply auto tagging by deepdanbooru in all keyframes + apply "anyahehface" lora dynamically
- Added the function to dynamically apply TI, hypernet, Lora, and additional prompts according to automatically attached tags.
In the sample video, if the "smile" tag is given, the lora and lora trigger keywords are set to be added according to the strength of the "smile" tag.
Also, since automatically added tags are sometimes incorrect, unnecessary tags are listed in the blacklist.
[Here](sample/) is the actual configuration file used. placed in "Project directory" for use.
## Installation
- Install [ffmpeg](https://ffmpeg.org/) for your operating system
(https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/)

237
calculator.py Normal file
View File

@ -0,0 +1,237 @@
# https://www.mycompiler.io/view/3TFZagC
class ParseError(Exception):
def __init__(self, pos, msg, *args):
self.pos = pos
self.msg = msg
self.args = args
def __str__(self):
return '%s at position %s' % (self.msg % self.args, self.pos)
class Parser:
def __init__(self):
self.cache = {}
def parse(self, text):
self.text = text
self.pos = -1
self.len = len(text) - 1
rv = self.start()
self.assert_end()
return rv
def assert_end(self):
if self.pos < self.len:
raise ParseError(
self.pos + 1,
'Expected end of string but got %s',
self.text[self.pos + 1]
)
def eat_whitespace(self):
while self.pos < self.len and self.text[self.pos + 1] in " \f\v\r\t\n":
self.pos += 1
def split_char_ranges(self, chars):
try:
return self.cache[chars]
except KeyError:
pass
rv = []
index = 0
length = len(chars)
while index < length:
if index + 2 < length and chars[index + 1] == '-':
if chars[index] >= chars[index + 2]:
raise ValueError('Bad character range')
rv.append(chars[index:index + 3])
index += 3
else:
rv.append(chars[index])
index += 1
self.cache[chars] = rv
return rv
def char(self, chars=None):
if self.pos >= self.len:
raise ParseError(
self.pos + 1,
'Expected %s but got end of string',
'character' if chars is None else '[%s]' % chars
)
next_char = self.text[self.pos + 1]
if chars == None:
self.pos += 1
return next_char
for char_range in self.split_char_ranges(chars):
if len(char_range) == 1:
if next_char == char_range:
self.pos += 1
return next_char
elif char_range[0] <= next_char <= char_range[2]:
self.pos += 1
return next_char
raise ParseError(
self.pos + 1,
'Expected %s but got %s',
'character' if chars is None else '[%s]' % chars,
next_char
)
def keyword(self, *keywords):
self.eat_whitespace()
if self.pos >= self.len:
raise ParseError(
self.pos + 1,
'Expected %s but got end of string',
','.join(keywords)
)
for keyword in keywords:
low = self.pos + 1
high = low + len(keyword)
if self.text[low:high] == keyword:
self.pos += len(keyword)
self.eat_whitespace()
return keyword
raise ParseError(
self.pos + 1,
'Expected %s but got %s',
','.join(keywords),
self.text[self.pos + 1],
)
def match(self, *rules):
self.eat_whitespace()
last_error_pos = -1
last_exception = None
last_error_rules = []
for rule in rules:
initial_pos = self.pos
try:
rv = getattr(self, rule)()
self.eat_whitespace()
return rv
except ParseError as e:
self.pos = initial_pos
if e.pos > last_error_pos:
last_exception = e
last_error_pos = e.pos
last_error_rules.clear()
last_error_rules.append(rule)
elif e.pos == last_error_pos:
last_error_rules.append(rule)
if len(last_error_rules) == 1:
raise last_exception
else:
raise ParseError(
last_error_pos,
'Expected %s but got %s',
','.join(last_error_rules),
self.text[last_error_pos]
)
def maybe_char(self, chars=None):
try:
return self.char(chars)
except ParseError:
return None
def maybe_match(self, *rules):
try:
return self.match(*rules)
except ParseError:
return None
def maybe_keyword(self, *keywords):
try:
return self.keyword(*keywords)
except ParseError:
return None
class CalcParser(Parser):
def start(self):
return self.expression()
def expression(self):
rv = self.match('term')
while True:
op = self.maybe_keyword('+', '-')
if op is None:
break
term = self.match('term')
if op == '+':
rv += term
else:
rv -= term
return rv
def term(self):
rv = self.match('factor')
while True:
op = self.maybe_keyword('*', '/')
if op is None:
break
term = self.match('factor')
if op == '*':
rv *= term
else:
rv /= term
return rv
def factor(self):
if self.maybe_keyword('('):
rv = self.match('expression')
self.keyword(')')
return rv
return self.match('number')
def number(self):
chars = []
sign = self.maybe_keyword('+', '-')
if sign is not None:
chars.append(sign)
chars.append(self.char('0-9'))
while True:
char = self.maybe_char('0-9')
if char is None:
break
chars.append(char)
if self.maybe_char('.'):
chars.append('.')
chars.append(self.char('0-9'))
while True:
char = self.maybe_char('0-9')
if char is None:
break
chars.append(char)
rv = float(''.join(chars))
return rv

BIN
imgs/sample_anyaheh.mp4 Normal file

Binary file not shown.

BIN
imgs/sample_autotag.mp4 Normal file

Binary file not shown.

54
sample/add_token.txt Normal file
View File

@ -0,0 +1,54 @@
[
{
"target":"smile",
"min_score":0.5,
"token": ["lottalewds_v0", "1.2"],
"type":"lora"
},
{
"target":"smile",
"min_score":0.5,
"token": ["anyahehface", "score*1.2"],
"type":"normal"
},
{
"target":"smile",
"min_score":0.5,
"token": ["wicked smug", "score*1.2"],
"type":"normal"
},
{
"target":"smile",
"min_score":0.5,
"token": ["half closed eyes", "0.2 + score*0.3"],
"type":"normal"
},
{
"target":"test_token",
"min_score":0.8,
"token": ["lora_name_A", "0.5"],
"type":"lora"
},
{
"target":"test_token",
"min_score":0.5,
"token": ["bbbb", "score - 0.1"],
"type":"normal"
},
{
"target":"test_token2",
"min_score":0.8,
"token": ["hypernet_name_A", "score"],
"type":"hypernet"
},
{
"target":"test_token3",
"min_score":0.0,
"token": ["dddd", "score"],
"type":"normal"
}
]

10
sample/blacklist.txt Normal file
View File

@ -0,0 +1,10 @@
motion_blur
blurry
realistic
depth_of_field
mountain
tree
water
underwater
tongue
tongue_out

View File

@ -3,17 +3,23 @@ import gradio as gr
import os
import torch
import random
import time
import pprint
from modules.processing import process_images,Processed
from modules.paths import models_path
from modules.textual_inversion import autocrop
import modules.images
from modules import shared,deepbooru,masking
import cv2
import copy
import numpy as np
from PIL import Image
from PIL import Image,ImageOps
import glob
import requests
import json
import re
from extensions.ebsynth_utility.calculator import CalcParser,ParseError
def get_my_dir():
if os.path.isdir("extensions/ebsynth_utility"):
@ -61,6 +67,9 @@ class Script(scripts.Script):
face_detector = None
face_merge_mask_filename = "face_crop_img2img_mask.png"
face_merge_mask_image = None
prompts_dir = ""
calc_parser = None
is_invert_mask = False
# The title of the script. This is what will be displayed in the dropdown menu.
def title(self):
@ -83,27 +92,40 @@ class Script(scripts.Script):
with gr.Column(variant='panel'):
with gr.Column():
project_dir = gr.Textbox(label='Project directory', lines=1)
mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
with gr.Column():
use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
See \
<font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
</p>")
with gr.Column():
with gr.Accordion("Mask option"):
mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
See \
<font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
</p>")
with gr.Accordion("Loopback option"):
img2img_repeat_count = gr.Slider(minimum=1, maximum=30, step=1, value=1, label="Img2Img Repeat Count (Loop Back)")
inc_seed = gr.Slider(minimum=0, maximum=9999999, step=1, value=1, label="Add N to seed when repeating ")
with gr.Column():
is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
with gr.Accordion("Auto Tagging option"):
auto_tag_mode = gr.Dropdown(choices=["None","DeepDanbooru","CLIP"], value="None" ,label="Auto Tagging")
add_tag_to_head = gr.Checkbox(False, label="Add additional prompts to the head")
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
If loading of the Yolov5_anime model fails, check\
<font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
The results are stored in timestamp_prompts.txt.<br>\
If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt<br>\
Recommend enabling the following settings.<br>\
<font color=\"red\">\
Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
</font>\
</p>")
with gr.Accordion("Face Crop option"):
is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
with gr.Row():
face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
If loading of the Yolov5_anime model fails, check\
<font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
</p>")
max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size")
face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength")
face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ")
@ -115,10 +137,10 @@ class Script(scripts.Script):
value = "face close up,"
)
return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]
return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]
def detect_face(self, img_array):
def detect_face_from_img(self, img_array):
if not self.face_detector:
dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv"))
self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0))
@ -127,14 +149,19 @@ class Script(scripts.Script):
_, result = self.face_detector.detect(img_array)
return result
def detect_anime_face(self, img_array):
def detect_anime_face_from_img(self, img_array):
import sys
if not self.anime_face_detector:
if 'models' in sys.modules:
del sys.modules['models']
anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime"))
if not os.path.isfile(anime_model_path):
print( "WARNING!! " + anime_model_path + " not found.")
print( "use YuNet instead.")
return self.detect_face(img_array)
return self.detect_face_from_img(img_array)
self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path)
@ -150,6 +177,49 @@ class Script(scripts.Script):
return faces
def detect_face(self, img, mask, face_detection_method, max_crop_size):
img_array = np.array(img)
if mask is not None:
if self.is_invert_mask:
mask = ImageOps.invert(mask)
mask_array = np.array(mask)/255
if mask_array.ndim == 2:
mask_array = mask_array[:, :, np.newaxis]
img_array = mask_array * img_array
img_array = img_array.astype(np.uint8)
# image without alpha
img_array = img_array[:,:,:3]
if face_detection_method == "YuNet":
faces = self.detect_face_from_img(img_array)
elif face_detection_method == "Yolov5_anime":
faces = self.detect_anime_face_from_img(img_array)
else:
faces = self.detect_face_from_img(img_array)
if faces is None or len(faces) == 0:
return []
face_coords = []
for face in faces:
x = int(face[0])
y = int(face[1])
w = int(face[2])
h = int(face[3])
if max(w,h) > max_crop_size:
print("ignore big face")
continue
if w == 0 or h == 0:
print("ignore w,h = 0 face")
continue
face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
return face_coords
def get_mask(self):
def create_mask( output, x_rate, y_rate, k_size ):
img = np.zeros((512, 512, 3))
@ -327,6 +397,312 @@ class Script(scripts.Script):
mask = depth
return depth!=None, mask
### auto tagging
debug_count = 0
def get_masked_image(self, image, mask_image):
if mask_image == None:
return image.convert("RGB")
mask = mask_image.convert('L')
if self.is_invert_mask:
mask = ImageOps.invert(mask)
crop_region = masking.get_crop_region(np.array(mask), 0)
# crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
# x1, y1, x2, y2 = crop_region
image = image.crop(crop_region).convert("RGB")
mask = mask.crop(crop_region)
base_img = Image.new("RGB", image.size, (255, 190, 200))
image = Image.composite( image, base_img, mask )
# image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png")
# self.debug_count += 1
return image
def interrogate_deepdanbooru(self, imgs, masks):
prompts_dict = {}
cause_err = False
try:
deepbooru.model.start()
for img,mask in zip(imgs,masks):
key = os.path.basename(img)
print(key + " interrogate deepdanbooru")
image = Image.open(img)
mask_image = Image.open(mask) if mask else None
image = self.get_masked_image(image, mask_image)
prompt = deepbooru.model.tag_multi(image)
prompts_dict[key] = prompt
except Exception as e:
import traceback
traceback.print_exc()
print(e)
cause_err = True
finally:
deepbooru.model.stop()
if cause_err:
print("Exception occurred during auto-tagging(deepdanbooru)")
return Processed()
return prompts_dict
def interrogate_clip(self, imgs, masks):
from modules import devices, shared, lowvram, paths
import importlib
import models
caption_list = []
prompts_dict = {}
cause_err = False
try:
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
lowvram.send_everything_to_cpu()
devices.torch_gc()
with paths.Prioritize("BLIP"):
importlib.reload(models)
shared.interrogator.load()
for img,mask in zip(imgs,masks):
key = os.path.basename(img)
print(key + " generate caption")
image = Image.open(img)
mask_image = Image.open(mask) if mask else None
image = self.get_masked_image(image, mask_image)
caption = shared.interrogator.generate_caption(image)
caption_list.append(caption)
shared.interrogator.send_blip_to_ram()
devices.torch_gc()
for img,mask,caption in zip(imgs,masks,caption_list):
key = os.path.basename(img)
print(key + " interrogate clip")
image = Image.open(img)
mask_image = Image.open(mask) if mask else None
image = self.get_masked_image(image, mask_image)
clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate)
res = ""
with torch.no_grad(), devices.autocast():
image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype)
image_features /= image_features.norm(dim=-1, keepdim=True)
for name, topn, items in shared.interrogator.categories():
matches = shared.interrogator.rank(image_features, items, top_count=topn)
for match, score in matches:
if shared.opts.interrogate_return_ranks:
res += f", ({match}:{score/100:.3f})"
else:
res += ", " + match
prompts_dict[key] = (caption + res)
except Exception as e:
import traceback
traceback.print_exc()
print(e)
cause_err = True
finally:
shared.interrogator.unload()
if cause_err:
print("Exception occurred during auto-tagging(blip/clip)")
return Processed()
return prompts_dict
def remove_reserved_token(self, token_list):
reserved_list = ["pink_background","simple_background","pink"]
result_list = []
head_token = token_list[0]
if head_token[2] == "normal":
head_token_str = head_token[0].replace('pink background', '')
token_list[0] = (head_token_str, head_token[1], head_token[2])
for token in token_list:
if token[0] in reserved_list:
continue
result_list.append(token)
return result_list
def remove_blacklisted_token(self, token_list):
black_list_path = os.path.join(self.prompts_dir, "blacklist.txt")
if not os.path.isfile(black_list_path):
print(black_list_path + " not found.")
return token_list
with open(black_list_path) as f:
black_list = [s.strip() for s in f.readlines()]
result_list = []
for token in token_list:
if token[0] in black_list:
continue
result_list.append(token)
token_list = result_list
return token_list
def add_token(self, token_list):
add_list_path = os.path.join(self.prompts_dir, "add_token.txt")
if not os.path.isfile(add_list_path):
print(add_list_path + " not found.")
return token_list
if not self.calc_parser:
self.calc_parser = CalcParser()
with open(add_list_path) as f:
add_list = json.load(f)
'''
[
{
"target":"test_token",
"min_score":0.8,
"token": ["lora_name_A", "0.5"],
"type":"lora"
},
{
"target":"test_token",
"min_score":0.5,
"token": ["bbbb", "score - 0.1"],
"type":"normal"
},
{
"target":"test_token2",
"min_score":0.8,
"token": ["hypernet_name_A", "score"],
"type":"hypernet"
},
{
"target":"test_token3",
"min_score":0.0,
"token": ["dddd", "score"],
"type":"normal"
}
]
'''
result_list = []
for token in token_list:
for add_item in add_list:
if token[0] == add_item["target"]:
if token[1] > add_item["min_score"]:
# hit
formula = str(add_item["token"][1])
formula = formula.replace("score",str(token[1]))
print('Input: %s' % str(add_item["token"][1]))
try:
score = self.calc_parser.parse(formula)
score = round(score, 3)
except (ParseError, ZeroDivisionError) as e:
print('Input: %s' % str(add_item["token"][1]))
print('Error: %s' % e)
print("ignore this token")
continue
print("score = " + str(score))
result_list.append( ( add_item["token"][0], score, add_item["type"] ) )
token_list = token_list + result_list
return token_list
def create_prompts_dict(self, imgs, masks, auto_tag_mode):
prompts_dict = {}
if auto_tag_mode == "DeepDanbooru":
raw_dict = self.interrogate_deepdanbooru(imgs, masks)
elif auto_tag_mode == "CLIP":
raw_dict = self.interrogate_clip(imgs, masks)
repatter = re.compile(r'\((.+)\:([0-9\.]+)\)')
for key, value_str in raw_dict.items():
value_list = [x.strip() for x in value_str.split(',')]
value = []
for v in value_list:
m = repatter.fullmatch(v)
if m:
value.append((m.group(1), float(m.group(2)), "normal"))
else:
value.append((v, 1, "no_score"))
# print(value)
value = self.remove_reserved_token(value)
# print(value)
value = self.remove_blacklisted_token(value)
# print(value)
value = self.add_token(value)
# print(value)
def create_token_str(x):
print(x)
if x[2] == "no_score":
return x[0]
elif x[2] == "lora":
return "<lora:" + x[0] + ":" + str(x[1]) + ">"
elif x[2] == "hypernet":
return "<hypernet:" + x[0] + ":" + str(x[1]) + ">"
else:
return "(" + x[0] + ":" + str(x[1]) + ")"
value_list = [create_token_str(x) for x in value]
value = ",".join(value_list)
prompts_dict[key] = value
return prompts_dict
def load_prompts_dict(self, imgs, default_token):
prompts_path = os.path.join(self.prompts_dir, "prompts.txt")
if not os.path.isfile(prompts_path):
print(prompts_path + " not found.")
return {}
prompts_dict = {}
print(prompts_path + " found!!")
print("skip auto tagging.")
with open(prompts_path) as f:
raw_dict = json.load(f)
prev_value = default_token
for img in imgs:
key = os.path.basename(img)
if key in raw_dict:
prompts_dict[key] = raw_dict[key]
prev_value = raw_dict[key]
else:
prompts_dict[key] = prev_value
return prompts_dict
# This is where the additional processing is implemented. The parameters include
@ -335,52 +711,9 @@ class Script(scripts.Script):
# Custom functions can be defined here, and additional libraries can be imported
# to be used in processing. The return value should be a Processed object, which is
# what is returned by the process_images method.
def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
args = locals()
def detect_face(img, mask, face_detection_method, max_crop_size):
img_array = np.array(img)
if mask is not None:
mask_array = np.array(mask)/255
if mask_array.ndim == 2:
mask_array = mask_array[:, :, np.newaxis]
img_array = mask_array * img_array
img_array = img_array.astype(np.uint8)
# image without alpha
img_array = img_array[:,:,:3]
if face_detection_method == "YuNet":
faces = self.detect_face(img_array)
elif face_detection_method == "Yolov5_anime":
faces = self.detect_anime_face(img_array)
else:
faces = self.detect_face(img_array)
if faces is None or len(faces) == 0:
return []
face_coords = []
for face in faces:
x = int(face[0])
y = int(face[1])
w = int(face[2])
h = int(face[3])
if max(w,h) > max_crop_size:
print("ignore big face")
continue
if w == 0 or h == 0:
print("ignore w,h = 0 face")
continue
face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
return face_coords
if not os.path.isdir(project_dir):
print("project_dir not found")
return Processed()
@ -408,10 +741,15 @@ class Script(scripts.Script):
org_key_path = os.path.join(inv_path, "video_key")
img2img_key_path = os.path.join(inv_path, "img2img_key")
depth_path = os.path.join(inv_path, "video_key_depth")
self.prompts_dir = inv_path
self.is_invert_mask = True
else:
org_key_path = os.path.join(project_dir, "video_key")
img2img_key_path = os.path.join(project_dir, "img2img_key")
depth_path = os.path.join(project_dir, "video_key_depth")
self.prompts_dir = project_dir
self.is_invert_mask = False
frame_mask_path = os.path.join(project_dir, "video_mask")
@ -427,53 +765,88 @@ class Script(scripts.Script):
remove_pngs_in_dir(img2img_key_path)
os.makedirs(img2img_key_path, exist_ok=True)
imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
for img in imgs:
image = Image.open(img)
def get_mask_of_img(img):
img_basename = os.path.basename(img)
mask = None
if mask_mode != "None":
mask_path = os.path.join( frame_mask_path , img_basename )
if os.path.isfile( mask_path ):
mask = Image.open(mask_path)
return mask_path
return ""
imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
masks = [ get_mask_of_img(i) for i in imgs ]
######################
# face crop
face_coords_dict={}
for img,mask in zip(imgs,masks):
face_detected = False
if is_facecrop:
image = Image.open(img)
mask_image = Image.open(mask) if mask else None
face_coords = self.detect_face(image, mask_image, face_detection_method, max_crop_size)
if face_coords is None or len(face_coords) == 0:
print("no face detected")
else:
print("face detected")
face_detected = True
key = os.path.basename(img)
face_coords_dict[key] = face_coords if face_detected else []
with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"faces.txt" ), "w") as f:
f.write(json.dumps(face_coords_dict,indent=4))
######################
# prompts
prompts_dict = self.load_prompts_dict(imgs, p.prompt)
if not prompts_dict:
if auto_tag_mode != "None":
prompts_dict = self.create_prompts_dict(imgs, masks, auto_tag_mode)
for key, value in prompts_dict.items():
prompts_dict[key] = (value + "," + p.prompt) if add_tag_to_head else (p.prompt + "," + value)
else:
for img in imgs:
key = os.path.basename(img)
prompts_dict[key] = p.prompt
with open( os.path.join( project_dir if is_invert_mask == False else inv_path, time.strftime("%Y%m%d-%H%M%S_") + "prompts.txt" ), "w") as f:
f.write(json.dumps(prompts_dict,indent=4))
######################
# img2img
for img, mask, face_coords, prompts in zip(imgs, masks, face_coords_dict.values(), prompts_dict.values()):
image = Image.open(img)
mask_image = Image.open(mask) if mask else None
img_basename = os.path.basename(img)
_p = copy.copy(p)
_p.init_images=[image]
_p.image_mask = mask
_p.image_mask = mask_image
_p.prompt = prompts
resized_mask = None
repeat_count = img2img_repeat_count
_is_facecrop = is_facecrop
if _is_facecrop:
### face detect in base img
base_img = _p.init_images[0]
if base_img is None:
print("p.init_images[0] is None")
return process_images(p)
face_coords = detect_face(base_img, _p.image_mask, face_detection_method, max_crop_size)
if face_coords is None or len(face_coords) == 0:
print("no face detected")
_is_facecrop = False
if mask_mode != "None" or use_depth:
if use_depth:
depth_found, _p.image_mask = self.get_depth_map( mask, depth_path ,img_basename, is_invert_mask )
mask = _p.image_mask
depth_found, _p.image_mask = self.get_depth_map( mask_image, depth_path ,img_basename, is_invert_mask )
mask_image = _p.image_mask
if depth_found:
_p.inpainting_mask_invert = 0
while repeat_count > 0:
if _is_facecrop:
if face_coords:
proc = self.face_crop_img2img(_p, face_coords, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt)
else:
proc = process_images(_p)
@ -484,8 +857,8 @@ class Script(scripts.Script):
if repeat_count > 0:
_p.init_images=[proc.images[0]]
if mask is not None and resized_mask is None:
resized_mask = resize_img(np.array(mask) , proc.images[0].width, proc.images[0].height)
if mask_image is not None and resized_mask is None:
resized_mask = resize_img(np.array(mask_image) , proc.images[0].width, proc.images[0].height)
resized_mask = Image.fromarray(resized_mask)
_p.image_mask = resized_mask
_p.seed += inc_seed
@ -493,8 +866,8 @@ class Script(scripts.Script):
proc.images[0].save( os.path.join( img2img_key_path , img_basename ) )
with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"param.txt" ), "w") as f:
f.write(proc.info)
f.write(pprint.pformat(proc.info))
with open( os.path.join( project_dir if is_invert_mask == False else inv_path ,"args.txt" ), "w") as f:
f.write(str(args))
f.write(pprint.pformat(args))
return proc