add function auto tagging
Temporary fix for module name conflict between blip and yolov Improved UI refactoring Updated readmepull/48/head
parent
fb192bc100
commit
9553fb33ff
23
README.md
23
README.md
|
|
@ -9,15 +9,30 @@
|
|||
#### sample 1
|
||||
<div><video controls src="https://user-images.githubusercontent.com/118420657/213474231-38cac10e-7e75-43e1-b912-4e7727074d39.mp4" muted="false"></video></div>
|
||||
|
||||
#### sample 2
|
||||
<div><video controls src="https://user-images.githubusercontent.com/118420657/213474343-e49e797d-386e-459f-9be9-2241b2d6266d.mp4" muted="false"></video></div>
|
||||
|
||||
#### sample 3 blend background
|
||||
#### sample 2 blend background
|
||||
- person : masterpiece, best quality, masterpiece, 1girl, masterpiece, best quality,anime screencap, anime style
|
||||
- background : cyberpunk, factory, room ,anime screencap, anime style
|
||||
- It is also possible to blend with your favorite videos.
|
||||
<div><video controls src="https://user-images.githubusercontent.com/118420657/214592811-9677634f-93bb-40dd-95b6-1c97c8e7bb63.mp4" muted="false"></video></div>
|
||||
|
||||
#### sample 3 auto tagging
|
||||
- left : original
|
||||
- center : apply the same prompts in all keyframes
|
||||
- right : apply auto tagging by deepdanbooru in all keyframes
|
||||
- This function improves the detailed changes in facial expressions, hand expressions, etc.
|
||||
In the sample video, the "closed_eyes" and "hands_on_own_face" tags have been added to better represent eye blinks and hands brought in front of the face.
|
||||
|
||||
|
||||
#### sample 4 auto tagging (apply lora dynamically)
|
||||
- left : apply auto tagging by deepdanbooru in all keyframes
|
||||
- right : apply auto tagging by deepdanbooru in all keyframes + apply "anyahehface" lora dynamically
|
||||
- Added the function to dynamically apply TI, hypernet, Lora, and additional prompts according to automatically attached tags.
|
||||
In the sample video, if the "smile" tag is given, the lora and lora trigger keywords are set to be added according to the strength of the "smile" tag.
|
||||
Also, since automatically added tags are sometimes incorrect, unnecessary tags are listed in the blacklist.
|
||||
[Here](sample/) is the actual configuration file used. placed in "Project directory" for use.
|
||||
|
||||
|
||||
|
||||
## Installation
|
||||
- Install [ffmpeg](https://ffmpeg.org/) for your operating system
|
||||
(https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,237 @@
|
|||
# https://www.mycompiler.io/view/3TFZagC
|
||||
|
||||
class ParseError(Exception):
|
||||
def __init__(self, pos, msg, *args):
|
||||
self.pos = pos
|
||||
self.msg = msg
|
||||
self.args = args
|
||||
|
||||
def __str__(self):
|
||||
return '%s at position %s' % (self.msg % self.args, self.pos)
|
||||
|
||||
class Parser:
|
||||
def __init__(self):
|
||||
self.cache = {}
|
||||
|
||||
def parse(self, text):
|
||||
self.text = text
|
||||
self.pos = -1
|
||||
self.len = len(text) - 1
|
||||
rv = self.start()
|
||||
self.assert_end()
|
||||
return rv
|
||||
|
||||
def assert_end(self):
|
||||
if self.pos < self.len:
|
||||
raise ParseError(
|
||||
self.pos + 1,
|
||||
'Expected end of string but got %s',
|
||||
self.text[self.pos + 1]
|
||||
)
|
||||
|
||||
def eat_whitespace(self):
|
||||
while self.pos < self.len and self.text[self.pos + 1] in " \f\v\r\t\n":
|
||||
self.pos += 1
|
||||
|
||||
def split_char_ranges(self, chars):
|
||||
try:
|
||||
return self.cache[chars]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
rv = []
|
||||
index = 0
|
||||
length = len(chars)
|
||||
|
||||
while index < length:
|
||||
if index + 2 < length and chars[index + 1] == '-':
|
||||
if chars[index] >= chars[index + 2]:
|
||||
raise ValueError('Bad character range')
|
||||
|
||||
rv.append(chars[index:index + 3])
|
||||
index += 3
|
||||
else:
|
||||
rv.append(chars[index])
|
||||
index += 1
|
||||
|
||||
self.cache[chars] = rv
|
||||
return rv
|
||||
|
||||
def char(self, chars=None):
|
||||
if self.pos >= self.len:
|
||||
raise ParseError(
|
||||
self.pos + 1,
|
||||
'Expected %s but got end of string',
|
||||
'character' if chars is None else '[%s]' % chars
|
||||
)
|
||||
|
||||
next_char = self.text[self.pos + 1]
|
||||
if chars == None:
|
||||
self.pos += 1
|
||||
return next_char
|
||||
|
||||
for char_range in self.split_char_ranges(chars):
|
||||
if len(char_range) == 1:
|
||||
if next_char == char_range:
|
||||
self.pos += 1
|
||||
return next_char
|
||||
elif char_range[0] <= next_char <= char_range[2]:
|
||||
self.pos += 1
|
||||
return next_char
|
||||
|
||||
raise ParseError(
|
||||
self.pos + 1,
|
||||
'Expected %s but got %s',
|
||||
'character' if chars is None else '[%s]' % chars,
|
||||
next_char
|
||||
)
|
||||
|
||||
def keyword(self, *keywords):
|
||||
self.eat_whitespace()
|
||||
if self.pos >= self.len:
|
||||
raise ParseError(
|
||||
self.pos + 1,
|
||||
'Expected %s but got end of string',
|
||||
','.join(keywords)
|
||||
)
|
||||
|
||||
for keyword in keywords:
|
||||
low = self.pos + 1
|
||||
high = low + len(keyword)
|
||||
|
||||
if self.text[low:high] == keyword:
|
||||
self.pos += len(keyword)
|
||||
self.eat_whitespace()
|
||||
return keyword
|
||||
|
||||
raise ParseError(
|
||||
self.pos + 1,
|
||||
'Expected %s but got %s',
|
||||
','.join(keywords),
|
||||
self.text[self.pos + 1],
|
||||
)
|
||||
|
||||
def match(self, *rules):
|
||||
self.eat_whitespace()
|
||||
last_error_pos = -1
|
||||
last_exception = None
|
||||
last_error_rules = []
|
||||
|
||||
for rule in rules:
|
||||
initial_pos = self.pos
|
||||
try:
|
||||
rv = getattr(self, rule)()
|
||||
self.eat_whitespace()
|
||||
return rv
|
||||
except ParseError as e:
|
||||
self.pos = initial_pos
|
||||
|
||||
if e.pos > last_error_pos:
|
||||
last_exception = e
|
||||
last_error_pos = e.pos
|
||||
last_error_rules.clear()
|
||||
last_error_rules.append(rule)
|
||||
elif e.pos == last_error_pos:
|
||||
last_error_rules.append(rule)
|
||||
|
||||
if len(last_error_rules) == 1:
|
||||
raise last_exception
|
||||
else:
|
||||
raise ParseError(
|
||||
last_error_pos,
|
||||
'Expected %s but got %s',
|
||||
','.join(last_error_rules),
|
||||
self.text[last_error_pos]
|
||||
)
|
||||
|
||||
def maybe_char(self, chars=None):
|
||||
try:
|
||||
return self.char(chars)
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
def maybe_match(self, *rules):
|
||||
try:
|
||||
return self.match(*rules)
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
def maybe_keyword(self, *keywords):
|
||||
try:
|
||||
return self.keyword(*keywords)
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
class CalcParser(Parser):
|
||||
def start(self):
|
||||
return self.expression()
|
||||
|
||||
def expression(self):
|
||||
rv = self.match('term')
|
||||
while True:
|
||||
op = self.maybe_keyword('+', '-')
|
||||
if op is None:
|
||||
break
|
||||
|
||||
term = self.match('term')
|
||||
if op == '+':
|
||||
rv += term
|
||||
else:
|
||||
rv -= term
|
||||
|
||||
return rv
|
||||
|
||||
def term(self):
|
||||
rv = self.match('factor')
|
||||
while True:
|
||||
op = self.maybe_keyword('*', '/')
|
||||
if op is None:
|
||||
break
|
||||
|
||||
term = self.match('factor')
|
||||
if op == '*':
|
||||
rv *= term
|
||||
else:
|
||||
rv /= term
|
||||
|
||||
return rv
|
||||
|
||||
def factor(self):
|
||||
if self.maybe_keyword('('):
|
||||
rv = self.match('expression')
|
||||
self.keyword(')')
|
||||
|
||||
return rv
|
||||
|
||||
return self.match('number')
|
||||
|
||||
def number(self):
|
||||
chars = []
|
||||
|
||||
sign = self.maybe_keyword('+', '-')
|
||||
if sign is not None:
|
||||
chars.append(sign)
|
||||
|
||||
chars.append(self.char('0-9'))
|
||||
|
||||
while True:
|
||||
char = self.maybe_char('0-9')
|
||||
if char is None:
|
||||
break
|
||||
|
||||
chars.append(char)
|
||||
|
||||
if self.maybe_char('.'):
|
||||
chars.append('.')
|
||||
chars.append(self.char('0-9'))
|
||||
|
||||
while True:
|
||||
char = self.maybe_char('0-9')
|
||||
if char is None:
|
||||
break
|
||||
|
||||
chars.append(char)
|
||||
|
||||
rv = float(''.join(chars))
|
||||
return rv
|
||||
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,54 @@
|
|||
[
|
||||
{
|
||||
"target":"smile",
|
||||
"min_score":0.5,
|
||||
"token": ["lottalewds_v0", "1.2"],
|
||||
"type":"lora"
|
||||
},
|
||||
{
|
||||
"target":"smile",
|
||||
"min_score":0.5,
|
||||
"token": ["anyahehface", "score*1.2"],
|
||||
"type":"normal"
|
||||
},
|
||||
{
|
||||
"target":"smile",
|
||||
"min_score":0.5,
|
||||
"token": ["wicked smug", "score*1.2"],
|
||||
"type":"normal"
|
||||
},
|
||||
{
|
||||
"target":"smile",
|
||||
"min_score":0.5,
|
||||
"token": ["half closed eyes", "0.2 + score*0.3"],
|
||||
"type":"normal"
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"target":"test_token",
|
||||
"min_score":0.8,
|
||||
"token": ["lora_name_A", "0.5"],
|
||||
"type":"lora"
|
||||
},
|
||||
{
|
||||
"target":"test_token",
|
||||
"min_score":0.5,
|
||||
"token": ["bbbb", "score - 0.1"],
|
||||
"type":"normal"
|
||||
},
|
||||
{
|
||||
"target":"test_token2",
|
||||
"min_score":0.8,
|
||||
"token": ["hypernet_name_A", "score"],
|
||||
"type":"hypernet"
|
||||
},
|
||||
{
|
||||
"target":"test_token3",
|
||||
"min_score":0.0,
|
||||
"token": ["dddd", "score"],
|
||||
"type":"normal"
|
||||
}
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
motion_blur
|
||||
blurry
|
||||
realistic
|
||||
depth_of_field
|
||||
mountain
|
||||
tree
|
||||
water
|
||||
underwater
|
||||
tongue
|
||||
tongue_out
|
||||
|
|
@ -3,17 +3,23 @@ import gradio as gr
|
|||
import os
|
||||
import torch
|
||||
import random
|
||||
import time
|
||||
import pprint
|
||||
|
||||
from modules.processing import process_images,Processed
|
||||
from modules.paths import models_path
|
||||
from modules.textual_inversion import autocrop
|
||||
import modules.images
|
||||
from modules import shared,deepbooru,masking
|
||||
import cv2
|
||||
import copy
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from PIL import Image,ImageOps
|
||||
import glob
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
from extensions.ebsynth_utility.calculator import CalcParser,ParseError
|
||||
|
||||
def get_my_dir():
|
||||
if os.path.isdir("extensions/ebsynth_utility"):
|
||||
|
|
@ -61,6 +67,9 @@ class Script(scripts.Script):
|
|||
face_detector = None
|
||||
face_merge_mask_filename = "face_crop_img2img_mask.png"
|
||||
face_merge_mask_image = None
|
||||
prompts_dir = ""
|
||||
calc_parser = None
|
||||
is_invert_mask = False
|
||||
|
||||
# The title of the script. This is what will be displayed in the dropdown menu.
|
||||
def title(self):
|
||||
|
|
@ -83,27 +92,40 @@ class Script(scripts.Script):
|
|||
with gr.Column(variant='panel'):
|
||||
with gr.Column():
|
||||
project_dir = gr.Textbox(label='Project directory', lines=1)
|
||||
mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
|
||||
inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
|
||||
|
||||
with gr.Column():
|
||||
use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
|
||||
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
|
||||
See \
|
||||
<font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
|
||||
</p>")
|
||||
|
||||
with gr.Column():
|
||||
with gr.Accordion("Mask option"):
|
||||
mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
|
||||
inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
|
||||
use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
|
||||
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
|
||||
See \
|
||||
<font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
|
||||
</p>")
|
||||
|
||||
with gr.Accordion("Loopback option"):
|
||||
img2img_repeat_count = gr.Slider(minimum=1, maximum=30, step=1, value=1, label="Img2Img Repeat Count (Loop Back)")
|
||||
inc_seed = gr.Slider(minimum=0, maximum=9999999, step=1, value=1, label="Add N to seed when repeating ")
|
||||
|
||||
with gr.Column():
|
||||
is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
|
||||
face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
|
||||
with gr.Accordion("Auto Tagging option"):
|
||||
auto_tag_mode = gr.Dropdown(choices=["None","DeepDanbooru","CLIP"], value="None" ,label="Auto Tagging")
|
||||
add_tag_to_head = gr.Checkbox(False, label="Add additional prompts to the head")
|
||||
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
|
||||
If loading of the Yolov5_anime model fails, check\
|
||||
<font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
|
||||
The results are stored in timestamp_prompts.txt.<br>\
|
||||
If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt<br>\
|
||||
Recommend enabling the following settings.<br>\
|
||||
<font color=\"red\">\
|
||||
Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
|
||||
</font>\
|
||||
</p>")
|
||||
|
||||
with gr.Accordion("Face Crop option"):
|
||||
is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
|
||||
with gr.Row():
|
||||
face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
|
||||
gr.HTML(value="<p style='margin-bottom: 0.7em'>\
|
||||
If loading of the Yolov5_anime model fails, check\
|
||||
<font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
|
||||
</p>")
|
||||
max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size")
|
||||
face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength")
|
||||
face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ")
|
||||
|
|
@ -115,10 +137,10 @@ class Script(scripts.Script):
|
|||
value = "face close up,"
|
||||
)
|
||||
|
||||
return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]
|
||||
return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]
|
||||
|
||||
|
||||
def detect_face(self, img_array):
|
||||
def detect_face_from_img(self, img_array):
|
||||
if not self.face_detector:
|
||||
dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv"))
|
||||
self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0))
|
||||
|
|
@ -127,14 +149,19 @@ class Script(scripts.Script):
|
|||
_, result = self.face_detector.detect(img_array)
|
||||
return result
|
||||
|
||||
def detect_anime_face(self, img_array):
|
||||
def detect_anime_face_from_img(self, img_array):
|
||||
import sys
|
||||
|
||||
if not self.anime_face_detector:
|
||||
if 'models' in sys.modules:
|
||||
del sys.modules['models']
|
||||
|
||||
anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime"))
|
||||
|
||||
if not os.path.isfile(anime_model_path):
|
||||
print( "WARNING!! " + anime_model_path + " not found.")
|
||||
print( "use YuNet instead.")
|
||||
return self.detect_face(img_array)
|
||||
return self.detect_face_from_img(img_array)
|
||||
|
||||
self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path)
|
||||
|
||||
|
|
@ -150,6 +177,49 @@ class Script(scripts.Script):
|
|||
|
||||
return faces
|
||||
|
||||
def detect_face(self, img, mask, face_detection_method, max_crop_size):
|
||||
img_array = np.array(img)
|
||||
|
||||
if mask is not None:
|
||||
if self.is_invert_mask:
|
||||
mask = ImageOps.invert(mask)
|
||||
mask_array = np.array(mask)/255
|
||||
if mask_array.ndim == 2:
|
||||
mask_array = mask_array[:, :, np.newaxis]
|
||||
|
||||
img_array = mask_array * img_array
|
||||
img_array = img_array.astype(np.uint8)
|
||||
|
||||
# image without alpha
|
||||
img_array = img_array[:,:,:3]
|
||||
|
||||
if face_detection_method == "YuNet":
|
||||
faces = self.detect_face_from_img(img_array)
|
||||
elif face_detection_method == "Yolov5_anime":
|
||||
faces = self.detect_anime_face_from_img(img_array)
|
||||
else:
|
||||
faces = self.detect_face_from_img(img_array)
|
||||
|
||||
if faces is None or len(faces) == 0:
|
||||
return []
|
||||
|
||||
face_coords = []
|
||||
for face in faces:
|
||||
x = int(face[0])
|
||||
y = int(face[1])
|
||||
w = int(face[2])
|
||||
h = int(face[3])
|
||||
if max(w,h) > max_crop_size:
|
||||
print("ignore big face")
|
||||
continue
|
||||
if w == 0 or h == 0:
|
||||
print("ignore w,h = 0 face")
|
||||
continue
|
||||
|
||||
face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
|
||||
|
||||
return face_coords
|
||||
|
||||
def get_mask(self):
|
||||
def create_mask( output, x_rate, y_rate, k_size ):
|
||||
img = np.zeros((512, 512, 3))
|
||||
|
|
@ -327,6 +397,312 @@ class Script(scripts.Script):
|
|||
mask = depth
|
||||
|
||||
return depth!=None, mask
|
||||
|
||||
### auto tagging
|
||||
debug_count = 0
|
||||
|
||||
def get_masked_image(self, image, mask_image):
|
||||
|
||||
if mask_image == None:
|
||||
return image.convert("RGB")
|
||||
|
||||
mask = mask_image.convert('L')
|
||||
if self.is_invert_mask:
|
||||
mask = ImageOps.invert(mask)
|
||||
crop_region = masking.get_crop_region(np.array(mask), 0)
|
||||
# crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
|
||||
# x1, y1, x2, y2 = crop_region
|
||||
image = image.crop(crop_region).convert("RGB")
|
||||
mask = mask.crop(crop_region)
|
||||
|
||||
base_img = Image.new("RGB", image.size, (255, 190, 200))
|
||||
|
||||
image = Image.composite( image, base_img, mask )
|
||||
|
||||
# image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png")
|
||||
# self.debug_count += 1
|
||||
|
||||
return image
|
||||
|
||||
def interrogate_deepdanbooru(self, imgs, masks):
|
||||
prompts_dict = {}
|
||||
cause_err = False
|
||||
|
||||
try:
|
||||
deepbooru.model.start()
|
||||
|
||||
for img,mask in zip(imgs,masks):
|
||||
key = os.path.basename(img)
|
||||
print(key + " interrogate deepdanbooru")
|
||||
|
||||
image = Image.open(img)
|
||||
mask_image = Image.open(mask) if mask else None
|
||||
image = self.get_masked_image(image, mask_image)
|
||||
|
||||
prompt = deepbooru.model.tag_multi(image)
|
||||
|
||||
prompts_dict[key] = prompt
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(e)
|
||||
cause_err = True
|
||||
finally:
|
||||
deepbooru.model.stop()
|
||||
if cause_err:
|
||||
print("Exception occurred during auto-tagging(deepdanbooru)")
|
||||
return Processed()
|
||||
|
||||
return prompts_dict
|
||||
|
||||
|
||||
def interrogate_clip(self, imgs, masks):
|
||||
from modules import devices, shared, lowvram, paths
|
||||
import importlib
|
||||
import models
|
||||
|
||||
caption_list = []
|
||||
prompts_dict = {}
|
||||
cause_err = False
|
||||
|
||||
try:
|
||||
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
|
||||
lowvram.send_everything_to_cpu()
|
||||
devices.torch_gc()
|
||||
|
||||
with paths.Prioritize("BLIP"):
|
||||
importlib.reload(models)
|
||||
shared.interrogator.load()
|
||||
|
||||
for img,mask in zip(imgs,masks):
|
||||
key = os.path.basename(img)
|
||||
print(key + " generate caption")
|
||||
|
||||
image = Image.open(img)
|
||||
mask_image = Image.open(mask) if mask else None
|
||||
image = self.get_masked_image(image, mask_image)
|
||||
|
||||
caption = shared.interrogator.generate_caption(image)
|
||||
caption_list.append(caption)
|
||||
|
||||
shared.interrogator.send_blip_to_ram()
|
||||
devices.torch_gc()
|
||||
|
||||
for img,mask,caption in zip(imgs,masks,caption_list):
|
||||
key = os.path.basename(img)
|
||||
print(key + " interrogate clip")
|
||||
|
||||
image = Image.open(img)
|
||||
mask_image = Image.open(mask) if mask else None
|
||||
image = self.get_masked_image(image, mask_image)
|
||||
|
||||
clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate)
|
||||
|
||||
res = ""
|
||||
|
||||
with torch.no_grad(), devices.autocast():
|
||||
image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype)
|
||||
image_features /= image_features.norm(dim=-1, keepdim=True)
|
||||
|
||||
for name, topn, items in shared.interrogator.categories():
|
||||
matches = shared.interrogator.rank(image_features, items, top_count=topn)
|
||||
for match, score in matches:
|
||||
if shared.opts.interrogate_return_ranks:
|
||||
res += f", ({match}:{score/100:.3f})"
|
||||
else:
|
||||
res += ", " + match
|
||||
|
||||
prompts_dict[key] = (caption + res)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(e)
|
||||
cause_err = True
|
||||
finally:
|
||||
shared.interrogator.unload()
|
||||
if cause_err:
|
||||
print("Exception occurred during auto-tagging(blip/clip)")
|
||||
return Processed()
|
||||
|
||||
return prompts_dict
|
||||
|
||||
|
||||
def remove_reserved_token(self, token_list):
|
||||
reserved_list = ["pink_background","simple_background","pink"]
|
||||
|
||||
result_list = []
|
||||
|
||||
head_token = token_list[0]
|
||||
|
||||
if head_token[2] == "normal":
|
||||
head_token_str = head_token[0].replace('pink background', '')
|
||||
token_list[0] = (head_token_str, head_token[1], head_token[2])
|
||||
|
||||
for token in token_list:
|
||||
if token[0] in reserved_list:
|
||||
continue
|
||||
result_list.append(token)
|
||||
|
||||
return result_list
|
||||
|
||||
def remove_blacklisted_token(self, token_list):
|
||||
black_list_path = os.path.join(self.prompts_dir, "blacklist.txt")
|
||||
if not os.path.isfile(black_list_path):
|
||||
print(black_list_path + " not found.")
|
||||
return token_list
|
||||
|
||||
with open(black_list_path) as f:
|
||||
black_list = [s.strip() for s in f.readlines()]
|
||||
|
||||
result_list = []
|
||||
|
||||
for token in token_list:
|
||||
if token[0] in black_list:
|
||||
continue
|
||||
result_list.append(token)
|
||||
|
||||
token_list = result_list
|
||||
|
||||
return token_list
|
||||
|
||||
def add_token(self, token_list):
|
||||
add_list_path = os.path.join(self.prompts_dir, "add_token.txt")
|
||||
if not os.path.isfile(add_list_path):
|
||||
print(add_list_path + " not found.")
|
||||
return token_list
|
||||
|
||||
if not self.calc_parser:
|
||||
self.calc_parser = CalcParser()
|
||||
|
||||
with open(add_list_path) as f:
|
||||
add_list = json.load(f)
|
||||
'''
|
||||
[
|
||||
{
|
||||
"target":"test_token",
|
||||
"min_score":0.8,
|
||||
"token": ["lora_name_A", "0.5"],
|
||||
"type":"lora"
|
||||
},
|
||||
{
|
||||
"target":"test_token",
|
||||
"min_score":0.5,
|
||||
"token": ["bbbb", "score - 0.1"],
|
||||
"type":"normal"
|
||||
},
|
||||
{
|
||||
"target":"test_token2",
|
||||
"min_score":0.8,
|
||||
"token": ["hypernet_name_A", "score"],
|
||||
"type":"hypernet"
|
||||
},
|
||||
{
|
||||
"target":"test_token3",
|
||||
"min_score":0.0,
|
||||
"token": ["dddd", "score"],
|
||||
"type":"normal"
|
||||
}
|
||||
]
|
||||
'''
|
||||
result_list = []
|
||||
|
||||
for token in token_list:
|
||||
for add_item in add_list:
|
||||
if token[0] == add_item["target"]:
|
||||
if token[1] > add_item["min_score"]:
|
||||
# hit
|
||||
formula = str(add_item["token"][1])
|
||||
formula = formula.replace("score",str(token[1]))
|
||||
print('Input: %s' % str(add_item["token"][1]))
|
||||
|
||||
try:
|
||||
score = self.calc_parser.parse(formula)
|
||||
score = round(score, 3)
|
||||
except (ParseError, ZeroDivisionError) as e:
|
||||
print('Input: %s' % str(add_item["token"][1]))
|
||||
print('Error: %s' % e)
|
||||
print("ignore this token")
|
||||
continue
|
||||
|
||||
print("score = " + str(score))
|
||||
result_list.append( ( add_item["token"][0], score, add_item["type"] ) )
|
||||
|
||||
token_list = token_list + result_list
|
||||
|
||||
return token_list
|
||||
|
||||
def create_prompts_dict(self, imgs, masks, auto_tag_mode):
|
||||
prompts_dict = {}
|
||||
|
||||
if auto_tag_mode == "DeepDanbooru":
|
||||
raw_dict = self.interrogate_deepdanbooru(imgs, masks)
|
||||
elif auto_tag_mode == "CLIP":
|
||||
raw_dict = self.interrogate_clip(imgs, masks)
|
||||
|
||||
repatter = re.compile(r'\((.+)\:([0-9\.]+)\)')
|
||||
|
||||
for key, value_str in raw_dict.items():
|
||||
value_list = [x.strip() for x in value_str.split(',')]
|
||||
|
||||
value = []
|
||||
for v in value_list:
|
||||
m = repatter.fullmatch(v)
|
||||
if m:
|
||||
value.append((m.group(1), float(m.group(2)), "normal"))
|
||||
else:
|
||||
value.append((v, 1, "no_score"))
|
||||
|
||||
# print(value)
|
||||
value = self.remove_reserved_token(value)
|
||||
# print(value)
|
||||
value = self.remove_blacklisted_token(value)
|
||||
# print(value)
|
||||
value = self.add_token(value)
|
||||
# print(value)
|
||||
|
||||
def create_token_str(x):
|
||||
print(x)
|
||||
if x[2] == "no_score":
|
||||
return x[0]
|
||||
elif x[2] == "lora":
|
||||
return "<lora:" + x[0] + ":" + str(x[1]) + ">"
|
||||
elif x[2] == "hypernet":
|
||||
return "<hypernet:" + x[0] + ":" + str(x[1]) + ">"
|
||||
else:
|
||||
return "(" + x[0] + ":" + str(x[1]) + ")"
|
||||
|
||||
value_list = [create_token_str(x) for x in value]
|
||||
value = ",".join(value_list)
|
||||
|
||||
prompts_dict[key] = value
|
||||
|
||||
return prompts_dict
|
||||
|
||||
def load_prompts_dict(self, imgs, default_token):
|
||||
prompts_path = os.path.join(self.prompts_dir, "prompts.txt")
|
||||
if not os.path.isfile(prompts_path):
|
||||
print(prompts_path + " not found.")
|
||||
return {}
|
||||
|
||||
prompts_dict = {}
|
||||
|
||||
print(prompts_path + " found!!")
|
||||
print("skip auto tagging.")
|
||||
|
||||
with open(prompts_path) as f:
|
||||
raw_dict = json.load(f)
|
||||
prev_value = default_token
|
||||
for img in imgs:
|
||||
key = os.path.basename(img)
|
||||
|
||||
if key in raw_dict:
|
||||
prompts_dict[key] = raw_dict[key]
|
||||
prev_value = raw_dict[key]
|
||||
else:
|
||||
prompts_dict[key] = prev_value
|
||||
|
||||
return prompts_dict
|
||||
|
||||
|
||||
# This is where the additional processing is implemented. The parameters include
|
||||
|
|
@ -335,52 +711,9 @@ class Script(scripts.Script):
|
|||
# Custom functions can be defined here, and additional libraries can be imported
|
||||
# to be used in processing. The return value should be a Processed object, which is
|
||||
# what is returned by the process_images method.
|
||||
|
||||
def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
|
||||
def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
|
||||
args = locals()
|
||||
|
||||
def detect_face(img, mask, face_detection_method, max_crop_size):
|
||||
img_array = np.array(img)
|
||||
|
||||
if mask is not None:
|
||||
mask_array = np.array(mask)/255
|
||||
if mask_array.ndim == 2:
|
||||
mask_array = mask_array[:, :, np.newaxis]
|
||||
|
||||
img_array = mask_array * img_array
|
||||
img_array = img_array.astype(np.uint8)
|
||||
|
||||
# image without alpha
|
||||
img_array = img_array[:,:,:3]
|
||||
|
||||
if face_detection_method == "YuNet":
|
||||
faces = self.detect_face(img_array)
|
||||
elif face_detection_method == "Yolov5_anime":
|
||||
faces = self.detect_anime_face(img_array)
|
||||
else:
|
||||
faces = self.detect_face(img_array)
|
||||
|
||||
if faces is None or len(faces) == 0:
|
||||
return []
|
||||
|
||||
face_coords = []
|
||||
for face in faces:
|
||||
x = int(face[0])
|
||||
y = int(face[1])
|
||||
w = int(face[2])
|
||||
h = int(face[3])
|
||||
if max(w,h) > max_crop_size:
|
||||
print("ignore big face")
|
||||
continue
|
||||
if w == 0 or h == 0:
|
||||
print("ignore w,h = 0 face")
|
||||
continue
|
||||
|
||||
face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
|
||||
|
||||
return face_coords
|
||||
|
||||
|
||||
if not os.path.isdir(project_dir):
|
||||
print("project_dir not found")
|
||||
return Processed()
|
||||
|
|
@ -408,10 +741,15 @@ class Script(scripts.Script):
|
|||
org_key_path = os.path.join(inv_path, "video_key")
|
||||
img2img_key_path = os.path.join(inv_path, "img2img_key")
|
||||
depth_path = os.path.join(inv_path, "video_key_depth")
|
||||
|
||||
self.prompts_dir = inv_path
|
||||
self.is_invert_mask = True
|
||||
else:
|
||||
org_key_path = os.path.join(project_dir, "video_key")
|
||||
img2img_key_path = os.path.join(project_dir, "img2img_key")
|
||||
depth_path = os.path.join(project_dir, "video_key_depth")
|
||||
self.prompts_dir = project_dir
|
||||
self.is_invert_mask = False
|
||||
|
||||
frame_mask_path = os.path.join(project_dir, "video_mask")
|
||||
|
||||
|
|
@ -427,53 +765,88 @@ class Script(scripts.Script):
|
|||
remove_pngs_in_dir(img2img_key_path)
|
||||
os.makedirs(img2img_key_path, exist_ok=True)
|
||||
|
||||
imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
|
||||
for img in imgs:
|
||||
|
||||
image = Image.open(img)
|
||||
|
||||
def get_mask_of_img(img):
|
||||
img_basename = os.path.basename(img)
|
||||
|
||||
mask = None
|
||||
|
||||
if mask_mode != "None":
|
||||
mask_path = os.path.join( frame_mask_path , img_basename )
|
||||
if os.path.isfile( mask_path ):
|
||||
mask = Image.open(mask_path)
|
||||
return mask_path
|
||||
return ""
|
||||
|
||||
imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
|
||||
masks = [ get_mask_of_img(i) for i in imgs ]
|
||||
|
||||
######################
|
||||
# face crop
|
||||
face_coords_dict={}
|
||||
for img,mask in zip(imgs,masks):
|
||||
face_detected = False
|
||||
if is_facecrop:
|
||||
image = Image.open(img)
|
||||
mask_image = Image.open(mask) if mask else None
|
||||
face_coords = self.detect_face(image, mask_image, face_detection_method, max_crop_size)
|
||||
if face_coords is None or len(face_coords) == 0:
|
||||
print("no face detected")
|
||||
else:
|
||||
print("face detected")
|
||||
face_detected = True
|
||||
|
||||
key = os.path.basename(img)
|
||||
face_coords_dict[key] = face_coords if face_detected else []
|
||||
|
||||
with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"faces.txt" ), "w") as f:
|
||||
f.write(json.dumps(face_coords_dict,indent=4))
|
||||
|
||||
######################
|
||||
# prompts
|
||||
prompts_dict = self.load_prompts_dict(imgs, p.prompt)
|
||||
|
||||
if not prompts_dict:
|
||||
if auto_tag_mode != "None":
|
||||
prompts_dict = self.create_prompts_dict(imgs, masks, auto_tag_mode)
|
||||
|
||||
for key, value in prompts_dict.items():
|
||||
prompts_dict[key] = (value + "," + p.prompt) if add_tag_to_head else (p.prompt + "," + value)
|
||||
|
||||
else:
|
||||
for img in imgs:
|
||||
key = os.path.basename(img)
|
||||
prompts_dict[key] = p.prompt
|
||||
|
||||
with open( os.path.join( project_dir if is_invert_mask == False else inv_path, time.strftime("%Y%m%d-%H%M%S_") + "prompts.txt" ), "w") as f:
|
||||
f.write(json.dumps(prompts_dict,indent=4))
|
||||
|
||||
|
||||
######################
|
||||
# img2img
|
||||
for img, mask, face_coords, prompts in zip(imgs, masks, face_coords_dict.values(), prompts_dict.values()):
|
||||
|
||||
image = Image.open(img)
|
||||
mask_image = Image.open(mask) if mask else None
|
||||
|
||||
img_basename = os.path.basename(img)
|
||||
|
||||
_p = copy.copy(p)
|
||||
|
||||
_p.init_images=[image]
|
||||
_p.image_mask = mask
|
||||
_p.image_mask = mask_image
|
||||
_p.prompt = prompts
|
||||
resized_mask = None
|
||||
|
||||
repeat_count = img2img_repeat_count
|
||||
|
||||
_is_facecrop = is_facecrop
|
||||
|
||||
if _is_facecrop:
|
||||
### face detect in base img
|
||||
base_img = _p.init_images[0]
|
||||
|
||||
if base_img is None:
|
||||
print("p.init_images[0] is None")
|
||||
return process_images(p)
|
||||
|
||||
face_coords = detect_face(base_img, _p.image_mask, face_detection_method, max_crop_size)
|
||||
|
||||
if face_coords is None or len(face_coords) == 0:
|
||||
print("no face detected")
|
||||
_is_facecrop = False
|
||||
|
||||
if mask_mode != "None" or use_depth:
|
||||
if use_depth:
|
||||
depth_found, _p.image_mask = self.get_depth_map( mask, depth_path ,img_basename, is_invert_mask )
|
||||
mask = _p.image_mask
|
||||
depth_found, _p.image_mask = self.get_depth_map( mask_image, depth_path ,img_basename, is_invert_mask )
|
||||
mask_image = _p.image_mask
|
||||
if depth_found:
|
||||
_p.inpainting_mask_invert = 0
|
||||
|
||||
while repeat_count > 0:
|
||||
if _is_facecrop:
|
||||
|
||||
if face_coords:
|
||||
proc = self.face_crop_img2img(_p, face_coords, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt)
|
||||
else:
|
||||
proc = process_images(_p)
|
||||
|
|
@ -484,8 +857,8 @@ class Script(scripts.Script):
|
|||
if repeat_count > 0:
|
||||
_p.init_images=[proc.images[0]]
|
||||
|
||||
if mask is not None and resized_mask is None:
|
||||
resized_mask = resize_img(np.array(mask) , proc.images[0].width, proc.images[0].height)
|
||||
if mask_image is not None and resized_mask is None:
|
||||
resized_mask = resize_img(np.array(mask_image) , proc.images[0].width, proc.images[0].height)
|
||||
resized_mask = Image.fromarray(resized_mask)
|
||||
_p.image_mask = resized_mask
|
||||
_p.seed += inc_seed
|
||||
|
|
@ -493,8 +866,8 @@ class Script(scripts.Script):
|
|||
proc.images[0].save( os.path.join( img2img_key_path , img_basename ) )
|
||||
|
||||
with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"param.txt" ), "w") as f:
|
||||
f.write(proc.info)
|
||||
f.write(pprint.pformat(proc.info))
|
||||
with open( os.path.join( project_dir if is_invert_mask == False else inv_path ,"args.txt" ), "w") as f:
|
||||
f.write(str(args))
|
||||
f.write(pprint.pformat(args))
|
||||
|
||||
return proc
|
||||
|
|
|
|||
Loading…
Reference in New Issue