add function auto tagging

Temporary fix for module name conflict between blip and yolov Improved UI refactoring Updated readme
2023-02-11 17:02:33 +09:00 · 2023-02-11 17:02:33 +09:00 · 9553fb33ff
parent fb192bc100
commit 9553fb33ff
7 changed files with 788 additions and 99 deletions
--- a/README.md
+++ b/README.md
@ -9,15 +9,30 @@
 #### sample 1
 <div><video controls src="https://user-images.githubusercontent.com/118420657/213474231-38cac10e-7e75-43e1-b912-4e7727074d39.mp4" muted="false"></video></div>

-#### sample 2
-<div><video controls src="https://user-images.githubusercontent.com/118420657/213474343-e49e797d-386e-459f-9be9-2241b2d6266d.mp4" muted="false"></video></div>
-
-#### sample 3 blend background
+#### sample 2 blend background
 - person : masterpiece, best quality, masterpiece, 1girl, masterpiece, best quality,anime screencap, anime style  
 - background : cyberpunk, factory, room ,anime screencap, anime style  
 - It is also possible to blend with your favorite videos.  
 <div><video controls src="https://user-images.githubusercontent.com/118420657/214592811-9677634f-93bb-40dd-95b6-1c97c8e7bb63.mp4" muted="false"></video></div>

+#### sample 3 auto tagging
+- left : original  
+- center : apply the same prompts in all keyframes  
+- right : apply auto tagging by deepdanbooru in all keyframes  
+- This function improves the detailed changes in facial expressions, hand expressions, etc.  
+  In the sample video, the "closed_eyes" and "hands_on_own_face" tags have been added to better represent eye blinks and hands brought in front of the face.  
+
+
+#### sample 4 auto tagging (apply lora dynamically)
+- left : apply auto tagging by deepdanbooru in all keyframes  
+- right : apply auto tagging by deepdanbooru in all keyframes + apply "anyahehface" lora dynamically  
+- Added the function to dynamically apply TI, hypernet, Lora, and additional prompts according to automatically attached tags.  
+  In the sample video, if the "smile" tag is given, the lora and lora trigger keywords are set to be added according to the strength of the "smile" tag.  
+  Also, since automatically added tags are sometimes incorrect, unnecessary tags are listed in the blacklist.  
+  [Here](sample/) is the actual configuration file used. placed in "Project directory" for use.  
+
+
+
 ## Installation
 - Install [ffmpeg](https://ffmpeg.org/) for your operating system
  (https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/)
--- a/calculator.py
+++ b/calculator.py
@ -0,0 +1,237 @@
+# https://www.mycompiler.io/view/3TFZagC
+
+class ParseError(Exception):
+    def __init__(self, pos, msg, *args):
+        self.pos = pos
+        self.msg = msg
+        self.args = args
+
+    def __str__(self):
+        return '%s at position %s' % (self.msg % self.args, self.pos)
+
+class Parser:
+    def __init__(self):
+        self.cache = {}
+
+    def parse(self, text):
+        self.text = text
+        self.pos = -1
+        self.len = len(text) - 1
+        rv = self.start()
+        self.assert_end()
+        return rv
+
+    def assert_end(self):
+        if self.pos < self.len:
+            raise ParseError(
+                self.pos + 1,
+                'Expected end of string but got %s',
+                self.text[self.pos + 1]
+            )
+
+    def eat_whitespace(self):
+        while self.pos < self.len and self.text[self.pos + 1] in " \f\v\r\t\n":
+            self.pos += 1
+
+    def split_char_ranges(self, chars):
+        try:
+            return self.cache[chars]
+        except KeyError:
+            pass
+
+        rv = []
+        index = 0
+        length = len(chars)
+
+        while index < length:
+            if index + 2 < length and chars[index + 1] == '-':
+                if chars[index] >= chars[index + 2]:
+                    raise ValueError('Bad character range')
+
+                rv.append(chars[index:index + 3])
+                index += 3
+            else:
+                rv.append(chars[index])
+                index += 1
+
+        self.cache[chars] = rv
+        return rv
+
+    def char(self, chars=None):
+        if self.pos >= self.len:
+            raise ParseError(
+                self.pos + 1,
+                'Expected %s but got end of string',
+                'character' if chars is None else '[%s]' % chars
+            )
+
+        next_char = self.text[self.pos + 1]
+        if chars == None:
+            self.pos += 1
+            return next_char
+
+        for char_range in self.split_char_ranges(chars):
+            if len(char_range) == 1:
+                if next_char == char_range:
+                    self.pos += 1
+                    return next_char
+            elif char_range[0] <= next_char <= char_range[2]:
+                self.pos += 1
+                return next_char
+
+        raise ParseError(
+            self.pos + 1,
+            'Expected %s but got %s',
+            'character' if chars is None else '[%s]' % chars,
+            next_char
+        )
+
+    def keyword(self, *keywords):
+        self.eat_whitespace()
+        if self.pos >= self.len:
+            raise ParseError(
+                self.pos + 1,
+                'Expected %s but got end of string',
+                ','.join(keywords)
+            )
+
+        for keyword in keywords:
+            low = self.pos + 1
+            high = low + len(keyword)
+
+            if self.text[low:high] == keyword:
+                self.pos += len(keyword)
+                self.eat_whitespace()
+                return keyword
+
+        raise ParseError(
+            self.pos + 1,
+            'Expected %s but got %s',
+            ','.join(keywords),
+            self.text[self.pos + 1],
+        )
+
+    def match(self, *rules):
+        self.eat_whitespace()
+        last_error_pos = -1
+        last_exception = None
+        last_error_rules = []
+
+        for rule in rules:
+            initial_pos = self.pos
+            try:
+                rv = getattr(self, rule)()
+                self.eat_whitespace()
+                return rv
+            except ParseError as e:
+                self.pos = initial_pos
+
+                if e.pos > last_error_pos:
+                    last_exception = e
+                    last_error_pos = e.pos
+                    last_error_rules.clear()
+                    last_error_rules.append(rule)
+                elif e.pos == last_error_pos:
+                    last_error_rules.append(rule)
+
+        if len(last_error_rules) == 1:
+            raise last_exception
+        else:
+            raise ParseError(
+                last_error_pos,
+                'Expected %s but got %s',
+                ','.join(last_error_rules),
+                self.text[last_error_pos]
+            )
+
+    def maybe_char(self, chars=None):
+        try:
+            return self.char(chars)
+        except ParseError:
+            return None
+
+    def maybe_match(self, *rules):
+        try:
+            return self.match(*rules)
+        except ParseError:
+            return None
+
+    def maybe_keyword(self, *keywords):
+        try:
+            return self.keyword(*keywords)
+        except ParseError:
+            return None
+
+class CalcParser(Parser):
+    def start(self):
+        return self.expression()
+
+    def expression(self):
+        rv = self.match('term')
+        while True:
+            op = self.maybe_keyword('+', '-')
+            if op is None:
+                break
+
+            term = self.match('term')
+            if op == '+':
+                rv += term
+            else:
+                rv -= term
+
+        return rv
+
+    def term(self):
+        rv = self.match('factor')
+        while True:
+            op = self.maybe_keyword('*', '/')
+            if op is None:
+                break
+
+            term = self.match('factor')
+            if op == '*':
+                rv *= term
+            else:
+                rv /= term
+
+        return rv
+
+    def factor(self):
+        if self.maybe_keyword('('):
+            rv = self.match('expression')
+            self.keyword(')')
+
+            return rv
+
+        return self.match('number')
+
+    def number(self):
+        chars = []
+
+        sign = self.maybe_keyword('+', '-')
+        if sign is not None:
+            chars.append(sign)
+
+        chars.append(self.char('0-9'))
+
+        while True:
+            char = self.maybe_char('0-9')
+            if char is None:
+                break
+
+            chars.append(char)
+
+        if self.maybe_char('.'):
+            chars.append('.')
+            chars.append(self.char('0-9'))
+
+            while True:
+                char = self.maybe_char('0-9')
+                if char is None:
+                    break
+
+                chars.append(char)
+
+        rv = float(''.join(chars))
+        return rv
+
--- a/imgs/sample_anyaheh.mp4
+++ b/imgs/sample_anyaheh.mp4
--- a/imgs/sample_autotag.mp4
+++ b/imgs/sample_autotag.mp4
--- a/sample/add_token.txt
+++ b/sample/add_token.txt
@ -0,0 +1,54 @@
+[
+    {
+        "target":"smile",
+        "min_score":0.5,
+        "token": ["lottalewds_v0", "1.2"],
+        "type":"lora"
+    },
+    {
+        "target":"smile",
+        "min_score":0.5,
+        "token": ["anyahehface", "score*1.2"],
+        "type":"normal"
+    },
+    {
+        "target":"smile",
+        "min_score":0.5,
+        "token": ["wicked smug", "score*1.2"],
+        "type":"normal"
+    },
+    {
+        "target":"smile",
+        "min_score":0.5,
+        "token": ["half closed eyes", "0.2 + score*0.3"],
+        "type":"normal"
+    },
+    
+    
+    
+    {
+        "target":"test_token",
+        "min_score":0.8,
+        "token": ["lora_name_A", "0.5"],
+        "type":"lora"
+    },
+    {
+        "target":"test_token",
+        "min_score":0.5,
+        "token": ["bbbb", "score - 0.1"],
+        "type":"normal"
+    },
+    {
+        "target":"test_token2",
+        "min_score":0.8,
+        "token": ["hypernet_name_A", "score"],
+        "type":"hypernet"
+    },
+    {
+        "target":"test_token3",
+        "min_score":0.0,
+        "token": ["dddd", "score"],
+        "type":"normal"
+    }
+]
+
--- a/sample/blacklist.txt
+++ b/sample/blacklist.txt
@ -0,0 +1,10 @@
+motion_blur
+blurry
+realistic
+depth_of_field
+mountain
+tree
+water
+underwater
+tongue
+tongue_out
--- a/scripts/custom_script.py
+++ b/scripts/custom_script.py
@ -3,17 +3,23 @@ import gradio as gr
 import os
 import torch
 import random
+import time
+import pprint

 from modules.processing import process_images,Processed
 from modules.paths import models_path
 from modules.textual_inversion import autocrop
 import modules.images
+from modules import shared,deepbooru,masking
 import cv2
 import copy
 import numpy as np
-from PIL import Image
+from PIL import Image,ImageOps
 import glob
 import requests
+import json
+import re
+from extensions.ebsynth_utility.calculator import CalcParser,ParseError

 def get_my_dir():
    if os.path.isdir("extensions/ebsynth_utility"):
@ -61,6 +67,9 @@ class Script(scripts.Script):
    face_detector = None
    face_merge_mask_filename = "face_crop_img2img_mask.png"
    face_merge_mask_image = None
+    prompts_dir = ""
+    calc_parser = None
+    is_invert_mask = False

 # The title of the script. This is what will be displayed in the dropdown menu.
    def title(self):
@ -83,27 +92,40 @@ class Script(scripts.Script):
        with gr.Column(variant='panel'):
            with gr.Column():
                project_dir = gr.Textbox(label='Project directory', lines=1)
-                mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
-                inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
-                
-            with gr.Column():
-                use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
-                gr.HTML(value="<p style='margin-bottom: 0.7em'>\
-                        See \
-                        <font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
-                        </p>")

-            with gr.Column():
+                with gr.Accordion("Mask option"):
+                    mask_mode = gr.Dropdown(choices=["Normal","Invert","None","Don't Override"], value="Normal" ,label="Mask Mode(Override img2img Mask mode)")
+                    inpaint_area = gr.Dropdown(choices=["Whole picture","Only masked","Don't Override"], type = "index", value="Only masked" ,label="Inpaint Area(Override img2img Inpaint area)")
+                    use_depth = gr.Checkbox(True, label="Use Depth Map If exists in /video_key_depth")
+                    gr.HTML(value="<p style='margin-bottom: 0.7em'>\
+                            See \
+                            <font color=\"blue\"><a href=\"https://github.com/thygate/stable-diffusion-webui-depthmap-script\">[here]</a></font> for depth map.\
+                            </p>")
+            
+            with gr.Accordion("Loopback option"):
                img2img_repeat_count = gr.Slider(minimum=1, maximum=30, step=1, value=1, label="Img2Img Repeat Count (Loop Back)")
                inc_seed = gr.Slider(minimum=0, maximum=9999999, step=1, value=1, label="Add N to seed when repeating ")

-            with gr.Column():
-                is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
-                face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
+            with gr.Accordion("Auto Tagging option"):
+                auto_tag_mode = gr.Dropdown(choices=["None","DeepDanbooru","CLIP"], value="None" ,label="Auto Tagging")
+                add_tag_to_head = gr.Checkbox(False, label="Add additional prompts to the head")
                gr.HTML(value="<p style='margin-bottom: 0.7em'>\
-                        If loading of the Yolov5_anime model fails, check\
-                        <font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
+                        The results are stored in timestamp_prompts.txt.<br>\
+                        If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt<br>\
+                        Recommend enabling the following settings.<br>\
+                        <font color=\"red\">\
+                        Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
+                        </font>\
                        </p>")
+
+            with gr.Accordion("Face Crop option"):
+                is_facecrop = gr.Checkbox(False, label="use Face Crop img2img")
+                with gr.Row():
+                    face_detection_method = gr.Dropdown(choices=["YuNet","Yolov5_anime"], value="YuNet" ,label="Face Detection Method")
+                    gr.HTML(value="<p style='margin-bottom: 0.7em'>\
+                            If loading of the Yolov5_anime model fails, check\
+                            <font color=\"blue\"><a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2235\">[this]</a></font> solution.\
+                            </p>")
                max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size")
                face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength")
                face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ")
@ -115,10 +137,10 @@ class Script(scripts.Script):
                        value = "face close up,"
                    )

-        return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]
+        return [project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt]


-    def detect_face(self, img_array):
+    def detect_face_from_img(self, img_array):
        if not self.face_detector:
            dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv"))
            self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0))
@ -127,14 +149,19 @@ class Script(scripts.Script):
        _, result = self.face_detector.detect(img_array)
        return result

-    def detect_anime_face(self, img_array):
+    def detect_anime_face_from_img(self, img_array):
+        import sys
+
        if not self.anime_face_detector:
+            if 'models' in sys.modules:
+                del sys.modules['models']
+
            anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime"))

            if not os.path.isfile(anime_model_path):
                print( "WARNING!! " + anime_model_path + " not found.")
                print( "use YuNet instead.")
-                return self.detect_face(img_array)
+                return self.detect_face_from_img(img_array)

            self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path)

@ -150,6 +177,49 @@ class Script(scripts.Script):

        return faces

+    def detect_face(self, img, mask, face_detection_method, max_crop_size):
+        img_array = np.array(img)
+
+        if mask is not None:
+            if self.is_invert_mask:
+                mask = ImageOps.invert(mask)
+            mask_array = np.array(mask)/255
+            if mask_array.ndim == 2:
+                mask_array = mask_array[:, :, np.newaxis]
+
+            img_array = mask_array * img_array
+            img_array = img_array.astype(np.uint8)
+
+        # image without alpha
+        img_array = img_array[:,:,:3]
+
+        if face_detection_method == "YuNet":
+            faces = self.detect_face_from_img(img_array)
+        elif face_detection_method == "Yolov5_anime":
+            faces = self.detect_anime_face_from_img(img_array)
+        else:
+            faces = self.detect_face_from_img(img_array)
+        
+        if faces is None or len(faces) == 0:
+            return []
+        
+        face_coords = []
+        for face in faces:
+            x = int(face[0])
+            y = int(face[1])
+            w = int(face[2])
+            h = int(face[3])
+            if max(w,h) > max_crop_size:
+                print("ignore big face")
+                continue
+            if w == 0 or h == 0:
+                print("ignore w,h = 0 face")
+                continue
+
+            face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
+
+        return face_coords
+
    def get_mask(self):
        def create_mask( output, x_rate, y_rate, k_size ):
            img = np.zeros((512, 512, 3))
@ -327,6 +397,312 @@ class Script(scripts.Script):
            mask = depth
        
        return depth!=None, mask
+    
+### auto tagging
+    debug_count = 0
+
+    def get_masked_image(self, image, mask_image):
+
+        if mask_image == None:
+            return image.convert("RGB")
+        
+        mask = mask_image.convert('L')
+        if self.is_invert_mask:
+            mask = ImageOps.invert(mask)
+        crop_region = masking.get_crop_region(np.array(mask), 0)
+#        crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
+#        x1, y1, x2, y2 = crop_region
+        image = image.crop(crop_region).convert("RGB")
+        mask = mask.crop(crop_region)
+
+        base_img = Image.new("RGB", image.size, (255, 190, 200))
+
+        image = Image.composite( image, base_img, mask )
+
+#        image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png")
+#        self.debug_count += 1
+
+        return image
+    
+    def interrogate_deepdanbooru(self, imgs, masks):
+        prompts_dict = {}
+        cause_err = False
+
+        try:
+            deepbooru.model.start()
+
+            for img,mask in zip(imgs,masks):
+                key = os.path.basename(img)
+                print(key + " interrogate deepdanbooru")
+
+                image = Image.open(img)
+                mask_image = Image.open(mask) if mask else None
+                image = self.get_masked_image(image, mask_image)
+
+                prompt = deepbooru.model.tag_multi(image)
+
+                prompts_dict[key] = prompt
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            print(e)
+            cause_err = True
+        finally:
+            deepbooru.model.stop()
+            if cause_err:
+                print("Exception occurred during auto-tagging(deepdanbooru)")
+                return Processed()
+
+        return prompts_dict
+
+
+    def interrogate_clip(self, imgs, masks):
+        from modules import devices, shared, lowvram, paths
+        import importlib
+        import models
+
+        caption_list = []
+        prompts_dict = {}
+        cause_err = False
+
+        try:
+            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+                lowvram.send_everything_to_cpu()
+                devices.torch_gc()
+
+            with paths.Prioritize("BLIP"):
+                importlib.reload(models)
+                shared.interrogator.load()
+
+            for img,mask in zip(imgs,masks):
+                key = os.path.basename(img)
+                print(key + " generate caption")
+
+                image = Image.open(img)
+                mask_image = Image.open(mask) if mask else None
+                image = self.get_masked_image(image, mask_image)
+
+                caption = shared.interrogator.generate_caption(image)
+                caption_list.append(caption)
+
+            shared.interrogator.send_blip_to_ram()
+            devices.torch_gc()
+
+            for img,mask,caption in zip(imgs,masks,caption_list):
+                key = os.path.basename(img)
+                print(key + " interrogate clip")
+
+                image = Image.open(img)
+                mask_image = Image.open(mask) if mask else None
+                image = self.get_masked_image(image, mask_image)
+
+                clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate)
+
+                res = ""
+
+                with torch.no_grad(), devices.autocast():
+                    image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype)
+                    image_features /= image_features.norm(dim=-1, keepdim=True)
+
+                    for name, topn, items in shared.interrogator.categories():
+                        matches = shared.interrogator.rank(image_features, items, top_count=topn)
+                        for match, score in matches:
+                            if shared.opts.interrogate_return_ranks:
+                                res += f", ({match}:{score/100:.3f})"
+                            else:
+                                res += ", " + match
+
+                prompts_dict[key] = (caption + res)
+
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            print(e)
+            cause_err = True
+        finally:
+            shared.interrogator.unload()
+            if cause_err:
+                print("Exception occurred during auto-tagging(blip/clip)")
+                return Processed()
+        
+        return prompts_dict
+
+
+    def remove_reserved_token(self, token_list):
+        reserved_list = ["pink_background","simple_background","pink"]
+
+        result_list = []
+
+        head_token = token_list[0]
+
+        if head_token[2] == "normal":
+            head_token_str = head_token[0].replace('pink background', '')
+            token_list[0] = (head_token_str, head_token[1], head_token[2])
+
+        for token in token_list:
+            if token[0] in reserved_list:
+                continue
+            result_list.append(token)
+
+        return result_list
+
+    def remove_blacklisted_token(self, token_list):
+        black_list_path = os.path.join(self.prompts_dir, "blacklist.txt") 
+        if not os.path.isfile(black_list_path):
+            print(black_list_path + " not found.")
+            return token_list
+
+        with open(black_list_path) as f:
+            black_list = [s.strip() for s in f.readlines()]
+
+            result_list = []
+
+            for token in token_list:
+                if token[0] in black_list:
+                    continue
+                result_list.append(token)
+            
+            token_list = result_list
+
+        return token_list
+
+    def add_token(self, token_list):
+        add_list_path = os.path.join(self.prompts_dir, "add_token.txt") 
+        if not os.path.isfile(add_list_path):
+            print(add_list_path + " not found.")
+            return token_list
+        
+        if not self.calc_parser:
+            self.calc_parser = CalcParser()
+
+        with open(add_list_path) as f:
+            add_list = json.load(f)
+            '''
+            [
+                {
+                    "target":"test_token",
+                    "min_score":0.8,
+                    "token": ["lora_name_A", "0.5"],
+                    "type":"lora"
+                },
+                {
+                    "target":"test_token",
+                    "min_score":0.5,
+                    "token": ["bbbb", "score - 0.1"],
+                    "type":"normal"
+                },
+                {
+                    "target":"test_token2",
+                    "min_score":0.8,
+                    "token": ["hypernet_name_A", "score"],
+                    "type":"hypernet"
+                },
+                {
+                    "target":"test_token3",
+                    "min_score":0.0,
+                    "token": ["dddd", "score"],
+                    "type":"normal"
+                }
+            ]
+            '''
+            result_list = []
+
+            for token in token_list:
+                for add_item in add_list:
+                    if token[0] == add_item["target"]:
+                        if token[1] > add_item["min_score"]:
+                            # hit
+                            formula = str(add_item["token"][1])
+                            formula = formula.replace("score",str(token[1]))
+                            print('Input: %s' % str(add_item["token"][1]))
+
+                            try:
+                                score = self.calc_parser.parse(formula)
+                                score = round(score, 3)
+                            except (ParseError, ZeroDivisionError) as e:
+                                print('Input: %s' % str(add_item["token"][1]))
+                                print('Error: %s' % e)
+                                print("ignore this token")
+                                continue
+
+                            print("score = " + str(score))
+                            result_list.append( ( add_item["token"][0], score, add_item["type"] ) )
+            
+            token_list = token_list + result_list
+
+        return token_list
+
+    def create_prompts_dict(self, imgs, masks, auto_tag_mode):
+        prompts_dict = {}
+
+        if auto_tag_mode == "DeepDanbooru":
+            raw_dict = self.interrogate_deepdanbooru(imgs, masks)
+        elif auto_tag_mode == "CLIP":
+            raw_dict = self.interrogate_clip(imgs, masks)
+        
+        repatter = re.compile(r'\((.+)\:([0-9\.]+)\)')
+
+        for key, value_str in raw_dict.items():
+            value_list = [x.strip() for x in value_str.split(',')]
+
+            value = []
+            for v in value_list:
+                m = repatter.fullmatch(v)
+                if m:
+                    value.append((m.group(1), float(m.group(2)), "normal"))
+                else:
+                    value.append((v, 1, "no_score"))
+            
+#            print(value)
+            value = self.remove_reserved_token(value)
+#            print(value)
+            value = self.remove_blacklisted_token(value)
+#            print(value)
+            value = self.add_token(value)
+#            print(value)
+
+            def create_token_str(x):
+                print(x)
+                if x[2] == "no_score":
+                    return x[0]
+                elif x[2] == "lora":
+                    return "<lora:" + x[0] + ":" + str(x[1]) + ">"
+                elif x[2] == "hypernet":
+                    return "<hypernet:" + x[0] + ":" + str(x[1]) + ">"
+                else:
+                    return "(" + x[0] + ":" + str(x[1]) + ")"
+
+            value_list = [create_token_str(x) for x in value]
+            value = ",".join(value_list)
+
+            prompts_dict[key] = value
+
+        return prompts_dict
+
+    def load_prompts_dict(self, imgs, default_token):
+        prompts_path = os.path.join(self.prompts_dir, "prompts.txt") 
+        if not os.path.isfile(prompts_path):
+            print(prompts_path + " not found.")
+            return {}
+        
+        prompts_dict = {}
+
+        print(prompts_path + " found!!")
+        print("skip auto tagging.")
+        
+        with open(prompts_path) as f:
+            raw_dict = json.load(f)
+            prev_value = default_token
+            for img in imgs:
+                key = os.path.basename(img)
+
+                if key in raw_dict:
+                    prompts_dict[key] = raw_dict[key]
+                    prev_value = raw_dict[key]
+                else:
+                    prompts_dict[key] = prev_value
+
+        return prompts_dict


 # This is where the additional processing is implemented. The parameters include
@ -335,52 +711,9 @@ class Script(scripts.Script):
 # Custom functions can be defined here, and additional libraries can be imported 
 # to be used in processing. The return value should be a Processed object, which is
 # what is returned by the process_images method.
-
-    def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
+    def run(self, p, project_dir, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, is_facecrop, face_detection_method, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt):
        args = locals()

-        def detect_face(img, mask, face_detection_method, max_crop_size):
-            img_array = np.array(img)
-
-            if mask is not None:
-                mask_array = np.array(mask)/255
-                if mask_array.ndim == 2:
-                    mask_array = mask_array[:, :, np.newaxis]
-
-                img_array = mask_array * img_array
-                img_array = img_array.astype(np.uint8)
-
-            # image without alpha
-            img_array = img_array[:,:,:3]
-
-            if face_detection_method == "YuNet":
-                faces = self.detect_face(img_array)
-            elif face_detection_method == "Yolov5_anime":
-                faces = self.detect_anime_face(img_array)
-            else:
-                faces = self.detect_face(img_array)
-            
-            if faces is None or len(faces) == 0:
-                return []
-            
-            face_coords = []
-            for face in faces:
-                x = int(face[0])
-                y = int(face[1])
-                w = int(face[2])
-                h = int(face[3])
-                if max(w,h) > max_crop_size:
-                    print("ignore big face")
-                    continue
-                if w == 0 or h == 0:
-                    print("ignore w,h = 0 face")
-                    continue
-
-                face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] )
-
-            return face_coords
-            
-
        if not os.path.isdir(project_dir):
            print("project_dir not found")
            return Processed()
@ -408,10 +741,15 @@ class Script(scripts.Script):
            org_key_path = os.path.join(inv_path, "video_key")
            img2img_key_path = os.path.join(inv_path, "img2img_key")
            depth_path = os.path.join(inv_path, "video_key_depth")
+
+            self.prompts_dir = inv_path
+            self.is_invert_mask = True
        else:
            org_key_path = os.path.join(project_dir, "video_key")
            img2img_key_path = os.path.join(project_dir, "img2img_key")
            depth_path = os.path.join(project_dir, "video_key_depth")
+            self.prompts_dir = project_dir
+            self.is_invert_mask = False

        frame_mask_path = os.path.join(project_dir, "video_mask")

@ -427,53 +765,88 @@ class Script(scripts.Script):
        remove_pngs_in_dir(img2img_key_path)
        os.makedirs(img2img_key_path, exist_ok=True)

-        imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
-        for img in imgs:
-
-            image = Image.open(img)

+        def get_mask_of_img(img):
            img_basename = os.path.basename(img)
            
-            mask = None
-
            if mask_mode != "None":
                mask_path = os.path.join( frame_mask_path , img_basename )
                if os.path.isfile( mask_path ):
-                    mask = Image.open(mask_path)
+                    return mask_path
+            return ""
+        
+        imgs = glob.glob( os.path.join(org_key_path ,"*.png") )
+        masks = [ get_mask_of_img(i) for i in imgs ]
+
+        ######################
+        # face crop
+        face_coords_dict={}
+        for img,mask in zip(imgs,masks):
+            face_detected = False
+            if is_facecrop:
+                image = Image.open(img)
+                mask_image = Image.open(mask) if mask else None
+                face_coords = self.detect_face(image, mask_image, face_detection_method, max_crop_size)
+                if face_coords is None or len(face_coords) == 0:
+                    print("no face detected")
+                else:
+                    print("face detected")
+                    face_detected = True
+            
+            key = os.path.basename(img)
+            face_coords_dict[key] = face_coords if face_detected else []
+
+        with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"faces.txt" ), "w") as f:
+            f.write(json.dumps(face_coords_dict,indent=4))
+
+        ######################
+        # prompts
+        prompts_dict = self.load_prompts_dict(imgs, p.prompt)
+
+        if not prompts_dict:
+            if auto_tag_mode != "None":
+                prompts_dict = self.create_prompts_dict(imgs, masks, auto_tag_mode)
+
+                for key, value in prompts_dict.items():
+                    prompts_dict[key] = (value + "," + p.prompt) if add_tag_to_head else (p.prompt + "," + value)
+
+            else:
+                for img in imgs:
+                    key = os.path.basename(img)
+                    prompts_dict[key] = p.prompt
+            
+        with open( os.path.join( project_dir if is_invert_mask == False else inv_path, time.strftime("%Y%m%d-%H%M%S_") + "prompts.txt" ), "w") as f:
+            f.write(json.dumps(prompts_dict,indent=4))
+
+
+        ######################
+        # img2img
+        for img, mask, face_coords, prompts in zip(imgs, masks, face_coords_dict.values(), prompts_dict.values()):
+
+            image = Image.open(img)
+            mask_image = Image.open(mask) if mask else None
+
+            img_basename = os.path.basename(img)
            
            _p = copy.copy(p)
            
            _p.init_images=[image]
-            _p.image_mask = mask
+            _p.image_mask = mask_image
+            _p.prompt = prompts
            resized_mask = None

            repeat_count = img2img_repeat_count
-
-            _is_facecrop = is_facecrop
-
-            if _is_facecrop:
-                ### face detect in base img
-                base_img = _p.init_images[0]
-
-                if base_img is None:
-                    print("p.init_images[0] is None")
-                    return process_images(p)
-
-                face_coords = detect_face(base_img, _p.image_mask, face_detection_method, max_crop_size)
-
-                if face_coords is None or len(face_coords) == 0:
-                    print("no face detected")
-                    _is_facecrop = False
            
            if mask_mode != "None" or use_depth:
                if use_depth:
-                    depth_found, _p.image_mask = self.get_depth_map( mask, depth_path ,img_basename, is_invert_mask )
-                    mask = _p.image_mask
+                    depth_found, _p.image_mask = self.get_depth_map( mask_image, depth_path ,img_basename, is_invert_mask )
+                    mask_image = _p.image_mask
                    if depth_found:
                        _p.inpainting_mask_invert = 0

            while repeat_count > 0:
-                if _is_facecrop:
+
+                if face_coords:
                    proc = self.face_crop_img2img(_p, face_coords, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt)
                else:
                    proc = process_images(_p)
@ -484,8 +857,8 @@ class Script(scripts.Script):
                if repeat_count > 0:
                    _p.init_images=[proc.images[0]]

-                    if mask is not None and resized_mask is None:
-                        resized_mask = resize_img(np.array(mask) , proc.images[0].width, proc.images[0].height)
+                    if mask_image is not None and resized_mask is None:
+                        resized_mask = resize_img(np.array(mask_image) , proc.images[0].width, proc.images[0].height)
                        resized_mask = Image.fromarray(resized_mask)
                    _p.image_mask = resized_mask
                    _p.seed += inc_seed
@ -493,8 +866,8 @@ class Script(scripts.Script):
            proc.images[0].save( os.path.join( img2img_key_path , img_basename ) )

        with open( os.path.join( project_dir if is_invert_mask == False else inv_path,"param.txt" ), "w") as f:
-            f.write(proc.info)
+            f.write(pprint.pformat(proc.info))
        with open( os.path.join( project_dir if is_invert_mask == False else inv_path ,"args.txt" ), "w") as f:
-            f.write(str(args))
+            f.write(pprint.pformat(args))

        return proc