refactoring, implement #33 (but not available for now)

pull/41/head
toshiaki1729 2023-01-14 00:57:52 +09:00
parent 768c3f36aa
commit 7ae140152b
7 changed files with 191 additions and 100 deletions

View File

@ -1,4 +0,0 @@
import launch
if not launch.is_installed("onnxruntime-gpu"):
launch.run_pip("install onnxruntime-gpu", "requirements for dataset-tag-editor [onnxruntime-gpu]")

View File

@ -0,0 +1,50 @@
import modules.shared as shared
from scripts.dynamic_import import dynamic_import
git_large_captioning = dynamic_import('scripts/dataset_tag_editor/interrogators/git_large_captioning.py')
class Captioning:
def __enter__(self):
self.start()
return self
def __exit__(self, exception_type, exception_value, traceback):
self.stop()
pass
def start(self):
pass
def stop(self):
pass
def predict(self, image):
raise NotImplementedError
def name(self):
raise NotImplementedError
class BLIP(Captioning):
def start(self):
shared.interrogator.load()
def stop(self):
shared.interrogator.unload()
def predict(self, image):
tags = shared.interrogator.generate_caption(image).split(',')
return [t for t in tags if t]
def name(self):
return 'BLIP'
class GITLarge(Captioning):
def start(self):
git_large_captioning.instance.load()
def stop(self):
git_large_captioning.instance.unload()
def predict(self, image):
tags = git_large_captioning.instance.apply(image).split(',')
return [t for t in tags if t]
def name(self):
return 'GIT-large-COCO'

View File

@ -8,7 +8,8 @@ from enum import Enum
from scripts.dynamic_import import dynamic_import
ds = dynamic_import('scripts/dataset_tag_editor/dataset.py')
tag_scorer = dynamic_import('scripts/dataset_tag_editor/tag_scorer.py')
tagger = dynamic_import('scripts/dataset_tag_editor/tagger.py')
captioning = dynamic_import('scripts/dataset_tag_editor/captioning.py')
filters = dynamic_import('scripts/dataset_tag_editor/filters.py')
@ -23,13 +24,26 @@ class InterrogateMethod(Enum):
APPEND = 4
def interrogate_image_clip(path):
def interrogate_image_blip(path):
try:
img = Image.open(path).convert('RGB')
except:
return ''
else:
return shared.interrogator.interrogate(img)
with captioning.BLIP() as cap:
res = cap.predict(img)
return ', '.join(res)
def interrogate_image_git(path):
try:
img = Image.open(path).convert('RGB')
except:
return ''
else:
with captioning.GITLarge() as cap:
res = cap.predict(img)
return ', '.join(res)
def interrogate_image_booru(path, threshold):
@ -38,9 +52,9 @@ def interrogate_image_booru(path, threshold):
except:
return ''
else:
with tag_scorer.DeepDanbooru() as scorer:
res = scorer.predict(img, threshold=threshold)
return ', '.join(tag_scorer.get_arranged_tags(res))
with tagger.DeepDanbooru() as tg:
res = tg.predict(img, threshold=threshold)
return ', '.join(tagger.get_arranged_tags(res))
def interrogate_image_waifu(path, threshold):
@ -49,9 +63,9 @@ def interrogate_image_waifu(path, threshold):
except:
return ''
else:
with tag_scorer.WaifuDiffusion() as scorer:
res = scorer.predict(img, threshold=threshold)
return ', '.join(tag_scorer.get_arranged_tags(res))
with tagger.WaifuDiffusion() as tg:
res = tg.predict(img, threshold=threshold)
return ', '.join(tagger.get_arranged_tags(res))
def get_filepath_set(dir: str, recursive: bool):
@ -82,8 +96,6 @@ class DatasetTagEditor:
self.img_idx = dict()
self.tag_counts = {}
self.dataset_dir = ''
self.booru_tag_scores = None
self.waifu_tag_scores = None
def get_tag_list(self):
if len(self.tag_counts) == 0:
@ -390,25 +402,7 @@ class DatasetTagEditor:
print(e)
def score_dataset_booru(self):
with tag_scorer.DeepDanbooru() as scorer:
self.booru_tag_scores = dict()
for img_path in self.dataset.datas.keys():
img = Image.open(img_path)
probs = scorer.predict(img)
self.booru_tag_scores[img_path] = probs
def score_dataset_waifu(self):
with tag_scorer.DeepDanbooru() as scorer:
self.waifu_tag_scores = dict()
for img_path in self.dataset.datas.keys():
img = Image.open(img_path)
probs = scorer.predict(img)
self.waifu_tag_scores[img_path] = probs
def load_dataset(self, img_dir: str, recursive: bool, load_caption_from_filename: bool, interrogate_method: InterrogateMethod, use_booru: bool, use_clip: bool, use_waifu: bool, threshold_booru: float, threshold_waifu: float):
def load_dataset(self, img_dir: str, recursive: bool, load_caption_from_filename: bool, interrogate_method: InterrogateMethod, use_booru: bool, use_blip: bool, use_git:bool, use_waifu: bool, threshold_booru: float, threshold_waifu: float):
self.clear()
print(f'Loading dataset from {img_dir}')
if recursive:
@ -425,7 +419,7 @@ class DatasetTagEditor:
print(f'Total {len(filepath_set)} files under the directory including not image files.')
def load_images(filepath_set: Set[str], scorers: List[tag_scorer.TagScorer]):
def load_images(filepath_set: Set[str], captionings: List[captioning.Captioning], taggers: List[tagger.Tagger]):
for img_path in filepath_set:
img_dir = os.path.dirname(img_path)
img_filename, img_ext = os.path.splitext(os.path.basename(img_path))
@ -463,22 +457,12 @@ class DatasetTagEditor:
print(e)
print(f'Cannot interrogate file: {img_path}')
else:
if use_clip:
tmp = [t.strip() for t in shared.interrogator.generate_caption(img).split(',')]
interrogate_tags += [t for t in tmp if t]
for cap in captionings:
interrogate_tags += cap.predict(img)
for scorer in scorers:
probs = scorer.predict(img)
if isinstance(scorer, tag_scorer.DeepDanbooru):
interrogate_tags += [t for t, p in probs.items() if p > threshold_booru]
if not self.booru_tag_scores:
self.booru_tag_scores = dict()
self.booru_tag_scores[img_path] = probs
elif isinstance(scorer, tag_scorer.WaifuDiffusion):
interrogate_tags += [t for t, p in probs.items() if p > threshold_waifu]
if not self.waifu_tag_scores:
self.waifu_tag_scores = dict()
self.waifu_tag_scores[img_path] = probs
for tg, threshold in taggers:
probs = tg.predict(img)
interrogate_tags += [t for t, p in probs.items() if p > threshold]
img.close()
@ -491,27 +475,34 @@ class DatasetTagEditor:
self.set_tags_by_image_path(img_path, tags)
try:
scorers = []
captionings = []
taggers = []
if interrogate_method != InterrogateMethod.NONE:
if use_clip:
shared.interrogator.load()
if use_blip:
cap = captioning.BLIP()
cap.start()
captionings.append(cap)
if use_git:
cap = captioning.GITLarge()
cap.start()
captionings.append(cap)
if use_booru:
scorer = tag_scorer.DeepDanbooru()
scorer.start()
scorers.append(scorer)
tg = tagger.DeepDanbooru()
tg.start()
taggers.append((tg, threshold_booru))
if use_waifu:
scorer = tag_scorer.WaifuDiffusion()
scorer.start()
scorers.append(scorer)
tg = tagger.WaifuDiffusion()
tg.start()
taggers.append((tg, threshold_waifu))
load_images(filepath_set = filepath_set, scorers=scorers)
load_images(filepath_set=filepath_set, captionings=captionings, taggers=taggers)
finally:
if interrogate_method != InterrogateMethod.NONE:
if use_clip:
shared.interrogator.send_blip_to_ram()
for scorer in scorers:
scorer.stop()
for cap in captionings:
cap.stop()
for tg, _ in taggers:
tg.stop()
for i, p in enumerate(sorted(self.dataset.datas.keys())):
self.img_idx[p] = i
@ -570,8 +561,6 @@ class DatasetTagEditor:
self.tag_counts.clear()
self.img_idx.clear()
self.dataset_dir = ''
self.booru_tag_scores = None
self.waifu_tag_scores = None
def construct_tag_counts(self):

View File

@ -0,0 +1,29 @@
from transformers import AutoProcessor, AutoModelForCausalLM
from modules import shared
# brought from https://huggingface.co/docs/transformers/main/en/model_doc/git and modified
class GITLargeCaptioning():
MODEL_REPO = "microsoft/git-large-coco"
def __init__(self):
self.processor:AutoProcessor = None
self.model:AutoModelForCausalLM = None
def load(self):
if self.model is None or self.processor is None:
self.processor = AutoProcessor.from_pretrained(self.MODEL_REPO)
self.model = AutoModelForCausalLM.from_pretrained(self.MODEL_REPO).to(shared.device)
def unload(self):
if not shared.opts.interrogate_keep_models_in_memory:
self.model = None
self.processor = None
def apply(self, image):
if self.model is None or self.processor is None:
return ''
inputs = self.processor(images=image, return_tensors='pt').to(shared.device)
ids = self.model.generate(pixel_values=inputs.pixel_values, max_length=shared.opts.interrogate_clip_max_length)
return self.processor.batch_decode(ids, skip_special_tokens=True)[0]
instance = GITLargeCaptioning()

View File

@ -1,8 +1,8 @@
from PIL import Image
import numpy as np
from typing import List, Tuple
import onnxruntime as ort
from modules import shared
import launch
class WaifuDiffusionTagger():
@ -11,7 +11,7 @@ class WaifuDiffusionTagger():
MODEL_FILENAME = "model.onnx"
LABEL_FILENAME = "selected_tags.csv"
def __init__(self):
self.model: ort.InferenceSession = None
self.model = None
self.labels = []
def load(self):
@ -24,6 +24,10 @@ class WaifuDiffusionTagger():
providers = ['CPUExecutionProvider']
else:
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
if not launch.is_installed("onnxruntime"):
launch.run_pip("install onnxruntime-gpu", "requirements for dataset-tag-editor [onnxruntime-gpu]")
import onnxruntime as ort
self.model = ort.InferenceSession(path_model, providers=providers)
path_label = huggingface_hub.hf_hub_download(

View File

@ -7,10 +7,10 @@ from modules import devices, shared
from modules import deepbooru as db
from scripts.dynamic_import import dynamic_import
waifu_diffusion_tagger = dynamic_import('scripts/dataset_tag_editor/waifu_diffusion_tagger.py')
waifu_diffusion_tagger = dynamic_import('scripts/dataset_tag_editor/interrogators/waifu_diffusion_tagger.py')
class TagScorer:
class Tagger:
def __enter__(self):
self.start()
return self
@ -45,7 +45,7 @@ def get_arranged_tags(probs: Dict[str, float]):
return [tag for tag, _ in sorted(probs.items(), key=lambda x: -x[1])]
class DeepDanbooru(TagScorer):
class DeepDanbooru(Tagger):
def start(self):
db.model.start()
@ -78,7 +78,7 @@ class DeepDanbooru(TagScorer):
return 'DeepDanbooru'
class WaifuDiffusion(TagScorer):
class WaifuDiffusion(Tagger):
def start(self):
waifu_diffusion_tagger.instance.load()
return self
@ -101,4 +101,4 @@ class WaifuDiffusion(TagScorer):
return probability_dict
def name(self):
return 'wd-v1-4-tags'
return 'wd-v1-4-tagger'

View File

@ -43,7 +43,8 @@ GeneralConfig = namedtuple('GeneralConfig', [
'load_recursive',
'load_caption_from_filename',
'use_interrogator',
'use_clip_to_prefill',
'use_blip_to_prefill',
'use_git_to_prefill',
'use_booru_to_prefill',
'use_waifu_to_prefill',
'use_custom_threshold_booru',
@ -56,7 +57,7 @@ BatchEditConfig = namedtuple('BatchEditConfig', ['show_only_selected', 'prepend'
EditSelectedConfig = namedtuple('EditSelectedConfig', ['auto_copy', 'warn_change_not_saved'])
MoveDeleteConfig = namedtuple('MoveDeleteConfig', ['range', 'target', 'destination'])
CFG_GENERAL_DEFAULT = GeneralConfig(True, '', False, True, 'No', False, False, False, False, 0.7, False, 0.5)
CFG_GENERAL_DEFAULT = GeneralConfig(True, '', False, True, 'No', False, False, False, False, False, 0.7, False, 0.5)
CFG_FILTER_P_DEFAULT = FilterConfig('Alphabetical Order', 'Ascending', 'AND')
CFG_FILTER_N_DEFAULT = FilterConfig('Alphabetical Order', 'Ascending', 'OR')
CFG_BATCH_EDIT_DEFAULT = BatchEditConfig(True, False, False, 'Only Selected Tags', 'Alphabetical Order', 'Ascending')
@ -90,8 +91,8 @@ class Config:
config = Config()
def write_general_config(backup: bool, dataset_dir: str, load_recursive: bool, load_caption_from_filename: bool, use_interrogator: str, use_clip_to_prefill: bool, use_booru_to_prefill: bool, use_waifu_to_prefill: bool, use_custom_threshold_booru: bool, custom_threshold_booru: float, use_custom_threshold_waifu: bool, custom_threshold_waifu: float):
cfg = GeneralConfig(backup, dataset_dir, load_recursive, load_caption_from_filename, use_interrogator, use_clip_to_prefill, use_booru_to_prefill, use_waifu_to_prefill, use_custom_threshold_booru, custom_threshold_booru, use_custom_threshold_waifu, custom_threshold_waifu)
def write_general_config(backup: bool, dataset_dir: str, load_recursive: bool, load_caption_from_filename: bool, use_interrogator: str, use_blip_to_prefill: bool, use_git_to_prefill: bool, use_booru_to_prefill: bool, use_waifu_to_prefill: bool, use_custom_threshold_booru: bool, custom_threshold_booru: float, use_custom_threshold_waifu: bool, custom_threshold_waifu: float):
cfg = GeneralConfig(backup, dataset_dir, load_recursive, load_caption_from_filename, use_interrogator, use_blip_to_prefill, use_git_to_prefill, use_booru_to_prefill, use_waifu_to_prefill, use_custom_threshold_booru, custom_threshold_booru, use_custom_threshold_waifu, custom_threshold_waifu)
config.write(cfg._asdict(), 'general')
def write_filter_config(sort_by_p: str, sort_order_p: str, logic_p: str, sort_by_n: str, sort_order_n: str, logic_n: str):
@ -115,7 +116,8 @@ def read_config(name: str, config_type: Type, default: NamedTuple):
d = config.read(name)
cfg = default
if d:
d = default._asdict() | d
d = cfg._asdict() | d
d = {k:v for k,v in d.items() if k in cfg._asdict().keys()}
cfg = config_type(**d)
return cfg
@ -129,10 +131,12 @@ def read_filter_config():
cfg_p = CFG_FILTER_P_DEFAULT
cfg_n = CFG_FILTER_N_DEFAULT
if d_p:
d_p = CFG_FILTER_P_DEFAULT._asdict() | d_p
d_p = cfg_p._asdict() | d_p
d_p = {k:v for k,v in d_p.items() if k in cfg_p._asdict().keys()}
cfg_p = FilterConfig(**d_p)
if d_n:
d_n = CFG_FILTER_N_DEFAULT._asdict() | d_n
d_n = cfg_n._asdict() | d_n
d_n = {k:v for k,v in d_n.items() if k in cfg_n._asdict().keys()}
cfg_n = FilterConfig(**d_n)
return cfg_p, cfg_n
@ -180,7 +184,8 @@ def load_files_from_dir(
recursive: bool,
load_caption_from_filename: bool,
use_interrogator: str,
use_clip: bool,
use_blip: bool,
use_git: bool,
use_booru: bool,
use_waifu: bool,
use_custom_threshold_booru: bool,
@ -203,7 +208,7 @@ def load_files_from_dir(
threshold_booru = custom_threshold_booru if use_custom_threshold_booru else shared.opts.interrogate_deepbooru_score_threshold
threshold_waifu = custom_threshold_waifu if use_custom_threshold_waifu else shared.opts.interrogate_deepbooru_score_threshold
dataset_tag_editor.load_dataset(dir, recursive, load_caption_from_filename, interrogate_method, use_booru, use_clip, use_waifu, threshold_booru, threshold_waifu)
dataset_tag_editor.load_dataset(dir, recursive, load_caption_from_filename, interrogate_method, use_booru, use_blip, use_git, use_waifu, threshold_booru, threshold_waifu)
img_paths = dataset_tag_editor.get_filtered_imgpaths(filters=[])
img_indices = dataset_tag_editor.get_filtered_imgindices(filters=[])
path_filter = filters.PathFilter()
@ -394,9 +399,14 @@ def change_selected_image_caption(tags_text: str, idx: int):
return update_filter_and_gallery()
def interrogate_selected_image_clip():
def interrogate_selected_image_blip():
global gallery_selected_image_path
return dte.interrogate_image_clip(gallery_selected_image_path)
return dte.interrogate_image_blip(gallery_selected_image_path)
def interrogate_selected_image_git():
global gallery_selected_image_path
return dte.interrogate_image_git(gallery_selected_image_path)
def interrogate_selected_image_booru(use_threshold: bool, threshold: float):
@ -566,7 +576,8 @@ def on_ui_tabs():
with gr.Column():
rb_use_interrogator = gr.Radio(choices=['No', 'If Empty', 'Overwrite', 'Prepend', 'Append'], value=cfg_general.use_interrogator, label='Use Interrogator Caption')
with gr.Row():
cb_use_clip_to_prefill = gr.Checkbox(value=cfg_general.use_clip_to_prefill, label='Use BLIP')
cb_use_blip_to_prefill = gr.Checkbox(value=cfg_general.use_blip_to_prefill, label='Use BLIP')
cb_use_git_to_prefill = gr.Checkbox(value=cfg_general.use_git_to_prefill, label='Use GIT', visible=False)
cb_use_booru_to_prefill = gr.Checkbox(value=cfg_general.use_booru_to_prefill, label='Use DeepDanbooru')
cb_use_waifu_to_prefill = gr.Checkbox(value=cfg_general.use_waifu_to_prefill, label='Use WDv1.4 Tagger')
with gr.Accordion(label='Interrogator Settings', open=False):
@ -672,7 +683,8 @@ def on_ui_tabs():
with gr.Tab(label='Interrogate Selected Image'):
with gr.Row():
btn_interrogate_clip = gr.Button(value='Interrogate with BLIP')
btn_interrogate_blip = gr.Button(value='Interrogate with BLIP')
btn_interrogate_git = gr.Button(value='Interrogate with GIT Large', visible=False)
btn_interrogate_booru = gr.Button(value='Interrogate with DeepDanbooru')
btn_interrogate_waifu = gr.Button(value='Interrogate with WDv1.4 tagger')
tb_interrogate_selected_image = gr.Textbox(label='Interrogate Result', interactive=True, lines=6)
@ -706,13 +718,14 @@ def on_ui_tabs():
#----------------------------------------------------------------
# General
configurable_components = \
[cb_backup, tb_img_directory, cb_load_recursive, cb_load_caption_from_filename, rb_use_interrogator, cb_use_clip_to_prefill, cb_use_booru_to_prefill, cb_use_waifu_to_prefill, cb_use_custom_threshold_booru, sl_custom_threshold_booru, cb_use_custom_threshold_waifu, sl_custom_threshold_waifu] +\
[tag_filter_ui.rb_sort_by, tag_filter_ui.rb_sort_order, tag_filter_ui.rb_logic, tag_filter_ui_neg.rb_sort_by, tag_filter_ui_neg.rb_sort_order, tag_filter_ui_neg.rb_logic] +\
[cb_show_only_tags_selected, cb_prepend_tags, cb_use_regex, rb_sr_replace_target, tag_select_ui_remove.rb_sort_by, tag_select_ui_remove.rb_sort_order] +\
[cb_copy_caption_automatically, cb_ask_save_when_caption_changed] +\
[rb_move_or_delete_target_data, cbg_move_or_delete_target_file, tb_move_or_delete_destination_dir]
components_general = [cb_backup, tb_img_directory, cb_load_recursive, cb_load_caption_from_filename, rb_use_interrogator, cb_use_blip_to_prefill, cb_use_git_to_prefill, cb_use_booru_to_prefill, cb_use_waifu_to_prefill, cb_use_custom_threshold_booru, sl_custom_threshold_booru, cb_use_custom_threshold_waifu, sl_custom_threshold_waifu]
components_filter = [tag_filter_ui.rb_sort_by, tag_filter_ui.rb_sort_order, tag_filter_ui.rb_logic, tag_filter_ui_neg.rb_sort_by, tag_filter_ui_neg.rb_sort_order, tag_filter_ui_neg.rb_logic]
components_batch_edit = [cb_show_only_tags_selected, cb_prepend_tags, cb_use_regex, rb_sr_replace_target, tag_select_ui_remove.rb_sort_by, tag_select_ui_remove.rb_sort_order]
components_edit_selected = [cb_copy_caption_automatically, cb_ask_save_when_caption_changed]
components_move_delete = [rb_move_or_delete_target_data, cbg_move_or_delete_target_file, tb_move_or_delete_destination_dir]
configurable_components = components_general + components_filter + components_batch_edit + components_edit_selected + components_move_delete
def reload_config_file():
config.load()
p, n = read_filter_config()
@ -724,11 +737,16 @@ def on_ui_tabs():
)
def save_settings_callback(*a):
write_general_config(*a[:12])
write_filter_config(*a[12:18])
write_batch_edit_config(*a[18:24])
write_edit_selected_config(*a[24:26])
write_move_delete_config(*a[26:])
p = 0
def inc(v):
nonlocal p
p += v
return p
write_general_config(*a[p:inc(len(components_general))])
write_filter_config(*a[p:inc(len(components_filter))])
write_batch_edit_config(*a[p:inc(len(components_batch_edit))])
write_edit_selected_config(*a[p:inc(components_edit_selected)])
write_move_delete_config(*a[p:])
config.save()
btn_save_setting_as_default.click(
@ -792,7 +810,7 @@ def on_ui_tabs():
btn_load_datasets.click(
fn=load_files_from_dir,
inputs=[tb_img_directory, cb_load_recursive, cb_load_caption_from_filename, rb_use_interrogator, cb_use_clip_to_prefill, cb_use_booru_to_prefill, cb_use_waifu_to_prefill, cb_use_custom_threshold_booru, sl_custom_threshold_booru, cb_use_custom_threshold_waifu, sl_custom_threshold_waifu],
inputs=[tb_img_directory, cb_load_recursive, cb_load_caption_from_filename, rb_use_interrogator, cb_use_blip_to_prefill, cb_use_git_to_prefill, cb_use_booru_to_prefill, cb_use_waifu_to_prefill, cb_use_custom_threshold_booru, sl_custom_threshold_booru, cb_use_custom_threshold_waifu, sl_custom_threshold_waifu],
outputs=
[gl_dataset_images, gl_filter_images, txt_gallery, txt_selection] +
[cbg_hidden_dataset_filter, nb_hidden_dataset_filter_apply] +
@ -970,8 +988,13 @@ def on_ui_tabs():
outputs=[tb_edit_caption_selected_image]
)
btn_interrogate_clip.click(
fn=interrogate_selected_image_clip,
btn_interrogate_blip.click(
fn=interrogate_selected_image_blip,
outputs=[tb_interrogate_selected_image]
)
btn_interrogate_git.click(
fn=interrogate_selected_image_git,
outputs=[tb_interrogate_selected_image]
)