360 lines
12 KiB
Python
360 lines
12 KiB
Python
import math
|
|
import os
|
|
import sys
|
|
|
|
import numpy as np
|
|
import tqdm
|
|
from PIL import Image, ImageOps
|
|
|
|
from clipcrop import CropClip
|
|
import reallysafe
|
|
from modules import shared, images, safe
|
|
import modules.gfpgan_model
|
|
import modules.codeformer_model
|
|
from modules.shared import opts, cmd_opts
|
|
|
|
if cmd_opts.deepdanbooru:
|
|
import modules.deepbooru as deepbooru
|
|
|
|
|
|
def interrogate_image(image: Image, full=False):
|
|
if not full:
|
|
prev_artists = shared.opts.interrogate_use_builtin_artists
|
|
prev_max = shared.opts.interrogate_clip_max_length
|
|
prev_min = shared.opts.interrogate_clip_min_length
|
|
shared.opts.interrogate_clip_min_length = 10
|
|
shared.opts.interrogate_clip_max_length = 20
|
|
shared.opts.interrogate_use_builtin_artists = False
|
|
caption = shared.interrogator.interrogate(image)
|
|
shared.opts.interrogate_clip_min_length = prev_min
|
|
shared.opts.interrogate_clip_max_length = prev_max
|
|
shared.opts.interrogate_use_builtin_artists = prev_artists
|
|
else:
|
|
caption = shared.interrogator.interrogate(image)
|
|
|
|
return caption
|
|
|
|
|
|
def preprocess(src,
|
|
dst,
|
|
crop,
|
|
width,
|
|
height,
|
|
append_filename,
|
|
save_txt,
|
|
pretxt_action,
|
|
flip,
|
|
split,
|
|
caption,
|
|
caption_length,
|
|
caption_deepbooru,
|
|
split_threshold,
|
|
overlap_ratio,
|
|
subject_class,
|
|
subject,
|
|
replace_class,
|
|
restore_faces,
|
|
face_model,
|
|
upscale,
|
|
upscale_ratio,
|
|
scaler
|
|
):
|
|
try:
|
|
shared.state.textinfo = "Loading models for smart processing..."
|
|
safe.RestrictedUnpickler = reallysafe.RestrictedUnpickler
|
|
if caption:
|
|
shared.interrogator.load()
|
|
|
|
if caption_deepbooru:
|
|
deepbooru.model.start()
|
|
|
|
prework(src,
|
|
dst,
|
|
crop,
|
|
width,
|
|
height,
|
|
append_filename,
|
|
save_txt,
|
|
pretxt_action,
|
|
flip,
|
|
split,
|
|
caption,
|
|
caption_length,
|
|
caption_deepbooru,
|
|
split_threshold,
|
|
overlap_ratio,
|
|
subject_class,
|
|
subject,
|
|
replace_class,
|
|
restore_faces,
|
|
face_model,
|
|
upscale,
|
|
upscale_ratio,
|
|
scaler)
|
|
|
|
finally:
|
|
|
|
if caption:
|
|
shared.interrogator.send_blip_to_ram()
|
|
|
|
if caption_deepbooru:
|
|
deepbooru.model.stop()
|
|
|
|
return "Processing complete.", ""
|
|
|
|
|
|
def prework(src,
|
|
dst,
|
|
crop_image,
|
|
width,
|
|
height,
|
|
append_filename,
|
|
save_txt,
|
|
pretxt_action,
|
|
flip,
|
|
split,
|
|
caption_image,
|
|
caption_length,
|
|
caption_deepbooru,
|
|
split_threshold,
|
|
overlap_ratio,
|
|
subject_class,
|
|
subject,
|
|
replace_class,
|
|
restore_faces,
|
|
face_model,
|
|
upscale,
|
|
upscale_ratio,
|
|
scaler):
|
|
try:
|
|
del sys.modules['models']
|
|
except:
|
|
pass
|
|
width = width
|
|
height = height
|
|
src = os.path.abspath(src)
|
|
dst = os.path.abspath(dst)
|
|
|
|
if not crop_image and not caption_image and not restore_faces and not upscale:
|
|
print("Nothing to do.")
|
|
shared.state.textinfo = "Nothing to do!"
|
|
return
|
|
|
|
assert src != dst, 'same directory specified as source and destination'
|
|
|
|
os.makedirs(dst, exist_ok=True)
|
|
|
|
files = os.listdir(src)
|
|
|
|
shared.state.textinfo = "Preprocessing..."
|
|
shared.state.job_count = len(files)
|
|
|
|
def build_caption(image):
|
|
existing_caption = None
|
|
if not append_filename:
|
|
existing_caption_filename = os.path.splitext(filename)[0] + '.txt'
|
|
if os.path.exists(existing_caption_filename):
|
|
with open(existing_caption_filename, 'r', encoding="utf8") as file:
|
|
existing_caption = file.read()
|
|
else:
|
|
existing_caption = ''.join(c for c in filename if c.isalpha() or c in [" ", ","])
|
|
|
|
caption = ""
|
|
if caption_image:
|
|
caption = interrogate_image(img, True)
|
|
|
|
if caption_deepbooru:
|
|
if len(caption) > 0:
|
|
caption += ", "
|
|
caption += deepbooru.model.tag_multi(image)
|
|
|
|
if pretxt_action == 'prepend' and existing_caption:
|
|
caption = existing_caption + ' ' + caption
|
|
elif pretxt_action == 'append' and existing_caption:
|
|
caption = caption + ' ' + existing_caption
|
|
elif pretxt_action == 'copy' and existing_caption:
|
|
caption = existing_caption
|
|
|
|
caption = caption.strip()
|
|
if replace_class and subject is not None and subject_class is not None:
|
|
# Find and replace "a SUBJECT CLASS" in caption with subject name
|
|
if f"a {subject_class}" in caption:
|
|
caption = caption.replace(f"a {subject_class}", subject)
|
|
|
|
if subject_class in caption:
|
|
caption = caption.replace(subject_class, subject)
|
|
|
|
if 0 < caption_length < len(caption):
|
|
split_cap = caption.split(" ")
|
|
caption = ""
|
|
cap_test = ""
|
|
split_idx = 0
|
|
while True and split_idx < len(split_cap):
|
|
cap_test += f" {split_cap[split_idx]}"
|
|
if len(cap_test < caption_length):
|
|
caption = cap_test
|
|
split_idx += 1
|
|
|
|
caption = caption.strip()
|
|
return caption
|
|
|
|
def save_pic_with_caption(image, img_index, existing_caption):
|
|
|
|
if not append_filename:
|
|
filename_part = filename
|
|
filename_part = os.path.splitext(filename_part)[0]
|
|
filename_part = os.path.basename(filename_part)
|
|
else:
|
|
filename_part = existing_caption
|
|
|
|
basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
|
|
shared.state.current_image = img
|
|
image.save(os.path.join(dst, f"{basename}.png"))
|
|
|
|
if save_txt:
|
|
if len(existing_caption) > 0:
|
|
with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
|
|
file.write(existing_caption)
|
|
|
|
subindex[0] += 1
|
|
|
|
def save_pic(image, img_index, existing_caption=None):
|
|
save_pic_with_caption(image, img_index, existing_caption=existing_caption)
|
|
|
|
if flip:
|
|
save_pic_with_caption(ImageOps.mirror(image), img_index, existing_caption=existing_caption)
|
|
|
|
def split_pic(image, img_inverse_xy):
|
|
if img_inverse_xy:
|
|
from_w, from_h = image.height, image.width
|
|
to_w, to_h = height, width
|
|
else:
|
|
from_w, from_h = image.width, image.height
|
|
to_w, to_h = width, height
|
|
h = from_h * to_w // from_w
|
|
if img_inverse_xy:
|
|
image = image.resize((h, to_w))
|
|
else:
|
|
image = image.resize((to_w, h))
|
|
|
|
split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
|
|
y_step = (h - to_h) / (split_count - 1)
|
|
for i in range(split_count):
|
|
y = int(y_step * i)
|
|
if img_inverse_xy:
|
|
split_img = image.crop((y, 0, y + to_h, to_w))
|
|
else:
|
|
split_img = image.crop((0, y, to_w, y + to_h))
|
|
yield split_img
|
|
|
|
crop_clip = None
|
|
|
|
if crop_image:
|
|
split_threshold = max(0.0, min(1.0, split_threshold))
|
|
overlap_ratio = max(0.0, min(0.9, overlap_ratio))
|
|
crop_clip = CropClip()
|
|
|
|
for index, imagefile in enumerate(tqdm.tqdm(files)):
|
|
|
|
if shared.state.interrupted:
|
|
break
|
|
|
|
subindex = [0]
|
|
filename = os.path.join(src, imagefile)
|
|
try:
|
|
img = Image.open(filename).convert("RGB")
|
|
except Exception:
|
|
continue
|
|
|
|
# Interrogate once
|
|
short_caption = interrogate_image(img)
|
|
|
|
if subject_class is not None and subject_class != "":
|
|
short_caption = subject_class
|
|
|
|
shared.state.current_image = img
|
|
shared.state.textinfo = f"Processing: '({filename})"
|
|
if crop_image:
|
|
shared.state.textinfo = "Cropping..."
|
|
if img.height > img.width:
|
|
ratio = (img.width * height) / (img.height * width)
|
|
inverse_xy = False
|
|
else:
|
|
ratio = (img.height * width) / (img.width * height)
|
|
inverse_xy = True
|
|
|
|
if split and ratio < 1.0 and ratio <= split_threshold:
|
|
for splitted in split_pic(img, inverse_xy):
|
|
# Build our caption
|
|
full_caption = None
|
|
if caption_image:
|
|
full_caption = interrogate_image(splitted, True)
|
|
full_caption = build_caption(splitted, full_caption)
|
|
save_pic(splitted, index, existing_caption=full_caption)
|
|
|
|
im_data = crop_clip.get_center(img, prompt=short_caption)
|
|
crop_width = im_data[1] - im_data[0]
|
|
center_x = im_data[0] + (crop_width / 2)
|
|
crop_height = im_data[3] - im_data[2]
|
|
center_y = im_data[2] + (crop_height / 2)
|
|
crop_ratio = crop_width / crop_height
|
|
dest_ratio = width / height
|
|
tgt_width = crop_width
|
|
tgt_height = crop_height
|
|
|
|
if crop_ratio != dest_ratio:
|
|
if crop_width > crop_height:
|
|
tgt_height = crop_width / dest_ratio
|
|
tgt_width = crop_width
|
|
else:
|
|
tgt_width = crop_height / dest_ratio
|
|
tgt_height = crop_height
|
|
|
|
# Reverse the above if dest is too big
|
|
if tgt_width > img.width or tgt_height > img.height:
|
|
if tgt_width > img.width:
|
|
tgt_width = img.width
|
|
tgt_height = tgt_width / dest_ratio
|
|
else:
|
|
tgt_height = img.height
|
|
tgt_width = tgt_height / dest_ratio
|
|
|
|
tgt_height = int(tgt_height)
|
|
tgt_width = int(tgt_width)
|
|
left = max(center_x - (tgt_width / 2), 0)
|
|
right = min(center_x + (tgt_width / 2), img.width)
|
|
top = max(center_y - (tgt_height / 2), 0)
|
|
bottom = min(center_y + (tgt_height / 2), img.height)
|
|
img = img.crop((left, top, right, bottom))
|
|
default_resize = True
|
|
shared.state.current_image = img
|
|
else:
|
|
default_resize = False
|
|
|
|
if restore_faces:
|
|
shared.state.textinfo = f"Restoring faces using {face_model}..."
|
|
if face_model == "gfpgan":
|
|
restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(img, dtype=np.uint8))
|
|
img = Image.fromarray(restored_img)
|
|
else:
|
|
restored_img = modules.codeformer_model.codeformer.restore(np.array(img, dtype=np.uint8),
|
|
w=1.0)
|
|
img = Image.fromarray(restored_img)
|
|
shared.state.current_image = img
|
|
|
|
if upscale:
|
|
shared.state.textinfo = "Upscaling..."
|
|
upscaler = shared.sd_upscalers[scaler]
|
|
res = upscaler.scaler.upscale(img, upscale_ratio, upscaler.data_path)
|
|
img = res
|
|
default_resize = True
|
|
shared.state.current_image = img
|
|
|
|
if default_resize:
|
|
img = images.resize_image(1, img, width, height)
|
|
shared.state.current_image = img
|
|
full_caption = build_caption(img)
|
|
save_pic(img, index, existing_caption=full_caption)
|
|
|
|
shared.state.nextjob()
|