Initial Commit

pull/1/head
d8ahazard 2022-11-11 14:35:46 -06:00
parent 6f1a6b23de
commit 2b79465d99
8 changed files with 661 additions and 0 deletions

112
clipcrop.py Normal file
View File

@ -0,0 +1,112 @@
# Original project: https://github.com/Vishnunkumar/clipcrop/blob/main/clipcrop/clipcrop.py
import os.path
import sys
import cv2
import numpy
import numpy as np
import torch
from PIL import Image
from clip import clip
from transformers import CLIPProcessor, CLIPModel, pipeline
import modules.paths
from modules import shared, modelloader
from repositories.CodeFormer.facelib.detection.yolov5face.utils.general import xyxy2xywh, xywh2xyxy
def clip_boxes(boxes, shape):
# Clip boxes (xyxy) to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def find_position(parent: Image, child: Image):
w = child.width
h = child.height
res = cv2.matchTemplate(np.array(parent), np.array(child), cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
top_left = max_loc
center_x = top_left[0] + (w / 2)
center_y = top_left[1] + (h / 2)
return center_x, center_y
class CropClip:
def __init__(self):
# Model
model_name = 'yolov5m6.pt'
model_url = 'https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5m6.pt'
model_dir = os.path.join(modules.paths.models_path, "yolo")
model_path = modelloader.load_models(model_dir, model_url, None, '.pt', model_name)
self.model = torch.hub.load('ultralytics/yolov5', 'custom', model_path[0])
# Prevent BLIP crossfire breakage
try:
del sys.modules['models']
except:
pass
def get_center(self, image: Image, prompt: str):
# Load image into YOLO parser
results = self.model(image) # includes NMS
# Crop each image result to an array
cropped = results.crop(False)
l = []
for crop in cropped:
l.append(Image.fromarray(crop["im"]))
if len(l) == 0:
l = [image]
device = shared.device
# Take out cropped YOLO images, and get the features?
model, preprocess = clip.load("ViT-B/32", device=device)
images = torch.stack([preprocess(im) for im in l]).to(device)
with torch.no_grad():
image_features = model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)
image_features.cpu().numpy()
image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()
image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()
images = [preprocess(im) for im in l]
image_input = torch.tensor(np.stack(images)).cuda()
image_input -= image_mean[:, None, None]
image_input /= image_std[:, None, None]
with torch.no_grad():
image_features = model.encode_image(image_input).float()
image_features /= image_features.norm(dim=-1, keepdim=True)
def similarity_top(similarity_list, N):
results = zip(range(len(similarity_list)), similarity_list)
results = sorted(results, key=lambda x: x[1], reverse=True)
top_images = []
scores = []
for index, score in results[:N]:
scores.append(score)
top_images.append(l[index])
return scores, top_images
# @title Crop
with torch.no_grad():
# Encode and normalize the description using CLIP
text_encoded = model.encode_text(clip.tokenize(prompt).to(device))
text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
# Retrieve the description vector and the photo vectors
similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
similarity = similarity[0]
scores, imgs = similarity_top(similarity, N=1)
out = imgs[0]
res = cv2.matchTemplate(numpy.array(image), numpy.array(out), cv2.TM_SQDIFF)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
top_left = min_loc
bottom_right = (top_left[0] + out.width, top_left[1] + out.height)
return [top_left[0], bottom_right[0], top_left[1], bottom_right[1]]

9
install.py Normal file
View File

@ -0,0 +1,9 @@
import os
import sys
from launch import run
name = "Smart Crop"
req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
print(f"loading Dreambooth reqs from {req_file}")
run(f'"{sys.executable}" -m pip install -r "{req_file}"', f"Checking {name} requirements.", f"Couldn't install {name} requirements.")

View File

@ -0,0 +1,9 @@
function start_smart_process(){
requestProgress('sp');
gradioApp().querySelector('#sp_error').innerHTML='';
return args_to_array(arguments);
}
onUiUpdate(function(){
check_progressbar('sp', 'sp_progressbar', 'sp_progress_span', '', 'sp_interrupt', 'sp_preview', 'sp_gallery')
})

60
reallysafe.py Normal file
View File

@ -0,0 +1,60 @@
import _codecs
import collections
import pickle
import numpy
import torch
from modules import safe
from modules.safe import TypedStorage
def encode(*args):
out = _codecs.encode(*args)
return out
class RestrictedUnpickler(pickle.Unpickler):
def persistent_load(self, saved_id):
assert saved_id[0] == 'storage'
return TypedStorage()
def find_class(self, module, name):
if module == 'collections' and name == 'OrderedDict':
return getattr(collections, name)
if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']:
return getattr(torch._utils, name)
if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage',
'ByteStorage']:
return getattr(torch, name)
if module == 'torch.nn.modules.container' and name in ['ParameterDict', 'Sequential']:
return getattr(torch.nn.modules.container, name)
if module == 'numpy.core.multiarray' and name == 'scalar':
return numpy.core.multiarray.scalar
if module == 'numpy' and name == 'dtype':
return numpy.dtype
if module == '_codecs' and name == 'encode':
return encode
if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint':
import pytorch_lightning.callbacks
return pytorch_lightning.callbacks.model_checkpoint
if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint':
import pytorch_lightning.callbacks.model_checkpoint
return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
if "yolo" in module:
return super().find_class(module, name)
if module == "models.common" and name == "Conv":
return super().find_class(module, name)
if 'torch.nn.modules' in module and name in ['Conv', 'Conv2d', 'BatchNorm2d', "SiLU", "MaxPool2d", "Upsample",
"ModuleList"]:
return super().find_class(module, name)
if "models.common" in module and name in ["C3", "Bottleneck", "SPPF", "Concat"]:
return super().find_class(module, name)
if module == "__builtin__" and name == 'set':
return set
# Forbid everything else.
raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden")
safe.RestrictedUnpickler = RestrictedUnpickler

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
ipython==8.6.0
seaborn==0.12.1

109
scripts/main.py Normal file
View File

@ -0,0 +1,109 @@
import gradio as gr
from modules import script_callbacks, shared
from modules.shared import cmd_opts
from modules.ui import setup_progressbar, gr_show
from webui import wrap_gradio_gpu_call
import smartprocess
def on_ui_tabs():
with gr.Blocks() as sp_interface:
with gr.Row(equal_height=True):
with gr.Column(variant="panel"):
with gr.Tab("Directories"):
sp_src = gr.Textbox(label='Source directory')
sp_dst = gr.Textbox(label='Destination directory')
sp_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
sp_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
with gr.Tab("Cropping"):
sp_crop = gr.Checkbox(label='Crop Images')
sp_flip = gr.Checkbox(label='Create flipped copies')
sp_split = gr.Checkbox(label='Split over-sized images')
sp_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0,
maximum=1.0,
step=0.05)
sp_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0,
maximum=0.9, step=0.05)
with gr.Tab("Captions"):
sp_caption = gr.Checkbox(label='Generate Captions')
sp_caption_length = gr.Number(label='Max Caption length (0=unlimited)', value=0, precision=0)
sp_txt_action = gr.Dropdown(label='Existing Caption Action', value="ignore",
choices=["ignore", "copy", "prepend", "append"])
sp_caption_append_file = gr.Checkbox(label="Append Caption to File Name", value=True)
sp_caption_save_txt = gr.Checkbox(label="Save Caption to .txt File", value=False)
sp_caption_deepbooru = gr.Checkbox(label='Append DeepDanbooru to Caption',
visible=True if cmd_opts.deepdanbooru else False)
sp_replace_class = gr.Checkbox(label='Replace Class with Subject in Caption', value=True)
sp_class = gr.Textbox(label='Subject Class', placeholder='Subject class to crop (leave '
'blank to auto-detect)')
sp_subject = gr.Textbox(label='Subject Name', placeholder='Subject Name to replace class '
'with in captions')
with gr.Tab("Post-Processing"):
sp_restore_faces = gr.Checkbox(label='Restore Faces', value=False)
sp_face_model = gr.Dropdown(label="Face Restore Model",choices=["GFPGAN", "Codeformer"], value="GFPGAN")
sp_upscale = gr.Checkbox(label='Upscale and Resize', value=False)
sp_upscale_ratio = gr.Slider(label="Upscale Ratio", value=2, step=1, minimum=2, maximum=4)
sp_scaler = gr.Radio(label='Upscaler', elem_id="sp_scaler",
choices=[x.name for x in shared.sd_upscalers],
value=shared.sd_upscalers[0].name, type="index")
# Preview/progress
with gr.Column(variant="panel"):
sp_progress = gr.HTML(elem_id="sp_progress", value="")
sp_outcome = gr.HTML(elem_id="sp_error", value="")
sp_progressbar = gr.HTML(elem_id="sp_progressbar")
sp_gallery = gr.Gallery(label='Output', show_label=False, elem_id='sp_gallery').style(grid=4)
sp_preview = gr.Image(elem_id='sp_preview', visible=False)
setup_progressbar(sp_progressbar, sp_preview, 'sp', textinfo=sp_progress)
with gr.Row():
sp_cancel = gr.Button(value="Cancel")
sp_run = gr.Button(value="Preprocess", variant='primary')
sp_cancel.click(
fn=lambda: shared.state.interrupt()
)
sp_run.click(
fn=wrap_gradio_gpu_call(smartprocess.preprocess, extra_outputs=[gr.update()]),
_js="start_smart_process",
inputs=[
sp_src,
sp_dst,
sp_crop,
sp_width,
sp_height,
sp_caption_append_file,
sp_caption_save_txt,
sp_txt_action,
sp_flip,
sp_split,
sp_caption,
sp_caption_length,
sp_caption_deepbooru,
sp_split_threshold,
sp_overlap_ratio,
sp_class,
sp_subject,
sp_replace_class,
sp_restore_faces,
sp_face_model,
sp_upscale,
sp_upscale_ratio,
sp_scaler
],
outputs=[
sp_progress,
sp_outcome
],
)
return (sp_interface, "Smart Preprocess", "smartsp_interface"),
script_callbacks.on_ui_tabs(on_ui_tabs)

351
smartprocess.py Normal file
View File

@ -0,0 +1,351 @@
import math
import os
import sys
import numpy as np
import tqdm
from PIL import Image, ImageOps
from clipcrop import CropClip
import reallysafe
from modules import shared, images, safe
import modules.gfpgan_model
import modules.codeformer_model
from modules.shared import opts, cmd_opts
if cmd_opts.deepdanbooru:
import modules.deepbooru as deepbooru
def interrogate_image(image: Image):
prev_artists = shared.opts.interrogate_use_builtin_artists
prev_max = shared.opts.interrogate_clip_max_length
prev_min = shared.opts.interrogate_clip_min_length
shared.opts.interrogate_clip_min_length = 10
shared.opts.interrogate_clip_max_length = 20
shared.opts.interrogate_use_builtin_artists = False
prompt = shared.interrogator.interrogate(image)
shared.opts.interrogate_clip_min_length = prev_min
shared.opts.interrogate_clip_max_length = prev_max
shared.opts.interrogate_use_builtin_artists = prev_artists
full_caption = shared.interrogator.interrogate(image)
return prompt, full_caption
def preprocess(src,
dst,
crop,
width,
height,
append_filename,
save_txt,
pretxt_action,
flip,
split,
caption,
caption_length,
caption_deepbooru,
split_threshold,
overlap_ratio,
subject_class,
subject,
replace_class,
restore_faces,
face_model,
upscale,
upscale_ratio,
scaler
):
try:
shared.state.textinfo = "Loading models for smart processing..."
safe.RestrictedUnpickler = reallysafe.RestrictedUnpickler
if caption:
shared.interrogator.load()
if caption_deepbooru:
db_opts = deepbooru.create_deepbooru_opts()
db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
prework(src,
dst,
crop,
width,
height,
append_filename,
save_txt,
pretxt_action,
flip,
split,
caption,
caption_length,
caption_deepbooru,
split_threshold,
overlap_ratio,
subject_class,
subject,
replace_class,
restore_faces,
face_model,
upscale,
upscale_ratio,
scaler)
finally:
if caption:
shared.interrogator.send_blip_to_ram()
if caption_deepbooru:
deepbooru.release_process()
return "Processing complete.", ""
def prework(src,
dst,
crop_image,
width,
height,
append_filename,
save_txt,
pretxt_action,
flip,
split,
caption_image,
caption_length,
caption_deepbooru,
split_threshold,
overlap_ratio,
subject_class,
subject,
replace_class,
restore_faces,
face_model,
upscale,
upscale_ratio,
scaler):
try:
del sys.modules['models']
except:
pass
width = width
height = height
src = os.path.abspath(src)
dst = os.path.abspath(dst)
if not crop_image and not caption_image and not restore_faces and not upscale:
print("Nothing to do.")
shared.state.textinfo = "Nothing to do!"
return
assert src != dst, 'same directory specified as source and destination'
os.makedirs(dst, exist_ok=True)
files = os.listdir(src)
shared.state.textinfo = "Preprocessing..."
shared.state.job_count = len(files)
def build_caption(image, caption):
existing_caption = None
if not append_filename:
existing_caption_filename = os.path.splitext(filename)[0] + '.txt'
if os.path.exists(existing_caption_filename):
with open(existing_caption_filename, 'r', encoding="utf8") as file:
existing_caption = file.read()
else:
existing_caption = ''.join(c for c in filename if c.isalpha() or c in [" ", ","])
if caption_deepbooru:
if len(caption) > 0:
caption += ", "
caption += deepbooru.get_tags_from_process(image)
if pretxt_action == 'prepend' and existing_caption:
caption = existing_caption + ' ' + caption
elif pretxt_action == 'append' and existing_caption:
caption = caption + ' ' + existing_caption
elif pretxt_action == 'copy' and existing_caption:
caption = existing_caption
caption = caption.strip()
if replace_class and subject is not None and subject_class is not None:
# Find and replace "a SUBJECT CLASS" in caption with subject name
if f"a {subject_class}" in caption:
caption = caption.replace(f"a {subject_class}", subject)
if subject_class in caption:
caption = caption.replace(subject_class, subject)
if 0 < caption_length < len(caption):
split_cap = caption.split(" ")
caption = ""
cap_test = ""
split_idx = 0
while True and split_idx < len(split_cap):
cap_test += f" {split_cap[split_idx]}"
if len(cap_test < caption_length):
caption = cap_test
split_idx += 1
caption = caption.strip()
return caption
def save_pic_with_caption(image, img_index, existing_caption):
if not append_filename:
filename_part = filename
filename_part = os.path.splitext(filename_part)[0]
filename_part = os.path.basename(filename_part)
else:
filename_part = existing_caption
basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
shared.state.current_image = img
image.save(os.path.join(dst, f"{basename}.png"))
if save_txt:
if len(existing_caption) > 0:
with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
file.write(existing_caption)
subindex[0] += 1
def save_pic(image, img_index, existing_caption=None):
save_pic_with_caption(image, img_index, existing_caption=existing_caption)
if flip:
save_pic_with_caption(ImageOps.mirror(image), img_index, existing_caption=existing_caption)
def split_pic(image, img_inverse_xy):
if img_inverse_xy:
from_w, from_h = image.height, image.width
to_w, to_h = height, width
else:
from_w, from_h = image.width, image.height
to_w, to_h = width, height
h = from_h * to_w // from_w
if img_inverse_xy:
image = image.resize((h, to_w))
else:
image = image.resize((to_w, h))
split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
y_step = (h - to_h) / (split_count - 1)
for i in range(split_count):
y = int(y_step * i)
if img_inverse_xy:
split_img = image.crop((y, 0, y + to_h, to_w))
else:
split_img = image.crop((0, y, to_w, y + to_h))
yield split_img
crop_clip = None
if crop_image:
split_threshold = max(0.0, min(1.0, split_threshold))
overlap_ratio = max(0.0, min(0.9, overlap_ratio))
crop_clip = CropClip()
for index, imagefile in enumerate(tqdm.tqdm(files)):
if shared.state.interrupted:
break
subindex = [0]
filename = os.path.join(src, imagefile)
try:
img = Image.open(filename).convert("RGB")
except Exception:
continue
# Interrogate once
short_caption, full_caption = interrogate_image(img)
if subject_class is not None and subject_class != "":
short_caption = subject_class
# Build our caption
if caption_image:
full_caption = build_caption(img, full_caption)
shared.state.current_image = img
shared.state.textinfo = f"Processing: '{full_caption}' ({filename})"
if crop_image:
shared.state.textinfo = "Cropping..."
if img.height > img.width:
ratio = (img.width * height) / (img.height * width)
inverse_xy = False
else:
ratio = (img.height * width) / (img.width * height)
inverse_xy = True
if split and ratio < 1.0 and ratio <= split_threshold:
for splitted in split_pic(img, inverse_xy):
save_pic(splitted, index, existing_caption=full_caption)
im_data = crop_clip.get_center(img, prompt=short_caption)
crop_width = im_data[1] - im_data[0]
center_x = im_data[0] + (crop_width / 2)
crop_height = im_data[3] - im_data[2]
center_y = im_data[2] + (crop_height / 2)
crop_ratio = crop_width / crop_height
dest_ratio = width / height
tgt_width = crop_width
tgt_height = crop_height
if crop_ratio != dest_ratio:
if crop_width > crop_height:
tgt_height = crop_width / dest_ratio
tgt_width = crop_width
else:
tgt_width = crop_height / dest_ratio
tgt_height = crop_height
# Reverse the above if dest is too big
if tgt_width > img.width or tgt_height > img.height:
if tgt_width > img.width:
tgt_width = img.width
tgt_height = tgt_width / dest_ratio
else:
tgt_height = img.height
tgt_width = tgt_height / dest_ratio
tgt_height = int(tgt_height)
tgt_width = int(tgt_width)
left = max(center_x - (tgt_width / 2), 0)
right = min(center_x + (tgt_width / 2), img.width)
top = max(center_y - (tgt_height / 2), 0)
bottom = min(center_y + (tgt_height / 2), img.height)
img = img.crop((left, top, right, bottom))
default_resize = True
shared.state.current_image = img
else:
default_resize = False
if restore_faces:
shared.state.textinfo = f"Restoring faces using {face_model}..."
if face_model == "gfpgan":
restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(img, dtype=np.uint8))
img = Image.fromarray(restored_img)
else:
restored_img = modules.codeformer_model.codeformer.restore(np.array(img, dtype=np.uint8),
w=1.0)
img = Image.fromarray(restored_img)
shared.state.current_image = img
if upscale:
shared.state.textinfo = "Upscaling..."
upscaler = shared.sd_upscalers[scaler]
res = upscaler.scaler.upscale(img, upscale_ratio, upscaler.data_path)
img = res
default_resize = True
shared.state.current_image = img
if default_resize:
img = images.resize_image(1, img, width, height)
shared.state.current_image = img
save_pic(img, index, existing_caption=full_caption)
shared.state.nextjob()

9
style.css Normal file
View File

@ -0,0 +1,9 @@
#sp_gallery {
display: none !important;
}
#sp_preview {
width: 100% !important;
height: 100% !important;
display: block !important;
}