From 2b79465d994411f102c403d1572f7989df4e1c51 Mon Sep 17 00:00:00 2001 From: d8ahazard Date: Fri, 11 Nov 2022 14:35:46 -0600 Subject: [PATCH] Initial Commit --- clipcrop.py | 112 ++++++++++++ install.py | 9 + javascript/smart_process.js | 9 + reallysafe.py | 60 ++++++ requirements.txt | 2 + scripts/main.py | 109 +++++++++++ smartprocess.py | 351 ++++++++++++++++++++++++++++++++++++ style.css | 9 + 8 files changed, 661 insertions(+) create mode 100644 clipcrop.py create mode 100644 install.py create mode 100644 javascript/smart_process.js create mode 100644 reallysafe.py create mode 100644 requirements.txt create mode 100644 scripts/main.py create mode 100644 smartprocess.py create mode 100644 style.css diff --git a/clipcrop.py b/clipcrop.py new file mode 100644 index 0000000..4a1a3e5 --- /dev/null +++ b/clipcrop.py @@ -0,0 +1,112 @@ +# Original project: https://github.com/Vishnunkumar/clipcrop/blob/main/clipcrop/clipcrop.py +import os.path +import sys + +import cv2 +import numpy +import numpy as np +import torch +from PIL import Image +from clip import clip +from transformers import CLIPProcessor, CLIPModel, pipeline + +import modules.paths +from modules import shared, modelloader +from repositories.CodeFormer.facelib.detection.yolov5face.utils.general import xyxy2xywh, xywh2xyxy + + +def clip_boxes(boxes, shape): + # Clip boxes (xyxy) to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + +def find_position(parent: Image, child: Image): + w = child.width + h = child.height + res = cv2.matchTemplate(np.array(parent), np.array(child), cv2.TM_CCOEFF_NORMED) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) + # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum + top_left = max_loc + center_x = top_left[0] + (w / 2) + center_y = top_left[1] + (h / 2) + return center_x, center_y + + +class CropClip: + def __init__(self): + # Model + model_name = 'yolov5m6.pt' + model_url = 'https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5m6.pt' + model_dir = os.path.join(modules.paths.models_path, "yolo") + model_path = modelloader.load_models(model_dir, model_url, None, '.pt', model_name) + self.model = torch.hub.load('ultralytics/yolov5', 'custom', model_path[0]) + # Prevent BLIP crossfire breakage + try: + del sys.modules['models'] + except: + pass + + def get_center(self, image: Image, prompt: str): + # Load image into YOLO parser + results = self.model(image) # includes NMS + # Crop each image result to an array + cropped = results.crop(False) + l = [] + for crop in cropped: + l.append(Image.fromarray(crop["im"])) + if len(l) == 0: + l = [image] + device = shared.device + # Take out cropped YOLO images, and get the features? + model, preprocess = clip.load("ViT-B/32", device=device) + images = torch.stack([preprocess(im) for im in l]).to(device) + with torch.no_grad(): + image_features = model.encode_image(images) + image_features /= image_features.norm(dim=-1, keepdim=True) + + image_features.cpu().numpy() + image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda() + image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda() + + images = [preprocess(im) for im in l] + image_input = torch.tensor(np.stack(images)).cuda() + image_input -= image_mean[:, None, None] + image_input /= image_std[:, None, None] + with torch.no_grad(): + image_features = model.encode_image(image_input).float() + image_features /= image_features.norm(dim=-1, keepdim=True) + + def similarity_top(similarity_list, N): + results = zip(range(len(similarity_list)), similarity_list) + results = sorted(results, key=lambda x: x[1], reverse=True) + top_images = [] + scores = [] + for index, score in results[:N]: + scores.append(score) + top_images.append(l[index]) + return scores, top_images + + # @title Crop + with torch.no_grad(): + # Encode and normalize the description using CLIP + text_encoded = model.encode_text(clip.tokenize(prompt).to(device)) + text_encoded /= text_encoded.norm(dim=-1, keepdim=True) + + # Retrieve the description vector and the photo vectors + similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T + similarity = similarity[0] + scores, imgs = similarity_top(similarity, N=1) + out = imgs[0] + res = cv2.matchTemplate(numpy.array(image), numpy.array(out), cv2.TM_SQDIFF) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) + # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum + top_left = min_loc + bottom_right = (top_left[0] + out.width, top_left[1] + out.height) + return [top_left[0], bottom_right[0], top_left[1], bottom_right[1]] \ No newline at end of file diff --git a/install.py b/install.py new file mode 100644 index 0000000..bf8d412 --- /dev/null +++ b/install.py @@ -0,0 +1,9 @@ +import os +import sys + +from launch import run + +name = "Smart Crop" +req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt") +print(f"loading Dreambooth reqs from {req_file}") +run(f'"{sys.executable}" -m pip install -r "{req_file}"', f"Checking {name} requirements.", f"Couldn't install {name} requirements.") \ No newline at end of file diff --git a/javascript/smart_process.js b/javascript/smart_process.js new file mode 100644 index 0000000..eb99ef0 --- /dev/null +++ b/javascript/smart_process.js @@ -0,0 +1,9 @@ +function start_smart_process(){ + requestProgress('sp'); + gradioApp().querySelector('#sp_error').innerHTML=''; + return args_to_array(arguments); +} + +onUiUpdate(function(){ + check_progressbar('sp', 'sp_progressbar', 'sp_progress_span', '', 'sp_interrupt', 'sp_preview', 'sp_gallery') +}) \ No newline at end of file diff --git a/reallysafe.py b/reallysafe.py new file mode 100644 index 0000000..1c3d9a4 --- /dev/null +++ b/reallysafe.py @@ -0,0 +1,60 @@ +import _codecs +import collections +import pickle + +import numpy +import torch + +from modules import safe +from modules.safe import TypedStorage + + +def encode(*args): + out = _codecs.encode(*args) + return out + + +class RestrictedUnpickler(pickle.Unpickler): + def persistent_load(self, saved_id): + assert saved_id[0] == 'storage' + return TypedStorage() + + def find_class(self, module, name): + if module == 'collections' and name == 'OrderedDict': + return getattr(collections, name) + if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: + return getattr(torch._utils, name) + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', + 'ByteStorage']: + return getattr(torch, name) + if module == 'torch.nn.modules.container' and name in ['ParameterDict', 'Sequential']: + return getattr(torch.nn.modules.container, name) + if module == 'numpy.core.multiarray' and name == 'scalar': + return numpy.core.multiarray.scalar + if module == 'numpy' and name == 'dtype': + return numpy.dtype + if module == '_codecs' and name == 'encode': + return encode + if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint': + import pytorch_lightning.callbacks + return pytorch_lightning.callbacks.model_checkpoint + if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint': + import pytorch_lightning.callbacks.model_checkpoint + return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint + if "yolo" in module: + return super().find_class(module, name) + if module == "models.common" and name == "Conv": + return super().find_class(module, name) + if 'torch.nn.modules' in module and name in ['Conv', 'Conv2d', 'BatchNorm2d', "SiLU", "MaxPool2d", "Upsample", + "ModuleList"]: + return super().find_class(module, name) + if "models.common" in module and name in ["C3", "Bottleneck", "SPPF", "Concat"]: + return super().find_class(module, name) + if module == "__builtin__" and name == 'set': + return set + + # Forbid everything else. + raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") + + +safe.RestrictedUnpickler = RestrictedUnpickler diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..15e9c32 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +ipython==8.6.0 +seaborn==0.12.1 \ No newline at end of file diff --git a/scripts/main.py b/scripts/main.py new file mode 100644 index 0000000..a5ab8a2 --- /dev/null +++ b/scripts/main.py @@ -0,0 +1,109 @@ +import gradio as gr + +from modules import script_callbacks, shared +from modules.shared import cmd_opts +from modules.ui import setup_progressbar, gr_show +from webui import wrap_gradio_gpu_call +import smartprocess + + +def on_ui_tabs(): + with gr.Blocks() as sp_interface: + with gr.Row(equal_height=True): + with gr.Column(variant="panel"): + with gr.Tab("Directories"): + sp_src = gr.Textbox(label='Source directory') + sp_dst = gr.Textbox(label='Destination directory') + sp_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + sp_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + + with gr.Tab("Cropping"): + sp_crop = gr.Checkbox(label='Crop Images') + sp_flip = gr.Checkbox(label='Create flipped copies') + sp_split = gr.Checkbox(label='Split over-sized images') + sp_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, + maximum=1.0, + step=0.05) + sp_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, + maximum=0.9, step=0.05) + + with gr.Tab("Captions"): + sp_caption = gr.Checkbox(label='Generate Captions') + sp_caption_length = gr.Number(label='Max Caption length (0=unlimited)', value=0, precision=0) + sp_txt_action = gr.Dropdown(label='Existing Caption Action', value="ignore", + choices=["ignore", "copy", "prepend", "append"]) + sp_caption_append_file = gr.Checkbox(label="Append Caption to File Name", value=True) + sp_caption_save_txt = gr.Checkbox(label="Save Caption to .txt File", value=False) + sp_caption_deepbooru = gr.Checkbox(label='Append DeepDanbooru to Caption', + visible=True if cmd_opts.deepdanbooru else False) + sp_replace_class = gr.Checkbox(label='Replace Class with Subject in Caption', value=True) + sp_class = gr.Textbox(label='Subject Class', placeholder='Subject class to crop (leave ' + 'blank to auto-detect)') + sp_subject = gr.Textbox(label='Subject Name', placeholder='Subject Name to replace class ' + 'with in captions') + + with gr.Tab("Post-Processing"): + sp_restore_faces = gr.Checkbox(label='Restore Faces', value=False) + sp_face_model = gr.Dropdown(label="Face Restore Model",choices=["GFPGAN", "Codeformer"], value="GFPGAN") + sp_upscale = gr.Checkbox(label='Upscale and Resize', value=False) + sp_upscale_ratio = gr.Slider(label="Upscale Ratio", value=2, step=1, minimum=2, maximum=4) + sp_scaler = gr.Radio(label='Upscaler', elem_id="sp_scaler", + choices=[x.name for x in shared.sd_upscalers], + value=shared.sd_upscalers[0].name, type="index") + + + # Preview/progress + with gr.Column(variant="panel"): + sp_progress = gr.HTML(elem_id="sp_progress", value="") + sp_outcome = gr.HTML(elem_id="sp_error", value="") + sp_progressbar = gr.HTML(elem_id="sp_progressbar") + sp_gallery = gr.Gallery(label='Output', show_label=False, elem_id='sp_gallery').style(grid=4) + sp_preview = gr.Image(elem_id='sp_preview', visible=False) + setup_progressbar(sp_progressbar, sp_preview, 'sp', textinfo=sp_progress) + + with gr.Row(): + sp_cancel = gr.Button(value="Cancel") + sp_run = gr.Button(value="Preprocess", variant='primary') + + sp_cancel.click( + fn=lambda: shared.state.interrupt() + ) + + sp_run.click( + fn=wrap_gradio_gpu_call(smartprocess.preprocess, extra_outputs=[gr.update()]), + _js="start_smart_process", + inputs=[ + sp_src, + sp_dst, + sp_crop, + sp_width, + sp_height, + sp_caption_append_file, + sp_caption_save_txt, + sp_txt_action, + sp_flip, + sp_split, + sp_caption, + sp_caption_length, + sp_caption_deepbooru, + sp_split_threshold, + sp_overlap_ratio, + sp_class, + sp_subject, + sp_replace_class, + sp_restore_faces, + sp_face_model, + sp_upscale, + sp_upscale_ratio, + sp_scaler + ], + outputs=[ + sp_progress, + sp_outcome + ], + ) + + return (sp_interface, "Smart Preprocess", "smartsp_interface"), + + +script_callbacks.on_ui_tabs(on_ui_tabs) diff --git a/smartprocess.py b/smartprocess.py new file mode 100644 index 0000000..06097ad --- /dev/null +++ b/smartprocess.py @@ -0,0 +1,351 @@ +import math +import os +import sys + +import numpy as np +import tqdm +from PIL import Image, ImageOps + +from clipcrop import CropClip +import reallysafe +from modules import shared, images, safe +import modules.gfpgan_model +import modules.codeformer_model +from modules.shared import opts, cmd_opts + +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru + + +def interrogate_image(image: Image): + prev_artists = shared.opts.interrogate_use_builtin_artists + prev_max = shared.opts.interrogate_clip_max_length + prev_min = shared.opts.interrogate_clip_min_length + shared.opts.interrogate_clip_min_length = 10 + shared.opts.interrogate_clip_max_length = 20 + shared.opts.interrogate_use_builtin_artists = False + prompt = shared.interrogator.interrogate(image) + shared.opts.interrogate_clip_min_length = prev_min + shared.opts.interrogate_clip_max_length = prev_max + shared.opts.interrogate_use_builtin_artists = prev_artists + full_caption = shared.interrogator.interrogate(image) + return prompt, full_caption + + +def preprocess(src, + dst, + crop, + width, + height, + append_filename, + save_txt, + pretxt_action, + flip, + split, + caption, + caption_length, + caption_deepbooru, + split_threshold, + overlap_ratio, + subject_class, + subject, + replace_class, + restore_faces, + face_model, + upscale, + upscale_ratio, + scaler + ): + try: + shared.state.textinfo = "Loading models for smart processing..." + safe.RestrictedUnpickler = reallysafe.RestrictedUnpickler + if caption: + shared.interrogator.load() + + if caption_deepbooru: + db_opts = deepbooru.create_deepbooru_opts() + db_opts[deepbooru.OPT_INCLUDE_RANKS] = False + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) + + prework(src, + dst, + crop, + width, + height, + append_filename, + save_txt, + pretxt_action, + flip, + split, + caption, + caption_length, + caption_deepbooru, + split_threshold, + overlap_ratio, + subject_class, + subject, + replace_class, + restore_faces, + face_model, + upscale, + upscale_ratio, + scaler) + + finally: + + if caption: + shared.interrogator.send_blip_to_ram() + + if caption_deepbooru: + deepbooru.release_process() + + return "Processing complete.", "" + + +def prework(src, + dst, + crop_image, + width, + height, + append_filename, + save_txt, + pretxt_action, + flip, + split, + caption_image, + caption_length, + caption_deepbooru, + split_threshold, + overlap_ratio, + subject_class, + subject, + replace_class, + restore_faces, + face_model, + upscale, + upscale_ratio, + scaler): + try: + del sys.modules['models'] + except: + pass + width = width + height = height + src = os.path.abspath(src) + dst = os.path.abspath(dst) + + if not crop_image and not caption_image and not restore_faces and not upscale: + print("Nothing to do.") + shared.state.textinfo = "Nothing to do!" + return + + assert src != dst, 'same directory specified as source and destination' + + os.makedirs(dst, exist_ok=True) + + files = os.listdir(src) + + shared.state.textinfo = "Preprocessing..." + shared.state.job_count = len(files) + + def build_caption(image, caption): + existing_caption = None + if not append_filename: + existing_caption_filename = os.path.splitext(filename)[0] + '.txt' + if os.path.exists(existing_caption_filename): + with open(existing_caption_filename, 'r', encoding="utf8") as file: + existing_caption = file.read() + else: + existing_caption = ''.join(c for c in filename if c.isalpha() or c in [" ", ","]) + + if caption_deepbooru: + if len(caption) > 0: + caption += ", " + caption += deepbooru.get_tags_from_process(image) + + if pretxt_action == 'prepend' and existing_caption: + caption = existing_caption + ' ' + caption + elif pretxt_action == 'append' and existing_caption: + caption = caption + ' ' + existing_caption + elif pretxt_action == 'copy' and existing_caption: + caption = existing_caption + + caption = caption.strip() + if replace_class and subject is not None and subject_class is not None: + # Find and replace "a SUBJECT CLASS" in caption with subject name + if f"a {subject_class}" in caption: + caption = caption.replace(f"a {subject_class}", subject) + + if subject_class in caption: + caption = caption.replace(subject_class, subject) + + if 0 < caption_length < len(caption): + split_cap = caption.split(" ") + caption = "" + cap_test = "" + split_idx = 0 + while True and split_idx < len(split_cap): + cap_test += f" {split_cap[split_idx]}" + if len(cap_test < caption_length): + caption = cap_test + split_idx += 1 + + caption = caption.strip() + return caption + + def save_pic_with_caption(image, img_index, existing_caption): + + if not append_filename: + filename_part = filename + filename_part = os.path.splitext(filename_part)[0] + filename_part = os.path.basename(filename_part) + else: + filename_part = existing_caption + + basename = f"{img_index:05}-{subindex[0]}-{filename_part}" + shared.state.current_image = img + image.save(os.path.join(dst, f"{basename}.png")) + + if save_txt: + if len(existing_caption) > 0: + with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file: + file.write(existing_caption) + + subindex[0] += 1 + + def save_pic(image, img_index, existing_caption=None): + save_pic_with_caption(image, img_index, existing_caption=existing_caption) + + if flip: + save_pic_with_caption(ImageOps.mirror(image), img_index, existing_caption=existing_caption) + + def split_pic(image, img_inverse_xy): + if img_inverse_xy: + from_w, from_h = image.height, image.width + to_w, to_h = height, width + else: + from_w, from_h = image.width, image.height + to_w, to_h = width, height + h = from_h * to_w // from_w + if img_inverse_xy: + image = image.resize((h, to_w)) + else: + image = image.resize((to_w, h)) + + split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio))) + y_step = (h - to_h) / (split_count - 1) + for i in range(split_count): + y = int(y_step * i) + if img_inverse_xy: + split_img = image.crop((y, 0, y + to_h, to_w)) + else: + split_img = image.crop((0, y, to_w, y + to_h)) + yield split_img + + crop_clip = None + + if crop_image: + split_threshold = max(0.0, min(1.0, split_threshold)) + overlap_ratio = max(0.0, min(0.9, overlap_ratio)) + crop_clip = CropClip() + + for index, imagefile in enumerate(tqdm.tqdm(files)): + + if shared.state.interrupted: + break + + subindex = [0] + filename = os.path.join(src, imagefile) + try: + img = Image.open(filename).convert("RGB") + except Exception: + continue + + # Interrogate once + short_caption, full_caption = interrogate_image(img) + + if subject_class is not None and subject_class != "": + short_caption = subject_class + + # Build our caption + if caption_image: + full_caption = build_caption(img, full_caption) + shared.state.current_image = img + shared.state.textinfo = f"Processing: '{full_caption}' ({filename})" + if crop_image: + shared.state.textinfo = "Cropping..." + if img.height > img.width: + ratio = (img.width * height) / (img.height * width) + inverse_xy = False + else: + ratio = (img.height * width) / (img.width * height) + inverse_xy = True + + if split and ratio < 1.0 and ratio <= split_threshold: + for splitted in split_pic(img, inverse_xy): + save_pic(splitted, index, existing_caption=full_caption) + + im_data = crop_clip.get_center(img, prompt=short_caption) + crop_width = im_data[1] - im_data[0] + center_x = im_data[0] + (crop_width / 2) + crop_height = im_data[3] - im_data[2] + center_y = im_data[2] + (crop_height / 2) + crop_ratio = crop_width / crop_height + dest_ratio = width / height + tgt_width = crop_width + tgt_height = crop_height + + if crop_ratio != dest_ratio: + if crop_width > crop_height: + tgt_height = crop_width / dest_ratio + tgt_width = crop_width + else: + tgt_width = crop_height / dest_ratio + tgt_height = crop_height + + # Reverse the above if dest is too big + if tgt_width > img.width or tgt_height > img.height: + if tgt_width > img.width: + tgt_width = img.width + tgt_height = tgt_width / dest_ratio + else: + tgt_height = img.height + tgt_width = tgt_height / dest_ratio + + tgt_height = int(tgt_height) + tgt_width = int(tgt_width) + left = max(center_x - (tgt_width / 2), 0) + right = min(center_x + (tgt_width / 2), img.width) + top = max(center_y - (tgt_height / 2), 0) + bottom = min(center_y + (tgt_height / 2), img.height) + img = img.crop((left, top, right, bottom)) + default_resize = True + shared.state.current_image = img + else: + default_resize = False + + if restore_faces: + shared.state.textinfo = f"Restoring faces using {face_model}..." + if face_model == "gfpgan": + restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(img, dtype=np.uint8)) + img = Image.fromarray(restored_img) + else: + restored_img = modules.codeformer_model.codeformer.restore(np.array(img, dtype=np.uint8), + w=1.0) + img = Image.fromarray(restored_img) + shared.state.current_image = img + + if upscale: + shared.state.textinfo = "Upscaling..." + upscaler = shared.sd_upscalers[scaler] + res = upscaler.scaler.upscale(img, upscale_ratio, upscaler.data_path) + img = res + default_resize = True + shared.state.current_image = img + + if default_resize: + img = images.resize_image(1, img, width, height) + shared.state.current_image = img + save_pic(img, index, existing_caption=full_caption) + + shared.state.nextjob() diff --git a/style.css b/style.css new file mode 100644 index 0000000..65e7056 --- /dev/null +++ b/style.css @@ -0,0 +1,9 @@ +#sp_gallery { + display: none !important; +} + +#sp_preview { + width: 100% !important; + height: 100% !important; + display: block !important; +} \ No newline at end of file