diff --git a/clipcrop.py b/clipcrop.py index 4a1a3e5..68ab49e 100644 --- a/clipcrop.py +++ b/clipcrop.py @@ -42,8 +42,8 @@ def find_position(parent: Image, child: Image): class CropClip: def __init__(self): # Model - model_name = 'yolov5m6.pt' - model_url = 'https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5m6.pt' + model_name = 'yolov5m6v7.pt' + model_url = 'https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m6.pt' model_dir = os.path.join(modules.paths.models_path, "yolo") model_path = modelloader.load_models(model_dir, model_url, None, '.pt', model_name) self.model = torch.hub.load('ultralytics/yolov5', 'custom', model_path[0]) @@ -102,8 +102,14 @@ class CropClip: # Retrieve the description vector and the photo vectors similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T similarity = similarity[0] - scores, imgs = similarity_top(similarity, N=1) - out = imgs[0] + scores, imgs = similarity_top(similarity, N=3) + max_area = 0 + for img in imgs: + img_area = img.width * img.height + if img_area > max_area: + max_area = img_area + out = img + res = cv2.matchTemplate(numpy.array(image), numpy.array(out), cv2.TM_SQDIFF) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum diff --git a/scripts/main.py b/scripts/main.py index 19c4a43..ceea026 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -16,8 +16,7 @@ def on_ui_tabs(): sp_dst = gr.Textbox(label='Destination directory') with gr.Tab("Cropping"): - sp_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Width", value=512) - sp_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Height", value=512) + sp_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Size", value=512) sp_pad = gr.Checkbox(label="Pad Images") sp_crop = gr.Checkbox(label='Crop Images') sp_flip = gr.Checkbox(label='Create flipped copies') @@ -78,8 +77,7 @@ def on_ui_tabs(): sp_dst, sp_pad, sp_crop, - sp_width, - sp_height, + sp_size, sp_caption_append_file, sp_caption_save_txt, sp_txt_action, diff --git a/smartprocess.py b/smartprocess.py index 1d7066e..4ffedb5 100644 --- a/smartprocess.py +++ b/smartprocess.py @@ -41,7 +41,6 @@ def preprocess(src, pad, crop, width, - height, append_filename, save_txt, pretxt_action, @@ -77,7 +76,6 @@ def preprocess(src, pad, crop, width, - height, append_filename, save_txt, pretxt_action, @@ -113,7 +111,6 @@ def prework(src, pad_image, crop_image, width, - height, append_filename, save_txt, pretxt_action, @@ -137,7 +134,7 @@ def prework(src, except: pass width = width - height = height + height = width src = os.path.abspath(src) dst = os.path.abspath(dst) @@ -155,31 +152,6 @@ def prework(src, shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) - def pad_image(pil_img: Image, dest_width, dest_height): - src_width, src_height = pil_img.size - pad_width = dest_width - pad_height = dest_height - # If everything is square, just resize - if src_width == src_height and dest_width == dest_height: - pil_img.resize((dest_width, dest_height), resample=PIL.Image.LANCZOS) - else: - # If image is wider than tall - if src_width > src_height: - # And destination is square - if dest_width == dest_height: - pad_height = dest_width * src_height / src_width - - if src_width == src_height: - return pil_img - elif src_width > src_height: - result = Image.new(pil_img.mode, (src_width, src_width)) - result.paste(pil_img, (0, (src_width - src_height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (src_height, src_height)) - result.paste(pil_img, ((src_height - src_width) // 2, 0)) - return result - def build_caption(image): existing_caption = None if not append_filename: @@ -231,14 +203,12 @@ def prework(src, def save_pic_with_caption(image, img_index, existing_caption): - if not append_filename: - filename_part = filename - filename_part = os.path.splitext(filename_part)[0] - filename_part = os.path.basename(filename_part) - else: + if append_filename: filename_part = existing_caption + basename = f"{img_index:05}-{subindex[0]}-{filename_part}" + else: + basename = f"{img_index:05}-{subindex[0]}" - basename = f"{img_index:05}-{subindex[0]}-{filename_part}" shared.state.current_image = img image.save(os.path.join(dst, f"{basename}.png")) @@ -305,7 +275,7 @@ def prework(src, if subject_class is not None and subject_class != "": short_caption = subject_class - shared.state.textinfo = "Cropping..." + shared.state.textinfo = f"Cropping: {short_caption}" if img.height > img.width: ratio = (img.width * height) / (img.height * width) inverse_xy = False @@ -321,6 +291,19 @@ def prework(src, full_caption = build_caption(splitted) save_pic(splitted, index, existing_caption=full_caption) + src_ratio = img.width / img.height + # Pad image before cropping? + if src_ratio != 1: + if img.width > img.height: + pad_width = img.width + pad_height = img.width + else: + pad_width = img.height + pad_height = img.height + res = Image.new("RGB", (pad_width, pad_height)) + res.paste(img, box=(pad_width // 2 - img.width // 2, pad_height // 2 - img.height // 2)) + img = res + im_data = crop_clip.get_center(img, prompt=short_caption) crop_width = im_data[1] - im_data[0] center_x = im_data[0] + (crop_width / 2) @@ -380,7 +363,16 @@ def prework(src, shared.state.current_image = img if pad_image: - default_resize = True + ratio = width / height + src_ratio = img.width / img.height + + src_w = width if ratio < src_ratio else img.width * height // img.height + src_h = height if ratio >= src_ratio else img.height * width // img.width + + resized = images.resize_image(0, img, src_w, src_h) + res = Image.new("RGB", (width, height)) + res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) + img = res if default_resize: img = images.resize_image(1, img, width, height)