From 93380435c0fb145a0e250a8ea40981f8a4d39f65 Mon Sep 17 00:00:00 2001
From: d8ahazard <d8ahazard@gmail.com>
Date: Wed, 7 Dec 2022 15:57:45 -0600
Subject: [PATCH] Updates, Improvements

Update yolov5 model version.
Select largest target if multiple targets found when smart cropping.
Pad smart cropped images before cropping.
Add pad only mode.
Set our input dimensions to one square value.
Don't use hashes for new filenames, it makes diffing batches impossible.
---
 clipcrop.py     | 14 ++++++++---
 scripts/main.py |  6 ++---
 smartprocess.py | 66 ++++++++++++++++++++++---------------------------
 3 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/clipcrop.py b/clipcrop.py
index 4a1a3e5..68ab49e 100644
--- a/clipcrop.py
+++ b/clipcrop.py
@@ -42,8 +42,8 @@ def find_position(parent: Image, child: Image):
 class CropClip:
     def __init__(self):
         # Model
-        model_name = 'yolov5m6.pt'
-        model_url = 'https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5m6.pt'
+        model_name = 'yolov5m6v7.pt'
+        model_url = 'https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m6.pt'
         model_dir = os.path.join(modules.paths.models_path, "yolo")
         model_path = modelloader.load_models(model_dir, model_url, None, '.pt', model_name)
         self.model = torch.hub.load('ultralytics/yolov5', 'custom', model_path[0])
@@ -102,8 +102,14 @@ class CropClip:
         # Retrieve the description vector and the photo vectors
         similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
         similarity = similarity[0]
-        scores, imgs = similarity_top(similarity, N=1)
-        out = imgs[0]
+        scores, imgs = similarity_top(similarity, N=3)
+        max_area = 0
+        for img in imgs:
+            img_area = img.width * img.height
+            if img_area > max_area:
+                max_area = img_area
+                out = img
+
         res = cv2.matchTemplate(numpy.array(image), numpy.array(out), cv2.TM_SQDIFF)
         min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
         # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
diff --git a/scripts/main.py b/scripts/main.py
index 19c4a43..ceea026 100644
--- a/scripts/main.py
+++ b/scripts/main.py
@@ -16,8 +16,7 @@ def on_ui_tabs():
                     sp_dst = gr.Textbox(label='Destination directory')
 
                 with gr.Tab("Cropping"):
-                    sp_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Width", value=512)
-                    sp_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Height", value=512)
+                    sp_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Size", value=512)
                     sp_pad = gr.Checkbox(label="Pad Images")
                     sp_crop = gr.Checkbox(label='Crop Images')
                     sp_flip = gr.Checkbox(label='Create flipped copies')
@@ -78,8 +77,7 @@ def on_ui_tabs():
                 sp_dst,
                 sp_pad,
                 sp_crop,
-                sp_width,
-                sp_height,
+                sp_size,
                 sp_caption_append_file,
                 sp_caption_save_txt,
                 sp_txt_action,
diff --git a/smartprocess.py b/smartprocess.py
index 1d7066e..4ffedb5 100644
--- a/smartprocess.py
+++ b/smartprocess.py
@@ -41,7 +41,6 @@ def preprocess(src,
                pad,
                crop,
                width,
-               height,
                append_filename,
                save_txt,
                pretxt_action,
@@ -77,7 +76,6 @@ def preprocess(src,
                 pad,
                 crop,
                 width,
-                height,
                 append_filename,
                 save_txt,
                 pretxt_action,
@@ -113,7 +111,6 @@ def prework(src,
             pad_image,
             crop_image,
             width,
-            height,
             append_filename,
             save_txt,
             pretxt_action,
@@ -137,7 +134,7 @@ def prework(src,
     except:
         pass
     width = width
-    height = height
+    height = width
     src = os.path.abspath(src)
     dst = os.path.abspath(dst)
 
@@ -155,31 +152,6 @@ def prework(src,
     shared.state.textinfo = "Preprocessing..."
     shared.state.job_count = len(files)
 
-    def pad_image(pil_img: Image, dest_width, dest_height):
-        src_width, src_height = pil_img.size
-        pad_width = dest_width
-        pad_height = dest_height
-        # If everything is square, just resize
-        if src_width == src_height and dest_width == dest_height:
-            pil_img.resize((dest_width, dest_height), resample=PIL.Image.LANCZOS)
-        else:
-            # If image is wider than tall
-            if src_width > src_height:
-                # And destination is square
-                if dest_width == dest_height:
-                    pad_height = dest_width * src_height / src_width
-
-        if src_width == src_height:
-            return pil_img
-        elif src_width > src_height:
-            result = Image.new(pil_img.mode, (src_width, src_width))
-            result.paste(pil_img, (0, (src_width - src_height) // 2))
-            return result
-        else:
-            result = Image.new(pil_img.mode, (src_height, src_height))
-            result.paste(pil_img, ((src_height - src_width) // 2, 0))
-            return result
-
     def build_caption(image):
         existing_caption = None
         if not append_filename:
@@ -231,14 +203,12 @@ def prework(src,
 
     def save_pic_with_caption(image, img_index, existing_caption):
 
-        if not append_filename:
-            filename_part = filename
-            filename_part = os.path.splitext(filename_part)[0]
-            filename_part = os.path.basename(filename_part)
-        else:
+        if append_filename:
             filename_part = existing_caption
+            basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
+        else:
+            basename = f"{img_index:05}-{subindex[0]}"
 
-        basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
         shared.state.current_image = img
         image.save(os.path.join(dst, f"{basename}.png"))
 
@@ -305,7 +275,7 @@ def prework(src,
             if subject_class is not None and subject_class != "":
                 short_caption = subject_class
 
-            shared.state.textinfo = "Cropping..."
+            shared.state.textinfo = f"Cropping: {short_caption}"
             if img.height > img.width:
                 ratio = (img.width * height) / (img.height * width)
                 inverse_xy = False
@@ -321,6 +291,19 @@ def prework(src,
                         full_caption = build_caption(splitted)
                     save_pic(splitted, index, existing_caption=full_caption)
 
+            src_ratio = img.width / img.height
+            # Pad image before cropping?
+            if src_ratio != 1:
+                if img.width > img.height:
+                    pad_width = img.width
+                    pad_height = img.width
+                else:
+                    pad_width = img.height
+                    pad_height = img.height
+                res = Image.new("RGB", (pad_width, pad_height))
+                res.paste(img, box=(pad_width // 2 - img.width // 2, pad_height // 2 - img.height // 2))
+                img = res
+
             im_data = crop_clip.get_center(img, prompt=short_caption)
             crop_width = im_data[1] - im_data[0]
             center_x = im_data[0] + (crop_width / 2)
@@ -380,7 +363,16 @@ def prework(src,
             shared.state.current_image = img
 
         if pad_image:
-            default_resize = True
+            ratio = width / height
+            src_ratio = img.width / img.height
+
+            src_w = width if ratio < src_ratio else img.width * height // img.height
+            src_h = height if ratio >= src_ratio else img.height * width // img.width
+
+            resized = images.resize_image(0, img, src_w, src_h)
+            res = Image.new("RGB", (width, height))
+            res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
+            img = res
 
         if default_resize:
             img = images.resize_image(1, img, width, height)