Updates, Improvements

Update yolov5 model version.
Select largest target if multiple targets found when smart cropping.
Pad smart cropped images before cropping.
Add pad only mode.
Set our input dimensions to one square value.
Don't use hashes for new filenames, it makes diffing batches impossible.
pull/12/head
d8ahazard 2022-12-07 15:57:45 -06:00
parent c840807b84
commit 93380435c0
3 changed files with 41 additions and 45 deletions

View File

@ -42,8 +42,8 @@ def find_position(parent: Image, child: Image):
class CropClip:
def __init__(self):
# Model
model_name = 'yolov5m6.pt'
model_url = 'https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5m6.pt'
model_name = 'yolov5m6v7.pt'
model_url = 'https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m6.pt'
model_dir = os.path.join(modules.paths.models_path, "yolo")
model_path = modelloader.load_models(model_dir, model_url, None, '.pt', model_name)
self.model = torch.hub.load('ultralytics/yolov5', 'custom', model_path[0])
@ -102,8 +102,14 @@ class CropClip:
# Retrieve the description vector and the photo vectors
similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
similarity = similarity[0]
scores, imgs = similarity_top(similarity, N=1)
out = imgs[0]
scores, imgs = similarity_top(similarity, N=3)
max_area = 0
for img in imgs:
img_area = img.width * img.height
if img_area > max_area:
max_area = img_area
out = img
res = cv2.matchTemplate(numpy.array(image), numpy.array(out), cv2.TM_SQDIFF)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum

View File

@ -16,8 +16,7 @@ def on_ui_tabs():
sp_dst = gr.Textbox(label='Destination directory')
with gr.Tab("Cropping"):
sp_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Width", value=512)
sp_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Height", value=512)
sp_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Output Size", value=512)
sp_pad = gr.Checkbox(label="Pad Images")
sp_crop = gr.Checkbox(label='Crop Images')
sp_flip = gr.Checkbox(label='Create flipped copies')
@ -78,8 +77,7 @@ def on_ui_tabs():
sp_dst,
sp_pad,
sp_crop,
sp_width,
sp_height,
sp_size,
sp_caption_append_file,
sp_caption_save_txt,
sp_txt_action,

View File

@ -41,7 +41,6 @@ def preprocess(src,
pad,
crop,
width,
height,
append_filename,
save_txt,
pretxt_action,
@ -77,7 +76,6 @@ def preprocess(src,
pad,
crop,
width,
height,
append_filename,
save_txt,
pretxt_action,
@ -113,7 +111,6 @@ def prework(src,
pad_image,
crop_image,
width,
height,
append_filename,
save_txt,
pretxt_action,
@ -137,7 +134,7 @@ def prework(src,
except:
pass
width = width
height = height
height = width
src = os.path.abspath(src)
dst = os.path.abspath(dst)
@ -155,31 +152,6 @@ def prework(src,
shared.state.textinfo = "Preprocessing..."
shared.state.job_count = len(files)
def pad_image(pil_img: Image, dest_width, dest_height):
src_width, src_height = pil_img.size
pad_width = dest_width
pad_height = dest_height
# If everything is square, just resize
if src_width == src_height and dest_width == dest_height:
pil_img.resize((dest_width, dest_height), resample=PIL.Image.LANCZOS)
else:
# If image is wider than tall
if src_width > src_height:
# And destination is square
if dest_width == dest_height:
pad_height = dest_width * src_height / src_width
if src_width == src_height:
return pil_img
elif src_width > src_height:
result = Image.new(pil_img.mode, (src_width, src_width))
result.paste(pil_img, (0, (src_width - src_height) // 2))
return result
else:
result = Image.new(pil_img.mode, (src_height, src_height))
result.paste(pil_img, ((src_height - src_width) // 2, 0))
return result
def build_caption(image):
existing_caption = None
if not append_filename:
@ -231,14 +203,12 @@ def prework(src,
def save_pic_with_caption(image, img_index, existing_caption):
if not append_filename:
filename_part = filename
filename_part = os.path.splitext(filename_part)[0]
filename_part = os.path.basename(filename_part)
else:
if append_filename:
filename_part = existing_caption
basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
else:
basename = f"{img_index:05}-{subindex[0]}"
basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
shared.state.current_image = img
image.save(os.path.join(dst, f"{basename}.png"))
@ -305,7 +275,7 @@ def prework(src,
if subject_class is not None and subject_class != "":
short_caption = subject_class
shared.state.textinfo = "Cropping..."
shared.state.textinfo = f"Cropping: {short_caption}"
if img.height > img.width:
ratio = (img.width * height) / (img.height * width)
inverse_xy = False
@ -321,6 +291,19 @@ def prework(src,
full_caption = build_caption(splitted)
save_pic(splitted, index, existing_caption=full_caption)
src_ratio = img.width / img.height
# Pad image before cropping?
if src_ratio != 1:
if img.width > img.height:
pad_width = img.width
pad_height = img.width
else:
pad_width = img.height
pad_height = img.height
res = Image.new("RGB", (pad_width, pad_height))
res.paste(img, box=(pad_width // 2 - img.width // 2, pad_height // 2 - img.height // 2))
img = res
im_data = crop_clip.get_center(img, prompt=short_caption)
crop_width = im_data[1] - im_data[0]
center_x = im_data[0] + (crop_width / 2)
@ -380,7 +363,16 @@ def prework(src,
shared.state.current_image = img
if pad_image:
default_resize = True
ratio = width / height
src_ratio = img.width / img.height
src_w = width if ratio < src_ratio else img.width * height // img.height
src_h = height if ratio >= src_ratio else img.height * width // img.width
resized = images.resize_image(0, img, src_w, src_h)
res = Image.new("RGB", (width, height))
res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
img = res
if default_resize:
img = images.resize_image(1, img, width, height)