sd_smartprocess/smartprocess.py

import math
import os
import sys

import numpy as np
import tqdm
from PIL import Image, ImageOps

from clipcrop import CropClip
import reallysafe
from modules import shared, images, safe
import modules.gfpgan_model
import modules.codeformer_model
from modules.shared import opts, cmd_opts

if cmd_opts.deepdanbooru:
    import modules.deepbooru as deepbooru


def interrogate_image(image: Image, full=False):
    if not full:
        prev_artists = shared.opts.interrogate_use_builtin_artists
        prev_max = shared.opts.interrogate_clip_max_length
        prev_min = shared.opts.interrogate_clip_min_length
        shared.opts.interrogate_clip_min_length = 10
        shared.opts.interrogate_clip_max_length = 20
        shared.opts.interrogate_use_builtin_artists = False
        caption = shared.interrogator.interrogate(image)
        shared.opts.interrogate_clip_min_length = prev_min
        shared.opts.interrogate_clip_max_length = prev_max
        shared.opts.interrogate_use_builtin_artists = prev_artists
    else:
        caption = shared.interrogator.interrogate(image)

    return caption


def preprocess(src,
               dst,
               crop,
               width,
               height,
               append_filename,
               save_txt,
               pretxt_action,
               flip,
               split,
               caption,
               caption_length,
               caption_deepbooru,
               split_threshold,
               overlap_ratio,
               subject_class,
               subject,
               replace_class,
               restore_faces,
               face_model,
               upscale,
               upscale_ratio,
               scaler
               ):
    try:
        shared.state.textinfo = "Loading models for smart processing..."
        safe.RestrictedUnpickler = reallysafe.RestrictedUnpickler
        if caption:
            shared.interrogator.load()

        if caption_deepbooru:
            deepbooru.model.start()

        prework(src,
                dst,
                crop,
                width,
                height,
                append_filename,
                save_txt,
                pretxt_action,
                flip,
                split,
                caption,
                caption_length,
                caption_deepbooru,
                split_threshold,
                overlap_ratio,
                subject_class,
                subject,
                replace_class,
                restore_faces,
                face_model,
                upscale,
                upscale_ratio,
                scaler)

    finally:

        if caption:
            shared.interrogator.send_blip_to_ram()

        if caption_deepbooru:
            deepbooru.model.stop()

    return "Processing complete.", ""


def prework(src,
            dst,
            crop_image,
            width,
            height,
            append_filename,
            save_txt,
            pretxt_action,
            flip,
            split,
            caption_image,
            caption_length,
            caption_deepbooru,
            split_threshold,
            overlap_ratio,
            subject_class,
            subject,
            replace_class,
            restore_faces,
            face_model,
            upscale,
            upscale_ratio,
            scaler):
    try:
        del sys.modules['models']
    except:
        pass
    width = width
    height = height
    src = os.path.abspath(src)
    dst = os.path.abspath(dst)

    if not crop_image and not caption_image and not restore_faces and not upscale:
        print("Nothing to do.")
        shared.state.textinfo = "Nothing to do!"
        return

    assert src != dst, 'same directory specified as source and destination'

    os.makedirs(dst, exist_ok=True)

    files = os.listdir(src)

    shared.state.textinfo = "Preprocessing..."
    shared.state.job_count = len(files)

    def build_caption(image):
        existing_caption = None
        if not append_filename:
            existing_caption_filename = os.path.splitext(filename)[0] + '.txt'
            if os.path.exists(existing_caption_filename):
                with open(existing_caption_filename, 'r', encoding="utf8") as file:
                    existing_caption = file.read()
        else:
            existing_caption = ''.join(c for c in filename if c.isalpha() or c in [" ", ","])

        caption = ""
        if caption_image:
            caption = interrogate_image(img, True)

        if caption_deepbooru:
            if len(caption) > 0:
                caption += ", "
            caption += deepbooru.model.tag_multi(image)

        if pretxt_action == 'prepend' and existing_caption:
            caption = existing_caption + ' ' + caption
        elif pretxt_action == 'append' and existing_caption:
            caption = caption + ' ' + existing_caption
        elif pretxt_action == 'copy' and existing_caption:
            caption = existing_caption

        caption = caption.strip()
        if replace_class and subject is not None and subject_class is not None:
            # Find and replace "a SUBJECT CLASS" in caption with subject name
            if f"a {subject_class}" in caption:
                caption = caption.replace(f"a {subject_class}", subject)

            if subject_class in caption:
                caption = caption.replace(subject_class, subject)

        if 0 < caption_length < len(caption):
            split_cap = caption.split(" ")
            caption = ""
            cap_test = ""
            split_idx = 0
            while True and split_idx < len(split_cap):
                cap_test += f" {split_cap[split_idx]}"
                if len(cap_test < caption_length):
                    caption = cap_test
                split_idx += 1

        caption = caption.strip()
        return caption

    def save_pic_with_caption(image, img_index, existing_caption):

        if not append_filename:
            filename_part = filename
            filename_part = os.path.splitext(filename_part)[0]
            filename_part = os.path.basename(filename_part)
        else:
            filename_part = existing_caption

        basename = f"{img_index:05}-{subindex[0]}-{filename_part}"
        shared.state.current_image = img
        image.save(os.path.join(dst, f"{basename}.png"))

        if save_txt:
            if len(existing_caption) > 0:
                with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
                    file.write(existing_caption)

        subindex[0] += 1

    def save_pic(image, img_index, existing_caption=None):
        save_pic_with_caption(image, img_index, existing_caption=existing_caption)

        if flip:
            save_pic_with_caption(ImageOps.mirror(image), img_index, existing_caption=existing_caption)

    def split_pic(image, img_inverse_xy):
        if img_inverse_xy:
            from_w, from_h = image.height, image.width
            to_w, to_h = height, width
        else:
            from_w, from_h = image.width, image.height
            to_w, to_h = width, height
        h = from_h * to_w // from_w
        if img_inverse_xy:
            image = image.resize((h, to_w))
        else:
            image = image.resize((to_w, h))

        split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
        y_step = (h - to_h) / (split_count - 1)
        for i in range(split_count):
            y = int(y_step * i)
            if img_inverse_xy:
                split_img = image.crop((y, 0, y + to_h, to_w))
            else:
                split_img = image.crop((0, y, to_w, y + to_h))
            yield split_img

    crop_clip = None

    if crop_image:
        split_threshold = max(0.0, min(1.0, split_threshold))
        overlap_ratio = max(0.0, min(0.9, overlap_ratio))
        crop_clip = CropClip()

    for index, imagefile in enumerate(tqdm.tqdm(files)):

        if shared.state.interrupted:
            break

        subindex = [0]
        filename = os.path.join(src, imagefile)
        try:
            img = Image.open(filename).convert("RGB")
        except Exception:
            continue

        # Interrogate once
        short_caption = interrogate_image(img)

        if subject_class is not None and subject_class != "":
            short_caption = subject_class

        shared.state.current_image = img
        shared.state.textinfo = f"Processing: '({filename})"
        if crop_image:
            shared.state.textinfo = "Cropping..."
            if img.height > img.width:
                ratio = (img.width * height) / (img.height * width)
                inverse_xy = False
            else:
                ratio = (img.height * width) / (img.width * height)
                inverse_xy = True

            if split and ratio < 1.0 and ratio <= split_threshold:
                for splitted in split_pic(img, inverse_xy):
                    # Build our caption
                    full_caption = None
                    if caption_image:
                        full_caption = interrogate_image(splitted, True)
                        full_caption = build_caption(splitted, full_caption)
                    save_pic(splitted, index, existing_caption=full_caption)

            im_data = crop_clip.get_center(img, prompt=short_caption)
            crop_width = im_data[1] - im_data[0]
            center_x = im_data[0] + (crop_width / 2)
            crop_height = im_data[3] - im_data[2]
            center_y = im_data[2] + (crop_height / 2)
            crop_ratio = crop_width / crop_height
            dest_ratio = width / height
            tgt_width = crop_width
            tgt_height = crop_height

            if crop_ratio != dest_ratio:
                if crop_width > crop_height:
                    tgt_height = crop_width / dest_ratio
                    tgt_width = crop_width
                else:
                    tgt_width = crop_height / dest_ratio
                    tgt_height = crop_height

                # Reverse the above if dest is too big
                if tgt_width > img.width or tgt_height > img.height:
                    if tgt_width > img.width:
                        tgt_width = img.width
                        tgt_height = tgt_width / dest_ratio
                    else:
                        tgt_height = img.height
                        tgt_width = tgt_height / dest_ratio

            tgt_height = int(tgt_height)
            tgt_width = int(tgt_width)
            left = max(center_x - (tgt_width / 2), 0)
            right = min(center_x + (tgt_width / 2), img.width)
            top = max(center_y - (tgt_height / 2), 0)
            bottom = min(center_y + (tgt_height / 2), img.height)
            img = img.crop((left, top, right, bottom))
            default_resize = True
            shared.state.current_image = img
        else:
            default_resize = False

        if restore_faces:
            shared.state.textinfo = f"Restoring faces using {face_model}..."
            if face_model == "gfpgan":
                restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(img, dtype=np.uint8))
                img = Image.fromarray(restored_img)
            else:
                restored_img = modules.codeformer_model.codeformer.restore(np.array(img, dtype=np.uint8),
                                                                           w=1.0)
                img = Image.fromarray(restored_img)
            shared.state.current_image = img

        if upscale:
            shared.state.textinfo = "Upscaling..."
            upscaler = shared.sd_upscalers[scaler]
            res = upscaler.scaler.upscale(img, upscale_ratio, upscaler.data_path)
            img = res
            default_resize = True
            shared.state.current_image = img

        if default_resize:
            img = images.resize_image(1, img, width, height)
        shared.state.current_image = img
        full_caption = build_caption(img)
        save_pic(img, index, existing_caption=full_caption)

        shared.state.nextjob()