diff --git a/README.md b/README.md index a7a9427..e3567a6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,2 @@ # PBRemTools -Precise background remover - -test \ No newline at end of file +Precise background remover tools. diff --git a/scripts/convertor.py b/scripts/convertor.py new file mode 100644 index 0000000..406d1e0 --- /dev/null +++ b/scripts/convertor.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd +from PIL import Image + +def rgb2df(img): + """ + Convert an RGB image to a DataFrame. + + Args: + img (np.ndarray): RGB image. + + Returns: + df (pd.DataFrame): DataFrame containing the image data. + """ + h, w, _ = img.shape + x_l, y_l = np.meshgrid(np.arange(h), np.arange(w), indexing='ij') + r, g, b = img[:,:,0], img[:,:,1], img[:,:,2] + df = pd.DataFrame({ + "x_l": x_l.ravel(), + "y_l": y_l.ravel(), + "r": r.ravel(), + "g": g.ravel(), + "b": b.ravel(), + }) + return df + +def df2rgba(img_df): + """ + Convert a DataFrame to an RGB image. + + Args: + img_df (pd.DataFrame): DataFrame containing image data. + + Returns: + img (np.ndarray): RGB image. + """ + r_img = img_df.pivot_table(index="x_l", columns="y_l",values= "r").reset_index(drop=True).values + g_img = img_df.pivot_table(index="x_l", columns="y_l",values= "g").reset_index(drop=True).values + b_img = img_df.pivot_table(index="x_l", columns="y_l",values= "b").reset_index(drop=True).values + a_img = img_df.pivot_table(index="x_l", columns="y_l",values= "a").reset_index(drop=True).values + df_img = np.stack([r_img, g_img, b_img, a_img], 2).astype(np.uint8) + return df_img + +def pil2cv(image): + new_image = np.array(image, dtype=np.uint8) + if new_image.ndim == 2: + pass + elif new_image.shape[2] == 3: + new_image = new_image[:, :, ::-1] + elif new_image.shape[2] == 4: + new_image = new_image[:, :, [2, 1, 0, 3]] + return new_image + +def cv2pil(image): + new_image = image.copy() + if new_image.ndim == 2: + pass + elif new_image.shape[2] == 3: + new_image = new_image[:, :, ::-1] + elif new_image.shape[2] == 4: + new_image = new_image[:, :, [2, 1, 0, 3]] + new_image = Image.fromarray(new_image) + return new_image \ No newline at end of file diff --git a/scripts/launch.py b/scripts/launch.py new file mode 100644 index 0000000..e26a930 --- /dev/null +++ b/scripts/launch.py @@ -0,0 +1,67 @@ +import gradio as gr +import sys +import cv2 + +from td_abg import get_foreground +from convertor import pil2cv + + + + +class webui: + def __init__(self): + self.demo = gr.Blocks() + + def processing(self, input_image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L): + image = pil2cv(input_image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + mask, image = get_foreground(image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L) + return image, mask + + def launch(self, share): + with self.demo: + with gr.Row(): + with gr.Column(): + input_image = gr.Image(type="pil") + with gr.Accordion("tile division ABG", open=True): + with gr.Box(): + td_abg_enabled = gr.Checkbox(label="enabled", show_label=True) + h_split = gr.Slider(1, 2048, value=256, step=4, label="horizontal split num", show_label=True) + v_split = gr.Slider(1, 2048, value=256, step=4, label="vertical split num", show_label=True) + + n_cluster = gr.Slider(1, 1000, value=500, step=10, label="cluster num", show_label=True) + alpha = gr.Slider(1, 255, value=100, step=1, label="alpha threshold", show_label=True) + th_rate = gr.Slider(0, 1, value=0.1, step=0.01, label="mask content ratio", show_label=True) + + with gr.Accordion("cascadePSP", open=True): + with gr.Box(): + cascadePSP_enabled = gr.Checkbox(label="enabled", show_label=True) + fast = gr.Checkbox(label="fast", show_label=True) + psp_L = gr.Slider(1, 2048, value=900, step=1, label="Memory usage", show_label=True) + + submit = gr.Button(value="Submit") + with gr.Row(): + with gr.Column(): + with gr.Tab("output"): + output_img = gr.Image() + with gr.Tab("mask"): + output_mask = gr.Image() + submit.click( + self.processing, + inputs=[input_image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L], + outputs=[output_img, output_mask] + ) + + self.demo.queue() + self.demo.launch(share=share) + + +if __name__ == "__main__": + ui = webui() + if len(sys.argv) > 1: + if sys.argv[1] == "share": + ui.launch(share=True) + else: + ui.launch(share=False) + else: + ui.launch(share=False) diff --git a/scripts/main.py b/scripts/main.py new file mode 100644 index 0000000..db3ae85 --- /dev/null +++ b/scripts/main.py @@ -0,0 +1,85 @@ +import os +import io +import json +import numpy as np +import cv2 + +import gradio as gr + +import modules.scripts as scripts +from modules import script_callbacks + +from td_abg import get_foreground +from convertor import pil2cv + + +""" +body_estimation = None +presets_file = os.path.join(scripts.basedir(), "presets.json") +presets = {} + +try: + with open(presets_file) as file: + presets = json.load(file) +except FileNotFoundError: + pass +""" + +def processing(self, input_image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L): + image = pil2cv(input_image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + mask, image = get_foreground(image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L) + return image, mask + +class Script(scripts.Script): + def __init__(self) -> None: + super().__init__() + + def title(self): + return "PBRemTools" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, is_img2img): + return () + +def on_ui_tabs(): + with gr.Blocks(analytics_enabled=False) as PBRemTools: + with gr.Row(): + with gr.Column(): + input_image = gr.Image(type="pil") + with gr.Accordion("tile division ABG", open=True): + with gr.Box(): + td_abg_enabled = gr.Checkbox(label="enabled", show_label=True) + h_split = gr.Slider(1, 2048, value=256, step=4, label="horizontal split num", show_label=True) + v_split = gr.Slider(1, 2048, value=256, step=4, label="vertical split num", show_label=True) + + n_cluster = gr.Slider(1, 1000, value=500, step=10, label="cluster num", show_label=True) + alpha = gr.Slider(1, 255, value=100, step=1, label="alpha threshold", show_label=True) + th_rate = gr.Slider(0, 1, value=0.1, step=0.01, label="mask content ratio", show_label=True) + + with gr.Accordion("cascadePSP", open=True): + with gr.Box(): + cascadePSP_enabled = gr.Checkbox(label="enabled", show_label=True) + fast = gr.Checkbox(label="fast", show_label=True) + psp_L = gr.Slider(1, 2048, value=900, step=1, label="Memory usage", show_label=True) + + submit = gr.Button(value="Submit") + with gr.Row(): + with gr.Column(): + with gr.Tab("output"): + output_img = gr.Image() + with gr.Tab("mask"): + output_mask = gr.Image() + #dummy_component = gr.Label(visible=False) + #preset = gr.Text(visible=False) + submit.click( + processing, + inputs=[input_image, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L], + outputs=[output_img, output_mask] + ) + + return [(PBRemTools, "PBRemTools", "pbremtools")] + +script_callbacks.on_ui_tabs(on_ui_tabs) \ No newline at end of file diff --git a/scripts/td_abg.py b/scripts/td_abg.py new file mode 100644 index 0000000..7c1eba6 --- /dev/null +++ b/scripts/td_abg.py @@ -0,0 +1,122 @@ +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from sklearn.cluster import KMeans, MiniBatchKMeans + +from convertor import rgb2df, df2rgba, cv2pil + +import gradio as gr +import huggingface_hub +import onnxruntime as rt +import copy +from PIL import Image + +import segmentation_refinement as refine + + +# Declare Execution Providers +providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + +# Download and host the model +model_path = huggingface_hub.hf_hub_download( + "skytnt/anime-seg", "isnetis.onnx") +rmbg_model = rt.InferenceSession(model_path, providers=providers) + +def get_mask(img, s=1024): + img = (img / 255).astype(np.float32) + dim = img.shape[2] + if dim == 4: + img = img[..., :3] + dim = 3 + h, w = h0, w0 = img.shape[:-1] + h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) + ph, pw = s - h, s - w + img_input = np.zeros([s, s, dim], dtype=np.float32) + img_input[ph // 2:ph // 2 + h, pw // + 2:pw // 2 + w] = cv2.resize(img, (w, h)) + img_input = np.transpose(img_input, (2, 0, 1)) + img_input = img_input[np.newaxis, :] + mask = rmbg_model.run(None, {'img': img_input})[0][0] + mask = np.transpose(mask, (1, 2, 0)) + mask = mask[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] + mask = cv2.resize(mask, (w0, h0))[:, :, np.newaxis] + return mask + +def assign_tile(row, tile_width, tile_height): + tile_x = row['x_l'] // tile_width + tile_y = row['y_l'] // tile_height + return f"tile_{tile_y}_{tile_x}" + +def rmbg_fn(img): + mask = get_mask(img) + img = (mask * img + 255 * (1 - mask)).astype(np.uint8) + mask = (mask * 255).astype(np.uint8) + img = np.concatenate([img, mask], axis=2, dtype=np.uint8) + mask = mask.repeat(3, axis=2) + return mask, img + +def refinement(img, mask, fast, psp_L): + mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY) + refiner = refine.Refiner(device='cuda:0') # device can also be 'cpu' + + # Fast - Global step only. + # Smaller L -> Less memory usage; faster in fast mode. + mask = refiner.refine(img, mask, fast=fast, L=psp_L) + + return mask + + +def get_foreground(img, td_abg_enabled, h_split, v_split, n_cluster, alpha, th_rate, cascadePSP_enabled, fast, psp_L): + if td_abg_enabled == True: + mask = get_mask(img) + mask = (mask * 255).astype(np.uint8) + mask = mask.repeat(3, axis=2) + if cascadePSP_enabled == True: + mask = refinement(img, mask, fast, psp_L) + mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB) + df = rgb2df(img) + + image_width = img.shape[1] + image_height = img.shape[0] + + num_horizontal_splits = h_split + num_vertical_splits = v_split + tile_width = image_width // num_horizontal_splits + tile_height = image_height // num_vertical_splits + + df['tile'] = df.apply(assign_tile, args=(tile_width, tile_height), axis=1) + + cls = MiniBatchKMeans(n_clusters=n_cluster, batch_size=100) + cls.fit(df[["r","g","b"]]) + df["label"] = cls.labels_ + + mask_df = rgb2df(mask) + mask_df['bg_label'] = (mask_df['r'] > alpha) & (mask_df['g'] > alpha) & (mask_df['b'] > alpha) + + img_df = df.copy() + img_df["bg_label"] = mask_df["bg_label"] + img_df["label"] = img_df["label"].astype(str) + "-" + img_df["tile"] + bg_rate = img_df.groupby("label").sum()["bg_label"]/img_df.groupby("label").count()["bg_label"] + img_df['bg_cls'] = (img_df['label'].isin(bg_rate[bg_rate > th_rate].index)).astype(int) + img_df.loc[img_df['bg_cls'] == 0, ['a']] = 0 + img_df.loc[img_df['bg_cls'] != 0, ['a']] = 255 + img = df2rgba(img_df) + + if cascadePSP_enabled == True and td_abg_enabled == False: + mask = get_mask(img) + mask = (mask * 255).astype(np.uint8) + refiner = refine.Refiner(device='cuda:0') + mask = refiner.refine(img, mask, fast=fast, L=psp_L) + img = np.dstack((img, mask)) + + if cascadePSP_enabled == False and td_abg_enabled == False: + mask, img = rmbg_fn(img) + + return mask, img + + + + + +