unprompted/shortcodes/stable_diffusion/txt2mask.py

207 lines
10 KiB
Python

<<<<<<< Updated upstream
from torchvision.utils import draw_segmentation_masks
from torchvision.transforms.functional import pil_to_tensor, to_pil_image
=======
>>>>>>> Stashed changes
class Shortcode():
def __init__(self,Unprompted):
self.Unprompted = Unprompted
self.image_mask = None
self.show = False
self.description = "Creates an image mask from the content for use with inpainting."
def run_block(self, pargs, kwargs, context, content):
from lib_unprompted.stable_diffusion.clipseg.models.clipseg import CLIPDensePredT
from PIL import ImageChops, Image, ImageOps
import os.path
import torch
from torchvision import transforms
from matplotlib import pyplot as plt
import cv2
import numpy
from modules.images import flatten
from modules.shared import opts
if "init_images" not in self.Unprompted.shortcode_user_vars:
return
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
brush_mask_mode = self.Unprompted.parse_advanced(kwargs["mode"],context) if "mode" in kwargs else "add"
self.show = True if "show" in pargs else False
self.legacy_weights = True if "legacy_weights" in pargs else False
smoothing = int(self.Unprompted.parse_advanced(kwargs["smoothing"],context)) if "smoothing" in kwargs else 20
smoothing_kernel = None
if smoothing > 0:
smoothing_kernel = numpy.ones((smoothing,smoothing),numpy.float32)/(smoothing*smoothing)
neg_smoothing = int(self.Unprompted.parse_advanced(kwargs["neg_smoothing"],context)) if "neg_smoothing" in kwargs else 20
neg_smoothing_kernel = None
if neg_smoothing > 0:
neg_smoothing_kernel = numpy.ones((neg_smoothing,neg_smoothing),numpy.float32)/(neg_smoothing*neg_smoothing)
# Pad the mask by applying a dilation or erosion
mask_padding = int(self.Unprompted.parse_advanced(kwargs["padding"],context) if "padding" in kwargs else 0)
neg_mask_padding = int(self.Unprompted.parse_advanced(kwargs["neg_padding"],context) if "neg_padding" in kwargs else 0)
padding_dilation_kernel = None
if (mask_padding != 0):
padding_dilation_kernel = numpy.ones((abs(mask_padding), abs(mask_padding)), numpy.uint8)
neg_padding_dilation_kernel = None
if (neg_mask_padding != 0):
neg_padding_dilation_kernel = numpy.ones((abs(neg_mask_padding), abs(neg_mask_padding)), numpy.uint8)
prompts = content.split(self.Unprompted.Config.syntax.delimiter)
prompt_parts = len(prompts)
if "negative_mask" in kwargs:
negative_prompts = (self.Unprompted.parse_advanced(kwargs["negative_mask"],context)).split(self.Unprompted.Config.syntax.delimiter)
negative_prompt_parts = len(negative_prompts)
else: negative_prompts = None
mask_precision = min(255,int(self.Unprompted.parse_advanced(kwargs["precision"],context) if "precision" in kwargs else 100))
neg_mask_precision = min(255,int(self.Unprompted.parse_advanced(kwargs["neg_precision"],context) if "neg_precision" in kwargs else 100))
def overlay_mask_part(img_a,img_b,mode):
if (mode == "discard"): img_a = ImageChops.darker(img_a, img_b)
else: img_a = ImageChops.lighter(img_a, img_b)
return(img_a)
def gray_to_pil(img):
return (Image.fromarray(cv2.cvtColor(img,cv2.COLOR_GRAY2RGBA)))
def process_mask_parts(masks, mode, final_img = None, mask_precision=100, mask_padding=0, padding_dilation_kernel=None, smoothing_kernel=None):
for i, mask in enumerate(masks):
filename = f"mask_{mode}_{i}.png"
plt.imsave(filename,torch.sigmoid(mask[0]))
# TODO: Figure out how to convert the plot above to numpy instead of re-loading image
img = cv2.imread(filename)
if padding_dilation_kernel is not None:
if (mask_padding > 0): img = cv2.dilate(img,padding_dilation_kernel,iterations=1)
else: img = cv2.erode(img,padding_dilation_kernel,iterations=1)
if smoothing_kernel is not None: img = cv2.filter2D(img,-1,smoothing_kernel)
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, bw_image) = cv2.threshold(gray_image, mask_precision, 255, cv2.THRESH_BINARY)
if (mode == "discard"): bw_image = numpy.invert(bw_image)
# overlay mask parts
bw_image = gray_to_pil(bw_image)
if (i > 0 or final_img is not None): bw_image = overlay_mask_part(bw_image,final_img,mode)
final_img = bw_image
return(final_img)
def get_mask():
# load model
model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64, complex_trans_conv=not self.legacy_weights)
<<<<<<< Updated upstream
model_dir = f"{self.Unprompted.base_dir}/lib/stable_diffusion/clipseg/weights"
=======
model_dir = f"{self.Unprompted.base_dir}/lib_unprompted/stable_diffusion/clipseg/weights"
>>>>>>> Stashed changes
os.makedirs(model_dir, exist_ok=True)
d64_filename = "rd64-uni.pth" if self.legacy_weights else "rd64-uni-refined.pth"
d64_file = f"{model_dir}/{d64_filename}"
d16_file = f"{model_dir}/rd16-uni.pth"
# Download model weights if we don't have them yet
if not os.path.exists(d64_file):
print("Downloading clipseg model weights...")
self.Unprompted.download_file(d64_file,f"https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download?path=%2F&files={d64_filename}")
self.Unprompted.download_file(d16_file,"https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download?path=%2F&files=rd16-uni.pth")
# non-strict, because we only stored decoder weights (not CLIP weights)
model.load_state_dict(torch.load(d64_file), strict=False);
model = model.eval().to(device=device)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.Resize((512, 512)),
])
flattened_input = flatten(self.Unprompted.shortcode_user_vars["init_images"][0], opts.img2img_background_color)
img = transform(flattened_input).unsqueeze(0)
# predict
with torch.no_grad():
preds = model(img.repeat(prompt_parts,1,1,1).to(device=device), prompts)[0].cpu()
if (negative_prompts): negative_preds = model(img.repeat(negative_prompt_parts,1,1,1).to(device=device), negative_prompts)[0].cpu()
if "image_mask" not in self.Unprompted.shortcode_user_vars: self.Unprompted.shortcode_user_vars["image_mask"] = None
if (brush_mask_mode == "add" and self.Unprompted.shortcode_user_vars["image_mask"] is not None):
final_img = self.Unprompted.shortcode_user_vars["image_mask"].convert("RGBA").resize((512,512))
else: final_img = None
# process masking
final_img = process_mask_parts(preds,"add",final_img, mask_precision, mask_padding, padding_dilation_kernel, smoothing_kernel)
# process negative masking
if (brush_mask_mode == "subtract" and self.Unprompted.shortcode_user_vars["image_mask"] is not None):
self.Unprompted.shortcode_user_vars["image_mask"] = ImageOps.invert(self.Unprompted.shortcode_user_vars["image_mask"])
self.Unprompted.shortcode_user_vars["image_mask"] = self.Unprompted.shortcode_user_vars["image_mask"].convert("RGBA").resize((512,512))
final_img = overlay_mask_part(final_img,self.Unprompted.shortcode_user_vars["image_mask"],"discard")
if (negative_prompts): final_img = process_mask_parts(negative_preds,"discard",final_img, neg_mask_precision,neg_mask_padding, neg_padding_dilation_kernel, neg_smoothing_kernel)
if "size_var" in kwargs:
img_data = final_img.load()
# Count number of transparent pixels
black_pixels = 0
total_pixels = 512 * 512
for y in range(512):
for x in range(512):
pixel_data = img_data[x,y]
if (pixel_data[0] == 0 and pixel_data[1] == 0 and pixel_data[2] == 0): black_pixels += 1
subject_size = 1 - black_pixels / total_pixels
self.Unprompted.shortcode_user_vars[kwargs["size_var"]] = subject_size
return final_img
# Set up processor parameters correctly
self.image_mask = get_mask().resize((self.Unprompted.shortcode_user_vars["init_images"][0].width,self.Unprompted.shortcode_user_vars["init_images"][0].height))
self.Unprompted.shortcode_user_vars["mode"] = 0
self.Unprompted.shortcode_user_vars["mask_mode"] = 1
self.Unprompted.shortcode_user_vars["image_mask"] =self.image_mask
self.Unprompted.shortcode_user_vars["mask_for_overlay"] = self.image_mask
self.Unprompted.shortcode_user_vars["latent_mask"] = None # fixes inpainting full resolution
if "save" in kwargs: self.image_mask.save(f"{self.Unprompted.parse_advanced(kwargs['save'],context)}.png")
return ""
def after(self,p=None,processed=None):
from torchvision.utils import draw_segmentation_masks
from torchvision.transforms.functional import pil_to_tensor, to_pil_image
if self.image_mask and self.show:
processed.images.append(self.image_mask)
overlayed_init_img = draw_segmentation_masks(pil_to_tensor(p.init_images[0]), pil_to_tensor(self.image_mask.convert("L")) > 0)
processed.images.append(to_pil_image(overlayed_init_img))
self.image_mask = None
self.show = False
return processed
def ui(self,gr):
gr.Radio(label="Mask blend mode 🡢 mode",choices=["add","subtract","discard"],value="add",interactive=True)
gr.Checkbox(label="Show mask in output 🡢 show")
gr.Checkbox(label="Use legacy weights 🡢 legacy_weights")
gr.Number(label="Precision of selected area 🡢 precision",value=100,interactive=True)
gr.Number(label="Precision of negative selected area 🡢 neg_precision",value=100,interactive=True)
gr.Number(label="Padding radius in pixels 🡢 padding",value=0,interactive=True)
gr.Number(label="Padding radius in pixels for negative mask 🡢 neg_padding",value=0,interactive=True)
gr.Number(label="Smoothing radius in pixels 🡢 smoothing",value=20,interactive=True)
gr.Number(label="Smoothing radius in pixels 🡢 neg_smoothing",value=20,interactive=True)
gr.Textbox(label="Negative mask prompt 🡢 negative_mask",max_lines=1)
gr.Number(label="Negative mask precision of selected area 🡢 neg_precision",value=100,interactive=True)
gr.Number(label="Negative mask padding radius in pixels 🡢 neg_padding",value=0,interactive=True)
gr.Number(label="Negative mask smoothing radius in pixels 🡢 neg_smoothing",value=20,interactive=True)
gr.Textbox(label="Save the mask size to the following variable 🡢 size_var",max_lines=1)