batch-face-swap/scripts/face_detect.py

463 lines
17 KiB
Python

FaceDetectDevelopment = False # set to True enable the development panel UI
import sys
import modules.scripts as scripts
base_dir = scripts.basedir()
sys.path.append(base_dir)
from scripts.txt2mask import *
from modules import images
from modules.shared import opts
from PIL import Image, ImageOps
import mediapipe as mp
import numpy as np
import math
import cv2
import os
from enum import IntEnum
class FaceMode(IntEnum):
# these can be in any order, but they must range from 0..N with no gaps
ORIGINAL=0,
OPENCV_NORMAL=1
OPENCV_SLOW=2
OPENCV_SLOWEST=3
CLIPSEG=4
DEVELOPMENT=5 # must be highest numbered to avoid breaking UI
DEFAULT=1 # the one the UI defaults to
# for the UI dropdown, we want an array that's indexed by the above numbers, and we don't want
# bugs if you tweak them and don't keep the array in sync, so initialize the array explicitly
# using them as indices:
face_mode_init = [
(FaceMode.ORIGINAL , "Fastest (mediapipe FaceMesh, max 5 faces)"),
(FaceMode.OPENCV_NORMAL , "Normal (OpenCV + FaceMesh)"),
(FaceMode.OPENCV_SLOW , "Slow (OpenCV + FaceMesh)"),
(FaceMode.OPENCV_SLOWEST, "Extremely slow (OpenCV + FaceMesh)"),
(FaceMode.CLIPSEG, "God knows (OpenCV + FaceMesh + ClipSeg)")
]
if FaceDetectDevelopment:
face_mode_init.append((FaceMode.DEVELOPMENT, "Development testing"))
face_mode_names = [None] * len(face_mode_init)
for index,name in face_mode_init:
face_mode_names[index] = name
class FaceDetectConfig:
def __init__(self, faceMode, face_x_scale=None, face_y_scale=0, minFace=0, multiScale=0, multiScale2=0, multiScale3=0, minNeighbors=0, mpconfidence=0, mpcount=0, debugSave=0, optimizeDetect=0):
self.faceMode = faceMode
self.face_x_scale = face_x_scale
self.face_y_scale = face_y_scale
self.minFaceSize = int(minFace)
self.multiScale = multiScale
self.multiScale2 = multiScale2
self.multiScale3 = multiScale3
self.minNeighbors = int(minNeighbors)
self.mpconfidence = mpconfidence
self.mpcount = mpcount
self.debugSave = debugSave
self.optimizeDetect = optimizeDetect
# allow this function to be called with just faceMode (used by updateVisualizer)
# or with an explicit list of values (from the main UI) but throw away those values
# if we're not in development
#
# for the "just faceMode" version we could put most of these as default arguments,
# but then we'd have to maintain the defaults both above and below, so easier on
# development to only put the correct defaults below:
if not FaceDetectDevelopment or (face_x_scale is None):
# If not in development mode, override all passed-in parameters to defaults
# also, if called from UpdateVisualizer, all the arguments will be default values
# we use None/0 in the parameter list above so we don't have to update the default values in two places in the code
self.face_x_scale=4.0
self.face_y_scale=2.5
self.minFace=30
self.multiScale=1.03
self.multiScale2=1
self.multiScale3=1
self.minNeighbors=5
self.mpconfidence=0.5
self.mpcount=5
self.debugSave=False
self.optimizeDetect=False
if True: # disable this to alter these modes specifically
if faceMode == FaceMode.OPENCV_NORMAL:
self.multiScale = 1.1
elif faceMode == FaceMode.OPENCV_SLOW:
self.multiScale = 1.01
elif faceMode == FaceMode.OPENCV_SLOWEST:
self.multiScale = 1.003
def getFacialLandmarks(image, facecfg):
height, width, _ = image.shape
mp_face_mesh = mp.solutions.face_mesh
with mp_face_mesh.FaceMesh(static_image_mode=True,max_num_faces=facecfg.mpcount,min_detection_confidence=facecfg.mpconfidence) as face_mesh:
height, width, _ = image.shape
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
result = face_mesh.process(image_rgb)
facelandmarks = []
if result.multi_face_landmarks is not None:
for facial_landmarks in result.multi_face_landmarks:
landmarks = []
for i in range(0, 468):
pt1 = facial_landmarks.landmark[i]
x = int(pt1.x * width)
y = int(pt1.y * height)
landmarks.append([x, y])
#cv2.circle(image, (x, y), 2, (100,100,0), -1)
#cv2.imshow("Cropped", image)
facelandmarks.append(np.array(landmarks, np.int32))
return facelandmarks
#
# code below here is public domain, cc0, unlicense -- stb
#
# try to get landmarks for a face located at rect
def getFacialLandmarkConvexHull(image, rect, facecfg):
height, width, _ = image.shape
# make a subimage to hand to FaceMesh
(x,y,w,h) = rect
face_center_x = (x + w//2)
face_center_y = (y + h//2)
# the new image is just 2x the size of the face
subrect_width = int(float(w) * facecfg.face_x_scale)
subrect_height = int(float(h) * facecfg.face_y_scale)
subrect_halfwidth = subrect_width // 2
subrect_halfheight = subrect_height // 2
subrect_width = subrect_halfwidth * 2;
subrect_height = subrect_halfheight * 2;
subrect_x_center = face_center_x
subrect_y_center = face_center_y
subimage = np.zeros((subrect_height, subrect_width, 3), np.uint8)
# this is the coordinates of the top left of the subimage relative to the original image
subrect_x0 = subrect_x_center - subrect_halfwidth
subrect_y0 = subrect_y_center - subrect_halfheight
# we allow room for up to 1/2 of a face adjacent
crop_face_x0 = face_center_x - w
crop_face_x1 = face_center_x + w
crop_face_y0 = face_center_y - h
crop_face_y1 = face_center_y + h
crop_face_x0 = max(crop_face_x0, 0 )
crop_face_y0 = max(crop_face_y0, 0)
crop_face_x1 = min(crop_face_x1, width )
crop_face_y1 = min(crop_face_y1, height)
# now crop the face coordinates down to the subrect as well
crop_face_x0 = max(crop_face_x0, subrect_x0)
crop_face_y0 = max(crop_face_y0, subrect_y0)
crop_face_x1 = min(crop_face_x1, subrect_x0 + subrect_width );
crop_face_y1 = min(crop_face_y1, subrect_y0 + subrect_height);
face_image = image[crop_face_y0:crop_face_y1, crop_face_x0:crop_face_x1]
# by construction the face image can't be larger than the subrect, but it can be smaller
subimage[crop_face_y0-subrect_y0:crop_face_y1-subrect_y0, crop_face_x0-subrect_x0:crop_face_x1-subrect_x0] = face_image
# store the face box in these coordinates for later use
face_rect = (x - subrect_x0,
y - subrect_y0,
w,
h)
# final face bounding rect must overlap at least 1/4 of pixels that CV2 expected, or it's not a match
min_match_area = w * h // 4;
landmarks = getFacialLandmarks(subimage, facecfg)
mp_face_mesh = mp.solutions.face_mesh
best_hull = None
best_area = min_match_area
for landmark in landmarks:
convexhull = cv2.convexHull(landmark)
bounds = cv2.boundingRect(convexhull)
# compute intersection with face_rect
x0 = max(face_rect[0], bounds[0])
y0 = max(face_rect[1], bounds[1])
x1 = min(face_rect[0] + face_rect[2], bounds[0] + bounds[2])
y1 = min(face_rect[1] + face_rect[3], bounds[1] + bounds[3])
area = (x1-x0) * (y1-y0)
if area > best_area:
best_area = area
best_hull = convexhull
if best_hull is not None:
# translate the convex hull back into the coordinate space of the passed-in image
for i in range(len(best_hull)):
best_hull[i][0][0] += subrect_x0
best_hull[i][0][1] += subrect_y0
return best_hull
def getFacesClipseg(image, blackImage, rect, detectionPrompt):
# make a subimage to hand to FaceMesh
(x,y,w,h) = rect
#TODO Find the way to extend face image even if the face is at the edge of the image
face_image = image[y-50:y+h+50, x-50:x+w+50]
# Convert the cropped portion to PIL image
face = Image.fromarray(face_image)
b, g, r = face.split()
face = Image.merge("RGB", (r, g, b))
# Do something to the face (e.g. apply a filter)
face = txt2mask(face, detectionPrompt)
# face.show()
# Convert the face back to numpy array
face_mask = np.array(face)
face_mask = face_mask[..., :3]
# Paste the face back to the original image
blackImage[y-50:y+h+50, x-50:x+w+50] = face_mask
face = blackImage
return face, blackImage
def contractRect(r):
(x0,y0,w,h) = r
hw,hh = w/2, h/2
xc,yc = x0+hw, y0+hh
x0 = xc - hw*0.85
y0 = yc - hh*0.85
x1 = xc + hw*0.85
y1 = yc + hh*0.85
return (x0,y0,x1,y1)
def rectangleListOverlap(rlist, rect):
# contract rect slightly
rx0,ry0,rx1,ry1 = contractRect(rect)
for r in rlist:
x0,y0,x1,y1 = contractRect(r)
if x0 < rx1 and x1 > rx0 and y0 < ry1 and y1 > ry0:
return r
return None
# use cv2 detectMultiScale directly
def getFaceRectanglesSimple(image, known_face_rects, facecfg):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier("extensions/batch-face-swap/scripts/haarcascade_frontalface_default.xml")
faces = face_cascade.detectMultiScale(gray, scaleFactor=facecfg.multiScale, minNeighbors=facecfg.minNeighbors, minSize=(facecfg.minFaceSize,facecfg.minFaceSize))
all_faces = []
for r in faces:
if rectangleListOverlap(known_face_rects, r) is None:
all_faces.append(r)
return all_faces
# use cv2 detectMultiScale at multiple scales
def getFaceRectangles(image, known_face_rects, facecfg):
if not facecfg.optimizeDetect:
return getFaceRectanglesSimple(image, known_face_rects, facecfg)
height, width, _ = image.shape
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier("extensions/batch-face-swap/scripts/haarcascade_frontalface_default.xml")
all_faces = []
minsize = facecfg.minFaceSize
# naiveScale is the scale between successive detections
naiveScale = facecfg.multiScale
partition_count = int(facecfg.multiScale2+0.5)
partition_count = max(partition_count, 1)
effectiveScale = math.pow(naiveScale, partition_count)
current = gray
total_scale = 1
resize_scale = naiveScale
for i in range(0,partition_count):
faces = face_cascade.detectMultiScale(current, scaleFactor=effectiveScale, minNeighbors=facecfg.minNeighbors, minSize=(minsize,minsize))
new_faces = []
for rorig in faces:
r = [ int(rorig[0] * total_scale), int(rorig[1] * total_scale), int(rorig[2] * total_scale), int(rorig[3] * total_scale) ]
if rectangleListOverlap(known_face_rects, r) is None:
if rectangleListOverlap(all_faces, r) is None:
new_faces.append(r)
all_faces.extend(new_faces)
total_scale *= resize_scale
width = int(width / total_scale)
height = int(height / total_scale)
current = cv2.resize(gray, (width, height), interpolation=cv2.INTER_LANCZOS4)
return all_faces
#####################################################################################################################
#
# various attempts at optimizing. some of them were better, but a lot more
# work is needed to get something good
#
# note that these are not fully compatible with the rest of the code,
# as they've changed since it was written
#
#
def getFaceRectangles4(image, facecfg):
if not facecfg.optimizeDetect:
return getFaceRectanglesSimple(image, facecfg)
height, width, _ = image.shape
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier("extensions/batch-face-swap/scripts/haarcascade_frontalface_default.xml")
all_faces = []
resize_scale = facecfg.multiScale
trueScale = facecfg.multiScale2
overlapScale = facecfg.multiScale3
size = min(width,height)
minsize = facecfg.minFaceSize
# run the smallest detectors from 30..60
current = gray
total_scale = 1.0
#trueScale = trueScale * trueScale
while current.shape[0] > minsize and current.shape[1] > minsize:
height, width = current.shape[0], current.shape[1]
maxsize = int(minsize * resize_scale * overlapScale)
faces = face_cascade.detectMultiScale(current, scaleFactor=trueScale, minNeighbors=facecfg.minNeighbors, minSize=(minsize,minsize), maxSize=(maxsize,maxsize))
new_faces = []
for rorig in faces:
r = [ int(rorig[0] * total_scale), int(rorig[1] * total_scale), int(rorig[2] * total_scale), int(rorig[3] * total_scale) ]
# clamp detected shape back to range, this shou'dn't be necessary
orig = r.copy()
if r[0]+r[2] > gray.shape[1]:
excess = r[0] + r[2] - gray.shape[1]
r[2] -= excess//2
r[0] = gray.shape[1] - r[2]
if r[1]+r[3] > gray.shape[0]:
excess = r[1] + r[3] - gray.shape[0]
r[3] -= excess//2
r[1] = gray.shape[0] - r[3]
if orig[0] != r[0] or orig[1] != r[1] or orig[2] != r[2] or orig[3] != r[3]:
print( "Clamped bad rect from " + str(orig) + " to " + str(r) + " for image size " + str((gray.shape[1],gray.shape[0])))
overlap = rectangleListOverlap(all_faces, r)
if overlap is None:
new_faces.append((r, (minsize*total_scale,maxsize*total_scale)))
all_faces.extend(new_faces)
width = int(width / resize_scale)
height = int(height / resize_scale)
current = cv2.resize(gray, (width, height), interpolation=cv2.INTER_AREA)
total_scale *= resize_scale
if width < 600 and height < 600:
resize_scale = 10
trueScale = math.pow(facecfg.multiScale2,0.5)
else:
trueScale = facecfg.multiScale2
for i in range(0,len(all_faces)):
r,_ = all_faces[i]
all_faces[i] = r
return all_faces
# use cv2 detectMultiScale at multiple scales
def getFaceRectangles3(image, facecfg):
if facecfg.multiScale <= 1.1:
return getFaceRectanglesSimple(image, facecfg)
height, width, _ = image.shape
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier("extensions/batch-face-swap/scripts/haarcascade_frontalface_default.xml")
all_faces = []
size = min(width,height)
while size >= facecfg.minFaceSize:
maxsize = size
minsize = int(size / facecfg.multiScale)
# make sure there's some overlap
maxsize = int(maxsize*1.1 + 1)
minsiez = int(minsize/1.1 - 1)
ratio = float(maxsize) / float(minsize)
stepCount = (maxsize - minsize) * facecfg.multiScale2
scale = math.exp(math.log(ratio) / (stepCount+1))
print( f"Compute scale factor {scale} to go from {minsize} to {maxsize} in {stepCount} steps" )
faces = face_cascade.detectMultiScale(gray, scaleFactor=scale, minNeighbors=facecfg.minNeighbors, minSize=(minsize,minsize), maxSize=(maxsize,maxsize))
new_faces = []
for r in faces:
overlap = rectangleListOverlap(all_faces, r)
if overlap is None:
new_faces.append((r, (minsize,maxsize)))
all_faces.extend(new_faces)
size = minsize
for i in range(0,len(all_faces)):
r,_ = all_faces[i]
all_faces[i] = r
return all_faces
# use cv2 detectMultiScale at multiple scales
def getFaceRectangles2(image, facecfg):
if facecfg.multiScale < 1.5:
return getFaceRectanglesSimple(image, facecfg)
height, width, _ = image.shape
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier("extensions/batch-face-swap/scripts/haarcascade_frontalface_default.xml")
all_faces = []
stepSize = int(facecfg.multiScale)
for size in range(facecfg.minFaceSize, min(width,height), stepSize):
# detect faces in the range size..stepSize
minsize = size
maxsize = size + stepSize+1
ratio = float(maxsize) / float(size)
# pick a scale factor so we take about stepSize*1.2 steps to go from size to maxsize
# scale^(stepsize*1.2) = ratio
# (stepsize*1.2) * log(scale) = log(ratio)
# log(scale) = log(ratio) / (stepsize * 1.2)
scale = math.exp(math.log(ratio) / (float(stepSize)*1.1))
faces = face_cascade.detectMultiScale(gray, scaleFactor=scale, minNeighbors=facecfg.minNeighbors, minSize=(minsize,minsize), maxSize=(maxsize,maxsize))
new_faces = []
for r in faces:
overlap = rectangleListOverlap(all_faces, r)
if overlap is None:
new_faces.append((r, (minsize,maxsize)))
#else:
#(rect,size) = overlap
#print( "Duplicate face: " + str(r) + " in size range " + str((minsize,maxsize)) + " overlapped with " + str(rect) + " from size range " + str(size))
all_faces.extend(new_faces)
for i in range(0,len(all_faces)):
r,_ = all_faces[i]
all_faces[i] = r
return all_faces