mirror of https://github.com/vladmandic/automatic
187 lines
6.8 KiB
Python
187 lines
6.8 KiB
Python
from typing import List
|
|
import os
|
|
import cv2
|
|
import torch
|
|
import numpy as np
|
|
import diffusers
|
|
import huggingface_hub as hf
|
|
from PIL import Image
|
|
from modules import processing, shared, devices
|
|
|
|
|
|
FACEID_MODELS = {
|
|
"FaceID Base": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sd15.bin",
|
|
"FaceID Plus v1": "h94/IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin",
|
|
"FaceID Plus v2": "h94/IP-Adapter-FaceID/ip-adapter-faceid-plusv2_sd15.bin",
|
|
"FaceID XL": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sdxl.bin",
|
|
# "FaceID Portrait v10": "h94/IP-Adapter-FaceID/ip-adapter-faceid-portrait_sd15.bin",
|
|
# "FaceID Portrait v11": "h94/IP-Adapter-FaceID/ip-adapter-faceid-portrait-v11_sd15.bin",
|
|
# "FaceID XL Plus v2": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sdxl.bin",
|
|
}
|
|
faceid_model = None
|
|
faceid_model_name = None
|
|
debug = shared.log.trace if os.environ.get("SD_FACE_DEBUG", None) is not None else lambda *args, **kwargs: None
|
|
|
|
|
|
def face_id(
|
|
p: processing.StableDiffusionProcessing,
|
|
app,
|
|
source_images: List[Image.Image],
|
|
model: str,
|
|
override: bool,
|
|
cache: bool,
|
|
scale: float,
|
|
structure: float,
|
|
):
|
|
global faceid_model, faceid_model_name # pylint: disable=global-statement
|
|
if source_images is None or len(source_images) == 0:
|
|
shared.log.warning('FaceID: no input images')
|
|
return None
|
|
|
|
from insightface.utils import face_align
|
|
try:
|
|
from ip_adapter.ip_adapter_faceid import (
|
|
IPAdapterFaceID,
|
|
IPAdapterFaceIDPlus,
|
|
IPAdapterFaceIDXL,
|
|
IPAdapterFaceIDPlusXL,
|
|
)
|
|
from ip_adapter.ip_adapter_faceid_separate import (
|
|
IPAdapterFaceID as IPAdapterFaceIDPortrait,
|
|
)
|
|
except Exception as e:
|
|
shared.log.error(f"FaceID incorrect version of ip_adapter: {e}")
|
|
return None
|
|
|
|
ip_ckpt = FACEID_MODELS[model]
|
|
folder, filename = os.path.split(ip_ckpt)
|
|
basename, _ext = os.path.splitext(filename)
|
|
model_path = hf.hf_hub_download(repo_id=folder, filename=filename, cache_dir=shared.opts.diffusers_dir)
|
|
if model_path is None:
|
|
shared.log.error(f"FaceID download failed: model={model} file={ip_ckpt}")
|
|
return None
|
|
if override:
|
|
shared.sd_model.scheduler = diffusers.DDIMScheduler(
|
|
num_train_timesteps=1000,
|
|
beta_start=0.00085,
|
|
beta_end=0.012,
|
|
beta_schedule="scaled_linear",
|
|
clip_sample=False,
|
|
set_alpha_to_one=False,
|
|
steps_offset=1,
|
|
)
|
|
shortcut = None
|
|
if faceid_model is None or faceid_model_name != model or not cache:
|
|
shared.log.debug(f"FaceID load: model={model} file={ip_ckpt}")
|
|
if "XL Plus" in model:
|
|
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
|
|
faceid_model = IPAdapterFaceIDPlusXL(
|
|
sd_pipe=shared.sd_model,
|
|
image_encoder_path=image_encoder_path,
|
|
ip_ckpt=model_path,
|
|
lora_rank=128,
|
|
num_tokens=4,
|
|
device=devices.device,
|
|
torch_dtype=devices.dtype,
|
|
)
|
|
elif "XL" in model:
|
|
faceid_model = IPAdapterFaceIDXL(
|
|
sd_pipe=shared.sd_model,
|
|
ip_ckpt=model_path,
|
|
lora_rank=128,
|
|
num_tokens=4,
|
|
device=devices.device,
|
|
torch_dtype=devices.dtype,
|
|
)
|
|
elif "Plus" in model:
|
|
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
|
|
faceid_model = IPAdapterFaceIDPlus(
|
|
sd_pipe=shared.sd_model,
|
|
image_encoder_path=image_encoder_path,
|
|
ip_ckpt=model_path,
|
|
lora_rank=128,
|
|
num_tokens=4,
|
|
device=devices.device,
|
|
torch_dtype=devices.dtype,
|
|
)
|
|
elif "Portrait" in model:
|
|
faceid_model = IPAdapterFaceIDPortrait(
|
|
sd_pipe=shared.sd_model,
|
|
ip_ckpt=model_path,
|
|
num_tokens=16,
|
|
n_cond=5,
|
|
device=devices.device,
|
|
torch_dtype=devices.dtype,
|
|
)
|
|
else:
|
|
faceid_model = IPAdapterFaceID(
|
|
sd_pipe=shared.sd_model,
|
|
ip_ckpt=model_path,
|
|
lora_rank=128,
|
|
num_tokens=4,
|
|
device=devices.device,
|
|
torch_dtype=devices.dtype,
|
|
)
|
|
shortcut = "v2" in model
|
|
faceid_model_name = model
|
|
else:
|
|
shared.log.debug(f"FaceID cached: model={model} file={ip_ckpt}")
|
|
|
|
processed_images = []
|
|
face_embeds = []
|
|
face_images = []
|
|
for i, source_image in enumerate(source_images):
|
|
np_image = cv2.cvtColor(np.array(source_image), cv2.COLOR_RGB2BGR)
|
|
faces = app.get(np_image)
|
|
if len(faces) == 0:
|
|
shared.log.error("FaceID: no faces found")
|
|
break
|
|
face_embeds.append(torch.from_numpy(faces[0].normed_embedding).unsqueeze(0))
|
|
face_images.append(face_align.norm_crop(np_image, landmark=faces[0].kps, image_size=224))
|
|
shared.log.debug(f'FaceID face: i={i+1} score={faces[0].det_score:.2f} gender={"female" if faces[0].gender==0 else "male"} age={faces[0].age} bbox={faces[0].bbox}')
|
|
p.extra_generation_params[f"FaceID {i+1}"] = f'{faces[0].det_score:.2f} {"female" if faces[0].gender==0 else "male"} {faces[0].age}y'
|
|
if len(face_embeds) == 0:
|
|
shared.log.error("FaceID: no faces found")
|
|
return None
|
|
face_embeds = torch.cat(face_embeds, dim=0)
|
|
|
|
ip_model_dict = { # main generate dict
|
|
"num_samples": p.batch_size,
|
|
"width": p.width,
|
|
"height": p.height,
|
|
"num_inference_steps": p.steps,
|
|
"scale": scale,
|
|
"guidance_scale": p.cfg_scale,
|
|
"faceid_embeds": face_embeds.shape, # placeholder
|
|
}
|
|
# optional generate dict
|
|
if shortcut is not None:
|
|
ip_model_dict["shortcut"] = shortcut
|
|
if "Plus" in model:
|
|
ip_model_dict["s_scale"] = structure
|
|
shared.log.debug(f"FaceID args: {ip_model_dict}")
|
|
if "Plus" in model:
|
|
ip_model_dict["face_image"] = face_images
|
|
ip_model_dict["faceid_embeds"] = face_embeds # overwrite placeholder
|
|
# run generate
|
|
faceid_model.set_scale(scale)
|
|
for i in range(p.n_iter):
|
|
ip_model_dict.update({
|
|
"prompt": p.all_prompts[i],
|
|
"negative_prompt": p.all_negative_prompts[i],
|
|
"seed": int(p.all_seeds[i]),
|
|
})
|
|
debug(f"FaceID: {ip_model_dict}")
|
|
res = faceid_model.generate(**ip_model_dict)
|
|
if isinstance(res, list):
|
|
processed_images += res
|
|
faceid_model.set_scale(0)
|
|
|
|
if not cache:
|
|
faceid_model = None
|
|
faceid_model_name = None
|
|
devices.torch_gc()
|
|
|
|
p.extra_generation_params["IP Adapter"] = f"{basename}:{scale}"
|
|
return processed_images
|