fix way to discard grid (should fix hires). add comments. fix grid generation

2023-07-02 23:08:10 +02:00 · 2023-07-02 23:08:10 +02:00 · 0e8a518a3d
parent b26a0015b3
commit 0e8a518a3d
4 changed files with 339 additions and 102 deletions
--- a/example/api_example.py
+++ b/example/api_example.py
@ -33,7 +33,7 @@ args=[
 # The args for roop can be found by 
 # requests.get(url=f'{address}/sdapi/v1/script-info')

-prompt = "(8k, best quality, masterpiece, ultra highres:1.2),Realistic image style,Vertical orientation, Man wearing suit"
+prompt = "(8k, best quality, masterpiece, ultra highres:1.2),Realistic image style,Vertical orientation, Man wearing suit, Einstein"
 neg = "ng_deepnegative_v1_75t, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, normal quality, ((monochrome)), ((grayscale)), (verybadimagenegative_v1.3:0.8), negative_hand-neg, (lamp), badhandv4"
 payload = {
    "prompt": prompt,
--- a/scripts/faceswap.py
+++ b/scripts/faceswap.py
@ -1,6 +1,9 @@
 import glob
 import importlib
-from scripts import swapper, roop_logging, roop_version, cimage, imgutils, upscaling
+
+from scripts import (cimage, imgutils, roop_logging, roop_version, swapper,
+                     upscaling)
+
 #Reload all the modules when using "apply and restart"
 importlib.reload(swapper)
 importlib.reload(roop_logging)
@ -9,13 +12,15 @@ importlib.reload(cimage)
 importlib.reload(imgutils)
 importlib.reload(upscaling)

-import base64, io
+import base64
+import io
 import json
 import os
+import tempfile
 from dataclasses import dataclass, fields
 from pprint import pformat, pprint
 from typing import Dict, List, Set, Tuple, Union
-from scripts.cimage import convert_to_sd
+
 import cv2
 import dill as pickle
 import gradio as gr
@ -25,7 +30,7 @@ import onnx
 import pandas as pd
 import torch
 from insightface.app.common import Face
-from modules import script_callbacks, scripts, shared, processing
+from modules import processing, script_callbacks, scripts, shared
 from modules.face_restoration import FaceRestoration
 from modules.images import save_image
 from modules.processing import (Processed, StableDiffusionProcessing,
@ -36,26 +41,48 @@ from modules.upscaler import Upscaler, UpscalerData
 from onnx import numpy_helper
 from PIL import Image

-from scripts.roop_logging import logger
-from scripts.roop_version import version_flag
+from scripts.cimage import convert_to_sd
 from scripts.imgutils import (create_square_image, cv2_to_pil, pil_to_cv2,
                              pil_to_torch, torch_to_pil)
+from scripts.roop_logging import logger
+from scripts.roop_version import version_flag
 from scripts.upscaling import UpscaleOptions, upscale_image

 EXTENSION_PATH=os.path.join("extensions","sd-webui-roop")

 def get_models():
-    models_path = os.path.join(
-        scripts.basedir(), EXTENSION_PATH,"models","*"
-    )
+    """
+    Retrieve a list of swap model files.
+
+    This function searches for model files in the specified directories and returns a list of file paths.
+    The supported file extensions are ".onnx".
+
+    Returns:
+        A list of file paths of the model files.
+    """
+    models_path = os.path.join(scripts.basedir(), EXTENSION_PATH, "models", "*")
    models = glob.glob(models_path)
+
+    # Add an additional models directory and find files in it
    models_path = os.path.join(scripts.basedir(), "models", "roop", "*")
    models += glob.glob(models_path)
-    models = [x for x in models if x.endswith(".onnx") or x.endswith(".pth")]
+
+    # Filter the list to include only files with the supported extensions
+    models = [x for x in models if x.endswith(".onnx")]
+
    return models

-def get_faces():
-    faces_path = os.path.join(scripts.basedir(), "models", "roop", "faces","*.pkl")
+def get_face_checkpoints():
+    """
+    Retrieve a list of face checkpoint paths.
+
+    This function searches for face files with the extension ".pkl" in the specified directory and returns a list
+    containing the paths of those files.
+
+    Returns:
+        list: A list of face paths, including the string "None" as the first element.
+    """
+    faces_path = os.path.join(scripts.basedir(), "models", "roop", "faces", "*.pkl")
    faces = glob.glob(faces_path)
    return ["None"] + faces

@ -174,7 +201,8 @@ def compare(img1, img2):

    return "You need 2 images to compare"

-import tempfile
+
+
 def extract_faces(files, extract_path,  face_restorer_name, face_restorer_visibility,upscaler_name,upscaler_scale, upscaler_visibility):
    if not extract_path :
        tempfile.mkdtemp()
@ -201,7 +229,17 @@ def extract_faces(files, extract_path,  face_restorer_name, face_restorer_visibi



-def save(batch_files, name):
+def build_face_checkpoint_and_save(batch_files, name):
+    """
+    Builds a face checkpoint, swaps faces, and saves the result to a file.
+
+    Args:
+        batch_files (list): List of image file paths.
+        name (str): Name of the face checkpoint
+
+    Returns:
+        PIL.Image.Image or None: Resulting swapped face image if successful, otherwise None.
+    """
    batch_files = batch_files or []
    print("Build", name, [x.name for x in batch_files])
    faces = swapper.get_faces_from_img_files(batch_files)
@ -247,7 +285,7 @@ def save(batch_files, name):



-def explore(model_path):
+def explore_onnx_faceswap_model(model_path):
    data = {
        'Node Name': [],
        'Op Type': [],
@ -350,9 +388,9 @@ def tools_ui():
            )
        upscale_options = upscaler_ui()

-    explore_btn.click(explore, inputs=[model], outputs=[explore_result_text])  
+    explore_btn.click(explore_onnx_faceswap_model, inputs=[model], outputs=[explore_result_text])  
    compare_btn.click(compare, inputs=[img1, img2], outputs=[compare_result_text])
-    generate_checkpoint_btn.click(save, inputs=[batch_files, name], outputs=[preview])
+    generate_checkpoint_btn.click(build_face_checkpoint_and_save, inputs=[batch_files, name], outputs=[preview])
    extract_btn.click(extract_faces, inputs=[extracted_source_files, extract_save_path]+upscale_options, outputs=[extracted_faces])  

 def on_ui_tabs() :
@ -384,12 +422,12 @@ class FaceSwapScript(scripts.Script):
                    )
                with gr.Row() :
                    face = gr.inputs.Dropdown(
-                        choices=get_faces(),
-                        label="Face Checkpoint",
+                        choices=get_face_checkpoints(),
+                        label="Face Checkpoint (precedence over reference face)",
                    )
                    refresh = gr.Button(value='↻', variant='tool')
                    def refresh_fn(selected):
-                        return gr.Dropdown.update(value=selected, choices=get_faces())
+                        return gr.Dropdown.update(value=selected, choices=get_face_checkpoints())
                    refresh.click(fn=refresh_fn,inputs=face, outputs=face)

                with gr.Row():
@ -495,7 +533,7 @@ class FaceSwapScript(scripts.Script):
                p.init_images = init_images


-    def process_images_unit(self, unit, images, infos = None, processed = None)  :
+    def process_images_unit(self, unit, images, infos = None)  :
        if unit.enable :
            result_images = []
            result_infos = []
@ -512,47 +550,57 @@ class FaceSwapScript(scripts.Script):
                else :
                    logger.info("blend all faces together")
                    src_faces = [unit.blended_faces]
-                if not processed or img.width == processed.width and img.height == processed.height :
-                    for i,src_face in enumerate(src_faces):
-                        logger.info(f"Process face {i}")
-                        result: swapper.ImageResult = swapper.swap_face(
-                            unit.reference_face if unit.reference_face is not None else src_face,
-                            src_face,
-                            img,
-                            faces_index=unit.faces_index,
-                            model=self.model,
-                            same_gender=unit.same_gender,
+                for i,src_face in enumerate(src_faces):
+                    logger.info(f"Process face {i}")
+                    result: swapper.ImageResult = swapper.swap_face(
+                        unit.reference_face if unit.reference_face is not None else src_face,
+                        src_face,
+                        img,
+                        faces_index=unit.faces_index,
+                        model=self.model,
+                        same_gender=unit.same_gender,
+                    )
+                    if result.similarity and all([result.similarity.values()!=0]+[x >= unit.min_sim for x in result.similarity.values()]) and all([result.ref_similarity.values()!=0]+[x >= unit.min_ref_sim for x in result.ref_similarity.values()]):
+                        result_infos.append(f"{info}, similarity = {result.similarity}, ref_similarity = {result.ref_similarity}")
+                        result_images.append(result.image)
+                    else:
+                        logger.info(
+                            f"skip, similarity to low, sim = {result.similarity} (target {unit.min_sim}) ref sim = {result.ref_similarity} (target = {unit.min_ref_sim})"
                        )
-                        if result.similarity and all([result.similarity.values()!=0]+[x >= unit.min_sim for x in result.similarity.values()]) and all([result.ref_similarity.values()!=0]+[x >= unit.min_ref_sim for x in result.ref_similarity.values()]):
-                            result_infos.append(f"{info}, similarity = {result.similarity}, ref_similarity = {result.ref_similarity}")
-                            result_images.append(result.image)
-                        else:
-                            logger.info(
-                                f"skip, similarity to low, sim = {result.similarity} (target {unit.min_sim}) ref sim = {result.ref_similarity} (target = {unit.min_ref_sim})"
-                            )
            logger.info(f"{len(result_images)} images processed")
            return (result_images, result_infos)
        return (images, infos)

-    def postprocess(self, p, processed: Processed, *args):
+    def postprocess(self, p : StableDiffusionProcessing, processed: Processed, *args):
        orig_images = processed.images
        orig_infos = processed.infotexts

        if any([u.enable for u in self.units]):
            result_images = processed.images[:]
-            result_infos = processed.infotexts
+            result_infos = processed.infotexts[:]
+            if p.batch_size > 1 :
+                # Remove grid image if batch size is greater than 1 :
+                result_images = result_images[1:]
+                result_infos = result_infos[1:]
+                logger.info("Discard grid image from swapping process. This could induce bugs with some extensions.")
+
            for i, unit in enumerate(self.units):
                if unit.enable and unit.swap_in_generated :
-                    (result_images, result_infos) = self.process_images_unit(unit, result_images, result_infos, processed)
+                    (result_images, result_infos) = self.process_images_unit(unit, result_images, result_infos)
                    logger.info(f"unit {i+1}> processed : {len(result_images)}, {len(result_infos)}")

            for i, img in enumerate(result_images):
                if self.upscale_options is not None:
                    result_images[i] = upscale_image(img, self.upscale_options)
-
+                if p.outpath_samples :
+                    save_image(result_images[i], p.outpath_samples, "swapped")
+                           
            if len(result_images) > 1:
-                result_images.append(create_square_image(result_images))
-
+                try :
+                    # prepend swapped grid to result_images :
+                    result_images = [create_square_image(result_images)] + result_images
+                except Exception as e :
+                    logger.error("Error building result grid %s", e)
            processed.images = result_images
            processed.infotexts = result_infos
            
--- a/scripts/imgutils.py
+++ b/scripts/imgutils.py
@ -38,27 +38,54 @@ def pil_to_torch(pil_images):
    return torch_image


-
+from collections import Counter
 def create_square_image(image_list):
-    size = None
-    for image in image_list:
-        if size is None:
-            size = image.size
-        elif image.size != size:
-            raise ValueError("Not same size images")
+    """
+    Creates a square image by combining multiple images in a grid pattern.
+    
+    Args:
+        image_list (list): List of PIL Image objects to be combined.
+        
+    Returns:
+        PIL Image object: The resulting square image.
+        None: If the image_list is empty or contains only one image.
+    """
+    
+    # Count the occurrences of each image size in the image_list
+    size_counter = Counter(image.size for image in image_list)
+    
+    # Get the most common image size (size with the highest count)
+    common_size = size_counter.most_common(1)[0][0]
+    
+    # Filter the image_list to include only images with the common size
+    image_list = [image for image in image_list if image.size == common_size]
+    
+    # Get the dimensions (width and height) of the common size
+    size = common_size
+    
+    # If there are more than one image in the image_list
+    if len(image_list) > 1:
+        num_images = len(image_list)
+        
+        # Calculate the number of rows and columns for the grid
+        rows = isqrt(num_images)
+        cols = ceil(num_images / rows)

-    num_images = len(image_list)
-    rows = isqrt(num_images)
-    cols = ceil(num_images / rows)
+        # Calculate the size of the square image
+        square_size = (cols * size[0], rows * size[1])

-    square_size = (cols * size[0], rows * size[1])
+        # Create a new RGB image with the square size
+        square_image = Image.new("RGB", square_size)

-    square_image = Image.new("RGB", square_size)
+        # Paste each image onto the square image at the appropriate position
+        for i, image in enumerate(image_list):
+            row = i // cols
+            col = i % cols

-    for i, image in enumerate(image_list):
-        row = i // cols
-        col = i % cols
+            square_image.paste(image, (col * size[0], row * size[1]))

-        square_image.paste(image, (col * size[0], row * size[1]))
-
-    return square_image
+        # Return the resulting square image
+        return square_image
+    
+    # Return None if there are no images or only one image in the image_list
+    return None
--- a/scripts/swapper.py
+++ b/scripts/swapper.py
@ -1,113 +1,260 @@
 import copy
 import os
 from dataclasses import dataclass
-from typing import List, Union, Dict, Set, Tuple
+from pprint import pprint
+from typing import Dict, List, Set, Tuple, Union

 import cv2
-import numpy as np
-from PIL import Image
-
 import insightface
+import numpy as np
 import onnxruntime
-from scripts.roop_logging import logger
-from pprint import pprint
+from insightface.app.common import Face
+from PIL import Image
 from sklearn.metrics.pairwise import cosine_similarity

-from scripts.imgutils import pil_to_cv2, cv2_to_pil
+from scripts.imgutils import cv2_to_pil, pil_to_cv2
+from scripts.roop_logging import logger

 providers = ["CPUExecutionProvider"]


 def cosine_similarity_face(face1, face2) -> float:
+    """
+    Calculates the cosine similarity between two face embeddings.
+
+    Args:
+        face1 (Face): The first face object containing an embedding.
+        face2 (Face): The second face object containing an embedding.
+
+    Returns:
+        float: The cosine similarity between the face embeddings.
+
+    Note:
+        The cosine similarity ranges from 0 to 1, where 1 indicates identical embeddings and 0 indicates completely
+        dissimilar embeddings. In this implementation, the similarity is clamped to a minimum value of 0 to ensure a
+        non-negative similarity score.
+    """
+    # Reshape the face embeddings to have a shape of (1, -1)
    vec1 = face1.embedding.reshape(1, -1)
    vec2 = face2.embedding.reshape(1, -1)
-    return max(0, cosine_similarity(vec1, vec2)[0, 0])
+
+    # Calculate the cosine similarity between the reshaped embeddings
+    similarity = cosine_similarity(vec1, vec2)
+
+    # Return the maximum of 0 and the calculated similarity as the final similarity score
+    return max(0, similarity[0, 0])
+
+def compare_faces(img1: Image.Image, img2: Image.Image) -> float:
+    """
+    Compares the similarity between two faces extracted from images using cosine similarity.
+    
+    Args:
+        img1: The first image containing a face.
+        img2: The second image containing a face.
+    
+    Returns:
+        A float value representing the similarity between the two faces (0 to 1). 
+        Returns -1 if one or both of the images do not contain any faces.
+    """
+    
+    # Extract faces from the images
+    face1 = get_or_default(get_faces(pil_to_cv2(img1)), 0, None)
+    face2 = get_or_default(get_faces(pil_to_cv2(img2)), 0, None)
+
+    # Check if both faces are detected
+    if face1 is not None and face2 is not None:
+        # Calculate the cosine similarity between the faces
+        return cosine_similarity_face(face1, face2)
+    
+    # Return -1 if one or both of the images do not contain any faces
+    return -1


+
+# Global variable to store the analysis model
 ANALYSIS_MODEL = None


 def getAnalysisModel():
+    """
+    Retrieves the analysis model for face analysis.
+    
+    Returns:
+        insightface.app.FaceAnalysis: The analysis model for face analysis.
+    """
    global ANALYSIS_MODEL
+    
+    # Check if the analysis model has been initialized
    if ANALYSIS_MODEL is None:
+        logger.info("Load analysis model, will take some time.")
+        # Initialize the analysis model with the specified name and providers
        ANALYSIS_MODEL = insightface.app.FaceAnalysis(
            name="buffalo_l", providers=providers
        )
+    
+    # Return the analysis model
    return ANALYSIS_MODEL


-FS_MODEL = None
-CURRENT_FS_MODEL_PATH = None
-
+FS_MODEL = None  # Global variable to store the face swap model.
+CURRENT_FS_MODEL_PATH = None  # Global variable to store the current path of the face swap model.

 def getFaceSwapModel(model_path: str):
+    """
+    Retrieves the face swap model and initializes it if necessary.
+
+    Args:
+        model_path (str): Path to the face swap model.
+
+    Returns:
+        insightface.model_zoo.FaceModel: The face swap model.
+    """
    global FS_MODEL
    global CURRENT_FS_MODEL_PATH
+
+    # Check if the current model path is different from the new model path.
    if CURRENT_FS_MODEL_PATH is None or CURRENT_FS_MODEL_PATH != model_path:
        CURRENT_FS_MODEL_PATH = model_path
+        # Initializes the face swap model using the specified model path.
        FS_MODEL = insightface.model_zoo.get_model(model_path, providers=providers)

    return FS_MODEL


-def get_faces(img_data: np.ndarray, det_size=(640, 640)):
+def get_faces(img_data: np.ndarray, det_size=(640, 640)) -> List[Face]:
+    """
+    Detects and retrieves faces from an image using an analysis model.
+
+    Args:
+        img_data (np.ndarray): The image data as a NumPy array.
+        det_size (tuple): The desired detection size (width, height). Defaults to (640, 640).
+
+    Returns:
+        list: A list of detected faces, sorted by their x-coordinate of the bounding box.
+    """
+    # Create a deep copy of the analysis model (otherwise det_size is attached to the analysis model and can't be changed)
    face_analyser = copy.deepcopy(getAnalysisModel())
+
+    # Prepare the analysis model for face detection with the specified detection size
    face_analyser.prepare(ctx_id=0, det_size=det_size)
+
+    # Get the detected faces from the image using the analysis model
    face = face_analyser.get(img_data)

+    # If no faces are detected and the detection size is larger than 320x320,
+    # recursively call the function with a smaller detection size
    if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
        det_size_half = (det_size[0] // 2, det_size[1] // 2)
        return get_faces(img_data, det_size=det_size_half)

    try:
+        # Sort the detected faces based on their x-coordinate of the bounding box
        return sorted(face, key=lambda x: x.bbox[0])
-    except IndexError:
-        return None
+    except Exception as e:
+        return []


-def compare_faces(img1: Image.Image, img2: Image.Image) -> float:
-    face1 = get_or_default(get_faces(pil_to_cv2(img1)), 0, None)
-    face2 = get_or_default(get_faces(pil_to_cv2(img2)), 0, None)
-
-    if face1 is not None and face2 is not None:
-        return cosine_similarity_face(face1, face2)
-    return -1
-

@dataclass
 class ImageResult:
+    """
+    Represents the result of an image swap operation
+    """
+
    image: Image.Image
-    similarity: Dict[int, float]  # face, 0..1
-    ref_similarity: Dict[int, float]  # face, 0..1
+    """
+    The image object with the swapped face
+    """
+
+    similarity: Dict[int, float]
+    """
+    A dictionary mapping face indices to their similarity scores.
+    The similarity scores are represented as floating-point values between 0 and 1.
+    """
+
+    ref_similarity: Dict[int, float]
+    """
+    A dictionary mapping face indices to their similarity scores compared to a reference image.
+    The similarity scores are represented as floating-point values between 0 and 1.
+    """


 def get_or_default(l, index, default):
+    """
+    Retrieve the value at the specified index from the given list.
+    If the index is out of bounds, return the default value instead.
+
+    Args:
+        l (list): The input list.
+        index (int): The index to retrieve the value from.
+        default: The default value to return if the index is out of bounds.
+
+    Returns:
+        The value at the specified index if it exists, otherwise the default value.
+    """
    return l[index] if index < len(l) else default

-def get_faces_from_img_files(files) :
+
+def get_faces_from_img_files(files):
+    """
+    Extracts faces from a list of image files.
+
+    Args:
+        files (list): A list of file objects representing image files.
+
+    Returns:
+        list: A list of detected faces.
+
+    """
+
    faces = []
-    if len(files) > 0 :
-        for file in files :
-            print("open", file.name)
-            img = Image.open(file.name)
-            face = get_or_default(get_faces(pil_to_cv2(img)), 0, None)
-            if face is not None :
-                faces.append(face)
+
+    if len(files) > 0:
+        for file in files:
+            img = Image.open(file.name)  # Open the image file
+            face = get_or_default(get_faces(pil_to_cv2(img)), 0, None)  # Extract faces from the image
+            if face is not None:
+                faces.append(face)  # Add the detected face to the list of faces
+
    return faces

+def blend_faces(faces: List[Face]) -> Face:
+    """
+    Blends the embeddings of multiple faces into a single face.

-def blend_faces(faces) :
+    Args:
+        faces (List[Face]): List of Face objects.
+
+    Returns:
+        Face: The blended Face object with the averaged embedding.
+              Returns None if the input list is empty.
+              
+    Raises:
+        ValueError: If the embeddings have different shapes.
+
+    """
    embeddings = [face.embedding for face in faces]
-    if len(embeddings)> 0 :
+    
+    if len(embeddings) > 0:
        embedding_shape = embeddings[0].shape
+        
+        # Check if all embeddings have the same shape
        for embedding in embeddings:
            if embedding.shape != embedding_shape:
                raise ValueError("embedding shape mismatch")

+        # Compute the mean of all embeddings
        blended_embedding = np.mean(embeddings, axis=0)
+        
+        # Create a new Face object using the first face in the list
        blended = faces[0]
+        
+        # Assign the blended embedding to the blended Face object
        blended.embedding = blended_embedding
+        
        return blended
+    
+    # Return None if the input list is empty
    return None


@ -119,20 +266,35 @@ def swap_face(
    faces_index: Set[int] = {0},
    same_gender=True,
 ) -> ImageResult:
+    """
+    Swaps faces in the target image with the source face.
+
+    Args:
+        reference_face (np.ndarray): The reference face used for similarity comparison.
+        source_face (np.ndarray): The source face to be swapped.
+        target_img (Image.Image): The target image to swap faces in.
+        model (str): Path to the face swap model.
+        faces_index (Set[int], optional): Set of indices specifying which faces to swap. Defaults to {0}.
+        same_gender (bool, optional): If True, only swap faces with the same gender as the source face. Defaults to True.
+
+    Returns:
+        ImageResult: An object containing the swapped image and similarity scores.
+
+    """    
    return_result = ImageResult(target_img, {}, {})
    target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
    gender = source_face["gender"]
-    print("Source Gender ", gender)
+    logger.info("Source Gender %s", gender)
    if source_face is not None:
        result = target_img
        model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model)
        face_swapper = getFaceSwapModel(model_path)
        target_faces = get_faces(target_img)
-        print("Target faces count", len(target_faces))
+        logger.info("Target faces count : %s", len(target_faces))

        if same_gender:
            target_faces = [x for x in target_faces if x["gender"] == gender]
-            print("Target Gender Matches count", len(target_faces))
+            logger.info("Target Gender Matches count %s", len(target_faces))

        for i, swapped_face in enumerate(target_faces):
            logger.info(f"swap face {i}")
@ -159,8 +321,8 @@ def swap_face(
                        reference_face, swapped_face
                    )

-                print("similarity", return_result.similarity)
-                print("ref similarity", return_result.ref_similarity)
+                logger.info(f"similarity {return_result.similarity}")
+                logger.info(f"ref similarity {return_result.ref_similarity}")

        except Exception as e:
            logger.error(str(e))