add batch, model selection in inpainting, src face selection, segmented mask

2023-07-24 22:34:09 +02:00 · 2023-07-24 22:34:09 +02:00 · 39571b63a8
parent 96ee015dd7
commit 39571b63a8
16 changed files with 564 additions and 79 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,12 @@
+## 1.1.0 :
+All listed in features
+
+ add inpainting model selection => allow to select a different model for face inpainting
+ add source faces selection => allow to select the reference face if multiple face are present in reference image
+ add select by size => sort faces by size from larger to smaller
+ add batch option => allow to process images without txt2img or i2i in tabs
+ add segmentation mask for upscaled inpainter (based on codeformer implementation) : avoid square mask and prevent degradation of non-face parts of the image.
+
 ## 0.1.0 :

 ### Major :
--- a/scripts/faceswap_settings.py
+++ b/scripts/faceswap_settings.py
@ -32,11 +32,13 @@ def on_ui_settings():
    shared.opts.add_option("roop_upscaled_swapper", shared.OptionInfo(
        False, "Upscaled swapper. Applied only to the swapped faces. Apply transformations before merging with the original image.", gr.Checkbox, {"interactive": True}, section=section))
    shared.opts.add_option("roop_upscaled_swapper_upscaler", shared.OptionInfo(
-        None, "Upscaled swapper upscaler (Recommanded : LDSR)", gr.Dropdown, {"interactive": True, "choices" : [upscaler.name for upscaler in shared.sd_upscalers]}, section=section))
+        None, "Upscaled swapper upscaler (Recommanded : LDSR but slow)", gr.Dropdown, {"interactive": True, "choices" : [upscaler.name for upscaler in shared.sd_upscalers]}, section=section))
    shared.opts.add_option("roop_upscaled_swapper_sharpen", shared.OptionInfo(
-        True, "Upscaled swapper sharpen", gr.Checkbox, {"interactive": True}, section=section))
+        False, "Upscaled swapper sharpen", gr.Checkbox, {"interactive": True}, section=section))
    shared.opts.add_option("roop_upscaled_swapper_fixcolor", shared.OptionInfo(
-        True, "Upscaled swapper color correction", gr.Checkbox, {"interactive": True}, section=section))    
+        False, "Upscaled swapper color correction", gr.Checkbox, {"interactive": True}, section=section))    
+    shared.opts.add_option("roop_upscaled_improved_mask", shared.OptionInfo(
+        True, "Use improved segmented mask (use pastenet to mask only the face)", gr.Checkbox, {"interactive": True}, section=section))     
    shared.opts.add_option("roop_upscaled_swapper_face_restorer", shared.OptionInfo(
        None, "Upscaled swapper face restorer", gr.Dropdown, {"interactive": True, "choices" : ["None"] + [x.name() for x in shared.face_restorers]}, section=section))
    shared.opts.add_option("roop_upscaled_swapper_face_restorer_visibility", shared.OptionInfo(
--- a/scripts/faceswap_tab.py
+++ b/scripts/faceswap_tab.py
@ -8,10 +8,12 @@ import modules.scripts as scripts
 import numpy as np
 import onnx
 import pandas as pd
+from scripts.faceswap_unit_ui import faceswap_unit_ui
 from scripts.faceswap_upscaler_ui import upscaler_ui
 from insightface.app.common import Face
 from modules import script_callbacks, scripts
 from PIL import Image
+from modules.shared import opts 

 from scripts.roop_utils import imgutils
 from scripts.roop_utils.imgutils import pil_to_cv2
@ -20,7 +22,10 @@ from scripts.roop_logging import logger
 import scripts.roop_swapping.swapper as swapper
 from scripts.roop_postprocessing.postprocessing_options import PostProcessingOptions
 from scripts.roop_postprocessing.postprocessing import enhance_image
-
+from dataclasses import dataclass, fields
+from typing import Dict, List, Set, Tuple, Union, Optional
+from scripts.faceswap_unit_settings import FaceSwapUnitSettings
+from scripts.roop_utils.models_utils import get_current_model

 def compare(img1, img2):
    if img1 is not None and img2 is not None:
@ -159,6 +164,55 @@ def explore_onnx_faceswap_model(model_path):
    df = pd.DataFrame(data)
    return df

+def batch_process(files, save_path,  *components):
+    try :
+        if save_path is not None:
+            os.makedirs(save_path, exist_ok=True)
+
+        units_count = opts.data.get("roop_units_count", 3)
+        units: List[FaceSwapUnitSettings] = []
+
+        #Parse and convert units flat components into FaceSwapUnitSettings
+        for i in range(0, units_count):
+            units += [FaceSwapUnitSettings.get_unit_configuration(i, components)] 
+
+        for i, u in enumerate(units):
+            logger.debug("%s, %s", pformat(i), pformat(u))
+
+        #Parse the postprocessing options
+        #We must first find where to start from (after face swapping units) 
+        len_conf: int = len(fields(FaceSwapUnitSettings))
+        shift: int = units_count * len_conf
+        postprocess_options = PostProcessingOptions(
+            *components[shift : shift + len(fields(PostProcessingOptions))]
+        )
+        logger.debug("%s", pformat(postprocess_options))
+
+        units = [u for u in units if u.enable]
+        if files is not None:
+            images = []
+            for file in files :
+                current_images = []
+                src_image = Image.open(file.name).convert("RGB")
+                swapped_images = swapper.process_images_units(get_current_model(), images=[(src_image,None)], units=units, upscaled_swapper=opts.data.get("roop_upscaled_swapper", False))
+                if len(swapped_images) > 0:
+                    current_images+= [img for img,info in swapped_images]
+
+                logger.info("%s images generated", len(current_images))
+                for i, img in enumerate(current_images) :
+                    current_images[i] = enhance_image(img,postprocess_options)
+
+                for img in current_images :
+                    path = tempfile.NamedTemporaryFile(delete=False,suffix=".png",dir=save_path).name
+                    img.save(path)
+
+                images += current_images
+            return images
+    except Exception as e:
+        logger.error("Batch Process error : %s",e)
+        import traceback
+        traceback.print_exc()
+    return None


 def tools_ui():
@ -234,13 +288,36 @@ def tools_ui():
            analyse_btn = gr.Button("Analyse", elem_id="roop_analyse_btn")
            analyse_results = gr.Textbox(label="Results", interactive=False, value="", elem_id="roop_analyse_results")

-        upscale_options = upscaler_ui()
+    with gr.Tab("Batch Process"):
+        with gr.Tab("Source Images"):
+            gr.Markdown(
+                """Batch process images. Will apply enhancement in the tools enhancement tab.""")
+            with gr.Row():
+                batch_source_files = gr.components.File(
+                    type="file",
+                    file_count="multiple",
+                    label="Batch Sources Images",
+                    optional=True,
+                    elem_id="roop_batch_images"
+                )
+                batch_results =  gr.Gallery(
+                                        label="Batch result", show_label=False,
+                                        elem_id="roop_batch_results"
+                                    ).style(columns=[2], rows=[2])
+            batch_save_path = gr.Textbox(label="Destination Directory", value="outputs/faceswap/", elem_id="roop_batch_destination")
+            batch_save_btn= gr.Button("Process & Save", elem_id="roop_extract_btn")
+        unit_components = []
+        for i in range(1,opts.data.get("roop_units_count", 3)+1):
+            unit_components += faceswap_unit_ui(False, i, id_prefix="roop_tab")
+
+    upscale_options = upscaler_ui()

    explore_btn.click(explore_onnx_faceswap_model, inputs=[model], outputs=[explore_result_text])  
    compare_btn.click(compare, inputs=[img1, img2], outputs=[compare_result_text])
    generate_checkpoint_btn.click(build_face_checkpoint_and_save, inputs=[batch_files, name], outputs=[preview])
    extract_btn.click(extract_faces, inputs=[extracted_source_files, extract_save_path]+upscale_options, outputs=[extracted_faces])  
    analyse_btn.click(analyse_faces, inputs=[img_to_analyse,analyse_det_threshold], outputs=[analyse_results])  
+    batch_save_btn.click(batch_process, inputs=[batch_source_files, batch_save_path]+unit_components+upscale_options, outputs=[batch_results])  

 def on_ui_tabs() :
    with gr.Blocks(analytics_enabled=False) as ui_faceswap:
--- a/scripts/faceswap_unit_settings.py
+++ b/scripts/faceswap_unit_settings.py
@ -13,6 +13,9 @@ from scripts.roop_logging import logger

@dataclass
 class FaceSwapUnitSettings:
+    
+    # ORDER of parameters is IMPORTANT. It should match the result of faceswap_unit_ui
+
    # The image given in reference
    source_img: Union[Image.Image, str]
    # The checkpoint file
@ -25,7 +28,8 @@ class FaceSwapUnitSettings:
    enable: bool
    # Use same gender filtering
    same_gender: bool
-
+    # Sort faces by their size (from larger to smaller)
+    sort_by_size : bool
    # If True, discard images with low similarity
    check_similarity : bool 
    # if True will compute similarity and add it to the image info
@ -37,6 +41,9 @@ class FaceSwapUnitSettings:
    min_ref_sim: float
    # The face index to use for swapping
    _faces_index: str
+    # The face index to get image from source
+    reference_face_index : int
+
    # Swap in the source image in img2img (before processing)
    swap_in_source: bool
    # Swap in the generated image in img2img (always on for txt2img)
@ -60,6 +67,8 @@ class FaceSwapUnitSettings:
        if len(faces_index) == 0:
            return {0}

+        logger.debug("FACES INDEX : %s", faces_index)
+
        return faces_index

    @property
@ -98,7 +107,7 @@ class FaceSwapUnitSettings:
                        img_bytes = base64.b64decode(self.source_img)
                    self.source_img = Image.open(io.BytesIO(img_bytes))
                source_img = pil_to_cv2(self.source_img)
-                self._reference_face =  swapper.get_or_default(swapper.get_faces(source_img), 0, None)  
+                self._reference_face =  swapper.get_or_default(swapper.get_faces(source_img), self.reference_face_index, None)  
                if self._reference_face is None :
                    logger.error("Face not found in reference image")  
            else :
--- a/scripts/faceswap_unit_ui.py
+++ b/scripts/faceswap_unit_ui.py
@ -1,19 +1,20 @@
 from scripts.roop_utils.models_utils import get_face_checkpoints
 import gradio as gr

-def faceswap_unit_ui(is_img2img, unit_num=1):
+def faceswap_unit_ui(is_img2img, unit_num=1, id_prefix="roop"):
    with gr.Tab(f"Face {unit_num}"):
        with gr.Column():
            gr.Markdown(
            """Reference is an image. First face will be extracted. 
            First face of batches sources will be extracted and used as input (or blended if blend is activated).""")
            with gr.Row():
-                img = gr.components.Image(type="pil", label="Reference")
+                img = gr.components.Image(type="pil", label="Reference", elem_id=f"{id_prefix}_face{unit_num}_reference_image")
                batch_files = gr.components.File(
                    type="file",
                    file_count="multiple",
                    label="Batch Sources Images",
                    optional=True,
+                    elem_id=f"{id_prefix}_face{unit_num}_batch_source_face_files"
                )
            gr.Markdown(
                """Face checkpoint built with the checkpoint builder in tools. Will overwrite reference image.""")     
@ -22,32 +23,57 @@ def faceswap_unit_ui(is_img2img, unit_num=1):
                face = gr.Dropdown(
                    choices=get_face_checkpoints(),
                    label="Face Checkpoint (precedence over reference face)",
+                    elem_id=f"{id_prefix}_face{unit_num}_face_checkpoint"
                )
-                refresh = gr.Button(value='↻', variant='tool')
+                refresh = gr.Button(value='↻', variant='tool', elem_id=f"{id_prefix}_face{unit_num}_refresh_checkpoints")
                def refresh_fn(selected):
                    return gr.Dropdown.update(value=selected, choices=get_face_checkpoints())
                refresh.click(fn=refresh_fn,inputs=face, outputs=face)

            with gr.Row():
-                enable = gr.Checkbox(False, placeholder="enable", label="Enable")
-                same_gender = gr.Checkbox(
-                    False, placeholder="Same Gender", label="Same Gender"
-                )
+                enable = gr.Checkbox(False, placeholder="enable", label="Enable", elem_id=f"{id_prefix}_face{unit_num}_enable")
                blend_faces = gr.Checkbox(
-                    True, placeholder="Blend Faces", label="Blend Faces ((Source|Checkpoint)+References = 1)"
+                    True, placeholder="Blend Faces", label="Blend Faces ((Source|Checkpoint)+References = 1)",
+                    elem_id=f"{id_prefix}_face{unit_num}_blend_faces",
+                    interactive=True
                )
            gr.Markdown("""Discard images with low similarity or no faces :""")
            with gr.Row():
-                check_similarity = gr.Checkbox(False, placeholder="discard", label="Check similarity")  
-                compute_similarity = gr.Checkbox(False, label="Compute similarity")      
-            min_sim = gr.Slider(0, 1, 0, step=0.01, label="Min similarity")
+                check_similarity = gr.Checkbox(False, placeholder="discard", label="Check similarity",
+                    elem_id=f"{id_prefix}_face{unit_num}_check_similarity")  
+                compute_similarity = gr.Checkbox(False, label="Compute similarity",
+                    elem_id=f"{id_prefix}_face{unit_num}_compute_similarity")      
+            min_sim = gr.Slider(0, 1, 0, step=0.01, label="Min similarity",
+                    elem_id=f"{id_prefix}_face{unit_num}_min_similarity")
            min_ref_sim = gr.Slider(
-                0, 1, 0, step=0.01, label="Min reference similarity"
+                0, 1, 0, step=0.01, label="Min reference similarity",
+                    elem_id=f"{id_prefix}_face{unit_num}_min_ref_similarity"
            )
-            faces_index = gr.Textbox(
+
+            gr.Markdown("""Select the face to be swapped, you can sort by size or use the same gender as the desired face:""")
+            with gr.Row():
+                same_gender = gr.Checkbox(
+                    False, placeholder="Same Gender", label="Same Gender",
+                    elem_id=f"{id_prefix}_face{unit_num}_same_gender"
+                )
+                sort_by_size = gr.Checkbox(
+                    False, placeholder="Sort by size", label="Sort by size (larger>smaller)",
+                    elem_id=f"{id_prefix}_face{unit_num}_sort_by_size"
+                )
+            target_faces_index = gr.Textbox(
                value="0",
                placeholder="Which face to swap (comma separated), start from 0 (by gender if same_gender is enabled)",
-                label="Comma separated face number(s)",
+                label="Target face : Comma separated face number(s)",
+                elem_id=f"{id_prefix}_face{unit_num}_target_faces_index"
+            )
+            gr.Markdown("""The following will only affect reference face image (and is not affected by sort by size) :""")
+            reference_faces_index = gr.Number(
+                value=0,
+                precision=0,
+                minimum=0,
+                placeholder="Which face to get from reference image start from 0",
+                label="Reference source face : start from 0",
+                elem_id=f"{id_prefix}_face{unit_num}_reference_face_index"
            )
            gr.Markdown("""Configure swapping. Swapping can occure before img2img, after or both :""", visible=is_img2img)        
            swap_in_source = gr.Checkbox(
@ -55,14 +81,17 @@ def faceswap_unit_ui(is_img2img, unit_num=1):
                placeholder="Swap face in source image",
                label="Swap in source image (blended face)",
                visible=is_img2img,
+                elem_id=f"{id_prefix}_face{unit_num}_swap_in_source"
            )
            swap_in_generated = gr.Checkbox(
                True,
                placeholder="Swap face in generated image",
                label="Swap in generated image",
                visible=is_img2img,
+                elem_id=f"{id_prefix}_face{unit_num}_swap_in_generated"
            )
    # If changed, you need to change FaceSwapUnitSettings accordingly
+    # ORDER of parameters is IMPORTANT. It should match the result of FaceSwapUnitSettings
    return [
        img,
        face,
@ -70,11 +99,13 @@ def faceswap_unit_ui(is_img2img, unit_num=1):
        blend_faces,
        enable,
        same_gender,
+        sort_by_size,
        check_similarity,
        compute_similarity,
        min_sim,
        min_ref_sim,
-        faces_index,
+        target_faces_index,
+        reference_faces_index,
        swap_in_source,
        swap_in_generated,
    ]
--- a/scripts/faceswap_upscaler_ui.py
+++ b/scripts/faceswap_upscaler_ui.py
@ -1,12 +1,11 @@
 import gradio as gr
 import modules
-from modules import shared
+from modules import shared, sd_models
 from modules.shared import cmd_opts, opts, state

 import scripts.roop_postprocessing.upscaling as upscaling
 from scripts.roop_logging import logger

-
 def upscaler_ui():
    with gr.Tab(f"Post-Processing"):
        gr.Markdown(
@ -63,6 +62,7 @@ def upscaler_ui():
                    elem_id="roop_pp_inpainting_steps"
                )
                
+            inpaiting_model = gr.Dropdown(choices=["Current"]+sd_models.checkpoint_tiles(), default="Current", label="sd model (experimental)", elem_id="roop_pp_inpainting_sd_model")
    return [
        face_restorer_name,
        face_restorer_visibility,
@ -75,5 +75,6 @@ def upscaler_ui():
        inpainting_denoising_negative_prompt,
        inpainting_denoising_steps,
        inpainting_sampler,
-        inpainting_when
+        inpainting_when,
+        inpaiting_model
    ]
--- a/scripts/roop_globals.py
+++ b/scripts/roop_globals.py
@ -3,7 +3,9 @@ import os

 MODELS_DIR = os.path.abspath(os.path.join("models","roop"))
 ANALYZER_DIR = os.path.abspath(os.path.join(MODELS_DIR, "analysers"))
-VERSION_FLAG = "v1.0.1"
+FACE_PARSER_DIR = os.path.abspath(os.path.join(MODELS_DIR, "parser"))
+
+VERSION_FLAG = "v1.1.0"
 EXTENSION_PATH=os.path.join("extensions","sd-webui-roop")
 SD_CONVERT_SCORE = 0.7

--- a/scripts/roop_postprocessing/i2i_pp.py
+++ b/scripts/roop_postprocessing/i2i_pp.py
@ -8,26 +8,28 @@ import numpy as np
 from modules import shared
 from scripts.roop_utils import imgutils
 from modules import shared, processing, codeformer_model
-
+from pprint import pformat
 from modules.processing import (StableDiffusionProcessingImg2Img)
 from enum import Enum
+from scripts.roop_postprocessing.postprocessing_options import PostProcessingOptions, InpaintingWhen
+from modules import sd_models

 from scripts.roop_swapping import swapper


-def img2img_diffusion(img : Image.Image, inpainting_prompt : str, inpainting_denoising_strength : float = 0.1, inpainting_negative_prompt : str="", inpainting_steps : int = 20, inpainting_sampler : str ="Euler") -> Image.Image :
-    if inpainting_denoising_strength == 0  :
+def img2img_diffusion(img : Image.Image, pp: PostProcessingOptions) -> Image.Image :
+    if pp.inpainting_denoising_strengh == 0  :
        return img

    try :
        logger.info(
 f"""Inpainting face
-Sampler : {inpainting_sampler}
-inpainting_denoising_strength : {inpainting_denoising_strength}
-inpainting_steps : {inpainting_steps}
+Sampler : {pp.inpainting_sampler}
+inpainting_denoising_strength : {pp.inpainting_denoising_strengh}
+inpainting_steps : {pp.inpainting_steps}
 """
 )
-        if not isinstance(inpainting_sampler, str) :
+        if not isinstance(pp.inpainting_sampler, str) :
            inpainting_sampler = "Euler"

        logger.info("send faces to image to image")
@ -37,12 +39,36 @@ inpainting_steps : {inpainting_steps}
            for face in faces:
                bbox =face.bbox.astype(int)
                mask = imgutils.create_mask(img, bbox)
-                prompt = inpainting_prompt.replace("[gender]", "man" if face["gender"] == 1 else "woman")
-                negative_prompt = inpainting_negative_prompt.replace("[gender]", "man" if face["gender"] == 1 else "woman")
+                prompt = pp.inpainting_prompt.replace("[gender]", "man" if face["gender"] == 1 else "woman")
+                negative_prompt = pp.inpainting_negative_prompt.replace("[gender]", "man" if face["gender"] == 1 else "woman")
                logger.info("Denoising prompt : %s", prompt)
-                logger.info("Denoising strenght : %s", inpainting_denoising_strength)                
-                i2i_p = StableDiffusionProcessingImg2Img([img],sampler_name=inpainting_sampler, do_not_save_samples=True, steps =inpainting_steps, width = img.width, inpainting_fill=1, inpaint_full_res= True, height = img.height, mask=mask, prompt = prompt,negative_prompt=negative_prompt, denoising_strength=inpainting_denoising_strength)
+                logger.info("Denoising strenght : %s", pp.inpainting_denoising_strengh)                
+                
+                i2i_kwargs = {"sampler_name" :pp.inpainting_sampler,
+                        "do_not_save_samples":True, 
+                        "steps" :pp.inpainting_steps,
+                        "width" : img.width,
+                        "inpainting_fill":1,
+                        "inpaint_full_res":True,
+                        "height" : img.height,
+                        "mask": mask,
+                        "prompt" : prompt,
+                        "negative_prompt" :negative_prompt,
+                        "denoising_strength" :pp.inpainting_denoising_strengh}
+                current_model_checkpoint = shared.opts.sd_model_checkpoint
+                if pp.inpainting_model and pp.inpainting_model != "Current" :
+                    # Change checkpoint
+                    shared.opts.sd_model_checkpoint = pp.inpainting_model
+                    sd_models.select_checkpoint
+                    sd_models.load_model()
+                i2i_p = StableDiffusionProcessingImg2Img([img], **i2i_kwargs)
                i2i_processed = processing.process_images(i2i_p)
+                if pp.inpainting_model and pp.inpainting_model != "Current" :
+                    # Restore checkpoint
+                    shared.opts.sd_model_checkpoint = current_model_checkpoint
+                    sd_models.select_checkpoint
+                    sd_models.load_model()
+
                images = i2i_processed.images
                if len(images) > 0 :
                    img = images[0]
--- a/scripts/roop_postprocessing/postprocessing.py
+++ b/scripts/roop_postprocessing/postprocessing.py
@ -10,31 +10,16 @@ def enhance_image(image: Image.Image, pp_options: PostProcessingOptions) -> Imag
    result_image = image
    try :
        if pp_options.inpainting_when == InpaintingWhen.BEFORE_UPSCALING.value :
-            result_image = img2img_diffusion(image, 
-                                            inpainting_sampler= pp_options.inpainting_sampler,
-                                            inpainting_prompt=pp_options.inpainting_prompt, 
-                                            inpainting_negative_prompt=pp_options.inpainting_negative_prompt, 
-                                            inpainting_denoising_strength=pp_options.inpainting_denoising_strengh,
-                                            inpainting_steps=pp_options.inpainting_steps)
+            result_image = img2img_diffusion(image, pp_options)
        result_image = upscale_img(result_image, pp_options)

        if pp_options.inpainting_when == InpaintingWhen.BEFORE_RESTORE_FACE.value :
-            result_image = img2img_diffusion(image, 
-                                            inpainting_sampler= pp_options.inpainting_sampler,
-                                            inpainting_prompt=pp_options.inpainting_prompt, 
-                                            inpainting_negative_prompt=pp_options.inpainting_negative_prompt, 
-                                            inpainting_denoising_strength=pp_options.inpainting_denoising_strengh,
-                                            inpainting_steps=pp_options.inpainting_steps)
+            result_image = img2img_diffusion(image,pp_options)

        result_image = restore_face(result_image, pp_options)
              
        if pp_options.inpainting_when == InpaintingWhen.AFTER_ALL.value :
-            result_image = img2img_diffusion(image, 
-                                            inpainting_sampler= pp_options.inpainting_sampler,
-                                            inpainting_prompt=pp_options.inpainting_prompt, 
-                                            inpainting_negative_prompt=pp_options.inpainting_negative_prompt, 
-                                            inpainting_denoising_strength=pp_options.inpainting_denoising_strengh,
-                                            inpainting_steps=pp_options.inpainting_steps)
+            result_image = img2img_diffusion(image,pp_options)

    except Exception as e:
        logger.error("Failed to upscale %s", e)
--- a/scripts/roop_postprocessing/postprocessing_options.py
+++ b/scripts/roop_postprocessing/postprocessing_options.py
@ -26,6 +26,7 @@ class PostProcessingOptions:
    inpainting_steps : int = 20
    inpainting_sampler : str = "Euler"
    inpainting_when : InpaintingWhen = InpaintingWhen.BEFORE_UPSCALING
+    inpainting_model : str = "Current"
    
    @property
    def upscaler(self) -> UpscalerData:
--- a/scripts/roop_swapping/facemask.py
+++ b/scripts/roop_swapping/facemask.py
@ -0,0 +1,55 @@
+"""
+Code from codeformer https://github.com/sczhou/CodeFormer
+"""
+
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms.functional import normalize
+from scripts.roop_swapping.parsing import init_parsing_model
+from functools import lru_cache
+
+@lru_cache
+def get_parsing_model(device) :
+    return init_parsing_model(device=device)
+
+def img2tensor(imgs, bgr2rgb=True, float32=True):
+    def _totensor(img, bgr2rgb, float32):
+        if img.shape[2] == 3 and bgr2rgb:
+            if img.dtype == 'float64':
+                img = img.astype('float32')
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = torch.from_numpy(img.transpose(2, 0, 1))
+        if float32:
+            img = img.float()
+        return img
+
+    if isinstance(imgs, list):
+        return [_totensor(img, bgr2rgb, float32) for img in imgs]
+    else:
+        return _totensor(imgs, bgr2rgb, float32)
+
+def generate_face_mask(face_img, device):    
+    # Redimensionner l'image du visage pour le modèle
+    face_input = cv2.resize(face_img, (512, 512), interpolation=cv2.INTER_LINEAR)
+    
+    # Prétraitement de l'image
+    face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True)
+    normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+    face_input = torch.unsqueeze(face_input, 0).to(device)
+
+    # Faire passer l'image à travers le modèle
+    with torch.no_grad():
+        out = get_parsing_model(device)(face_input)[0]
+    out = out.argmax(dim=1).squeeze().cpu().numpy()
+
+    # Générer le masque à partir de la sortie du modèle
+    parse_mask = np.zeros(out.shape)
+    MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0]
+    for idx, color in enumerate(MASK_COLORMAP):
+        parse_mask[out == idx] = color
+
+    # Redimensionner le masque pour qu'il corresponde à l'image d'origine
+    face_mask = cv2.resize(parse_mask, (face_img.shape[1], face_img.shape[0]))
+
+    return face_mask
--- a/scripts/roop_swapping/parsing/init.py
+++ b/scripts/roop_swapping/parsing/init.py
@ -0,0 +1,46 @@
+"""
+Code from codeformer https://github.com/sczhou/CodeFormer
+"""
+
+
+import torch
+import cv2
+import os
+import os.path as osp
+import torch
+from torch.hub import download_url_to_file, get_dir
+from .parsenet import ParseNet
+from urllib.parse import urlparse
+from scripts.roop_globals import FACE_PARSER_DIR
+
+ROOT_DIR = FACE_PARSER_DIR
+
+def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
+    """Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
+    """
+    if model_dir is None:
+        hub_dir = get_dir()
+        model_dir = os.path.join(hub_dir, 'checkpoints')
+
+    os.makedirs(os.path.join(ROOT_DIR, model_dir), exist_ok=True)
+
+    parts = urlparse(url)
+    filename = os.path.basename(parts.path)
+    if file_name is not None:
+        filename = file_name
+    cached_file = os.path.abspath(os.path.join(ROOT_DIR, model_dir, filename))
+    if not os.path.exists(cached_file):
+        print(f'Downloading: "{url}" to {cached_file}\n')
+        download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
+    return cached_file
+
+
+def init_parsing_model(device='cuda'):
+    model = ParseNet(in_size=512, out_size=512, parsing_ch=19)
+    model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/parsing_parsenet.pth'
+    model_path = load_file_from_url(url=model_url, model_dir='weights/facelib', progress=True, file_name=None)
+    load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
+    model.load_state_dict(load_net, strict=True)
+    model.eval()
+    model = model.to(device)
+    return model
--- a/scripts/roop_swapping/parsing/parsenet.py
+++ b/scripts/roop_swapping/parsing/parsenet.py
@ -0,0 +1,195 @@
+"""Modified from https://github.com/chaofengc/PSFRGAN
+Code from codeformer https://github.com/sczhou/CodeFormer
+"""
+import numpy as np
+import torch.nn as nn
+from torch.nn import functional as F
+
+
+class NormLayer(nn.Module):
+    """Normalization Layers.
+
+    Args:
+        channels: input channels, for batch norm and instance norm.
+        input_size: input shape without batch size, for layer norm.
+    """
+
+    def __init__(self, channels, normalize_shape=None, norm_type='bn'):
+        super(NormLayer, self).__init__()
+        norm_type = norm_type.lower()
+        self.norm_type = norm_type
+        if norm_type == 'bn':
+            self.norm = nn.BatchNorm2d(channels, affine=True)
+        elif norm_type == 'in':
+            self.norm = nn.InstanceNorm2d(channels, affine=False)
+        elif norm_type == 'gn':
+            self.norm = nn.GroupNorm(32, channels, affine=True)
+        elif norm_type == 'pixel':
+            self.norm = lambda x: F.normalize(x, p=2, dim=1)
+        elif norm_type == 'layer':
+            self.norm = nn.LayerNorm(normalize_shape)
+        elif norm_type == 'none':
+            self.norm = lambda x: x * 1.0
+        else:
+            assert 1 == 0, f'Norm type {norm_type} not support.'
+
+    def forward(self, x, ref=None):
+        if self.norm_type == 'spade':
+            return self.norm(x, ref)
+        else:
+            return self.norm(x)
+
+
+class ReluLayer(nn.Module):
+    """Relu Layer.
+
+    Args:
+        relu type: type of relu layer, candidates are
+            - ReLU
+            - LeakyReLU: default relu slope 0.2
+            - PRelu
+            - SELU
+            - none: direct pass
+    """
+
+    def __init__(self, channels, relu_type='relu'):
+        super(ReluLayer, self).__init__()
+        relu_type = relu_type.lower()
+        if relu_type == 'relu':
+            self.func = nn.ReLU(True)
+        elif relu_type == 'leakyrelu':
+            self.func = nn.LeakyReLU(0.2, inplace=True)
+        elif relu_type == 'prelu':
+            self.func = nn.PReLU(channels)
+        elif relu_type == 'selu':
+            self.func = nn.SELU(True)
+        elif relu_type == 'none':
+            self.func = lambda x: x * 1.0
+        else:
+            assert 1 == 0, f'Relu type {relu_type} not support.'
+
+    def forward(self, x):
+        return self.func(x)
+
+
+class ConvLayer(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 scale='none',
+                 norm_type='none',
+                 relu_type='none',
+                 use_pad=True,
+                 bias=True):
+        super(ConvLayer, self).__init__()
+        self.use_pad = use_pad
+        self.norm_type = norm_type
+        if norm_type in ['bn']:
+            bias = False
+
+        stride = 2 if scale == 'down' else 1
+
+        self.scale_func = lambda x: x
+        if scale == 'up':
+            self.scale_func = lambda x: nn.functional.interpolate(x, scale_factor=2, mode='nearest')
+
+        self.reflection_pad = nn.ReflectionPad2d(int(np.ceil((kernel_size - 1.) / 2)))
+        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, bias=bias)
+
+        self.relu = ReluLayer(out_channels, relu_type)
+        self.norm = NormLayer(out_channels, norm_type=norm_type)
+
+    def forward(self, x):
+        out = self.scale_func(x)
+        if self.use_pad:
+            out = self.reflection_pad(out)
+        out = self.conv2d(out)
+        out = self.norm(out)
+        out = self.relu(out)
+        return out
+
+
+class ResidualBlock(nn.Module):
+    """
+    Residual block recommended in: http://torch.ch/blog/2016/02/04/resnets.html
+    """
+
+    def __init__(self, c_in, c_out, relu_type='prelu', norm_type='bn', scale='none'):
+        super(ResidualBlock, self).__init__()
+
+        if scale == 'none' and c_in == c_out:
+            self.shortcut_func = lambda x: x
+        else:
+            self.shortcut_func = ConvLayer(c_in, c_out, 3, scale)
+
+        scale_config_dict = {'down': ['none', 'down'], 'up': ['up', 'none'], 'none': ['none', 'none']}
+        scale_conf = scale_config_dict[scale]
+
+        self.conv1 = ConvLayer(c_in, c_out, 3, scale_conf[0], norm_type=norm_type, relu_type=relu_type)
+        self.conv2 = ConvLayer(c_out, c_out, 3, scale_conf[1], norm_type=norm_type, relu_type='none')
+
+    def forward(self, x):
+        identity = self.shortcut_func(x)
+
+        res = self.conv1(x)
+        res = self.conv2(res)
+        return identity + res
+
+
+class ParseNet(nn.Module):
+
+    def __init__(self,
+                 in_size=128,
+                 out_size=128,
+                 min_feat_size=32,
+                 base_ch=64,
+                 parsing_ch=19,
+                 res_depth=10,
+                 relu_type='LeakyReLU',
+                 norm_type='bn',
+                 ch_range=[32, 256]):
+        super().__init__()
+        self.res_depth = res_depth
+        act_args = {'norm_type': norm_type, 'relu_type': relu_type}
+        min_ch, max_ch = ch_range
+
+        ch_clip = lambda x: max(min_ch, min(x, max_ch))  # noqa: E731
+        min_feat_size = min(in_size, min_feat_size)
+
+        down_steps = int(np.log2(in_size // min_feat_size))
+        up_steps = int(np.log2(out_size // min_feat_size))
+
+        # =============== define encoder-body-decoder ====================
+        self.encoder = []
+        self.encoder.append(ConvLayer(3, base_ch, 3, 1))
+        head_ch = base_ch
+        for i in range(down_steps):
+            cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2)
+            self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args))
+            head_ch = head_ch * 2
+
+        self.body = []
+        for i in range(res_depth):
+            self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args))
+
+        self.decoder = []
+        for i in range(up_steps):
+            cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2)
+            self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args))
+            head_ch = head_ch // 2
+
+        self.encoder = nn.Sequential(*self.encoder)
+        self.body = nn.Sequential(*self.body)
+        self.decoder = nn.Sequential(*self.decoder)
+        self.out_img_conv = ConvLayer(ch_clip(head_ch), 3)
+        self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch)
+
+    def forward(self, x):
+        feat = self.encoder(x)
+        x = feat + self.body(feat)
+        x = self.decoder(x)
+        out_img = self.out_img_conv(x)
+        out_mask = self.out_mask_conv(x)
+        return out_mask, out_img
--- a/scripts/roop_swapping/swapper.py
+++ b/scripts/roop_swapping/swapper.py
@ -119,13 +119,14 @@ def getFaceSwapModel(model_path: str):
        logger.error("Loading of swapping model failed, please check the requirements (On Windows, download and install Visual Studio. During the install, make sure to include the Python and C++ packages.)")


-def get_faces(img_data: np.ndarray, det_size=(640, 640), det_thresh : Optional[int]=None) -> List[Face]:
+def get_faces(img_data: np.ndarray, det_size=(640, 640), det_thresh : Optional[int]=None, sort_by_face_size = False) -> List[Face]:
    """
    Detects and retrieves faces from an image using an analysis model.

    Args:
        img_data (np.ndarray): The image data as a NumPy array.
        det_size (tuple): The desired detection size (width, height). Defaults to (640, 640).
+        sort_by_face_size (bool) : Will sort the faces by their size from larger to smaller face

    Returns:
        list: A list of detected faces, sorted by their x-coordinate of the bounding box.
@ -150,6 +151,9 @@ def get_faces(img_data: np.ndarray, det_size=(640, 640), det_thresh : Optional[i
        return get_faces(img_data, det_size=det_size_half, det_thresh=det_thresh)

    try:
+        if sort_by_face_size :
+            return sorted(face, reverse=True, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))
+
        # Sort the detected faces based on their x-coordinate of the bounding box
        return sorted(face, key=lambda x: x.bbox[0])
    except Exception as e:
@ -268,7 +272,8 @@ def swap_face(
    faces_index: Set[int] = {0},
    same_gender=True,
    upscaled_swapper = False,
-    compute_similarity = True
+    compute_similarity = True,
+    sort_by_face_size = False
 ) -> ImageResult:
    """
    Swaps faces in the target image with the source face.
@ -294,7 +299,7 @@ def swap_face(
            result = target_img
            model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model)
            face_swapper = getFaceSwapModel(model_path)
-            target_faces = get_faces(target_img)
+            target_faces = get_faces(target_img, sort_by_face_size=sort_by_face_size)
            logger.info("Target faces count : %s", len(target_faces))

            if same_gender:
@ -313,7 +318,7 @@ def swap_face(
            if compute_similarity :
                try:
                    result_faces = get_faces(
-                        cv2.cvtColor(np.array(result_image), cv2.COLOR_RGB2BGR)
+                        cv2.cvtColor(np.array(result_image), cv2.COLOR_RGB2BGR), sort_by_face_size=sort_by_face_size
                    )
                    if same_gender:
                        result_faces = [x for x in result_faces if x["gender"] == gender]
@ -382,7 +387,8 @@ def process_image_unit(model, unit : FaceSwapUnitSettings, image: Image.Image, i
                model=model,
                same_gender=unit.same_gender,
                upscaled_swapper=upscaled_swapper,
-                compute_similarity=unit.compute_similarity
+                compute_similarity=unit.compute_similarity,
+                sort_by_face_size=unit.sort_by_size
            )
            save_img_debug(result.image, "After swap")

--- a/scripts/roop_swapping/upscaled_inswapper.py
+++ b/scripts/roop_swapping/upscaled_inswapper.py
@ -1,29 +1,61 @@
 import time
-import numpy as np
-import onnxruntime
-import cv2
-import onnx
-from onnx import numpy_helper
-from  insightface.utils import face_align
-from  insightface.model_zoo.inswapper import INSwapper
-from modules import scripts, shared, processing
-from modules.face_restoration import FaceRestoration
-from modules.upscaler import UpscalerData
-from PIL import Image
-from scripts.roop_logging import logger
-from scripts.roop_utils.imgutils import cv2_to_pil, pil_to_cv2
-from modules import processing
-from modules.shared import cmd_opts, opts, state
-from scripts.roop_postprocessing.postprocessing_options import PostProcessingOptions
-from scripts.roop_postprocessing import upscaling

+import cv2
+import numpy as np
+import onnx
+import onnxruntime
+from insightface.model_zoo.inswapper import INSwapper
+from insightface.utils import face_align
+from modules import codeformer_model, processing, scripts, shared
+from modules.face_restoration import FaceRestoration
+from modules.shared import cmd_opts, opts, state
+from modules.upscaler import UpscalerData
+from onnx import numpy_helper
+from PIL import Image
+
+from scripts.roop_logging import logger
+from scripts.roop_postprocessing import upscaling
+from scripts.roop_postprocessing.postprocessing_options import \
+    PostProcessingOptions
+from scripts.roop_utils.imgutils import cv2_to_pil, pil_to_cv2
+from scripts.roop_swapping.facemask import generate_face_mask

 def get_upscaler() -> UpscalerData:
    for upscaler in shared.sd_upscalers:
        if upscaler.name == opts.data.get("roop_upscaled_swapper_upscaler", "LDSR"):
            return upscaler
    return None
-            
+
+def merge_images_with_mask(image1, image2, mask):
+    if image1.shape != image2.shape or image1.shape[:2] != mask.shape:
+        raise ValueError("Img should have the same shape")
+    mask = mask.astype(np.uint8)
+    masked_region = cv2.bitwise_and(image2, image2, mask=mask)
+    inverse_mask = cv2.bitwise_not(mask)
+    empty_region = cv2.bitwise_and(image1, image1, mask=inverse_mask)
+    merged_image = cv2.add(empty_region, masked_region)
+    return merged_image
+
+def erode_mask(mask, kernel_size=3, iterations=1):
+    kernel = np.ones((kernel_size, kernel_size), np.uint8)
+    eroded_mask = cv2.erode(mask, kernel, iterations=iterations)
+    return eroded_mask
+
+def apply_gaussian_blur(mask, kernel_size=(5, 5), sigma_x=0):
+    blurred_mask = cv2.GaussianBlur(mask, kernel_size, sigma_x)
+    return blurred_mask
+
+def dilate_mask(mask, kernel_size=5, iterations=1):
+    kernel = np.ones((kernel_size, kernel_size), np.uint8)
+    dilated_mask = cv2.dilate(mask, kernel, iterations=iterations)
+    return dilated_mask
+
+def get_face_mask(aimg,bgr_fake):
+    mask1 = generate_face_mask(aimg, device = shared.device)
+    mask2 = generate_face_mask(bgr_fake, device = shared.device)
+    mask = dilate_mask(cv2.bitwise_or(mask1,mask2))    
+    return mask
+

 class UpscaledINSwapper():
    def __init__(self, inswapper : INSwapper):
@ -75,15 +107,21 @@ class UpscaledINSwapper():
                    return fake_diff

                if upscale :
+
                    print("*"*80)
                    print(f"Upscaled inswapper using {opts.data.get('roop_upscaled_swapper_upscaler', 'LDSR')}")
                    print("*"*80)
+                                    
                    k = 4
                    aimg, M = face_align.norm_crop2(img, target_face.kps, self.input_size[0]*k)                
                   
                    # upscale and restore face :
                    bgr_fake = self.super_resolution(bgr_fake, k)
                    
+                    if opts.data.get("roop_upscaled_improved_mask", True) :
+                        mask = get_face_mask(aimg,bgr_fake)
+                        bgr_fake = merge_images_with_mask(aimg, bgr_fake,mask)
+
                    # compute fake_diff before sharpen and color correction (better result)
                    fake_diff = compute_diff(bgr_fake, aimg)

@ -99,6 +137,8 @@ class UpscaledINSwapper():
                        correction = processing.setup_color_correction(cv2_to_pil(aimg))
                        bgr_fake_pil = processing.apply_color_correction(correction, cv2_to_pil(bgr_fake))
                        bgr_fake = pil_to_cv2(bgr_fake_pil)
+
+
                else :
                    fake_diff = compute_diff(bgr_fake, aimg)

--- a/scripts/roop_utils/models_utils.py
+++ b/scripts/roop_utils/models_utils.py
@ -52,4 +52,4 @@ def get_face_checkpoints():
    """
    faces_path = os.path.join(scripts.basedir(), "models", "roop", "faces", "*.pkl")
    faces = glob.glob(faces_path)
-    return ["None"] + faces
+    return ["None"] + faces