Add an updated version of IP-Adapter-Face

2023-11-10 11:17:16 +08:00 · 2023-11-10 11:17:16 +08:00 · 0c68d479c3
parent 78de85f9e6
commit 0c68d479c3
4 changed files with 372 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,7 @@ we present IP-Adapter, an effective and lightweight adapter to achieve image pro
 ![arch](assets/figs/fig1.png)

 ## Release
+- [2023/11/10] 🔥 Add an updated version of IP-Adapter-Face. The demo is [here](ip_adapter-full-face_demo.ipynb).
 - [2023/11/05] 🔥 Add text-to-image [demo](ip_adapter_t2i_demo.ipynb) with IP-Adapter and [Kandinsky 2.2 Prior](https://huggingface.co/kandinsky-community/kandinsky-2-2-prior)
 - [2023/11/02] Support [safetensors](https://github.com/huggingface/safetensors)
 - [2023/9/08] 🔥 Update a new version of IP-Adapter with SDXL_1.0. More information can be found [here](#sdxl_10).
--- a/ip_adapter-full-face_demo.ipynb
+++ b/ip_adapter-full-face_demo.ipynb
--- a/ip_adapter/init.py
+++ b/ip_adapter/init.py
@ -1,8 +1,9 @@
-from .ip_adapter import IPAdapter, IPAdapterPlus, IPAdapterPlusXL, IPAdapterXL
+from .ip_adapter import IPAdapter, IPAdapterPlus, IPAdapterPlusXL, IPAdapterXL, IPAdapterFull

 __all__ = [
    "IPAdapter",
    "IPAdapterPlus",
    "IPAdapterPlusXL",
    "IPAdapterXL",
+    "IPAdapterFull",
 ]
--- a/ip_adapter/ip_adapter.py
+++ b/ip_adapter/ip_adapter.py
@ -45,6 +45,23 @@ class ImageProjModel(torch.nn.Module):
        return clip_extra_context_tokens


+class MLPProjModel(torch.nn.Module):
+    """SD model with image prompt"""
+    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
+        super().__init__()
+        
+        self.proj = torch.nn.Sequential(
+            torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim),
+            torch.nn.GELU(),
+            torch.nn.Linear(clip_embeddings_dim, cross_attention_dim),
+            torch.nn.LayerNorm(cross_attention_dim)
+        )
+        
+    def forward(self, image_embeds):
+        clip_extra_context_tokens = self.proj(image_embeds)
+        return clip_extra_context_tokens
+
+
 class IPAdapter:
    def __init__(self, sd_pipe, image_encoder_path, ip_ckpt, device, num_tokens=4):
        self.device = device
@ -176,14 +193,13 @@ class IPAdapter:
        uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * num_samples, seq_len, -1)

        with torch.inference_mode():
-            prompt_embeds = self.pipe._encode_prompt(
+            prompt_embeds_, negative_prompt_embeds_ = self.pipe.encode_prompt(
                prompt,
                device=self.device,
                num_images_per_prompt=num_samples,
                do_classifier_free_guidance=True,
                negative_prompt=negative_prompt,
            )
-            negative_prompt_embeds_, prompt_embeds_ = prompt_embeds.chunk(2)
            prompt_embeds = torch.cat([prompt_embeds_, image_prompt_embeds], dim=1)
            negative_prompt_embeds = torch.cat([negative_prompt_embeds_, uncond_image_prompt_embeds], dim=1)

@ -295,6 +311,17 @@ class IPAdapterPlus(IPAdapter):
        return image_prompt_embeds, uncond_image_prompt_embeds


+class IPAdapterFull(IPAdapterPlus):
+    """IP-Adapter with full features"""
+
+    def init_proj(self):
+        image_proj_model = MLPProjModel(
+            cross_attention_dim=self.pipe.unet.config.cross_attention_dim,
+            clip_embeddings_dim=self.image_encoder.config.hidden_size,
+        ).to(self.device, dtype=torch.float16)
+        return image_proj_model
+
+
 class IPAdapterPlusXL(IPAdapter):
    """SDXL"""