diff --git a/assets/demo/sdxl_cmp.jpg b/assets/demo/sdxl_cmp.jpg new file mode 100644 index 0000000..29a2904 Binary files /dev/null and b/assets/demo/sdxl_cmp.jpg differ diff --git a/ip_adapter_sdxl_controlnet_demo.ipynb b/ip_adapter_sdxl_controlnet_demo.ipynb new file mode 100644 index 0000000..95bdf45 --- /dev/null +++ b/ip_adapter_sdxl_controlnet_demo.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "411c59b3-f177-4a10-8925-d931ce572eaa", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, StableDiffusionXLPipeline\n", + "from PIL import Image\n", + "\n", + "from ip_adapter import IPAdapterXL" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6b6dc69c-192d-4d74-8b1e-f0d9ccfbdb49", + "metadata": {}, + "outputs": [], + "source": [ + "base_model_path = \"stabilityai/stable-diffusion-xl-base-1.0\"\n", + "image_encoder_path = \"models/image_encoder\"\n", + "ip_ckpt = \"models/ip-adapter_sdxl_vit-h.bin\"\n", + "device = \"cuda\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "63ec542f-8474-4f38-9457-073425578073", + "metadata": {}, + "outputs": [], + "source": [ + "def image_grid(imgs, rows, cols):\n", + " assert len(imgs) == rows*cols\n", + "\n", + " w, h = imgs[0].size\n", + " grid = Image.new('RGB', size=(cols*w, rows*h))\n", + " grid_w, grid_h = grid.size\n", + " \n", + " for i, img in enumerate(imgs):\n", + " grid.paste(img, box=(i%cols*w, i//cols*h))\n", + " return grid" + ] + }, + { + "cell_type": "markdown", + "id": "4f907096-d919-424a-be56-7008a89fdb25", + "metadata": {}, + "source": [ + "## Image Variations" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "50186b61-9fe8-4f6f-be55-5c65608560f1", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bf89394df9234acbb352a357f210c38c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading pipeline components...: 0%| | 0/7 [00:00" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# read image prompt\n", + "image = Image.open(\"assets/images/statue.png\")\n", + "image.resize((512, 512))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b21410c4-465e-4c33-9ede-44fbc4591003", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a55e86189304463b8f3fc34acda044d1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/30 [00:00" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# generate image variations\n", + "num_samples = 2\n", + "images = ip_model.generate(pil_image=image, num_samples=num_samples, num_inference_steps=30, seed=4)\n", + "grid = image_grid(images, 1, num_samples)\n", + "grid" + ] + }, + { + "cell_type": "markdown", + "id": "eaabb012-b2fc-4451-a26a-42b2b31e8d5d", + "metadata": {}, + "source": [ + "## ControlNet Depth" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3849f9d0-5f68-4a49-9190-69dd50720cae", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "13f60ac8f6f94deba19fd6977c39947f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading pipeline components...: 0%| | 0/7 [00:00" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# read image prompt\n", + "image = Image.open(\"assets/images/statue.png\")\n", + "depth_map = Image.open(\"assets/structure_controls/depth.png\").resize((1024, 1024))\n", + "image_grid([image.resize((256, 256)), depth_map.resize((256, 256))], 1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b77f52de-a9e4-44e1-aeec-8165414f1273", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "31da649372524dce87a1a008fa848da1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/30 [00:00" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# generate image with structural control\n", + "num_samples = 2\n", + "images = ip_model.generate(pil_image=image, image=depth_map, controlnet_conditioning_scale=0.7, num_samples=num_samples, num_inference_steps=30, seed=42)\n", + "grid = image_grid(images, 1, num_samples)\n", + "grid" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}