mirror of https://github.com/vladmandic/automatic
xadapter prototype and placeholder
parent
50beb2157d
commit
be81d48601
|
|
@ -9,12 +9,13 @@ extension-pkg-whitelist=
|
|||
fail-on=
|
||||
fail-under=10
|
||||
ignore=CVS
|
||||
ignore-paths=^repositories/.*$,
|
||||
ignore-paths=/usr/lib/.*$,
|
||||
^repositories/.*$,
|
||||
^extensions/.*$,
|
||||
^extensions-builtin/.*$,
|
||||
/usr/lib/.*$,
|
||||
^modules/dml/.*$,
|
||||
^modules/models/diffusion/.*$,
|
||||
^modules/xadapters/.*$,
|
||||
ignore-patterns=
|
||||
ignored-modules=
|
||||
jobs=0
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@
|
|||
- enhanced theme loader
|
||||
- add additional debug env variables
|
||||
- **Fixes**:
|
||||
- add variation seed to diffusers txt2img, thanks @AI-Casanova
|
||||
- handle extensions that install conflicting versions of packages
|
||||
`onnxruntime`, `opencv2-python`
|
||||
- installer refresh package cache on any install
|
||||
|
|
|
|||
16
README.md
16
README.md
|
|
@ -31,8 +31,6 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
|
|||
- Enhanced *Lora*/*LoCon*/*Lyco* code supporting latest trends in training
|
||||
- Built-in queue management
|
||||
- Enterprise level logging and hardened API
|
||||
- Modern localization and hints engine
|
||||
- Broad compatibility with existing extensions ecosystem and new extensions manager
|
||||
- Built in installer with automatic updates and dependency management
|
||||
- Modernized UI with theme support and number of built-in themes *(dark and light)*
|
||||
|
||||
|
|
@ -50,7 +48,7 @@ For screenshots and informations on other available themes, see [Themes Wiki](ht
|
|||
**SD.Next** supports two main backends: *Diffusers* and *Original*:
|
||||
|
||||
- **Diffusers**: Based on new [Huggingface Diffusers](https://huggingface.co/docs/diffusers/index) implementation
|
||||
Supports *original* SD models as well as *all* models listed below
|
||||
Supports *all* models listed below
|
||||
This backend is set as default for new installations
|
||||
See [wiki article](https://github.com/vladmandic/automatic/wiki/Diffusers) for more information
|
||||
- **Original**: Based on [LDM](https://github.com/Stability-AI/stablediffusion) reference implementation and significantly expanded on by [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
|
||||
|
|
@ -116,7 +114,7 @@ Also supported are modifiers such as:
|
|||
> [!IMPORTANT]
|
||||
> - Loading any model other than standard SD 1.x / SD 2.x requires use of backend **Diffusers**
|
||||
> - Loading any other models using **Original** backend is not supported
|
||||
> - Loading manually download model `.safetensors` files is supported for SD 1.x / SD 2.x / SD-XL models only
|
||||
> - Loading manually download model `.safetensors` files is supported for specified models only (typically SD 1.x / SD 2.x / SD-XL models only)
|
||||
> - For all other model types, use backend **Diffusers** and use built in Model downloader or
|
||||
select model from Networks -> Models -> Reference list in which case it will be auto-downloaded and loaded
|
||||
|
||||
|
|
@ -141,7 +139,7 @@ Also supported are modifiers such as:
|
|||
- If you can't run us locally, try our friends at [RunDuffusion!](https://rundiffusion.com?utm_source=github&utm_medium=referral&utm_campaign=SDNext)
|
||||
|
||||
> [!TIP]
|
||||
> - Server can run without virtual environment,
|
||||
> - Server can run with or without virtual environment,
|
||||
Recommended to use `VENV` to avoid library version conflicts with other applications
|
||||
> - **nVidia/CUDA** / **AMD/ROCm** / **Intel/OneAPI** are auto-detected if present and available,
|
||||
For any other use case such as **DirectML**, **ONNX/Olive**, **OpenVINO** specify required parameter explicitly
|
||||
|
|
@ -169,7 +167,7 @@ Below is partial list of all available parameters, run `webui --help` for the fu
|
|||
--listen Launch web server using public IP address, default: False
|
||||
--auth AUTH Set access authentication like "user:pwd,user:pwd""
|
||||
--autolaunch Open the UI URL in the system's default browser upon launch
|
||||
--docs Mount Gradio docs at /docs, default: False
|
||||
--docs Mount API docs, default: False
|
||||
--no-hashing Disable hashing of checkpoints, default: False
|
||||
--no-metadata Disable reading of metadata from models, default: False
|
||||
--backend {original,diffusers} force model pipeline type
|
||||
|
|
@ -208,9 +206,9 @@ SD.Next comes with several extensions pre-installed:
|
|||
|
||||
### **Collab**
|
||||
|
||||
- We'd love to have additional maintainers with full admin rights. If you're interested, ping us!
|
||||
- In addition to general cross-platform code, desire is to have a lead for each of the main platforms.
|
||||
This should be fully cross-platform, but we'd really love to have additional contributors and/or maintainers to join and help lead the efforts on different platforms.
|
||||
- We'd love to have additional maintainers (with comes with full repo rights). If you're interested, ping us!
|
||||
- In addition to general cross-platform code, desire is to have a lead for each of the main platforms
|
||||
This should be fully cross-platform, but we'd really love to have additional contributors and/or maintainers to join and help lead the efforts on different platforms
|
||||
|
||||
### **Credits**
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ group.add_argument("--freeze", default=os.environ.get("SD_FREEZE", False), actio
|
|||
group.add_argument("--auth", type=str, default=os.environ.get("SD_AUTH", None), help='Set access authentication like "user:pwd,user:pwd""')
|
||||
group.add_argument("--auth-file", type=str, default=os.environ.get("SD_AUTHFILE", None), help='Set access authentication using file, default: %(default)s')
|
||||
group.add_argument("--autolaunch", default=os.environ.get("SD_AUTOLAUNCH", False), action='store_true', help="Open the UI URL in the system's default browser upon launch")
|
||||
group.add_argument('--docs', default=os.environ.get("SD_DOCS", False), action='store_true', help = "Mount Gradio docs at /docs, default: %(default)s")
|
||||
group.add_argument('--docs', default=os.environ.get("SD_DOCS", False), action='store_true', help = "Mount API docs, default: %(default)s")
|
||||
group.add_argument('--api-only', default=os.environ.get("SD_APIONLY", False), action='store_true', help = "Run in API only mode without starting UI")
|
||||
group.add_argument("--api-log", default=os.environ.get("SD_APILOG", False), action='store_true', help="Enable logging of all API requests, default: %(default)s")
|
||||
group.add_argument("--device-id", type=str, default=os.environ.get("SD_DEVICEID", None), help="Select the default CUDA device to use, default: %(default)s")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,327 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from collections import OrderedDict
|
||||
from diffusers.models.embeddings import (
|
||||
TimestepEmbedding,
|
||||
Timesteps,
|
||||
)
|
||||
|
||||
|
||||
def conv_nd(dims, *args, **kwargs):
|
||||
"""
|
||||
Create a 1D, 2D, or 3D convolution module.
|
||||
"""
|
||||
if dims == 1:
|
||||
return nn.Conv1d(*args, **kwargs)
|
||||
elif dims == 2:
|
||||
return nn.Conv2d(*args, **kwargs)
|
||||
elif dims == 3:
|
||||
return nn.Conv3d(*args, **kwargs)
|
||||
raise ValueError(f"unsupported dimensions: {dims}")
|
||||
|
||||
|
||||
def avg_pool_nd(dims, *args, **kwargs):
|
||||
"""
|
||||
Create a 1D, 2D, or 3D average pooling module.
|
||||
"""
|
||||
if dims == 1:
|
||||
return nn.AvgPool1d(*args, **kwargs)
|
||||
elif dims == 2:
|
||||
return nn.AvgPool2d(*args, **kwargs)
|
||||
elif dims == 3:
|
||||
return nn.AvgPool3d(*args, **kwargs)
|
||||
raise ValueError(f"unsupported dimensions: {dims}")
|
||||
|
||||
|
||||
def get_parameter_dtype(parameter: torch.nn.Module):
|
||||
try:
|
||||
params = tuple(parameter.parameters())
|
||||
if len(params) > 0:
|
||||
return params[0].dtype
|
||||
|
||||
buffers = tuple(parameter.buffers())
|
||||
if len(buffers) > 0:
|
||||
return buffers[0].dtype
|
||||
|
||||
except StopIteration:
|
||||
# For torch.nn.DataParallel compatibility in PyTorch 1.5
|
||||
|
||||
def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
|
||||
tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
|
||||
return tuples
|
||||
|
||||
gen = parameter._named_members(get_members_fn=find_tensor_attributes)
|
||||
first_tuple = next(gen)
|
||||
return first_tuple[1].dtype
|
||||
|
||||
|
||||
class Downsample(nn.Module):
|
||||
"""
|
||||
A downsampling layer with an optional convolution.
|
||||
:param channels: channels in the inputs and outputs.
|
||||
:param use_conv: a bool determining if a convolution is applied.
|
||||
:param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then
|
||||
downsampling occurs in the inner-two dimensions.
|
||||
"""
|
||||
|
||||
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1):
|
||||
super().__init__()
|
||||
self.channels = channels
|
||||
self.out_channels = out_channels or channels
|
||||
self.use_conv = use_conv
|
||||
self.dims = dims
|
||||
stride = 2 if dims != 3 else (1, 2, 2)
|
||||
if use_conv:
|
||||
self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=padding)
|
||||
else:
|
||||
assert self.channels == self.out_channels
|
||||
from torch.nn import MaxUnpool2d
|
||||
self.op = MaxUnpool2d(dims, kernel_size=stride, stride=stride)
|
||||
|
||||
def forward(self, x):
|
||||
assert x.shape[1] == self.channels
|
||||
return self.op(x)
|
||||
|
||||
|
||||
class Upsample(nn.Module):
|
||||
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1):
|
||||
super().__init__()
|
||||
self.channels = channels
|
||||
self.out_channels = out_channels or channels
|
||||
self.use_conv = use_conv
|
||||
self.dims = dims
|
||||
stride = 2 if dims != 3 else (1, 2, 2)
|
||||
if use_conv:
|
||||
self.op = nn.ConvTranspose2d(self.channels, self.out_channels, 3, stride=stride, padding=1)
|
||||
else:
|
||||
assert self.channels == self.out_channels
|
||||
self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride)
|
||||
|
||||
def forward(self, x, output_size):
|
||||
assert x.shape[1] == self.channels
|
||||
return self.op(x, output_size)
|
||||
|
||||
|
||||
class Linear(nn.Module):
|
||||
def __init__(self, temb_channels, out_channels):
|
||||
super(Linear, self).__init__()
|
||||
self.linear = nn.Linear(temb_channels, out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
return self.linear(x)
|
||||
|
||||
|
||||
|
||||
class ResnetBlock(nn.Module):
|
||||
|
||||
def __init__(self, in_c, out_c, down, up, ksize=3, sk=False, use_conv=True, enable_timestep=False, temb_channels=None, use_norm=False):
|
||||
super().__init__()
|
||||
self.use_norm = use_norm
|
||||
self.enable_timestep = enable_timestep
|
||||
ps = ksize // 2
|
||||
if in_c != out_c or sk == False:
|
||||
self.in_conv = nn.Conv2d(in_c, out_c, ksize, 1, ps)
|
||||
else:
|
||||
self.in_conv = None
|
||||
self.block1 = nn.Conv2d(out_c, out_c, 3, 1, 1)
|
||||
self.act = nn.ReLU()
|
||||
if use_norm:
|
||||
self.norm1 = nn.GroupNorm(num_groups=32, num_channels=out_c, eps=1e-6, affine=True)
|
||||
self.block2 = nn.Conv2d(out_c, out_c, ksize, 1, ps)
|
||||
if sk == False:
|
||||
self.skep = nn.Conv2d(in_c, out_c, ksize, 1, ps)
|
||||
else:
|
||||
self.skep = None
|
||||
|
||||
self.down = down
|
||||
self.up = up
|
||||
if self.down:
|
||||
self.down_opt = Downsample(in_c, use_conv=use_conv)
|
||||
if self.up:
|
||||
self.up_opt = Upsample(in_c, use_conv=use_conv)
|
||||
if enable_timestep:
|
||||
self.timestep_proj = Linear(temb_channels, out_c)
|
||||
|
||||
|
||||
def forward(self, x, output_size=None, temb=None):
|
||||
if self.down == True:
|
||||
x = self.down_opt(x)
|
||||
if self.up == True:
|
||||
x = self.up_opt(x, output_size)
|
||||
if self.in_conv is not None: # edit
|
||||
x = self.in_conv(x)
|
||||
|
||||
h = self.block1(x)
|
||||
if temb is not None:
|
||||
temb = self.timestep_proj(temb)[:, :, None, None]
|
||||
h = h + temb
|
||||
if self.use_norm:
|
||||
h = self.norm1(h)
|
||||
h = self.act(h)
|
||||
h = self.block2(h)
|
||||
if self.skep is not None:
|
||||
return h + self.skep(x)
|
||||
else:
|
||||
return h + x
|
||||
|
||||
|
||||
class Adapter_XL(nn.Module):
|
||||
|
||||
def __init__(self, in_channels=[1280, 640, 320], out_channels=[1280, 1280, 640], nums_rb=3, ksize=3, sk=True, use_conv=False, use_zero_conv=True,
|
||||
enable_timestep=False, use_norm=False, temb_channels=None, fusion_type='ADD'):
|
||||
super(Adapter_XL, self).__init__()
|
||||
self.channels = in_channels
|
||||
self.nums_rb = nums_rb
|
||||
self.body = []
|
||||
self.out = []
|
||||
self.use_zero_conv = use_zero_conv
|
||||
self.fusion_type = fusion_type
|
||||
self.gamma = []
|
||||
self.beta = []
|
||||
self.norm = []
|
||||
if fusion_type == "SPADE":
|
||||
self.use_zero_conv = False
|
||||
for i in range(len(self.channels)):
|
||||
if self.fusion_type == 'SPADE':
|
||||
# Corresponding to SPADE <Semantic Image Synthesis with Spatially-Adaptive Normalization>
|
||||
self.gamma.append(nn.Conv2d(out_channels[i], out_channels[i], 1, padding=0))
|
||||
self.beta.append(nn.Conv2d(out_channels[i], out_channels[i], 1, padding=0))
|
||||
self.norm.append(nn.BatchNorm2d(out_channels[i]))
|
||||
elif use_zero_conv:
|
||||
self.out.append(self.make_zero_conv(out_channels[i]))
|
||||
else:
|
||||
self.out.append(nn.Conv2d(out_channels[i], out_channels[i], 1, padding=0))
|
||||
for j in range(nums_rb):
|
||||
if i==0:
|
||||
# 1280, 32, 32 -> 1280, 32, 32
|
||||
self.body.append(
|
||||
ResnetBlock(in_channels[i], out_channels[i], down=False, up=False, ksize=ksize, sk=sk, use_conv=use_conv,
|
||||
enable_timestep=enable_timestep, temb_channels=temb_channels, use_norm=use_norm))
|
||||
# 1280, 32, 32 -> 1280, 32, 32
|
||||
elif i==1:
|
||||
# 640, 64, 64 -> 1280, 64, 64
|
||||
if j==0:
|
||||
self.body.append(
|
||||
ResnetBlock(in_channels[i], out_channels[i], down=False, up=False, ksize=ksize, sk=sk,
|
||||
use_conv=use_conv, enable_timestep=enable_timestep, temb_channels=temb_channels, use_norm=use_norm))
|
||||
else:
|
||||
self.body.append(
|
||||
ResnetBlock(out_channels[i], out_channels[i], down=False, up=False, ksize=ksize,sk=sk,
|
||||
use_conv=use_conv, enable_timestep=enable_timestep, temb_channels=temb_channels, use_norm=use_norm))
|
||||
else:
|
||||
# 320, 64, 64 -> 640, 128, 128
|
||||
if j==0:
|
||||
self.body.append(
|
||||
ResnetBlock(in_channels[i], out_channels[i], down=False, up=True, ksize=ksize, sk=sk,
|
||||
use_conv=True, enable_timestep=enable_timestep, temb_channels=temb_channels, use_norm=use_norm))
|
||||
# use convtranspose2d
|
||||
else:
|
||||
self.body.append(
|
||||
ResnetBlock(out_channels[i], out_channels[i], down=False, up=False, ksize=ksize, sk=sk,
|
||||
use_conv=use_conv, enable_timestep=enable_timestep, temb_channels=temb_channels, use_norm=use_norm))
|
||||
|
||||
|
||||
self.body = nn.ModuleList(self.body)
|
||||
if self.use_zero_conv:
|
||||
self.zero_out = nn.ModuleList(self.out)
|
||||
|
||||
# if self.fusion_type == 'SPADE':
|
||||
# self.norm = nn.ModuleList(self.norm)
|
||||
# self.gamma = nn.ModuleList(self.gamma)
|
||||
# self.beta = nn.ModuleList(self.beta)
|
||||
# else:
|
||||
# self.zero_out = nn.ModuleList(self.out)
|
||||
|
||||
|
||||
# if enable_timestep:
|
||||
# a = 320
|
||||
#
|
||||
# time_embed_dim = a * 4
|
||||
# self.time_proj = Timesteps(a, True, 0)
|
||||
# timestep_input_dim = a
|
||||
#
|
||||
# self.time_embedding = TimestepEmbedding(
|
||||
# timestep_input_dim,
|
||||
# time_embed_dim,
|
||||
# act_fn='silu',
|
||||
# post_act_fn=None,
|
||||
# cond_proj_dim=None,
|
||||
# )
|
||||
|
||||
|
||||
def make_zero_conv(self, channels):
|
||||
|
||||
return zero_module(nn.Conv2d(channels, channels, 1, padding=0))
|
||||
|
||||
@property
|
||||
def dtype(self) -> torch.dtype:
|
||||
"""
|
||||
`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
|
||||
"""
|
||||
return get_parameter_dtype(self)
|
||||
|
||||
def forward(self, x, t=None):
|
||||
# extract features
|
||||
features = []
|
||||
b, c, _, _ = x[-1].shape
|
||||
if t is not None:
|
||||
if not torch.is_tensor(t):
|
||||
# TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
|
||||
# This would be a good case for the `match` statement (Python 3.10+)
|
||||
is_mps = x[0].device.type == "mps"
|
||||
if isinstance(timestep, float):
|
||||
dtype = torch.float32 if is_mps else torch.float64
|
||||
else:
|
||||
dtype = torch.int32 if is_mps else torch.int64
|
||||
t = torch.tensor([t], dtype=dtype, device=x[0].device)
|
||||
elif len(t.shape) == 0:
|
||||
t = t[None].to(x[0].device)
|
||||
|
||||
t = t.expand(b)
|
||||
t = self.time_proj(t) # b, 320
|
||||
t = t.to(dtype=x[0].dtype)
|
||||
t = self.time_embedding(t) # b, 1280
|
||||
output_size = (b, 640, 128, 128) # last CA layer output
|
||||
for i in range(len(self.channels)):
|
||||
for j in range(self.nums_rb):
|
||||
idx = i * self.nums_rb + j
|
||||
if j == 0:
|
||||
if i < 2:
|
||||
out = self.body[idx](x[i], temb=t)
|
||||
else:
|
||||
out = self.body[idx](x[i], output_size=output_size, temb=t)
|
||||
else:
|
||||
out = self.body[idx](out, temb=t)
|
||||
if self.fusion_type == 'SPADE':
|
||||
out_gamma = self.gamma[i](out)
|
||||
out_beta = self.beta[i](out)
|
||||
out = [out_gamma, out_beta]
|
||||
else:
|
||||
out = self.zero_out[i](out)
|
||||
features.append(out)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def zero_module(module):
|
||||
"""
|
||||
Zero out the parameters of a module and return it.
|
||||
"""
|
||||
for p in module.parameters():
|
||||
p.detach().zero_()
|
||||
return module
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
adapter = Adapter_XL(use_zero_conv=True,
|
||||
enable_timestep=True, use_norm=True, temb_channels=1280, fusion_type='SPADE').cuda()
|
||||
x = [torch.randn(4, 1280, 32, 32).cuda(), torch.randn(4, 640, 64, 64).cuda(), torch.randn(4, 320, 64, 64).cuda()]
|
||||
t = torch.tensor([1,2,3,4]).cuda()
|
||||
result = adapter(x, t=t)
|
||||
for xx in result:
|
||||
print(xx[0].shape)
|
||||
print(xx[1].shape)
|
||||
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,14 @@
|
|||
import os
|
||||
import imageio
|
||||
import numpy as np
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.distributed as dist
|
||||
|
||||
from safetensors import safe_open
|
||||
from tqdm import tqdm
|
||||
from einops import rearrange
|
||||
from model.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
|
||||
# from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora, convert_motion_lora_ckpt_to_diffusers
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class FourierEmbedder(nn.Module):
|
||||
def __init__(self, num_freqs=64, temperature=100):
|
||||
super().__init__()
|
||||
|
||||
self.num_freqs = num_freqs
|
||||
self.temperature = temperature
|
||||
|
||||
freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs)
|
||||
freq_bands = freq_bands[None, None, None]
|
||||
self.register_buffer("freq_bands", freq_bands, persistent=False)
|
||||
|
||||
def __call__(self, x):
|
||||
x = self.freq_bands * x.unsqueeze(-1)
|
||||
return torch.stack((x.sin(), x.cos()), dim=-1).permute(0, 1, 3, 4, 2).reshape(*x.shape[:2], -1)
|
||||
|
||||
|
||||
class PositionNet(nn.Module):
|
||||
def __init__(self, positive_len, out_dim, fourier_freqs=8):
|
||||
super().__init__()
|
||||
self.positive_len = positive_len
|
||||
self.out_dim = out_dim
|
||||
|
||||
self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs)
|
||||
self.position_dim = fourier_freqs * 2 * 4 # 2: sin/cos, 4: xyxy
|
||||
|
||||
if isinstance(out_dim, tuple):
|
||||
out_dim = out_dim[0]
|
||||
self.linears = nn.Sequential(
|
||||
nn.Linear(self.positive_len + self.position_dim, 512),
|
||||
nn.SiLU(),
|
||||
nn.Linear(512, 512),
|
||||
nn.SiLU(),
|
||||
nn.Linear(512, out_dim),
|
||||
)
|
||||
|
||||
self.null_positive_feature = torch.nn.Parameter(torch.zeros([self.positive_len]))
|
||||
self.null_position_feature = torch.nn.Parameter(torch.zeros([self.position_dim]))
|
||||
|
||||
def forward(self, boxes, masks, positive_embeddings):
|
||||
masks = masks.unsqueeze(-1)
|
||||
|
||||
# embedding position (it may includes padding as placeholder)
|
||||
xyxy_embedding = self.fourier_embedder(boxes) # B*N*4 -> B*N*C
|
||||
|
||||
# learnable null embedding
|
||||
positive_null = self.null_positive_feature.view(1, 1, -1)
|
||||
xyxy_null = self.null_position_feature.view(1, 1, -1)
|
||||
|
||||
# replace padding with learnable null embedding
|
||||
positive_embeddings = positive_embeddings * masks + (1 - masks) * positive_null
|
||||
xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null
|
||||
|
||||
objs = self.linears(torch.cat([positive_embeddings, xyxy_embedding], dim=-1))
|
||||
return objs
|
||||
|
|
@ -15,6 +15,7 @@ exclude = [
|
|||
"modules/control/units/*_model.py",
|
||||
"modules/control/units/*_pipe.py",
|
||||
"modules/pipelines/*.py",
|
||||
"modules/xadapter/*.py",
|
||||
]
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
# from PIL import Image
|
||||
# import gradio as gr
|
||||
from modules import scripts, processing, shared, devices
|
||||
from modules.processing_helpers import slerp
|
||||
import torch
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
from modules import scripts, processing, shared, devices
|
||||
from modules.processing_helpers import slerp
|
||||
|
||||
|
||||
class Script(scripts.Script):
|
||||
|
|
@ -21,11 +19,9 @@ class Script(scripts.Script):
|
|||
generator = [torch.Generator(generator_device).manual_seed(s) for s in p.seeds]
|
||||
shape = (len(generator), shared.sd_model.unet.config.in_channels, p.height // shared.sd_model.vae_scale_factor,
|
||||
p.width // shared.sd_model.vae_scale_factor)
|
||||
latents = randn_tensor(shape, generator=generator, device=shared.sd_model._execution_device,
|
||||
dtype=shared.sd_model.unet.dtype)
|
||||
latents = randn_tensor(shape, generator=generator, device=shared.sd_model._execution_device, dtype=shared.sd_model.unet.dtype) # pylint: disable=protected-access
|
||||
var_generator = [torch.Generator(generator_device).manual_seed(ss) for ss in p.subseeds]
|
||||
var_latents = randn_tensor(shape, generator=var_generator, device=shared.sd_model._execution_device,
|
||||
dtype=shared.sd_model.unet.dtype)
|
||||
var_latents = randn_tensor(shape, generator=var_generator, device=shared.sd_model._execution_device, dtype=shared.sd_model.unet.dtype) # pylint: disable=protected-access
|
||||
return latents, var_latents, generator, var_generator
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -44,9 +40,6 @@ class Script(scripts.Script):
|
|||
if shared.backend != shared.Backend.DIFFUSERS:
|
||||
return
|
||||
args = list(args)
|
||||
if p.subseed_strength != 0:
|
||||
if p.subseed_strength != 0 and getattr(shared.sd_model, '_execution_device', None) is not None:
|
||||
latents, var_latents, generator, var_generator = self.get_latents(p)
|
||||
self.set_slerp(p, latents, var_latents, generator, var_generator)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,147 @@
|
|||
# https://github.com/showlab/X-Adapter
|
||||
|
||||
import torch
|
||||
import diffusers
|
||||
import gradio as gr
|
||||
import huggingface_hub as hf
|
||||
from modules import errors, shared, devices, scripts, processing, sd_models, sd_samplers
|
||||
|
||||
|
||||
adapter = None
|
||||
|
||||
|
||||
class Script(scripts.Script):
|
||||
def title(self):
|
||||
return 'X-Adapter'
|
||||
|
||||
def show(self, is_img2img):
|
||||
return False
|
||||
# return True if shared.backend == shared.Backend.DIFFUSERS else False
|
||||
|
||||
def ui(self, _is_img2img):
|
||||
with gr.Row():
|
||||
gr.HTML('<a href="https://github.com/showlab/X-Adapter">  X-Adapter</a><br>')
|
||||
with gr.Row():
|
||||
model = gr.Dropdown(label='Adapter model', choices=['None'] + sd_models.checkpoint_tiles(), value='None')
|
||||
sampler = gr.Dropdown(label='Adapter sampler', choices=[s.name for s in sd_samplers.samplers], value='Default')
|
||||
with gr.Row():
|
||||
width = gr.Slider(label='Adapter width', minimum=64, maximum=2048, step=8, value=512)
|
||||
height = gr.Slider(label='Adapter height', minimum=64, maximum=2048, step=8, value=512)
|
||||
with gr.Row():
|
||||
start = gr.Slider(label='Adapter start', minimum=0.0, maximum=1.0, step=0.01, value=0.5)
|
||||
scale = gr.Slider(label='Adapter scale', minimum=0.0, maximum=1.0, step=0.01, value=1.0)
|
||||
with gr.Row():
|
||||
lora = gr.Textbox('', label='Adapter LoRA', default='')
|
||||
return model, sampler, width, height, start, scale, lora
|
||||
|
||||
def run(self, p: processing.StableDiffusionProcessing, model, sampler, width, height, start, scale, lora): # pylint: disable=arguments-differ
|
||||
from modules.xadapter.xadapter_hijacks import PositionNet
|
||||
diffusers.models.embeddings.PositionNet = PositionNet # patch diffusers==0.26 from diffusers==0.20
|
||||
from modules.xadapter.adapter import Adapter_XL
|
||||
from modules.xadapter.pipeline_sd_xl_adapter import StableDiffusionXLAdapterPipeline
|
||||
from modules.xadapter.unet_adapter import UNet2DConditionModel as UNet2DConditionModelAdapter
|
||||
|
||||
global adapter # pylint: disable=global-statement
|
||||
if model == 'None':
|
||||
return
|
||||
else:
|
||||
shared.opts.sd_model_refiner = model
|
||||
if shared.sd_model_type != 'sdxl':
|
||||
shared.log.error(f'X-Adapter: incorrect base model: {shared.sd_model.__class__.__name__}')
|
||||
return
|
||||
|
||||
if adapter is None:
|
||||
shared.log.debug('X-Adapter: adapter loading')
|
||||
adapter = Adapter_XL()
|
||||
adapter_path = hf.hf_hub_download(repo_id='Lingmin-Ran/X-Adapter', filename='X_Adapter_v1.bin')
|
||||
adapter_dict = torch.load(adapter_path)
|
||||
adapter.load_state_dict(adapter_dict)
|
||||
try:
|
||||
if adapter is not None:
|
||||
adapter.to(devices.device)
|
||||
except Exception:
|
||||
pass
|
||||
if adapter is None:
|
||||
shared.log.error('X-Adapter: adapter loading failed')
|
||||
return
|
||||
|
||||
sd_models.unload_model_weights(op='model')
|
||||
sd_models.unload_model_weights(op='refiner')
|
||||
orig_unetcondmodel = diffusers.models.unets.unet_2d_condition.UNet2DConditionModel
|
||||
diffusers.models.UNet2DConditionModel = UNet2DConditionModelAdapter # patch diffusers with x-adapter
|
||||
diffusers.models.unets.unet_2d_condition.UNet2DConditionModel = UNet2DConditionModelAdapter # patch diffusers with x-adapter
|
||||
sd_models.reload_model_weights(op='model')
|
||||
sd_models.reload_model_weights(op='refiner')
|
||||
diffusers.models.unets.unet_2d_condition.UNet2DConditionModel = orig_unetcondmodel # unpatch diffusers
|
||||
diffusers.models.UNet2DConditionModel = orig_unetcondmodel # unpatch diffusers
|
||||
|
||||
if shared.sd_refiner_type != 'sd':
|
||||
shared.log.error(f'X-Adapter: incorrect adapter model: {shared.sd_model.__class__.__name__}')
|
||||
return
|
||||
|
||||
# backup pipeline and params
|
||||
orig_pipeline = shared.sd_model
|
||||
orig_prompt_attention = shared.opts.prompt_attention
|
||||
pipe = None
|
||||
|
||||
try:
|
||||
shared.log.debug('X-Adapter: creating pipeline')
|
||||
pipe = StableDiffusionXLAdapterPipeline(
|
||||
vae=shared.sd_model.vae,
|
||||
text_encoder=shared.sd_model.text_encoder,
|
||||
text_encoder_2=shared.sd_model.text_encoder_2,
|
||||
tokenizer=shared.sd_model.tokenizer,
|
||||
tokenizer_2=shared.sd_model.tokenizer_2,
|
||||
unet=shared.sd_model.unet,
|
||||
scheduler=shared.sd_model.scheduler,
|
||||
vae_sd1_5=shared.sd_refiner.vae,
|
||||
text_encoder_sd1_5=shared.sd_refiner.text_encoder,
|
||||
tokenizer_sd1_5=shared.sd_refiner.tokenizer,
|
||||
unet_sd1_5=shared.sd_refiner.unet,
|
||||
scheduler_sd1_5=shared.sd_refiner.scheduler,
|
||||
adapter=adapter,
|
||||
)
|
||||
sd_models.copy_diffuser_options(pipe, shared.sd_model)
|
||||
sd_models.set_diffuser_options(pipe)
|
||||
try:
|
||||
pipe.to(device=devices.device, dtype=devices.dtype)
|
||||
except Exception:
|
||||
pass
|
||||
shared.opts.data['prompt_attention'] = 'Fixed attention'
|
||||
prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles)
|
||||
negative = shared.prompt_styles.apply_negative_styles_to_prompt(p.negative_prompt, p.styles)
|
||||
p.task_args['prompt'] = prompt
|
||||
p.task_args['negative_prompt'] = negative
|
||||
p.task_args['prompt_sd1_5'] = prompt
|
||||
p.task_args['width_sd1_5'] = width
|
||||
p.task_args['height_sd1_5'] = height
|
||||
p.task_args['adapter_guidance_start'] = start
|
||||
p.task_args['adapter_condition_scale'] = scale
|
||||
p.task_args['fusion_guidance_scale'] = 1.0 # ???
|
||||
if sampler != 'Default':
|
||||
pipe.scheduler_sd1_5 = sd_samplers.create_sampler(sampler, shared.sd_refiner)
|
||||
else:
|
||||
pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.scheduler_sd1_5 = diffusers.DPMSolverMultistepScheduler.from_config(pipe.scheduler_sd1_5.config)
|
||||
pipe.scheduler_sd1_5.config.timestep_spacing = "leading"
|
||||
shared.log.debug(f'X-Adapter: pipeline={pipe.__class__.__name__} args={p.task_args}')
|
||||
shared.sd_model = pipe
|
||||
except Exception as e:
|
||||
shared.log.error(f'X-Adapter: pipeline creation failed: {e}')
|
||||
errors.display(e, 'X-Adapter: pipeline creation failed')
|
||||
shared.sd_model = orig_pipeline
|
||||
|
||||
# run pipeline
|
||||
processed: processing.Processed = processing.process_images(p) # runs processing using main loop
|
||||
|
||||
# restore pipeline and params
|
||||
try:
|
||||
if adapter is not None:
|
||||
adapter.to(devices.cpu)
|
||||
except Exception:
|
||||
pass
|
||||
pipe = None
|
||||
shared.opts.data['prompt_attention'] = orig_prompt_attention
|
||||
shared.sd_model = orig_pipeline
|
||||
devices.torch_gc()
|
||||
return processed
|
||||
Loading…
Reference in New Issue