mirror of https://github.com/vladmandic/automatic
parent
1c74d36e29
commit
3cd21d6b74
|
|
@ -15,7 +15,7 @@ This release can be considered an LTS release before we kick off the next round
|
|||
- major [Wiki](https://github.com/vladmandic/automatic/wiki) and [Home](https://github.com/vladmandic/automatic) updates
|
||||
- Integrations:
|
||||
- [PuLID](https://github.com/ToTheBeginning/PuLID): Pure and Lightning ID Customization via Contrastive Alignment
|
||||
- advanced method of face transfer with better quality as well as control over identity and appearance
|
||||
- advanced method of face id transfer with better quality as well as control over identity and appearance
|
||||
try it out, likely the best quality available for sdxl models
|
||||
- select in *scripts -> pulid*
|
||||
- compatible with *sdxl* for text-to-image, image-to-image, inpaint and detailer workflows
|
||||
|
|
@ -98,7 +98,8 @@ This release can be considered an LTS release before we kick off the next round
|
|||
- fix network height in standard vs modern ui
|
||||
- fix k-diff enum on startup
|
||||
- fix text2video scripts
|
||||
- dont uninstall flash-attn
|
||||
- dont uninstall flash-attn
|
||||
- ui css fixes
|
||||
- move downloads of some auxillary models to hfcache instead of models folder
|
||||
|
||||
## Update for 2024-10-29
|
||||
|
|
|
|||
|
|
@ -13,11 +13,12 @@ footer { display: none; margin-top: 0 !important;}
|
|||
table { overflow-x: auto !important; overflow-y: auto !important; }
|
||||
td { border-bottom: none !important; padding: 0 0.5em !important; }
|
||||
tr { border-bottom: none !important; padding: 0 0.5em !important; }
|
||||
td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
|
||||
textarea { overflow-y: auto !important; }
|
||||
span { font-size: var(--text-md) !important; }
|
||||
button { font-size: var(--text-lg) !important; }
|
||||
input[type='color'] { width: 64px; height: 32px; }
|
||||
td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
|
||||
input::-webkit-outer-spin-button, input::-webkit-inner-spin-button { margin-left: 4px; }
|
||||
|
||||
/* gradio elements */
|
||||
.block .padded:not(.gradio-accordion) { padding: 4px 0 0 0 !important; margin-right: 0; min-width: 90px !important; }
|
||||
|
|
|
|||
|
|
@ -32,10 +32,8 @@ class PerceiverAttentionCA(nn.Module):
|
|||
self.dim_head = dim_head
|
||||
self.heads = heads
|
||||
inner_dim = dim_head * heads
|
||||
|
||||
self.norm1 = nn.LayerNorm(dim if kv_dim is None else kv_dim)
|
||||
self.norm2 = nn.LayerNorm(dim)
|
||||
|
||||
self.to_q = nn.Linear(dim, inner_dim, bias=False)
|
||||
self.to_kv = nn.Linear(dim if kv_dim is None else kv_dim, inner_dim * 2, bias=False)
|
||||
self.to_out = nn.Linear(inner_dim, dim, bias=False)
|
||||
|
|
@ -50,12 +48,9 @@ class PerceiverAttentionCA(nn.Module):
|
|||
"""
|
||||
x = self.norm1(x)
|
||||
latents = self.norm2(latents)
|
||||
|
||||
b, seq_len, _ = latents.shape
|
||||
|
||||
q = self.to_q(latents)
|
||||
k, v = self.to_kv(x).chunk(2, dim=-1)
|
||||
|
||||
q = reshape_tensor(q, self.heads)
|
||||
k = reshape_tensor(k, self.heads)
|
||||
v = reshape_tensor(v, self.heads)
|
||||
|
|
@ -65,7 +60,6 @@ class PerceiverAttentionCA(nn.Module):
|
|||
weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
|
||||
weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
|
||||
out = weight @ v
|
||||
|
||||
out = out.permute(0, 2, 1, 3).reshape(b, seq_len, -1)
|
||||
|
||||
return self.to_out(out)
|
||||
|
|
@ -78,10 +72,8 @@ class PerceiverAttention(nn.Module):
|
|||
self.dim_head = dim_head
|
||||
self.heads = heads
|
||||
inner_dim = dim_head * heads
|
||||
|
||||
self.norm1 = nn.LayerNorm(dim if kv_dim is None else kv_dim)
|
||||
self.norm2 = nn.LayerNorm(dim)
|
||||
|
||||
self.to_q = nn.Linear(dim, inner_dim, bias=False)
|
||||
self.to_kv = nn.Linear(dim if kv_dim is None else kv_dim, inner_dim * 2, bias=False)
|
||||
self.to_out = nn.Linear(inner_dim, dim, bias=False)
|
||||
|
|
@ -96,13 +88,10 @@ class PerceiverAttention(nn.Module):
|
|||
"""
|
||||
x = self.norm1(x)
|
||||
latents = self.norm2(latents)
|
||||
|
||||
b, seq_len, _ = latents.shape
|
||||
|
||||
q = self.to_q(latents)
|
||||
kv_input = torch.cat((x, latents), dim=-2)
|
||||
k, v = self.to_kv(kv_input).chunk(2, dim=-1)
|
||||
|
||||
q = reshape_tensor(q, self.heads)
|
||||
k = reshape_tensor(k, self.heads)
|
||||
v = reshape_tensor(v, self.heads)
|
||||
|
|
@ -112,7 +101,6 @@ class PerceiverAttention(nn.Module):
|
|||
weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
|
||||
weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
|
||||
out = weight @ v
|
||||
|
||||
out = out.permute(0, 2, 1, 3).reshape(b, seq_len, -1)
|
||||
|
||||
return self.to_out(out)
|
||||
|
|
@ -145,7 +133,6 @@ class IDFormer(nn.Module):
|
|||
assert depth % 5 == 0
|
||||
self.depth = depth // 5
|
||||
scale = dim ** -0.5
|
||||
|
||||
self.latents = nn.Parameter(torch.randn(1, num_queries, dim) * scale)
|
||||
self.proj_out = nn.Parameter(scale * torch.randn(dim, output_dim))
|
||||
|
||||
|
|
@ -233,7 +220,6 @@ class IDEncoder(nn.Module):
|
|||
nn.Linear(1024, context_dim),
|
||||
),
|
||||
)
|
||||
|
||||
setattr(
|
||||
self,
|
||||
f'mapping_patch_{i}',
|
||||
|
|
|
|||
|
|
@ -153,6 +153,7 @@ class Script(scripts.Script):
|
|||
|
||||
self.mask_apply_overlay = shared.opts.mask_apply_overlay
|
||||
shared.opts.data['mask_apply_overlay'] = False
|
||||
sdp = shared.opts.cross_attention_optimization == "Scaled-Dot-Product"
|
||||
strength = getattr(p, 'pulid_strength', strength)
|
||||
zero = getattr(p, 'pulid_zero', zero)
|
||||
ortho = getattr(p, 'pulid_ortho', ortho)
|
||||
|
|
@ -173,7 +174,7 @@ class Script(scripts.Script):
|
|||
providers=devices.onnx,
|
||||
offload=offload,
|
||||
version=version,
|
||||
sdp=shared.opts.cross_attention_optimization == "Scaled-Dot-Product",
|
||||
sdp=sdp,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
)
|
||||
shared.sd_model.no_recurse = True
|
||||
|
|
@ -187,7 +188,7 @@ class Script(scripts.Script):
|
|||
return None
|
||||
|
||||
shared.sd_model.sampler = sampler_fn
|
||||
shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload}')
|
||||
shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" sdp={sdp} strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload}')
|
||||
self.pulid.attention.NUM_ZERO = zero
|
||||
self.pulid.attention.ORTHO = ortho == 'v1'
|
||||
self.pulid.attention.ORTHO_v2 = ortho == 'v2'
|
||||
|
|
|
|||
Loading…
Reference in New Issue