v10.1.0
parent
31f94bc49d
commit
71f8be3583
28
config.json
28
config.json
|
|
@ -62,7 +62,33 @@
|
|||
"batch_size_method":"standard",
|
||||
"show_extra_generation_params":true,
|
||||
"template_editor":true,
|
||||
"controlnet_name":"sd-webui-controlnet"
|
||||
"controlnet":
|
||||
{
|
||||
"extension":"sd-webui-controlnet",
|
||||
"sd1_models":
|
||||
{
|
||||
"canny":"controlnet11Models_canny",
|
||||
"depth":"controlnet11Models_depth",
|
||||
"normalmap":"control_v11p_sd15_normalbae",
|
||||
"openpose":"controlnet11Models_openpose",
|
||||
"mlsd":"",
|
||||
"lineart":"controlnet11Models_animeline",
|
||||
"softedge":"controlnet11Models_softedge",
|
||||
"scribble":"controlnet11Models_scribble",
|
||||
"segmentation":"controlnet11Models_seg",
|
||||
"shuffle":"",
|
||||
"tile":"controlnet11Models_tileE",
|
||||
"inpaint":"controlnet11Models_inpaint",
|
||||
"instructp2p":"control_v11e_sd15_ip2p",
|
||||
"recolor":"",
|
||||
"ip_adapter":"ip-adapter-plus-face_sd15",
|
||||
"t2i_adapter":""
|
||||
},
|
||||
"sdxl_models":
|
||||
{
|
||||
"canny":""
|
||||
}
|
||||
}
|
||||
},
|
||||
"ui":
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3,7 +3,28 @@ All notable changes to this project will be documented in this file.
|
|||
|
||||
For more details on new features, please check the [Manual](./MANUAL.md).
|
||||
|
||||
<details open><summary>10.0.2 - 13 October 2023</summary>
|
||||
<details open><summary>10.1.0 - 13 October 2023</summary>
|
||||
|
||||
### Added
|
||||
- New shortcode `[upscale]`: Enhances a given image using one the WebUI's upscaler methods
|
||||
- New shortcode `[interrogate]`: Generates a caption for the given image using various techniques
|
||||
- `[civitai]`: Now supports `_words` parg to include the activation text in your prompt, also writing it to the companion JSON file
|
||||
- Facelift v0.1.0: Upgraded preset `best_quality_v2` which now applies `[upscale]` as a final step
|
||||
- New helper method `ensure()`: Converts a variable to a datatype if it isn't already that datatype
|
||||
- Bodysnatcher v1.4.0: Optionally interrogate the starting image
|
||||
- ControlNet model variables may now refer to the name presets in `Config.stable_diffusion.controlnet.sd1_models`; you can adjust these to match your own filenames
|
||||
- The CN config has a place for SDXL models too, although I haven't added any entries there yet
|
||||
- Updated img2img preset `full_denoise_v3`: Reduced the CFG scale and disabled mask blur
|
||||
|
||||
### Changed
|
||||
- The setting `Config.stable_diffusion.controlnet_name` has been renamed to `Config.stable_diffusion.controlnet.extension`
|
||||
|
||||
### Fixed
|
||||
- The template editor will correctly parse files with emojis now
|
||||
|
||||
</details>
|
||||
|
||||
<details><summary>10.0.2 - 13 October 2023</summary>
|
||||
|
||||
### Added
|
||||
- `[else]`: Now supports `debug` parg to print diagnostic information
|
||||
|
|
@ -17,7 +38,7 @@ For more details on new features, please check the [Manual](./MANUAL.md).
|
|||
- Renamed ControlNet preset `photo_face` to `face_doctor_v1`
|
||||
- Facelift v0.0.2: Now defaults to the `best_quality` preset
|
||||
- Bodysnatcher v1.3.5: Updated the default `prefix` from "photo of" to "high detail RAW photo of"
|
||||
- Bodysnatcher v1.3.5: No longer runs `[img2img_autosize]` when you are on `Mask Only` mode
|
||||
- Bodysnatcher v1.3.5: No longer runs `[img2img_autosize]` when you are on `Only masked` mode
|
||||
- Bodysnatcher v1.3.5: Now applies 5px of negative mask padding when using the `Keep original hands` option, which can significantly improve blending of new image
|
||||
- Bodysnatcher v1.3.5: The Zoom Enhance features are now disabled by default, as Facelift is a better fit with Bodysnatcher
|
||||
- Bodysnatcher v1.3.5: Updated the default `inference_preset` to `subtle_v1`
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
from data.dataset import *
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
'''
|
||||
Module contains Dataset class, collate function for DataLoader and loader getter function.
|
||||
|
||||
* MiniFlickrDataset loads data from pickle file and returns image embedding and caption.
|
||||
* cl_fn is used to process batch of data and return tensors.
|
||||
* get_loader returns DataLoader object.
|
||||
'''
|
||||
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from transformers import GPT2Tokenizer
|
||||
|
||||
class MiniFlickrDataset(Dataset):
|
||||
def __init__(self, path):
|
||||
# check if file is file
|
||||
if not os.path.isfile(path):
|
||||
raise OSError('Dataset file not found. Downloading...')
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
self.data = pickle.load(f)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.data[idx]
|
||||
|
||||
# collate_fn for DataLoader
|
||||
def cl_fn(batch, tokenizer):
|
||||
batch = list(zip(*batch))
|
||||
|
||||
_, img_emb, cap = batch
|
||||
del batch
|
||||
|
||||
img_emb = torch.tensor(np.array(img_emb)) # better to convert list to numpy array first, then to tensor
|
||||
cap = tokenizer(cap, padding=True, return_tensors='pt')
|
||||
|
||||
input_ids, attention_mask = cap['input_ids'], cap['attention_mask']
|
||||
|
||||
return img_emb, input_ids, attention_mask
|
||||
|
||||
def get_loader(dataset, bs_exp=5, shuffle=True, num_workers=0, pin_memory=False):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
return DataLoader(
|
||||
dataset,
|
||||
batch_size=2**bs_exp,
|
||||
collate_fn=lambda b: cl_fn(b, tokenizer),
|
||||
shuffle=shuffle,
|
||||
num_workers=num_workers,
|
||||
pin_memory=pin_memory
|
||||
)
|
||||
|
|
@ -0,0 +1,268 @@
|
|||
'''
|
||||
Module contains final Model and all pieces of it.
|
||||
'''
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import CLIPModel, CLIPProcessor, GPT2LMHeadModel, GPT2Tokenizer
|
||||
|
||||
class ImageEncoder(nn.Module):
|
||||
'''
|
||||
Encodes image and returns it's embedding.
|
||||
'''
|
||||
|
||||
def __init__(self, model, device='cpu'):
|
||||
super(ImageEncoder, self).__init__()
|
||||
|
||||
self.device = device
|
||||
|
||||
self.preprocessor = CLIPProcessor.from_pretrained(model)
|
||||
self.model = CLIPModel.from_pretrained(model).vision_model.to(self.device)
|
||||
|
||||
def forward(self, image):
|
||||
# only one image at a time
|
||||
image = self.preprocessor(images=image, return_tensors='pt').to(self.device)
|
||||
image_features = self.model(**image)
|
||||
|
||||
return image_features.pooler_output
|
||||
|
||||
class Mapping(nn.Module):
|
||||
'''
|
||||
Maps image embedding to GPT-2 embedding.
|
||||
'''
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ep_len,
|
||||
num_layers,
|
||||
embed_size,
|
||||
n_heads,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
device='cpu'
|
||||
):
|
||||
super(Mapping, self).__init__()
|
||||
|
||||
self.ep_len = ep_len
|
||||
self.embed_size = embed_size
|
||||
|
||||
self.device = device
|
||||
|
||||
self.transformer_encoder = nn.TransformerEncoder(
|
||||
nn.TransformerEncoderLayer(
|
||||
d_model=embed_size,
|
||||
nhead=n_heads,
|
||||
dim_feedforward=embed_size*forward_expansion,
|
||||
dropout=dropout,
|
||||
batch_first=True,
|
||||
device=device
|
||||
),
|
||||
num_layers=num_layers
|
||||
).to(self.device)
|
||||
|
||||
self.mapper = nn.Linear(embed_size, ep_len * embed_size).to(self.device)
|
||||
|
||||
self.init_weights()
|
||||
|
||||
def forward(self, img_embedded, train_mode=False):
|
||||
x = self.transformer_encoder(img_embedded)
|
||||
x = self.mapper(x)
|
||||
|
||||
x = x.view(
|
||||
*(
|
||||
[-1, self.ep_len, self.embed_size]
|
||||
if train_mode else
|
||||
[self.ep_len, self.embed_size]
|
||||
)
|
||||
) # for batched input
|
||||
|
||||
return x
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
nn.init.ones_(m.weight)
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
class TextDecoder(nn.Module):
|
||||
'''
|
||||
Processes embedding into caption.
|
||||
'''
|
||||
|
||||
def __init__(self, model, device='cpu'):
|
||||
super(TextDecoder, self).__init__()
|
||||
|
||||
self.device = device
|
||||
|
||||
self.tokenizer = GPT2Tokenizer.from_pretrained(model)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
self.model = GPT2LMHeadModel.from_pretrained(model).to(self.device)
|
||||
self.vocab_size = self.model.config.vocab_size
|
||||
|
||||
def forward(self, embedding, attention_mask=None):
|
||||
text_features = self.model(inputs_embeds=embedding, attention_mask=attention_mask)
|
||||
|
||||
return text_features.logits
|
||||
|
||||
class Net(nn.Module):
|
||||
'''
|
||||
Final Model class. Puts all pieces together and generates caption based on image.
|
||||
'''
|
||||
|
||||
def __init__(self, clip_model, text_model, ep_len, num_layers, n_heads, forward_expansion, dropout, max_len, device='cpu'):
|
||||
'''
|
||||
Model constructor.
|
||||
Args:
|
||||
num_layers: number of layers in the TransformerEncoder
|
||||
n_heads: number of heads in the MultiHeadAttention
|
||||
forward_expansion: expansion factor for the feedforward layer
|
||||
dropout: dropout probability
|
||||
max_len: maximum length of the generated text
|
||||
'''
|
||||
super(Net, self).__init__()
|
||||
|
||||
self.device = device
|
||||
self.ep_len = ep_len
|
||||
|
||||
self.ie = ImageEncoder(model=clip_model, device=device)
|
||||
self.mp = Mapping(ep_len=self.ep_len, num_layers=num_layers, embed_size=self.ie.model.config.hidden_size, n_heads=n_heads, forward_expansion=forward_expansion, dropout=dropout, device=device)
|
||||
self.td = TextDecoder(model=text_model, device=device)
|
||||
|
||||
assert self.ie.model.config.hidden_size == self.td.model.config.n_embd, "Embedding size of models mismatch"
|
||||
|
||||
self.max_len = max_len
|
||||
|
||||
# self.criterion = nn.CrossEntropyLoss(ignore_index=self.td.tokenizer.pad_token_id) # chanded on epoch 91
|
||||
self.criterion = nn.CrossEntropyLoss()
|
||||
|
||||
self.freeze_layers()
|
||||
|
||||
def freeze_layers(self):
|
||||
for p in [*list(self.ie.parameters()), *list(self.td.parameters())[14:-14]]: # freeze everything, except 1st and last transformer layer in Decoder
|
||||
p.requires_grad = False
|
||||
|
||||
def forward(self, img, temperature=1.0):
|
||||
'''
|
||||
Caption generation for a single image.
|
||||
Args:
|
||||
img: image to generate caption for [PIL.Image]
|
||||
Returns:
|
||||
caption: generated caption [str]
|
||||
tokens: generated tokens [torch.Tensor]
|
||||
'''
|
||||
|
||||
if temperature <= 0.0:
|
||||
temperature = 1.0
|
||||
print('Temperature must be positive. Setting it to 1.0')
|
||||
|
||||
with torch.no_grad():
|
||||
img_embedded = self.ie(img)
|
||||
|
||||
# (ep_len, embed_size)
|
||||
img_mapped = self.mp(img_embedded)
|
||||
|
||||
sos_emb = self.td.model.transformer.wte(torch.tensor(self.td.tokenizer.bos_token_id).to(self.device))
|
||||
|
||||
# sos_emb shape embed_size -> (1, embed_size)
|
||||
sos_emb = sos_emb.unsqueeze(0)
|
||||
|
||||
# (ep_len + 1, embed_size)
|
||||
start_emb = torch.cat([sos_emb, img_mapped], dim=0)
|
||||
|
||||
tokens = []
|
||||
for _ in range(self.max_len):
|
||||
if len(tokens):
|
||||
tok_emb = self.td.model.transformer.wte(torch.tensor(tokens).to(self.device))
|
||||
|
||||
emb = torch.cat([start_emb, tok_emb], dim=0)
|
||||
else:
|
||||
emb = start_emb
|
||||
|
||||
# add positional enc
|
||||
pos_emb = self.td.model.transformer.wpe(torch.arange(emb.shape[0]).to(self.device))
|
||||
|
||||
emb += pos_emb
|
||||
pred = self.td(emb)
|
||||
|
||||
pred = torch.softmax(pred / temperature, dim=-1)
|
||||
|
||||
_, pred = torch.max(pred, dim=1)
|
||||
|
||||
last_token = pred[-1].item()
|
||||
|
||||
tokens.append(last_token)
|
||||
|
||||
if last_token == self.td.tokenizer.eos_token_id:
|
||||
break
|
||||
|
||||
decoded = self.td.tokenizer.decode(tokens[:-1])
|
||||
|
||||
decoded = decoded.strip()
|
||||
decoded = decoded[0].upper() + decoded[1:]
|
||||
|
||||
return decoded, tokens
|
||||
|
||||
def train_forward(self, img_emb, trg_cap, att_mask):
|
||||
# method should get embedded by CLIP images and trg_text without last token.
|
||||
# dataset should contain image, embedded image, text
|
||||
|
||||
x, x_mask = trg_cap[:, :-1], att_mask[:, :-1]
|
||||
y = trg_cap[:, 1:]
|
||||
|
||||
img_mapped = self.mp(img_emb, train_mode=True)
|
||||
|
||||
# embed all texts and con cat with map sos
|
||||
text_emb = self.td.model.transformer.wte(x)
|
||||
|
||||
# N, len, embed_size
|
||||
x = torch.concat([img_mapped, text_emb], dim=1)
|
||||
x_mask = torch.concat([torch.ones(x_mask.shape[0], self.ep_len).to(self.device), x_mask], dim=1)
|
||||
|
||||
pos_emb = self.td.model.transformer.wpe(torch.arange(x.shape[1]).to(self.td.device))
|
||||
pos_emb = pos_emb.expand_as(x)
|
||||
|
||||
x += pos_emb
|
||||
|
||||
res = self.td(x, attention_mask=x_mask)
|
||||
res = torch.softmax(res, dim=2)
|
||||
|
||||
loss = self.criterion(res[:, self.ep_len:, :].reshape(-1, res.shape[-1]), y.reshape(-1))
|
||||
|
||||
return loss
|
||||
|
||||
if __name__ == '__main__':
|
||||
for clip, text in [['openai/clip-vit-base-patch32', 'gpt2'], ['openai/clip-vit-large-patch14', 'gpt2-medium']]:
|
||||
m = Net(
|
||||
clip_model=clip,
|
||||
text_model=text,
|
||||
ep_len=3,
|
||||
num_layers=6,
|
||||
n_heads=16,
|
||||
forward_expansion=4,
|
||||
dropout=0.1,
|
||||
max_len=20
|
||||
)
|
||||
|
||||
m.eval()
|
||||
r = m(torch.randn(3, 224, 224))
|
||||
print(r)
|
||||
|
||||
m.train()
|
||||
N = 10
|
||||
emb = m.td.model.config.n_embd
|
||||
length = 20
|
||||
|
||||
l = m.train_forward(
|
||||
torch.rand(N, emb),
|
||||
torch.randint(1, 50000, (N, length)),
|
||||
att_mask=torch.concat([torch.ones(N, length - 3), torch.zeros(N, 3)], dim=1)
|
||||
)
|
||||
print(l)
|
||||
|
||||
# number of parameters
|
||||
print(f'Total number of parameters: {sum(p.numel() for p in m.parameters())}')
|
||||
print(f'Number of trainable parameters: {sum(p.numel() for p in m.parameters() if p.requires_grad)}')
|
||||
|
|
@ -0,0 +1,203 @@
|
|||
'''
|
||||
Module contains Trainer used in training and testing processes.
|
||||
'''
|
||||
|
||||
import io
|
||||
import os
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
class Trainer:
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
optimizer,
|
||||
scaler,
|
||||
scheduler,
|
||||
train_loader,
|
||||
valid_loader,
|
||||
test_dataset='./data',
|
||||
test_path='',
|
||||
ckp_path='',
|
||||
device='cpu'
|
||||
):
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.scaler = scaler
|
||||
self.scheduler = scheduler
|
||||
self.train_loader = train_loader
|
||||
self.valid_loader = valid_loader
|
||||
self.test_dataset = test_dataset
|
||||
self.test_path = test_path
|
||||
self.ckp_path = ckp_path
|
||||
self.device = device
|
||||
|
||||
# load checkpoint
|
||||
if os.path.isfile(ckp_path):
|
||||
self._load_ckp(
|
||||
ckp_path,
|
||||
optimizer=optimizer,
|
||||
scheduler=scheduler,
|
||||
scaler=scaler,
|
||||
epoch=True,
|
||||
train_loss=True,
|
||||
valid_loss=True,
|
||||
device=device
|
||||
)
|
||||
|
||||
else:
|
||||
self.cur_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.epoch = 0
|
||||
self.train_loss = []
|
||||
self.valid_loss = []
|
||||
self.test_result = None
|
||||
|
||||
def train_epoch(self):
|
||||
self.model.train()
|
||||
self.epoch += 1
|
||||
|
||||
total_loss = 0
|
||||
|
||||
loop = tqdm(self.train_loader, total=len(self.train_loader))
|
||||
loop.set_description(f'Epoch: {self.epoch} | Loss: ---')
|
||||
for batch_idx, (img_emb, cap, att_mask) in enumerate(loop):
|
||||
|
||||
img_emb, cap, att_mask = img_emb.to(self.device), cap.to(self.device), att_mask.to(self.device)
|
||||
|
||||
with torch.cuda.amp.autocast():
|
||||
loss = self.model.train_forward(img_emb=img_emb, trg_cap=cap, att_mask=att_mask)
|
||||
|
||||
self.scaler.scale(loss).backward()
|
||||
self.scaler.unscale_(self.optimizer)
|
||||
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.3)
|
||||
|
||||
self.scaler.step(self.optimizer)
|
||||
self.scaler.update()
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
total_loss += loss.item()
|
||||
|
||||
loop.set_description(f'Epoch: {self.epoch} | Loss: {total_loss / (batch_idx + 1):.3f}')
|
||||
loop.refresh()
|
||||
|
||||
self.cur_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.train_loss.append(total_loss / (batch_idx + 1))
|
||||
|
||||
self.scheduler.step()
|
||||
|
||||
def valid_epoch(self):
|
||||
self.model.eval()
|
||||
|
||||
total_loss = 0
|
||||
|
||||
loop = tqdm(self.valid_loader, total=len(self.valid_loader))
|
||||
loop.set_description(f'Validation Loss: ---')
|
||||
for batch_idx, (img_emb, cap, att_mask) in enumerate(loop):
|
||||
|
||||
img_emb, cap, att_mask = img_emb.to(self.device), cap.to(self.device), att_mask.to(self.device)
|
||||
|
||||
with torch.no_grad():
|
||||
with torch.cuda.amp.autocast():
|
||||
|
||||
loss = self.model.train_forward(img_emb=img_emb, trg_cap=cap, att_mask=att_mask)
|
||||
|
||||
total_loss += loss.item()
|
||||
|
||||
loop.set_description(f'Validation Loss: {total_loss / (batch_idx + 1):.3f}')
|
||||
loop.refresh()
|
||||
|
||||
self.valid_loss.append(total_loss / (batch_idx + 1))
|
||||
|
||||
def test_step(self, num_examples=4):
|
||||
assert num_examples % 2 == 0, 'num_examples must be even'
|
||||
|
||||
self.model.eval()
|
||||
|
||||
fig, axs = plt.subplots(num_examples // 2, 2, figsize=(20, 12))
|
||||
|
||||
random_idx = np.random.randint(0, len(self.dataset), size=(num_examples,))
|
||||
for idx, r in enumerate(random_idx):
|
||||
img_name, _, _ = self.dataset[r]
|
||||
|
||||
img = Image.open(os.path.join(self.test_path, img_name))
|
||||
|
||||
with torch.no_grad():
|
||||
caption, _ = self.model(img)
|
||||
|
||||
axs[idx // 2, idx % 2].imshow(img)
|
||||
axs[idx // 2, idx % 2].set_title(caption)
|
||||
axs[idx // 2, idx % 2].axis('off')
|
||||
|
||||
buf = io.BytesIO()
|
||||
plt.savefig(buf, format='png')
|
||||
buf.seek(0)
|
||||
|
||||
fig.clear()
|
||||
plt.close(fig)
|
||||
|
||||
self.test_result = Image.open(buf)
|
||||
|
||||
def get_training_data(self):
|
||||
return {
|
||||
'train_loss': self.train_loss,
|
||||
'valid_loss': self.valid_loss,
|
||||
'lr': self.cur_lr,
|
||||
'examples': self.test_result
|
||||
}
|
||||
|
||||
def save_ckp(self, ckp_path):
|
||||
torch.save(
|
||||
{
|
||||
'epoch': self.epoch,
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'scheduler_state_dict': self.scheduler.state_dict(),
|
||||
'scaler_state_dict': self.scaler.state_dict(),
|
||||
'tloss': self.train_loss,
|
||||
'vloss': self.valid_loss
|
||||
},
|
||||
ckp_path
|
||||
)
|
||||
|
||||
def _load_ckp(
|
||||
self,
|
||||
checkpoint_fpath,
|
||||
optimizer=False,
|
||||
scheduler=False,
|
||||
scaler=False,
|
||||
epoch=False,
|
||||
train_loss=False,
|
||||
valid_loss=False,
|
||||
device='cpu'
|
||||
):
|
||||
'''
|
||||
Loads entire checkpoint from file.
|
||||
'''
|
||||
|
||||
checkpoint = torch.load(checkpoint_fpath, map_location=device)
|
||||
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
if optimizer:
|
||||
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
||||
|
||||
if scheduler:
|
||||
self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
|
||||
|
||||
if scaler:
|
||||
self.scaler.load_state_dict(checkpoint['scaler_state_dict'])
|
||||
|
||||
if epoch:
|
||||
self.epoch = len(checkpoint['tloss'])
|
||||
|
||||
if train_loss:
|
||||
self.train_loss = checkpoint['tloss']
|
||||
|
||||
if valid_loss:
|
||||
self.valid_loss = checkpoint['vloss']
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from utils.config import *
|
||||
from utils.downloads import *
|
||||
from utils.lr_warmup import *
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
'''
|
||||
Project's main config.
|
||||
'''
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class ConfigS:
|
||||
'''
|
||||
Project's main config.
|
||||
'''
|
||||
|
||||
clip_model: str = 'openai/clip-vit-base-patch32'
|
||||
text_model: str = 'gpt2'
|
||||
seed: int = 100
|
||||
num_workers: int = 2
|
||||
train_size: int = 0.84
|
||||
val_size: int = 0.13
|
||||
epochs: int = 150
|
||||
lr: int = 3e-3
|
||||
k: float = 0.33
|
||||
batch_size_exp: int = 6
|
||||
ep_len: int = 4
|
||||
num_layers: int = 6
|
||||
n_heads: int = 16
|
||||
forward_expansion: int = 4
|
||||
max_len: int = 40
|
||||
dropout: float = 0.1
|
||||
weights_dir: str = os.path.join('weights', 'small')
|
||||
|
||||
@dataclass
|
||||
class ConfigL:
|
||||
'''
|
||||
Project's main config.
|
||||
'''
|
||||
|
||||
clip_model: str = 'openai/clip-vit-large-patch14'
|
||||
text_model: str = 'gpt2-medium'
|
||||
seed: int = 100
|
||||
num_workers: int = 2
|
||||
train_size: int = 0.84
|
||||
val_size: int = 0.13
|
||||
epochs: int = 120
|
||||
lr: int = 5e-3
|
||||
k: float = 0.3
|
||||
batch_size_exp: int = 5
|
||||
ep_len: int = 4
|
||||
num_layers: int = 5
|
||||
n_heads: int = 16
|
||||
forward_expansion: int = 4
|
||||
max_len: int = 40
|
||||
dropout: float = 0.08
|
||||
weights_dir: str = os.path.join('weights', 'large')
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
"""
|
||||
Utility functions for loading weights.
|
||||
"""
|
||||
|
||||
import gdown
|
||||
|
||||
|
||||
def download_weights(checkpoint_fpath, model_size="L"):
|
||||
"""
|
||||
Downloads weights from Google Drive.
|
||||
"""
|
||||
|
||||
download_id = (
|
||||
"1pSQruQyg8KJq6VmzhMLFbT_VaHJMdlWF"
|
||||
if model_size.strip().upper() == "L"
|
||||
else "1Gh32arzhW06C1ZJyzcJSSfdJDi3RgWoG"
|
||||
)
|
||||
|
||||
gdown.download(
|
||||
f"https://drive.google.com/uc?id={download_id}", checkpoint_fpath, quiet=False
|
||||
)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
'''
|
||||
Learning rate scheduler with warmup utility.
|
||||
'''
|
||||
|
||||
class LRWarmup():
|
||||
'''
|
||||
Self-made learning rate scheduler with warmup.
|
||||
'''
|
||||
def __init__(self, epochs, max_lr, k):
|
||||
assert k < 0.95 and k > 0.05, 'k must be between 0.05 and 0.95'
|
||||
self.epochs = epochs
|
||||
self.max_lr = max_lr
|
||||
self.max_point = int(k * self.epochs)
|
||||
|
||||
def __call__(self, epoch):
|
||||
return self.lr_warmup(epoch)
|
||||
|
||||
def lr_warmup(self, epoch):
|
||||
a_1 = self.max_lr / self.max_point
|
||||
a_2 = self.max_lr / (self.max_point - self.epochs)
|
||||
|
||||
b = - a_2 * self.epochs
|
||||
|
||||
return min(a_1 * epoch, a_2 * epoch + b)
|
||||
|
|
@ -57,6 +57,10 @@ def is_int(value):
|
|||
except:
|
||||
return False
|
||||
|
||||
def ensure(var,datatype):
|
||||
"""Ensures that a variable is a given datatype"""
|
||||
if isinstance(var, datatype): return var
|
||||
else: return datatype(var)
|
||||
|
||||
def autocast(var):
|
||||
"""Converts a variable between string, int, and float depending on how it's formatted"""
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class Unprompted:
|
|||
self.log.info(f"Finished loading in {time.time()-start_time} seconds.")
|
||||
|
||||
def __init__(self, base_dir="."):
|
||||
self.VERSION = "10.0.2"
|
||||
self.VERSION = "10.1.0"
|
||||
|
||||
self.shortcode_modules = {}
|
||||
self.shortcode_objects = {}
|
||||
|
|
@ -291,7 +291,8 @@ class Unprompted:
|
|||
if type(default) == list:
|
||||
for idx,val in enumerate(default):
|
||||
default[idx] = datatype(val)
|
||||
else: default = datatype(default)
|
||||
else:
|
||||
default = datatype(default)
|
||||
except ValueError:
|
||||
self.log.warning(f"Could not cast {default} to {datatype}.")
|
||||
pass
|
||||
|
|
@ -390,8 +391,8 @@ class Unprompted:
|
|||
att_split = att.split("_") # e.g. controlnet_0_enabled
|
||||
if len(att_split) >= 3 and any(chr.isdigit() for chr in att): # Make sure we have at least 2 underscores and at least one number
|
||||
self.log.debug(f"Setting ControlNet value: {att}")
|
||||
cn_path = self.extension_path(self.Config.stable_diffusion.controlnet_name)
|
||||
cnet = helpers.import_file(f"{self.Config.stable_diffusion.controlnet_name}.scripts.external_code", f"{cn_path}/scripts/external_code.py")
|
||||
cn_path = self.extension_path(self.Config.stable_diffusion.controlnet.extension)
|
||||
cnet = helpers.import_file(f"{self.Config.stable_diffusion.controlnet.extension}.scripts.external_code", f"{cn_path}/scripts/external_code.py")
|
||||
|
||||
all_units = cnet.get_all_units_in_processing(this_p)
|
||||
|
||||
|
|
@ -400,6 +401,13 @@ class Unprompted:
|
|||
this_val = imageio.imread(self.str_replace_macros(self.shortcode_user_vars[att]))
|
||||
else:
|
||||
this_val = self.shortcode_user_vars[att]
|
||||
# Apply preset model names
|
||||
if att_split[2] == "model":
|
||||
if self.shortcode_user_vars["sd_base"]== "sd1": cn_dict = self.Config.stable_diffusion.controlnet.sd1_models
|
||||
elif self.shortcode_user_vars["sd_base"] == "sdxl": cn_dict = self.Config.stable_diffusion.controlnet.sdxl_models
|
||||
|
||||
if hasattr(cn_dict,this_val):
|
||||
this_val = getattr(cn_dict, this_val)
|
||||
setattr(all_units[int(att_split[1])], "_".join(att_split[2:]), this_val)
|
||||
cnet.update_cn_script_in_processing(this_p, all_units)
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ if Config.stable_diffusion.template_editor:
|
|||
def load_file(file_name):
|
||||
# print(f"loading: {file_name}")
|
||||
|
||||
with open(f"{folder}/{file_name}", "r") as file:
|
||||
with open(f"{folder}/{file_name}", "r", encoding=Config.formats.default_encoding) as file:
|
||||
content = file.read()
|
||||
# print(f"content: {content}")
|
||||
# update main_edit_space woth content
|
||||
|
|
@ -60,7 +60,7 @@ if Config.stable_diffusion.template_editor:
|
|||
|
||||
def save_file(file_name, content):
|
||||
# print(f"loading: {file_name}")
|
||||
with open(f"{folder}/{file_name}", "w") as file:
|
||||
with open(f"{folder}/{file_name}", "w", encoding=Config.formats.default_encoding) as file:
|
||||
file.write(content)
|
||||
|
||||
with gr.Blocks() as unprompted_editor_ui:
|
||||
|
|
|
|||
|
|
@ -61,9 +61,9 @@ class Shortcode():
|
|||
self.log.debug(f"{success_string} Regional Prompter")
|
||||
elif script_title == "controlnet":
|
||||
# Update the controlnet script args with a list of 0 units
|
||||
cn_path = self.Unprompted.extension_path(self.Unprompted.Config.stable_diffusion.controlnet_name)
|
||||
cn_path = self.Unprompted.extension_path(self.Unprompted.Config.stable_diffusion.controlnet.extension)
|
||||
if cn_path:
|
||||
cn_module = helpers.import_file(f"{self.Unprompted.Config.stable_diffusion.controlnet_name}.internal_controlnet.external_code", f"{cn_path}/internal_controlnet/external_code.py")
|
||||
cn_module = helpers.import_file(f"{self.Unprompted.Config.stable_diffusion.controlnet.extension}.internal_controlnet.external_code", f"{cn_path}/internal_controlnet/external_code.py")
|
||||
cn_module.update_cn_script_in_processing(self.Unprompted.main_p, [])
|
||||
self.log.debug(f"{success_string} ControlNet")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ class Shortcode():
|
|||
self.description = "Downloads a file using the Civitai API (unless you already have the file in question) and automatically adds it to your prompt."
|
||||
self.extra_nets = None
|
||||
|
||||
def is_network_installed(self, net, paths, exts=[".pt", ".ckpt", ".safetensors"]):
|
||||
def network_path(self, net, paths, exts=[".pt", ".ckpt", ".safetensors"]):
|
||||
"""Based on list_available_networks() from extensions-builtin/Lora/networks.py.
|
||||
It's not clear if there's a better/more standardized means of checking the
|
||||
user's installed extra networks. Please let me know if there is such a thing..."""
|
||||
|
|
@ -26,13 +26,13 @@ class Shortcode():
|
|||
name = os.path.splitext(os.path.basename(filename))[0]
|
||||
if name == net:
|
||||
self.log.debug(f"Extra network {net} is already installed: {filename}")
|
||||
return True
|
||||
return filename
|
||||
|
||||
return False
|
||||
return None
|
||||
|
||||
def run_atomic(self, pargs, kwargs, context):
|
||||
import lib_unprompted.helpers as helpers
|
||||
import requests, os
|
||||
import requests, os, json
|
||||
from modules import shared
|
||||
|
||||
net_directories = []
|
||||
|
|
@ -73,7 +73,7 @@ class Shortcode():
|
|||
# Defaults
|
||||
if "limit" not in kwargs: kwargs["limit"] = 1
|
||||
timeout = kwargs["_timeout"] if "_timeout" in kwargs else 60
|
||||
filename = kwargs["_file"] if "_file" in kwargs else kwargs["query"]
|
||||
filename = kwargs["_file"] if "_file" in kwargs else self.Unpromtped.parse_arg("query","")
|
||||
weight = self.Unprompted.parse_arg("_weight",1.0)
|
||||
activate = self.Unprompted.parse_arg("_activate",True)
|
||||
words = self.Unprompted.parse_arg("_words",False)
|
||||
|
|
@ -109,12 +109,13 @@ class Shortcode():
|
|||
elif net_type in ["controlnet","cn"]:
|
||||
from modules.paths_internal import extensions_dir
|
||||
kwargs["types"] = "Controlnet"
|
||||
net_directories = [extensions_dir+self.Unprompted.Config.stable_diffusion.controlnet_name+"/models"]
|
||||
net_directories = [extensions_dir+self.Unprompted.Config.stable_diffusion.controlnet.extension+"/models"]
|
||||
elif net_type in ["poses","pose","openpose"]:
|
||||
kwargs["types"] = "Poses"
|
||||
net_directories = [self.Unprompted.base_dir+"/user/poses"]
|
||||
|
||||
if not self.is_network_installed(filename,net_directories):
|
||||
net_path = self.network_path(filename,net_directories)
|
||||
if not net_path:
|
||||
# Remove system arguments from kwargs dict because we don't need to waste anyone's bandwidth
|
||||
for k in list(kwargs.keys()):
|
||||
if k.startswith("_"):
|
||||
|
|
@ -159,6 +160,13 @@ class Shortcode():
|
|||
except:
|
||||
self.log.exception("An error occurred while downloading the Civitai file.")
|
||||
|
||||
if words:
|
||||
# Replace the extension in file_path with .json:
|
||||
json_path = os.path.splitext(file_path)[0]+".json"
|
||||
# Create and open json_path for writing:
|
||||
with open(json_path, "w") as json_file:
|
||||
json.dump({"activation text":words}, json_file)
|
||||
|
||||
except Exception as e:
|
||||
self.log.exception("Exception caught while decoding JSON")
|
||||
return ""
|
||||
|
|
@ -166,7 +174,17 @@ class Shortcode():
|
|||
self.log.error(f"Request to Civitai API yielded bad response: {r.status_code}")
|
||||
return ""
|
||||
# We already have the file, check for activation text in json
|
||||
# else:
|
||||
elif words:
|
||||
json_path = os.path.splitext(net_path)[0]+".json"
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, "r") as json_file:
|
||||
json_obj = json.load(json_file)
|
||||
if "activation text" in json_obj:
|
||||
words = json_obj["activation text"]
|
||||
else:
|
||||
self.log.debug(f"Activation text not found in {json_path}.")
|
||||
else:
|
||||
self.log.debug(f"No JSON found at {json_path}.")
|
||||
|
||||
|
||||
# Return assembled prompt string
|
||||
|
|
@ -178,4 +196,7 @@ class Shortcode():
|
|||
elif kwargs["types"] == "TextualInversion":
|
||||
return_string += f"({filename}:{weight})"
|
||||
|
||||
return return_string
|
||||
return return_string
|
||||
|
||||
def ui(self, gr):
|
||||
return
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
try:
|
||||
from modules import shared
|
||||
except:
|
||||
pass # for unprompted_dry
|
||||
|
||||
|
||||
class Shortcode():
|
||||
def __init__(self, Unprompted):
|
||||
self.Unprompted = Unprompted
|
||||
self.description = "Generates a caption for the given image using various technqiues."
|
||||
self.model = None
|
||||
self.processor = None
|
||||
self.last_method = ""
|
||||
self.last_model_name = ""
|
||||
|
||||
def run_atomic(self, pargs, kwargs, context):
|
||||
from PIL import Image
|
||||
import lib_unprompted.helpers as helpers
|
||||
from lib_unprompted.clipxgpt.model.model import Net
|
||||
import torch
|
||||
|
||||
image = self.Unprompted.parse_arg("image",False)
|
||||
if not image: image = self.Unprompted.current_image()
|
||||
if isinstance(image,str): image = Image.open(image)
|
||||
|
||||
method = self.Unprompted.parse_arg("method","CLIP")
|
||||
model_name = self.Unprompted.parse_arg("model","")
|
||||
prompt = self.Unprompted.parse_arg("text","")
|
||||
question = self.Unprompted.parse_arg("question","")
|
||||
max_tokens = self.Unprompted.parse_arg("max_tokens",50)
|
||||
if question: prompt = f"Question: {question} Answer:"
|
||||
|
||||
# Default models per method
|
||||
if not model_name:
|
||||
if method == "BLIP-2": model_name = "Salesforce/blip2-opt-2.7b"
|
||||
elif method == "CLIPxGPT": model_name = "large_model"
|
||||
|
||||
image = self.Unprompted.parse_arg("image",False)
|
||||
if not image: image = self.Unprompted.current_image()
|
||||
if isinstance(image,str): image = Image.open(image)
|
||||
|
||||
device ="cuda" if torch.cuda.is_available() else "cpu"
|
||||
unload = self.Unprompted.parse_arg("unload",False)
|
||||
|
||||
def get_cached():
|
||||
if method != self.last_method or model_name != self.last_model_name or not self.model:
|
||||
self.log.info(f"Loading {method} model...")
|
||||
return False
|
||||
self.log.info(f"Using cached {method} model.")
|
||||
return self.model
|
||||
|
||||
if method == "BLIP-2":
|
||||
from transformers import AutoProcessor, Blip2ForConditionalGeneration
|
||||
model = get_cached()
|
||||
if not model:
|
||||
#with torch.device(device):
|
||||
self.processor = AutoProcessor.from_pretrained(model_name, cache_dir=f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/BLIP-2", low_cpu_mem_usage=True)
|
||||
model = Blip2ForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, cache_dir=f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/BLIP-2", low_cpu_mem_usage=True)
|
||||
model.to(device)
|
||||
|
||||
inputs = self.processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
|
||||
|
||||
generated_ids = model.generate(**inputs, max_new_tokens=max_tokens)
|
||||
caption = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
||||
|
||||
elif method == "CLIP":
|
||||
from modules import shared, lowvram, devices
|
||||
self.log.info("Calling the WebUI's standard CLIP interrogator...")
|
||||
# caption = shared.interrogator.interrogate(image.convert("RGB"))
|
||||
|
||||
lowvram.send_everything_to_cpu()
|
||||
devices.torch_gc()
|
||||
shared.interrogator.load()
|
||||
caption = shared.interrogator.generate_caption(image.convert("RGB"))
|
||||
shared.interrogator.unload()
|
||||
|
||||
elif method =="CLIPxGPT":
|
||||
import os
|
||||
model = get_cached()
|
||||
if not model:
|
||||
model = Net(
|
||||
clip_model="openai/clip-vit-large-patch14",
|
||||
text_model="gpt2-medium",
|
||||
ep_len=4,
|
||||
num_layers=5,
|
||||
n_heads=16,
|
||||
forward_expansion=4,
|
||||
dropout=0.08,
|
||||
max_len=40,
|
||||
device=device
|
||||
)
|
||||
ckp_file = f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/clipxgpt/{model_name}.pt"
|
||||
if not os.path.exists(ckp_file):
|
||||
self.log.info("Downloading CLIPxGPT model...")
|
||||
helpers.download_file(ckp_file, f"https://drive.google.com/uc?export=download&id=1Gh32arzhW06C1ZJyzcJSSfdJDi3RgWoG")
|
||||
checkpoint = torch.load(ckp_file, map_location=device)
|
||||
model.load_state_dict(checkpoint, strict=False)
|
||||
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
caption, _ = model(image, 1.0) # temperature
|
||||
|
||||
self.log.debug(f"Caption method {method} returned: {caption}")
|
||||
|
||||
# Cache handling
|
||||
self.last_method = method
|
||||
self.last_model_name = model_name
|
||||
if unload:
|
||||
self.model = None
|
||||
self.processor = None
|
||||
elif method != "CLIP":
|
||||
self.model = model
|
||||
|
||||
return caption
|
||||
|
||||
def ui(self, gr):
|
||||
gr.Image(label="Image to perform interrogation on (defaults to SD output) 🡢 image",type="filepath",interactive=True)
|
||||
gr.Radio(label="Interrogation method 🡢 method", value="CLIP", choices=["BLIP-2","CLIP","CLIPxGPT"], info="Note: The other methods require large model downloads!")
|
||||
gr.Text(label="Model name 🡢 model",value="",info="Accepts Hugging Face model strings")
|
||||
gr.Text(label="Context 🡢 context",value="",info="For BLIP-2, provide contextual information for the interrogation.")
|
||||
gr.Text(label="Question 🡢 question",value="",info="For BLIP-2, ask a question about the image.")
|
||||
gr.Slider(label="Max Tokens 🡢 max_tokens",value=50,min=1,max=100,step=1,info="For BLIP-2, the maximum number of tokens to generate.")
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
try:
|
||||
from modules import shared
|
||||
except:
|
||||
pass # for unprompted_dry
|
||||
|
||||
|
||||
class Shortcode():
|
||||
def __init__(self, Unprompted):
|
||||
self.Unprompted = Unprompted
|
||||
self.description = "Enhances a given image using the WebUI's built-in upscaler methods."
|
||||
|
||||
def run_atomic(self, pargs, kwargs, context):
|
||||
from PIL import Image
|
||||
import lib_unprompted.helpers as helpers
|
||||
|
||||
image = self.Unprompted.parse_arg("image",False)
|
||||
if not image: image = self.Unprompted.current_image()
|
||||
if isinstance(image,str):
|
||||
image = Image.open(image)
|
||||
orig_image = image.copy()
|
||||
|
||||
scale = self.Unprompted.parse_arg("scale",1)
|
||||
visibility = self.Unprompted.parse_arg("visibility",1.0)
|
||||
limit = self.Unprompted.parse_arg("limit",100)
|
||||
keep_res = self.Unprompted.parse_arg("keep_res",False)
|
||||
|
||||
_models = helpers.ensure(self.Unprompted.parse_arg("models","None"),list)
|
||||
models = []
|
||||
for model in _models:
|
||||
for upscaler in shared.sd_upscalers:
|
||||
if upscaler.name == model:
|
||||
models.append(upscaler)
|
||||
break
|
||||
if len(models) >= limit:
|
||||
self.log.info(f"Upscale model limit satisfied ({limit}). Proceeding...")
|
||||
break
|
||||
|
||||
for model in models:
|
||||
self.log.info(f"Upscaling {scale}x with {model.name}...")
|
||||
image = model.scaler.upscale(image, scale, model.data_path)
|
||||
if keep_res:
|
||||
image = image.resize(orig_image.size, Image.ANTIALIAS)
|
||||
|
||||
# Append to output window
|
||||
try:
|
||||
if not keep_res:
|
||||
orig_image = orig_image.resize(image.size, Image.ANTIALIAS)
|
||||
self.Unprompted.current_image(Image.blend(orig_image, image, visibility))
|
||||
except:
|
||||
pass
|
||||
|
||||
return ""
|
||||
|
||||
def ui(self, gr):
|
||||
gr.Image(label="Image to perform upscaling on (defaults to SD output) 🡢 image",type="filepath",interactive=True)
|
||||
gr.Dropdown(label="Upscaler Model(s) 🡢 models",choices=[upscaler.name for upscaler in shared.sd_upscalers],multiselect=True)
|
||||
gr.Slider(label="Upscale Factor 🡢 scale", value=1, maximum=16, minimum=1, interactive=True, step=1)
|
||||
gr.Slider(label="Upscale Visibility 🡢 visibility", value=1.0, maximum=1.0, minimum=0.0, interactive=True, step=0.01)
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
[template name="Bodysnatcher v1.3.5"]
|
||||
[template name="Bodysnatcher v1.4.0"]
|
||||

|
||||
<details open><summary>⚠️ Important info, please read carefully</summary>
|
||||
|
||||
|
|
@ -43,6 +43,7 @@ Always bodysnatch responsibly.<br>
|
|||
[set subject _new _label="New subject"]mona lisa[/set]
|
||||
[set simple_description _new _label="Simple Description" _info="These terms will apply to both the full image and the cropped face, 1-3 words are usually plenty"][/set]
|
||||
[set class _new _label="Class" _info="The search term that determines the inpainting mask"]woman[/set]
|
||||
[set interrogate _new _ui="checkbox" _label="Interrogate starting image" _info="Adds a descriptive caption to the prompt"]1[/set]
|
||||
[/wizard]
|
||||
|
||||
[wizard accordion _label="🎭 Mask Settings"]
|
||||
|
|
@ -76,6 +77,7 @@ Always bodysnatch responsibly.<br>
|
|||
|
||||
[set debug _new _label="Debug Mode" _ui="checkbox"]0[/set]
|
||||
|
||||
[if interrogate][interrogate][/if]
|
||||
[sets neg_mask=""]
|
||||
[if "(keep_hands==1 and background_mode==0) or (keep_hands==0 and background_mode==1)"]
|
||||
[sets neg_mask=fingers]
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
[template name="Facelift v0.0.2"]
|
||||
[template name="Facelift v0.1.0"]
|
||||
An all-in-one solution for performing faceswaps by combining different models and postprocessing techniques.
|
||||
[/template]
|
||||
[wizard row]
|
||||
[set faces _new _ui="file" _label="New face image(s)" _file_count="multiple" _file_types="image"][/set]
|
||||
[set body _new _ui="image" _label="Body image to perform swap on (defaults to SD output)" _remember][/set]
|
||||
[/wizard]
|
||||
[set preset _new _ui="dropdown" _choices="{filelist '%BASE_DIR%/templates/common/presets/facelift/*.*' _basename _hide_ext}"]best_quality[/set]
|
||||
[set preset _new _ui="dropdown" _choices="{filelist '%BASE_DIR%/templates/common/presets/facelift/*.*' _basename _hide_ext}"]best_quality_v2[/set]
|
||||
[set unload _new _ui="checkbox" _label="Unload resources after inference" _info="Frees up VRAM but slows down inference time."]0[/set]
|
||||
[if unload][set unload_all]all[/set][/if][else][set unload_all][/set][/else]
|
||||
[# Reduce inference steps to 2 if we're using an external image. (setting it to 1 can cause errors)]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
[sets
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=openpose_faceonly cn_0_model=controlnet11Models_openpose cn_0_weight=1.0
|
||||
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=softedge_hed cn_1_model=controlnet11Models_softedge cn_1_weight=1.0
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=openpose_faceonly cn_0_model=openpose cn_0_weight=1.0
|
||||
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=softedge_hed cn_1_model=softedge cn_1_weight=1.0
|
||||
]
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
[sets
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=depth_midas cn_0_model=controlnet11Models_depth cn_0_weight=1.0 cn_0_control_mode=1
|
||||
cn_1_enabled=0 cn_1_pixel_perfect=1 cn_1_module=normal_midas cn_1_model=controlnet11Models_normal cn_1_weight=1.0 cn_1_control_mode=1
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=depth_midas cn_0_model=depth cn_0_weight=1.0 cn_0_control_mode=1
|
||||
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=normal_midas cn_1_model=normalmap cn_1_weight=1.0 cn_1_control_mode=1
|
||||
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=reference_adain cn_2_weight=1.0 cn_2_control_mode=1
|
||||
cn_3_enabled=1 cn_3_pixel_perfect=1 cn_3_module=lineart_anime_denoise cn_3_model=controlnet11Models_animeline cn_3_weight=1.0 cn_3_control_mode=1
|
||||
cn_3_enabled=1 cn_3_pixel_perfect=1 cn_3_module=lineart_anime_denoise cn_3_model=lineart cn_3_weight=1.0 cn_3_control_mode=1
|
||||
]
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[sets
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=control_v11e_sd15_ip2p_fp16 cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
|
||||
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=openpose_full cn_1_model=control11Models_openpose cn_1_weight=1.0
|
||||
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=softedge_hed cn_2_model=controlnet11Models_softedge cn_2_weight=0.25
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_model=instructp2p cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
|
||||
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=openpose_full cn_1_model=openpose cn_1_weight=1.0
|
||||
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=softedge_hed cn_2_model=softedge cn_2_weight=0.25
|
||||
]
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
[sets
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=control_v11e_sd15_ip2p_fp16 cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
|
||||
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=instructp2p cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
|
||||
]
|
||||
|
|
@ -1 +1 @@
|
|||
[restore_faces unload="{get unload}" method=gfpgan image="{get body}"][faceswap "{get faces}" unload="{get unload_all}" minimum_similarity=-999][restore_faces unload="{get unload}" method=gpen]
|
||||
[restore_faces unload="{get unload}" method=gfpgan image="{get body}"][faceswap "{get faces}" unload="{get unload_all}" minimum_similarity=-999][restore_faces unload="{get unload}" method=gpen][upscale models="4x-UltraSharp|R-ESRGAN 4x+" scale=1 limit=1 visibility=0.8 keep_res]
|
||||
|
|
@ -1 +0,0 @@
|
|||
[sets cfg_scale=7.5 sampler_name="DPM++ 3M SDE" steps=20 denoising_strength=1.0 mask_blur=10]
|
||||
|
|
@ -0,0 +1 @@
|
|||
[sets cfg_scale=4.5 sampler_name="DPM++ 3M SDE" steps=20 denoising_strength=1.0 mask_blur=0]
|
||||
Loading…
Reference in New Issue