pull/223/head
ThereforeGames 2023-10-13 15:36:58 -04:00
parent 31f94bc49d
commit 71f8be3583
26 changed files with 929 additions and 34 deletions

View File

@ -62,7 +62,33 @@
"batch_size_method":"standard",
"show_extra_generation_params":true,
"template_editor":true,
"controlnet_name":"sd-webui-controlnet"
"controlnet":
{
"extension":"sd-webui-controlnet",
"sd1_models":
{
"canny":"controlnet11Models_canny",
"depth":"controlnet11Models_depth",
"normalmap":"control_v11p_sd15_normalbae",
"openpose":"controlnet11Models_openpose",
"mlsd":"",
"lineart":"controlnet11Models_animeline",
"softedge":"controlnet11Models_softedge",
"scribble":"controlnet11Models_scribble",
"segmentation":"controlnet11Models_seg",
"shuffle":"",
"tile":"controlnet11Models_tileE",
"inpaint":"controlnet11Models_inpaint",
"instructp2p":"control_v11e_sd15_ip2p",
"recolor":"",
"ip_adapter":"ip-adapter-plus-face_sd15",
"t2i_adapter":""
},
"sdxl_models":
{
"canny":""
}
}
},
"ui":
{

View File

@ -3,7 +3,28 @@ All notable changes to this project will be documented in this file.
For more details on new features, please check the [Manual](./MANUAL.md).
<details open><summary>10.0.2 - 13 October 2023</summary>
<details open><summary>10.1.0 - 13 October 2023</summary>
### Added
- New shortcode `[upscale]`: Enhances a given image using one the WebUI's upscaler methods
- New shortcode `[interrogate]`: Generates a caption for the given image using various techniques
- `[civitai]`: Now supports `_words` parg to include the activation text in your prompt, also writing it to the companion JSON file
- Facelift v0.1.0: Upgraded preset `best_quality_v2` which now applies `[upscale]` as a final step
- New helper method `ensure()`: Converts a variable to a datatype if it isn't already that datatype
- Bodysnatcher v1.4.0: Optionally interrogate the starting image
- ControlNet model variables may now refer to the name presets in `Config.stable_diffusion.controlnet.sd1_models`; you can adjust these to match your own filenames
- The CN config has a place for SDXL models too, although I haven't added any entries there yet
- Updated img2img preset `full_denoise_v3`: Reduced the CFG scale and disabled mask blur
### Changed
- The setting `Config.stable_diffusion.controlnet_name` has been renamed to `Config.stable_diffusion.controlnet.extension`
### Fixed
- The template editor will correctly parse files with emojis now
</details>
<details><summary>10.0.2 - 13 October 2023</summary>
### Added
- `[else]`: Now supports `debug` parg to print diagnostic information
@ -17,7 +38,7 @@ For more details on new features, please check the [Manual](./MANUAL.md).
- Renamed ControlNet preset `photo_face` to `face_doctor_v1`
- Facelift v0.0.2: Now defaults to the `best_quality` preset
- Bodysnatcher v1.3.5: Updated the default `prefix` from "photo of" to "high detail RAW photo of"
- Bodysnatcher v1.3.5: No longer runs `[img2img_autosize]` when you are on `Mask Only` mode
- Bodysnatcher v1.3.5: No longer runs `[img2img_autosize]` when you are on `Only masked` mode
- Bodysnatcher v1.3.5: Now applies 5px of negative mask padding when using the `Keep original hands` option, which can significantly improve blending of new image
- Bodysnatcher v1.3.5: The Zoom Enhance features are now disabled by default, as Facelift is a better fit with Bodysnatcher
- Bodysnatcher v1.3.5: Updated the default `inference_preset` to `subtle_v1`

View File

@ -0,0 +1 @@
from data.dataset import *

View File

@ -0,0 +1,58 @@
'''
Module contains Dataset class, collate function for DataLoader and loader getter function.
* MiniFlickrDataset loads data from pickle file and returns image embedding and caption.
* cl_fn is used to process batch of data and return tensors.
* get_loader returns DataLoader object.
'''
import os
import pickle
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer
class MiniFlickrDataset(Dataset):
def __init__(self, path):
# check if file is file
if not os.path.isfile(path):
raise OSError('Dataset file not found. Downloading...')
with open(path, 'rb') as f:
self.data = pickle.load(f)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# collate_fn for DataLoader
def cl_fn(batch, tokenizer):
batch = list(zip(*batch))
_, img_emb, cap = batch
del batch
img_emb = torch.tensor(np.array(img_emb)) # better to convert list to numpy array first, then to tensor
cap = tokenizer(cap, padding=True, return_tensors='pt')
input_ids, attention_mask = cap['input_ids'], cap['attention_mask']
return img_emb, input_ids, attention_mask
def get_loader(dataset, bs_exp=5, shuffle=True, num_workers=0, pin_memory=False):
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
tokenizer.pad_token = tokenizer.eos_token
return DataLoader(
dataset,
batch_size=2**bs_exp,
collate_fn=lambda b: cl_fn(b, tokenizer),
shuffle=shuffle,
num_workers=num_workers,
pin_memory=pin_memory
)

View File

@ -0,0 +1,268 @@
'''
Module contains final Model and all pieces of it.
'''
import torch
import torch.nn as nn
from transformers import CLIPModel, CLIPProcessor, GPT2LMHeadModel, GPT2Tokenizer
class ImageEncoder(nn.Module):
'''
Encodes image and returns it's embedding.
'''
def __init__(self, model, device='cpu'):
super(ImageEncoder, self).__init__()
self.device = device
self.preprocessor = CLIPProcessor.from_pretrained(model)
self.model = CLIPModel.from_pretrained(model).vision_model.to(self.device)
def forward(self, image):
# only one image at a time
image = self.preprocessor(images=image, return_tensors='pt').to(self.device)
image_features = self.model(**image)
return image_features.pooler_output
class Mapping(nn.Module):
'''
Maps image embedding to GPT-2 embedding.
'''
def __init__(
self,
ep_len,
num_layers,
embed_size,
n_heads,
forward_expansion,
dropout,
device='cpu'
):
super(Mapping, self).__init__()
self.ep_len = ep_len
self.embed_size = embed_size
self.device = device
self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(
d_model=embed_size,
nhead=n_heads,
dim_feedforward=embed_size*forward_expansion,
dropout=dropout,
batch_first=True,
device=device
),
num_layers=num_layers
).to(self.device)
self.mapper = nn.Linear(embed_size, ep_len * embed_size).to(self.device)
self.init_weights()
def forward(self, img_embedded, train_mode=False):
x = self.transformer_encoder(img_embedded)
x = self.mapper(x)
x = x.view(
*(
[-1, self.ep_len, self.embed_size]
if train_mode else
[self.ep_len, self.embed_size]
)
) # for batched input
return x
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
class TextDecoder(nn.Module):
'''
Processes embedding into caption.
'''
def __init__(self, model, device='cpu'):
super(TextDecoder, self).__init__()
self.device = device
self.tokenizer = GPT2Tokenizer.from_pretrained(model)
self.tokenizer.pad_token = self.tokenizer.eos_token
self.model = GPT2LMHeadModel.from_pretrained(model).to(self.device)
self.vocab_size = self.model.config.vocab_size
def forward(self, embedding, attention_mask=None):
text_features = self.model(inputs_embeds=embedding, attention_mask=attention_mask)
return text_features.logits
class Net(nn.Module):
'''
Final Model class. Puts all pieces together and generates caption based on image.
'''
def __init__(self, clip_model, text_model, ep_len, num_layers, n_heads, forward_expansion, dropout, max_len, device='cpu'):
'''
Model constructor.
Args:
num_layers: number of layers in the TransformerEncoder
n_heads: number of heads in the MultiHeadAttention
forward_expansion: expansion factor for the feedforward layer
dropout: dropout probability
max_len: maximum length of the generated text
'''
super(Net, self).__init__()
self.device = device
self.ep_len = ep_len
self.ie = ImageEncoder(model=clip_model, device=device)
self.mp = Mapping(ep_len=self.ep_len, num_layers=num_layers, embed_size=self.ie.model.config.hidden_size, n_heads=n_heads, forward_expansion=forward_expansion, dropout=dropout, device=device)
self.td = TextDecoder(model=text_model, device=device)
assert self.ie.model.config.hidden_size == self.td.model.config.n_embd, "Embedding size of models mismatch"
self.max_len = max_len
# self.criterion = nn.CrossEntropyLoss(ignore_index=self.td.tokenizer.pad_token_id) # chanded on epoch 91
self.criterion = nn.CrossEntropyLoss()
self.freeze_layers()
def freeze_layers(self):
for p in [*list(self.ie.parameters()), *list(self.td.parameters())[14:-14]]: # freeze everything, except 1st and last transformer layer in Decoder
p.requires_grad = False
def forward(self, img, temperature=1.0):
'''
Caption generation for a single image.
Args:
img: image to generate caption for [PIL.Image]
Returns:
caption: generated caption [str]
tokens: generated tokens [torch.Tensor]
'''
if temperature <= 0.0:
temperature = 1.0
print('Temperature must be positive. Setting it to 1.0')
with torch.no_grad():
img_embedded = self.ie(img)
# (ep_len, embed_size)
img_mapped = self.mp(img_embedded)
sos_emb = self.td.model.transformer.wte(torch.tensor(self.td.tokenizer.bos_token_id).to(self.device))
# sos_emb shape embed_size -> (1, embed_size)
sos_emb = sos_emb.unsqueeze(0)
# (ep_len + 1, embed_size)
start_emb = torch.cat([sos_emb, img_mapped], dim=0)
tokens = []
for _ in range(self.max_len):
if len(tokens):
tok_emb = self.td.model.transformer.wte(torch.tensor(tokens).to(self.device))
emb = torch.cat([start_emb, tok_emb], dim=0)
else:
emb = start_emb
# add positional enc
pos_emb = self.td.model.transformer.wpe(torch.arange(emb.shape[0]).to(self.device))
emb += pos_emb
pred = self.td(emb)
pred = torch.softmax(pred / temperature, dim=-1)
_, pred = torch.max(pred, dim=1)
last_token = pred[-1].item()
tokens.append(last_token)
if last_token == self.td.tokenizer.eos_token_id:
break
decoded = self.td.tokenizer.decode(tokens[:-1])
decoded = decoded.strip()
decoded = decoded[0].upper() + decoded[1:]
return decoded, tokens
def train_forward(self, img_emb, trg_cap, att_mask):
# method should get embedded by CLIP images and trg_text without last token.
# dataset should contain image, embedded image, text
x, x_mask = trg_cap[:, :-1], att_mask[:, :-1]
y = trg_cap[:, 1:]
img_mapped = self.mp(img_emb, train_mode=True)
# embed all texts and con cat with map sos
text_emb = self.td.model.transformer.wte(x)
# N, len, embed_size
x = torch.concat([img_mapped, text_emb], dim=1)
x_mask = torch.concat([torch.ones(x_mask.shape[0], self.ep_len).to(self.device), x_mask], dim=1)
pos_emb = self.td.model.transformer.wpe(torch.arange(x.shape[1]).to(self.td.device))
pos_emb = pos_emb.expand_as(x)
x += pos_emb
res = self.td(x, attention_mask=x_mask)
res = torch.softmax(res, dim=2)
loss = self.criterion(res[:, self.ep_len:, :].reshape(-1, res.shape[-1]), y.reshape(-1))
return loss
if __name__ == '__main__':
for clip, text in [['openai/clip-vit-base-patch32', 'gpt2'], ['openai/clip-vit-large-patch14', 'gpt2-medium']]:
m = Net(
clip_model=clip,
text_model=text,
ep_len=3,
num_layers=6,
n_heads=16,
forward_expansion=4,
dropout=0.1,
max_len=20
)
m.eval()
r = m(torch.randn(3, 224, 224))
print(r)
m.train()
N = 10
emb = m.td.model.config.n_embd
length = 20
l = m.train_forward(
torch.rand(N, emb),
torch.randint(1, 50000, (N, length)),
att_mask=torch.concat([torch.ones(N, length - 3), torch.zeros(N, 3)], dim=1)
)
print(l)
# number of parameters
print(f'Total number of parameters: {sum(p.numel() for p in m.parameters())}')
print(f'Number of trainable parameters: {sum(p.numel() for p in m.parameters() if p.requires_grad)}')

View File

@ -0,0 +1,203 @@
'''
Module contains Trainer used in training and testing processes.
'''
import io
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
from tqdm import tqdm
class Trainer:
def __init__(
self,
model,
optimizer,
scaler,
scheduler,
train_loader,
valid_loader,
test_dataset='./data',
test_path='',
ckp_path='',
device='cpu'
):
self.model = model
self.optimizer = optimizer
self.scaler = scaler
self.scheduler = scheduler
self.train_loader = train_loader
self.valid_loader = valid_loader
self.test_dataset = test_dataset
self.test_path = test_path
self.ckp_path = ckp_path
self.device = device
# load checkpoint
if os.path.isfile(ckp_path):
self._load_ckp(
ckp_path,
optimizer=optimizer,
scheduler=scheduler,
scaler=scaler,
epoch=True,
train_loss=True,
valid_loss=True,
device=device
)
else:
self.cur_lr = self.optimizer.param_groups[0]['lr']
self.epoch = 0
self.train_loss = []
self.valid_loss = []
self.test_result = None
def train_epoch(self):
self.model.train()
self.epoch += 1
total_loss = 0
loop = tqdm(self.train_loader, total=len(self.train_loader))
loop.set_description(f'Epoch: {self.epoch} | Loss: ---')
for batch_idx, (img_emb, cap, att_mask) in enumerate(loop):
img_emb, cap, att_mask = img_emb.to(self.device), cap.to(self.device), att_mask.to(self.device)
with torch.cuda.amp.autocast():
loss = self.model.train_forward(img_emb=img_emb, trg_cap=cap, att_mask=att_mask)
self.scaler.scale(loss).backward()
self.scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.3)
self.scaler.step(self.optimizer)
self.scaler.update()
self.optimizer.zero_grad()
total_loss += loss.item()
loop.set_description(f'Epoch: {self.epoch} | Loss: {total_loss / (batch_idx + 1):.3f}')
loop.refresh()
self.cur_lr = self.optimizer.param_groups[0]['lr']
self.train_loss.append(total_loss / (batch_idx + 1))
self.scheduler.step()
def valid_epoch(self):
self.model.eval()
total_loss = 0
loop = tqdm(self.valid_loader, total=len(self.valid_loader))
loop.set_description(f'Validation Loss: ---')
for batch_idx, (img_emb, cap, att_mask) in enumerate(loop):
img_emb, cap, att_mask = img_emb.to(self.device), cap.to(self.device), att_mask.to(self.device)
with torch.no_grad():
with torch.cuda.amp.autocast():
loss = self.model.train_forward(img_emb=img_emb, trg_cap=cap, att_mask=att_mask)
total_loss += loss.item()
loop.set_description(f'Validation Loss: {total_loss / (batch_idx + 1):.3f}')
loop.refresh()
self.valid_loss.append(total_loss / (batch_idx + 1))
def test_step(self, num_examples=4):
assert num_examples % 2 == 0, 'num_examples must be even'
self.model.eval()
fig, axs = plt.subplots(num_examples // 2, 2, figsize=(20, 12))
random_idx = np.random.randint(0, len(self.dataset), size=(num_examples,))
for idx, r in enumerate(random_idx):
img_name, _, _ = self.dataset[r]
img = Image.open(os.path.join(self.test_path, img_name))
with torch.no_grad():
caption, _ = self.model(img)
axs[idx // 2, idx % 2].imshow(img)
axs[idx // 2, idx % 2].set_title(caption)
axs[idx // 2, idx % 2].axis('off')
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
fig.clear()
plt.close(fig)
self.test_result = Image.open(buf)
def get_training_data(self):
return {
'train_loss': self.train_loss,
'valid_loss': self.valid_loss,
'lr': self.cur_lr,
'examples': self.test_result
}
def save_ckp(self, ckp_path):
torch.save(
{
'epoch': self.epoch,
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'scheduler_state_dict': self.scheduler.state_dict(),
'scaler_state_dict': self.scaler.state_dict(),
'tloss': self.train_loss,
'vloss': self.valid_loss
},
ckp_path
)
def _load_ckp(
self,
checkpoint_fpath,
optimizer=False,
scheduler=False,
scaler=False,
epoch=False,
train_loss=False,
valid_loss=False,
device='cpu'
):
'''
Loads entire checkpoint from file.
'''
checkpoint = torch.load(checkpoint_fpath, map_location=device)
self.model.load_state_dict(checkpoint['model_state_dict'])
if optimizer:
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
if scheduler:
self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
if scaler:
self.scaler.load_state_dict(checkpoint['scaler_state_dict'])
if epoch:
self.epoch = len(checkpoint['tloss'])
if train_loss:
self.train_loss = checkpoint['tloss']
if valid_loss:
self.valid_loss = checkpoint['vloss']

View File

@ -0,0 +1,3 @@
from utils.config import *
from utils.downloads import *
from utils.lr_warmup import *

View File

@ -0,0 +1,54 @@
'''
Project's main config.
'''
import os
from dataclasses import dataclass
@dataclass
class ConfigS:
'''
Project's main config.
'''
clip_model: str = 'openai/clip-vit-base-patch32'
text_model: str = 'gpt2'
seed: int = 100
num_workers: int = 2
train_size: int = 0.84
val_size: int = 0.13
epochs: int = 150
lr: int = 3e-3
k: float = 0.33
batch_size_exp: int = 6
ep_len: int = 4
num_layers: int = 6
n_heads: int = 16
forward_expansion: int = 4
max_len: int = 40
dropout: float = 0.1
weights_dir: str = os.path.join('weights', 'small')
@dataclass
class ConfigL:
'''
Project's main config.
'''
clip_model: str = 'openai/clip-vit-large-patch14'
text_model: str = 'gpt2-medium'
seed: int = 100
num_workers: int = 2
train_size: int = 0.84
val_size: int = 0.13
epochs: int = 120
lr: int = 5e-3
k: float = 0.3
batch_size_exp: int = 5
ep_len: int = 4
num_layers: int = 5
n_heads: int = 16
forward_expansion: int = 4
max_len: int = 40
dropout: float = 0.08
weights_dir: str = os.path.join('weights', 'large')

View File

@ -0,0 +1,21 @@
"""
Utility functions for loading weights.
"""
import gdown
def download_weights(checkpoint_fpath, model_size="L"):
"""
Downloads weights from Google Drive.
"""
download_id = (
"1pSQruQyg8KJq6VmzhMLFbT_VaHJMdlWF"
if model_size.strip().upper() == "L"
else "1Gh32arzhW06C1ZJyzcJSSfdJDi3RgWoG"
)
gdown.download(
f"https://drive.google.com/uc?id={download_id}", checkpoint_fpath, quiet=False
)

View File

@ -0,0 +1,24 @@
'''
Learning rate scheduler with warmup utility.
'''
class LRWarmup():
'''
Self-made learning rate scheduler with warmup.
'''
def __init__(self, epochs, max_lr, k):
assert k < 0.95 and k > 0.05, 'k must be between 0.05 and 0.95'
self.epochs = epochs
self.max_lr = max_lr
self.max_point = int(k * self.epochs)
def __call__(self, epoch):
return self.lr_warmup(epoch)
def lr_warmup(self, epoch):
a_1 = self.max_lr / self.max_point
a_2 = self.max_lr / (self.max_point - self.epochs)
b = - a_2 * self.epochs
return min(a_1 * epoch, a_2 * epoch + b)

View File

@ -57,6 +57,10 @@ def is_int(value):
except:
return False
def ensure(var,datatype):
"""Ensures that a variable is a given datatype"""
if isinstance(var, datatype): return var
else: return datatype(var)
def autocast(var):
"""Converts a variable between string, int, and float depending on how it's formatted"""

View File

@ -101,7 +101,7 @@ class Unprompted:
self.log.info(f"Finished loading in {time.time()-start_time} seconds.")
def __init__(self, base_dir="."):
self.VERSION = "10.0.2"
self.VERSION = "10.1.0"
self.shortcode_modules = {}
self.shortcode_objects = {}
@ -291,7 +291,8 @@ class Unprompted:
if type(default) == list:
for idx,val in enumerate(default):
default[idx] = datatype(val)
else: default = datatype(default)
else:
default = datatype(default)
except ValueError:
self.log.warning(f"Could not cast {default} to {datatype}.")
pass
@ -390,8 +391,8 @@ class Unprompted:
att_split = att.split("_") # e.g. controlnet_0_enabled
if len(att_split) >= 3 and any(chr.isdigit() for chr in att): # Make sure we have at least 2 underscores and at least one number
self.log.debug(f"Setting ControlNet value: {att}")
cn_path = self.extension_path(self.Config.stable_diffusion.controlnet_name)
cnet = helpers.import_file(f"{self.Config.stable_diffusion.controlnet_name}.scripts.external_code", f"{cn_path}/scripts/external_code.py")
cn_path = self.extension_path(self.Config.stable_diffusion.controlnet.extension)
cnet = helpers.import_file(f"{self.Config.stable_diffusion.controlnet.extension}.scripts.external_code", f"{cn_path}/scripts/external_code.py")
all_units = cnet.get_all_units_in_processing(this_p)
@ -400,6 +401,13 @@ class Unprompted:
this_val = imageio.imread(self.str_replace_macros(self.shortcode_user_vars[att]))
else:
this_val = self.shortcode_user_vars[att]
# Apply preset model names
if att_split[2] == "model":
if self.shortcode_user_vars["sd_base"]== "sd1": cn_dict = self.Config.stable_diffusion.controlnet.sd1_models
elif self.shortcode_user_vars["sd_base"] == "sdxl": cn_dict = self.Config.stable_diffusion.controlnet.sdxl_models
if hasattr(cn_dict,this_val):
this_val = getattr(cn_dict, this_val)
setattr(all_units[int(att_split[1])], "_".join(att_split[2:]), this_val)
cnet.update_cn_script_in_processing(this_p, all_units)
except Exception as e:

View File

@ -52,7 +52,7 @@ if Config.stable_diffusion.template_editor:
def load_file(file_name):
# print(f"loading: {file_name}")
with open(f"{folder}/{file_name}", "r") as file:
with open(f"{folder}/{file_name}", "r", encoding=Config.formats.default_encoding) as file:
content = file.read()
# print(f"content: {content}")
# update main_edit_space woth content
@ -60,7 +60,7 @@ if Config.stable_diffusion.template_editor:
def save_file(file_name, content):
# print(f"loading: {file_name}")
with open(f"{folder}/{file_name}", "w") as file:
with open(f"{folder}/{file_name}", "w", encoding=Config.formats.default_encoding) as file:
file.write(content)
with gr.Blocks() as unprompted_editor_ui:

View File

@ -61,9 +61,9 @@ class Shortcode():
self.log.debug(f"{success_string} Regional Prompter")
elif script_title == "controlnet":
# Update the controlnet script args with a list of 0 units
cn_path = self.Unprompted.extension_path(self.Unprompted.Config.stable_diffusion.controlnet_name)
cn_path = self.Unprompted.extension_path(self.Unprompted.Config.stable_diffusion.controlnet.extension)
if cn_path:
cn_module = helpers.import_file(f"{self.Unprompted.Config.stable_diffusion.controlnet_name}.internal_controlnet.external_code", f"{cn_path}/internal_controlnet/external_code.py")
cn_module = helpers.import_file(f"{self.Unprompted.Config.stable_diffusion.controlnet.extension}.internal_controlnet.external_code", f"{cn_path}/internal_controlnet/external_code.py")
cn_module.update_cn_script_in_processing(self.Unprompted.main_p, [])
self.log.debug(f"{success_string} ControlNet")
else:

View File

@ -4,7 +4,7 @@ class Shortcode():
self.description = "Downloads a file using the Civitai API (unless you already have the file in question) and automatically adds it to your prompt."
self.extra_nets = None
def is_network_installed(self, net, paths, exts=[".pt", ".ckpt", ".safetensors"]):
def network_path(self, net, paths, exts=[".pt", ".ckpt", ".safetensors"]):
"""Based on list_available_networks() from extensions-builtin/Lora/networks.py.
It's not clear if there's a better/more standardized means of checking the
user's installed extra networks. Please let me know if there is such a thing..."""
@ -26,13 +26,13 @@ class Shortcode():
name = os.path.splitext(os.path.basename(filename))[0]
if name == net:
self.log.debug(f"Extra network {net} is already installed: {filename}")
return True
return filename
return False
return None
def run_atomic(self, pargs, kwargs, context):
import lib_unprompted.helpers as helpers
import requests, os
import requests, os, json
from modules import shared
net_directories = []
@ -73,7 +73,7 @@ class Shortcode():
# Defaults
if "limit" not in kwargs: kwargs["limit"] = 1
timeout = kwargs["_timeout"] if "_timeout" in kwargs else 60
filename = kwargs["_file"] if "_file" in kwargs else kwargs["query"]
filename = kwargs["_file"] if "_file" in kwargs else self.Unpromtped.parse_arg("query","")
weight = self.Unprompted.parse_arg("_weight",1.0)
activate = self.Unprompted.parse_arg("_activate",True)
words = self.Unprompted.parse_arg("_words",False)
@ -109,12 +109,13 @@ class Shortcode():
elif net_type in ["controlnet","cn"]:
from modules.paths_internal import extensions_dir
kwargs["types"] = "Controlnet"
net_directories = [extensions_dir+self.Unprompted.Config.stable_diffusion.controlnet_name+"/models"]
net_directories = [extensions_dir+self.Unprompted.Config.stable_diffusion.controlnet.extension+"/models"]
elif net_type in ["poses","pose","openpose"]:
kwargs["types"] = "Poses"
net_directories = [self.Unprompted.base_dir+"/user/poses"]
if not self.is_network_installed(filename,net_directories):
net_path = self.network_path(filename,net_directories)
if not net_path:
# Remove system arguments from kwargs dict because we don't need to waste anyone's bandwidth
for k in list(kwargs.keys()):
if k.startswith("_"):
@ -159,6 +160,13 @@ class Shortcode():
except:
self.log.exception("An error occurred while downloading the Civitai file.")
if words:
# Replace the extension in file_path with .json:
json_path = os.path.splitext(file_path)[0]+".json"
# Create and open json_path for writing:
with open(json_path, "w") as json_file:
json.dump({"activation text":words}, json_file)
except Exception as e:
self.log.exception("Exception caught while decoding JSON")
return ""
@ -166,7 +174,17 @@ class Shortcode():
self.log.error(f"Request to Civitai API yielded bad response: {r.status_code}")
return ""
# We already have the file, check for activation text in json
# else:
elif words:
json_path = os.path.splitext(net_path)[0]+".json"
if os.path.exists(json_path):
with open(json_path, "r") as json_file:
json_obj = json.load(json_file)
if "activation text" in json_obj:
words = json_obj["activation text"]
else:
self.log.debug(f"Activation text not found in {json_path}.")
else:
self.log.debug(f"No JSON found at {json_path}.")
# Return assembled prompt string
@ -178,4 +196,7 @@ class Shortcode():
elif kwargs["types"] == "TextualInversion":
return_string += f"({filename}:{weight})"
return return_string
return return_string
def ui(self, gr):
return

View File

@ -0,0 +1,123 @@
try:
from modules import shared
except:
pass # for unprompted_dry
class Shortcode():
def __init__(self, Unprompted):
self.Unprompted = Unprompted
self.description = "Generates a caption for the given image using various technqiues."
self.model = None
self.processor = None
self.last_method = ""
self.last_model_name = ""
def run_atomic(self, pargs, kwargs, context):
from PIL import Image
import lib_unprompted.helpers as helpers
from lib_unprompted.clipxgpt.model.model import Net
import torch
image = self.Unprompted.parse_arg("image",False)
if not image: image = self.Unprompted.current_image()
if isinstance(image,str): image = Image.open(image)
method = self.Unprompted.parse_arg("method","CLIP")
model_name = self.Unprompted.parse_arg("model","")
prompt = self.Unprompted.parse_arg("text","")
question = self.Unprompted.parse_arg("question","")
max_tokens = self.Unprompted.parse_arg("max_tokens",50)
if question: prompt = f"Question: {question} Answer:"
# Default models per method
if not model_name:
if method == "BLIP-2": model_name = "Salesforce/blip2-opt-2.7b"
elif method == "CLIPxGPT": model_name = "large_model"
image = self.Unprompted.parse_arg("image",False)
if not image: image = self.Unprompted.current_image()
if isinstance(image,str): image = Image.open(image)
device ="cuda" if torch.cuda.is_available() else "cpu"
unload = self.Unprompted.parse_arg("unload",False)
def get_cached():
if method != self.last_method or model_name != self.last_model_name or not self.model:
self.log.info(f"Loading {method} model...")
return False
self.log.info(f"Using cached {method} model.")
return self.model
if method == "BLIP-2":
from transformers import AutoProcessor, Blip2ForConditionalGeneration
model = get_cached()
if not model:
#with torch.device(device):
self.processor = AutoProcessor.from_pretrained(model_name, cache_dir=f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/BLIP-2", low_cpu_mem_usage=True)
model = Blip2ForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, cache_dir=f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/BLIP-2", low_cpu_mem_usage=True)
model.to(device)
inputs = self.processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
generated_ids = model.generate(**inputs, max_new_tokens=max_tokens)
caption = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
elif method == "CLIP":
from modules import shared, lowvram, devices
self.log.info("Calling the WebUI's standard CLIP interrogator...")
# caption = shared.interrogator.interrogate(image.convert("RGB"))
lowvram.send_everything_to_cpu()
devices.torch_gc()
shared.interrogator.load()
caption = shared.interrogator.generate_caption(image.convert("RGB"))
shared.interrogator.unload()
elif method =="CLIPxGPT":
import os
model = get_cached()
if not model:
model = Net(
clip_model="openai/clip-vit-large-patch14",
text_model="gpt2-medium",
ep_len=4,
num_layers=5,
n_heads=16,
forward_expansion=4,
dropout=0.08,
max_len=40,
device=device
)
ckp_file = f"{self.Unprompted.base_dir}/{self.Unprompted.Config.subdirectories.models}/clipxgpt/{model_name}.pt"
if not os.path.exists(ckp_file):
self.log.info("Downloading CLIPxGPT model...")
helpers.download_file(ckp_file, f"https://drive.google.com/uc?export=download&id=1Gh32arzhW06C1ZJyzcJSSfdJDi3RgWoG")
checkpoint = torch.load(ckp_file, map_location=device)
model.load_state_dict(checkpoint, strict=False)
model.eval()
with torch.no_grad():
caption, _ = model(image, 1.0) # temperature
self.log.debug(f"Caption method {method} returned: {caption}")
# Cache handling
self.last_method = method
self.last_model_name = model_name
if unload:
self.model = None
self.processor = None
elif method != "CLIP":
self.model = model
return caption
def ui(self, gr):
gr.Image(label="Image to perform interrogation on (defaults to SD output) 🡢 image",type="filepath",interactive=True)
gr.Radio(label="Interrogation method 🡢 method", value="CLIP", choices=["BLIP-2","CLIP","CLIPxGPT"], info="Note: The other methods require large model downloads!")
gr.Text(label="Model name 🡢 model",value="",info="Accepts Hugging Face model strings")
gr.Text(label="Context 🡢 context",value="",info="For BLIP-2, provide contextual information for the interrogation.")
gr.Text(label="Question 🡢 question",value="",info="For BLIP-2, ask a question about the image.")
gr.Slider(label="Max Tokens 🡢 max_tokens",value=50,min=1,max=100,step=1,info="For BLIP-2, the maximum number of tokens to generate.")

View File

@ -0,0 +1,58 @@
try:
from modules import shared
except:
pass # for unprompted_dry
class Shortcode():
def __init__(self, Unprompted):
self.Unprompted = Unprompted
self.description = "Enhances a given image using the WebUI's built-in upscaler methods."
def run_atomic(self, pargs, kwargs, context):
from PIL import Image
import lib_unprompted.helpers as helpers
image = self.Unprompted.parse_arg("image",False)
if not image: image = self.Unprompted.current_image()
if isinstance(image,str):
image = Image.open(image)
orig_image = image.copy()
scale = self.Unprompted.parse_arg("scale",1)
visibility = self.Unprompted.parse_arg("visibility",1.0)
limit = self.Unprompted.parse_arg("limit",100)
keep_res = self.Unprompted.parse_arg("keep_res",False)
_models = helpers.ensure(self.Unprompted.parse_arg("models","None"),list)
models = []
for model in _models:
for upscaler in shared.sd_upscalers:
if upscaler.name == model:
models.append(upscaler)
break
if len(models) >= limit:
self.log.info(f"Upscale model limit satisfied ({limit}). Proceeding...")
break
for model in models:
self.log.info(f"Upscaling {scale}x with {model.name}...")
image = model.scaler.upscale(image, scale, model.data_path)
if keep_res:
image = image.resize(orig_image.size, Image.ANTIALIAS)
# Append to output window
try:
if not keep_res:
orig_image = orig_image.resize(image.size, Image.ANTIALIAS)
self.Unprompted.current_image(Image.blend(orig_image, image, visibility))
except:
pass
return ""
def ui(self, gr):
gr.Image(label="Image to perform upscaling on (defaults to SD output) 🡢 image",type="filepath",interactive=True)
gr.Dropdown(label="Upscaler Model(s) 🡢 models",choices=[upscaler.name for upscaler in shared.sd_upscalers],multiselect=True)
gr.Slider(label="Upscale Factor 🡢 scale", value=1, maximum=16, minimum=1, interactive=True, step=1)
gr.Slider(label="Upscale Visibility 🡢 visibility", value=1.0, maximum=1.0, minimum=0.0, interactive=True, step=0.01)

View File

@ -1,4 +1,4 @@
[template name="Bodysnatcher v1.3.5"]
[template name="Bodysnatcher v1.4.0"]
![Preview]([base_dir]/bodysnatcher.png)
<details open><summary>⚠️ Important info, please read carefully</summary>
@ -43,6 +43,7 @@ Always bodysnatch responsibly.<br>
[set subject _new _label="New subject"]mona lisa[/set]
[set simple_description _new _label="Simple Description" _info="These terms will apply to both the full image and the cropped face, 1-3 words are usually plenty"][/set]
[set class _new _label="Class" _info="The search term that determines the inpainting mask"]woman[/set]
[set interrogate _new _ui="checkbox" _label="Interrogate starting image" _info="Adds a descriptive caption to the prompt"]1[/set]
[/wizard]
[wizard accordion _label="🎭 Mask Settings"]
@ -76,6 +77,7 @@ Always bodysnatch responsibly.<br>
[set debug _new _label="Debug Mode" _ui="checkbox"]0[/set]
[if interrogate][interrogate][/if]
[sets neg_mask=""]
[if "(keep_hands==1 and background_mode==0) or (keep_hands==0 and background_mode==1)"]
[sets neg_mask=fingers]

View File

@ -1,11 +1,11 @@
[template name="Facelift v0.0.2"]
[template name="Facelift v0.1.0"]
An all-in-one solution for performing faceswaps by combining different models and postprocessing techniques.
[/template]
[wizard row]
[set faces _new _ui="file" _label="New face image(s)" _file_count="multiple" _file_types="image"][/set]
[set body _new _ui="image" _label="Body image to perform swap on (defaults to SD output)" _remember][/set]
[/wizard]
[set preset _new _ui="dropdown" _choices="{filelist '%BASE_DIR%/templates/common/presets/facelift/*.*' _basename _hide_ext}"]best_quality[/set]
[set preset _new _ui="dropdown" _choices="{filelist '%BASE_DIR%/templates/common/presets/facelift/*.*' _basename _hide_ext}"]best_quality_v2[/set]
[set unload _new _ui="checkbox" _label="Unload resources after inference" _info="Frees up VRAM but slows down inference time."]0[/set]
[if unload][set unload_all]all[/set][/if][else][set unload_all][/set][/else]
[# Reduce inference steps to 2 if we're using an external image. (setting it to 1 can cause errors)]

View File

@ -1,4 +1,4 @@
[sets
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=openpose_faceonly cn_0_model=controlnet11Models_openpose cn_0_weight=1.0
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=softedge_hed cn_1_model=controlnet11Models_softedge cn_1_weight=1.0
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=openpose_faceonly cn_0_model=openpose cn_0_weight=1.0
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=softedge_hed cn_1_model=softedge cn_1_weight=1.0
]

View File

@ -1,6 +1,6 @@
[sets
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=depth_midas cn_0_model=controlnet11Models_depth cn_0_weight=1.0 cn_0_control_mode=1
cn_1_enabled=0 cn_1_pixel_perfect=1 cn_1_module=normal_midas cn_1_model=controlnet11Models_normal cn_1_weight=1.0 cn_1_control_mode=1
cn_0_enabled=1 cn_0_pixel_perfect=1 cn_0_module=depth_midas cn_0_model=depth cn_0_weight=1.0 cn_0_control_mode=1
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=normal_midas cn_1_model=normalmap cn_1_weight=1.0 cn_1_control_mode=1
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=reference_adain cn_2_weight=1.0 cn_2_control_mode=1
cn_3_enabled=1 cn_3_pixel_perfect=1 cn_3_module=lineart_anime_denoise cn_3_model=controlnet11Models_animeline cn_3_weight=1.0 cn_3_control_mode=1
cn_3_enabled=1 cn_3_pixel_perfect=1 cn_3_module=lineart_anime_denoise cn_3_model=lineart cn_3_weight=1.0 cn_3_control_mode=1
]

View File

@ -1,5 +1,5 @@
[sets
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=control_v11e_sd15_ip2p_fp16 cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=openpose_full cn_1_model=control11Models_openpose cn_1_weight=1.0
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=softedge_hed cn_2_model=controlnet11Models_softedge cn_2_weight=0.25
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_model=instructp2p cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
cn_1_enabled=1 cn_1_pixel_perfect=1 cn_1_module=openpose_full cn_1_model=openpose cn_1_weight=1.0
cn_2_enabled=1 cn_2_pixel_perfect=1 cn_2_module=softedge_hed cn_2_model=softedge cn_2_weight=0.25
]

View File

@ -1,3 +1,3 @@
[sets
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=control_v11e_sd15_ip2p_fp16 cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
cn_0_enabled=1 cn_0_pixel_perfect=0 cn_0_module=none cn_0_model=instructp2p cn_0_weight=0.15 cn_0_guidance_end=0.25 cn_0_control_mode=0
]

View File

@ -1 +1 @@
[restore_faces unload="{get unload}" method=gfpgan image="{get body}"][faceswap "{get faces}" unload="{get unload_all}" minimum_similarity=-999][restore_faces unload="{get unload}" method=gpen]
[restore_faces unload="{get unload}" method=gfpgan image="{get body}"][faceswap "{get faces}" unload="{get unload_all}" minimum_similarity=-999][restore_faces unload="{get unload}" method=gpen][upscale models="4x-UltraSharp|R-ESRGAN 4x+" scale=1 limit=1 visibility=0.8 keep_res]

View File

@ -1 +0,0 @@
[sets cfg_scale=7.5 sampler_name="DPM++ 3M SDE" steps=20 denoising_strength=1.0 mask_blur=10]

View File

@ -0,0 +1 @@
[sets cfg_scale=4.5 sampler_name="DPM++ 3M SDE" steps=20 denoising_strength=1.0 mask_blur=0]