parent
e42d2124c1
commit
83fa587f39
|
|
@ -16,6 +16,7 @@ import tqdm
|
||||||
from modules import shared, sd_models, devices, processing, sd_samplers
|
from modules import shared, sd_models, devices, processing, sd_samplers
|
||||||
from modules.hypernetworks.hypernetwork import optimizer_dict, stack_conds, save_hypernetwork, report_statistics
|
from modules.hypernetworks.hypernetwork import optimizer_dict, stack_conds, save_hypernetwork, report_statistics
|
||||||
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
||||||
|
from modules.textual_inversion.textual_inversion import tensorboard_setup, tensorboard_add, tensorboard_add_image
|
||||||
from .textual_inversion import validate_train_inputs, write_loss
|
from .textual_inversion import validate_train_inputs, write_loss
|
||||||
from ..hypernetwork import Hypernetwork, load_hypernetwork
|
from ..hypernetwork import Hypernetwork, load_hypernetwork
|
||||||
from . import sd_hijack_checkpoint
|
from . import sd_hijack_checkpoint
|
||||||
|
|
@ -24,7 +25,7 @@ from ..scheduler import CosineAnnealingWarmUpRestarts
|
||||||
from .dataset import PersonalizedBase,PersonalizedDataLoader
|
from .dataset import PersonalizedBase,PersonalizedDataLoader
|
||||||
|
|
||||||
|
|
||||||
def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory,
|
def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory,
|
||||||
training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method,
|
training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method,
|
||||||
create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt,
|
create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt,
|
||||||
preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed,
|
preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed,
|
||||||
|
|
@ -112,6 +113,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
if not isinstance(shared.loaded_hypernetwork, Hypernetwork):
|
if not isinstance(shared.loaded_hypernetwork, Hypernetwork):
|
||||||
raise RuntimeError("Cannot perform training for Hypernetwork structure pipeline!")
|
raise RuntimeError("Cannot perform training for Hypernetwork structure pipeline!")
|
||||||
|
|
||||||
|
shared.state.job = "train-hypernetwork"
|
||||||
shared.state.textinfo = "Initializing hypernetwork training..."
|
shared.state.textinfo = "Initializing hypernetwork training..."
|
||||||
shared.state.job_count = steps
|
shared.state.job_count = steps
|
||||||
|
|
||||||
|
|
@ -142,7 +144,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
return hypernetwork, filename
|
return hypernetwork, filename
|
||||||
|
|
||||||
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
|
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
|
||||||
|
if shared.opts.training_enable_tensorboard:
|
||||||
|
print("Tensorboard logging enabled")
|
||||||
|
tensorboard_writer = tensorboard_setup(log_directory)
|
||||||
|
else:
|
||||||
|
tensorboard_writer = None
|
||||||
# dataset loading may take a while, so input validations and early returns should be done before this
|
# dataset loading may take a while, so input validations and early returns should be done before this
|
||||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||||
|
|
||||||
|
|
@ -163,11 +169,19 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
|
|
||||||
dl = PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method,
|
dl = PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method,
|
||||||
batch_size=ds.batch_size, pin_memory=pin_memory)
|
batch_size=ds.batch_size, pin_memory=pin_memory)
|
||||||
|
old_parallel_processing_allowed = shared.parallel_processing_allowed
|
||||||
|
|
||||||
if unload:
|
if unload:
|
||||||
|
shared.parallel_processing_allowed = False
|
||||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||||
|
|
||||||
|
detach_grad = False # test code that removes EMA
|
||||||
|
if detach_grad:
|
||||||
|
shared.sd_model.cond_stage_model.requires_grad_(False)
|
||||||
|
shared.sd_model.first_stage_model.requires_grad_(False)
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
weights = hypernetwork.weights(True)
|
weights = hypernetwork.weights(True)
|
||||||
|
|
||||||
# Here we use optimizer from saved HN, or we can specify as UI option.
|
# Here we use optimizer from saved HN, or we can specify as UI option.
|
||||||
|
|
@ -285,7 +299,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
epoch_num = hypernetwork.step // steps_per_epoch
|
epoch_num = hypernetwork.step // steps_per_epoch
|
||||||
epoch_step = hypernetwork.step % steps_per_epoch
|
epoch_step = hypernetwork.step % steps_per_epoch
|
||||||
|
|
||||||
pbar.set_description(f"[Epoch {epoch_num}: {epoch_step + 1}/{steps_per_epoch}]loss: {loss_step:.7f}")
|
description = f"Training hypernetwork [Epoch {epoch_num}: {epoch_step + 1}/{steps_per_epoch}]loss: {loss_step:.7f}"
|
||||||
|
pbar.set_description(description)
|
||||||
if hypernetwork_dir is not None and ((use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and save_when_converge) or (save_hypernetwork_every > 0 and steps_done % save_hypernetwork_every == 0)):
|
if hypernetwork_dir is not None and ((use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and save_when_converge) or (save_hypernetwork_every > 0 and steps_done % save_hypernetwork_every == 0)):
|
||||||
# Before saving, change name to match current checkpoint.
|
# Before saving, change name to match current checkpoint.
|
||||||
hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
|
hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
|
||||||
|
|
@ -301,7 +316,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
"loss": f"{loss_step:.7f}",
|
"loss": f"{loss_step:.7f}",
|
||||||
"learn_rate": optimizer.param_groups[0]['lr']
|
"learn_rate": optimizer.param_groups[0]['lr']
|
||||||
})
|
})
|
||||||
|
if shared.opts.training_enable_tensorboard:
|
||||||
|
epoch_num = hypernetwork.step // len(ds)
|
||||||
|
epoch_step = hypernetwork.step - (epoch_num * len(ds)) + 1
|
||||||
|
mean_loss = sum(sum(x) for x in loss_dict.values()) / sum(len(x) for x in loss_dict.values())
|
||||||
|
tensorboard_add(tensorboard_writer, loss=mean_loss, global_step=hypernetwork.step, step=epoch_step, learn_rate=scheduler.learn_rate, epoch_num=epoch_num)
|
||||||
if images_dir is not None and (use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and create_when_converge) or (create_image_every > 0 and steps_done % create_image_every == 0):
|
if images_dir is not None and (use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and create_when_converge) or (create_image_every > 0 and steps_done % create_image_every == 0):
|
||||||
forced_filename = f'{hypernetwork_name}-{steps_done}'
|
forced_filename = f'{hypernetwork_name}-{steps_done}'
|
||||||
last_saved_image = os.path.join(images_dir, forced_filename)
|
last_saved_image = os.path.join(images_dir, forced_filename)
|
||||||
|
|
@ -341,6 +360,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
|
|
||||||
processed = processing.process_images(p)
|
processed = processing.process_images(p)
|
||||||
image = processed.images[0] if len(processed.images) > 0 else None
|
image = processed.images[0] if len(processed.images) > 0 else None
|
||||||
|
if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images:
|
||||||
|
tensorboard_add_image(tensorboard_writer, f"Validation at epoch {epoch_num}", image, hypernetwork.step)
|
||||||
|
|
||||||
if unload:
|
if unload:
|
||||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||||
|
|
@ -352,7 +373,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
|
||||||
if move_optimizer:
|
if move_optimizer:
|
||||||
optim_to(optimizer, devices.device)
|
optim_to(optimizer, devices.device)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
shared.state.current_image = image
|
shared.state.assign_current_image(image)
|
||||||
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
||||||
shared.opts.samples_format,
|
shared.opts.samples_format,
|
||||||
processed.infotexts[0], p=p,
|
processed.infotexts[0], p=p,
|
||||||
|
|
@ -377,6 +398,7 @@ Last saved image: {html.escape(last_saved_image)}<br/>
|
||||||
pbar.leave = False
|
pbar.leave = False
|
||||||
pbar.close()
|
pbar.close()
|
||||||
hypernetwork.eval()
|
hypernetwork.eval()
|
||||||
|
shared.parallel_processing_allowed = old_parallel_processing_allowed
|
||||||
report_statistics(loss_dict)
|
report_statistics(loss_dict)
|
||||||
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
|
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
|
||||||
hypernetwork.optimizer_name = optimizer_name
|
hypernetwork.optimizer_name = optimizer_name
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,8 @@ from modules.textual_inversion.image_embedding import caption_image_overlay, ins
|
||||||
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
||||||
from modules.textual_inversion.textual_inversion import save_embedding
|
from modules.textual_inversion.textual_inversion import save_embedding
|
||||||
|
|
||||||
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
from modules.textual_inversion.textual_inversion import tensorboard_add, tensorboard_setup, tensorboard_add_scaler, tensorboard_add_image
|
||||||
#apply OsError avoid here
|
#apply OsError avoid here
|
||||||
delayed_values = {}
|
delayed_values = {}
|
||||||
|
|
||||||
|
|
@ -84,7 +86,7 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
|
||||||
assert log_directory, "Log directory is empty"
|
assert log_directory, "Log directory is empty"
|
||||||
|
|
||||||
|
|
||||||
def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width,
|
def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width,
|
||||||
training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every,
|
training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every,
|
||||||
save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img,
|
save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img,
|
||||||
preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale,
|
preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale,
|
||||||
|
|
@ -200,6 +202,12 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
|
||||||
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
|
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
|
||||||
# dataset loading may take a while, so input validations and early returns should be done before this
|
# dataset loading may take a while, so input validations and early returns should be done before this
|
||||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||||
|
old_parallel_processing_allowed = shared.parallel_processing_allowed
|
||||||
|
|
||||||
|
tensorboard_writer = None
|
||||||
|
if shared.opts.training_enable_tensorboard:
|
||||||
|
print("Tensorboard logging enabled")
|
||||||
|
tensorboard_writer = tensorboard_setup(log_directory)
|
||||||
|
|
||||||
pin_memory = shared.opts.pin_memory
|
pin_memory = shared.opts.pin_memory
|
||||||
shared.sd_model.cond_stage_model.to(devices.device)
|
shared.sd_model.cond_stage_model.to(devices.device)
|
||||||
|
|
@ -220,6 +228,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
|
||||||
batch_size=ds.batch_size, pin_memory=pin_memory)
|
batch_size=ds.batch_size, pin_memory=pin_memory)
|
||||||
|
|
||||||
if unload:
|
if unload:
|
||||||
|
shared.parallel_processing_allowed = False
|
||||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||||
|
|
||||||
embedding.vec.requires_grad = True
|
embedding.vec.requires_grad = True
|
||||||
|
|
@ -380,21 +389,19 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
|
||||||
processed = processing.process_images(p)
|
processed = processing.process_images(p)
|
||||||
image = processed.images[0] if len(processed.images) > 0 else None
|
image = processed.images[0] if len(processed.images) > 0 else None
|
||||||
|
|
||||||
if unload:
|
|
||||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
|
||||||
torch.set_rng_state(rng_state)
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
torch.cuda.set_rng_state_all(cuda_rng_state)
|
|
||||||
if move_optimizer:
|
if move_optimizer:
|
||||||
optim_to(optimizer, devices.device)
|
optim_to(optimizer, devices.device)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
shared.state.current_image = image
|
shared.state.assign_current_image(image)
|
||||||
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
||||||
shared.opts.samples_format,
|
shared.opts.samples_format,
|
||||||
processed.infotexts[0], p=p,
|
processed.infotexts[0], p=p,
|
||||||
forced_filename=forced_filename,
|
forced_filename=forced_filename,
|
||||||
save_to_dirs=False)
|
save_to_dirs=False)
|
||||||
last_saved_image += f", prompt: {preview_text}"
|
last_saved_image += f", prompt: {preview_text}"
|
||||||
|
if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images:
|
||||||
|
tensorboard_add_image(tensorboard_writer, f"Validation at epoch {epoch_num}", image,
|
||||||
|
embedding.step)
|
||||||
|
|
||||||
if save_image_with_stored_embedding and os.path.exists(
|
if save_image_with_stored_embedding and os.path.exists(
|
||||||
last_saved_file) and embedding_yet_to_be_embedded:
|
last_saved_file) and embedding_yet_to_be_embedded:
|
||||||
|
|
@ -422,7 +429,11 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
|
||||||
|
|
||||||
captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
|
captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
|
||||||
embedding_yet_to_be_embedded = False
|
embedding_yet_to_be_embedded = False
|
||||||
|
if unload:
|
||||||
|
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||||
|
torch.set_rng_state(rng_state)
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.set_rng_state_all(cuda_rng_state)
|
||||||
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
|
||||||
shared.opts.samples_format,
|
shared.opts.samples_format,
|
||||||
processed.infotexts[0], p=p,
|
processed.infotexts[0], p=p,
|
||||||
|
|
@ -430,6 +441,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
|
||||||
save_to_dirs=False)
|
save_to_dirs=False)
|
||||||
last_saved_image += f", prompt: {preview_text}"
|
last_saved_image += f", prompt: {preview_text}"
|
||||||
|
|
||||||
|
|
||||||
shared.state.job_no = embedding.step
|
shared.state.job_no = embedding.step
|
||||||
|
|
||||||
shared.state.textinfo = f"""
|
shared.state.textinfo = f"""
|
||||||
|
|
@ -450,5 +462,5 @@ Last saved image: {html.escape(last_saved_image)}<br/>
|
||||||
pbar.leave = False
|
pbar.leave = False
|
||||||
pbar.close()
|
pbar.close()
|
||||||
shared.sd_model.first_stage_model.to(devices.device)
|
shared.sd_model.first_stage_model.to(devices.device)
|
||||||
|
shared.parallel_processing_allowed = old_parallel_processing_allowed
|
||||||
return embedding, filename
|
return embedding, filename
|
||||||
|
|
@ -36,6 +36,7 @@ Hypernetwork saved to {html.escape(filename)}
|
||||||
|
|
||||||
|
|
||||||
def on_train_gamma_tab(params=None):
|
def on_train_gamma_tab(params=None):
|
||||||
|
dummy_component = gr.Label(visible=False)
|
||||||
with gr.Tab(label="Train_Gamma") as train_gamma:
|
with gr.Tab(label="Train_Gamma") as train_gamma:
|
||||||
gr.HTML(
|
gr.HTML(
|
||||||
value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
|
value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
|
||||||
|
|
@ -145,6 +146,7 @@ def on_train_gamma_tab(params=None):
|
||||||
fn=wrap_gradio_gpu_call(train_embedding_external, extra_outputs=[gr.update()]),
|
fn=wrap_gradio_gpu_call(train_embedding_external, extra_outputs=[gr.update()]),
|
||||||
_js="start_training_textual_inversion",
|
_js="start_training_textual_inversion",
|
||||||
inputs=[
|
inputs=[
|
||||||
|
dummy_component,
|
||||||
train_embedding_name,
|
train_embedding_name,
|
||||||
embedding_learn_rate,
|
embedding_learn_rate,
|
||||||
batch_size,
|
batch_size,
|
||||||
|
|
@ -192,6 +194,7 @@ def on_train_gamma_tab(params=None):
|
||||||
fn=wrap_gradio_gpu_call(train_hypernetwork_ui, extra_outputs=[gr.update()]),
|
fn=wrap_gradio_gpu_call(train_hypernetwork_ui, extra_outputs=[gr.update()]),
|
||||||
_js="start_training_textual_inversion",
|
_js="start_training_textual_inversion",
|
||||||
inputs=[
|
inputs=[
|
||||||
|
dummy_component,
|
||||||
train_hypernetwork_name,
|
train_hypernetwork_name,
|
||||||
hypernetwork_learn_rate,
|
hypernetwork_learn_rate,
|
||||||
batch_size,
|
batch_size,
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import filelock
|
||||||
# This is full copy of modules/hashes. This will be only loaded if compatibility issue happens due to version mismatch.
|
# This is full copy of modules/hashes. This will be only loaded if compatibility issue happens due to version mismatch.
|
||||||
cache_filename = "cache.json"
|
cache_filename = "cache.json"
|
||||||
cache_data = None
|
cache_data = None
|
||||||
|
blksize = 1 << 20
|
||||||
|
|
||||||
def dump_cache():
|
def dump_cache():
|
||||||
with filelock.FileLock(cache_filename+".lock"):
|
with filelock.FileLock(cache_filename+".lock"):
|
||||||
|
|
@ -34,9 +34,9 @@ def cache(subsection):
|
||||||
|
|
||||||
def calculate_sha256(filename):
|
def calculate_sha256(filename):
|
||||||
hash_sha256 = hashlib.sha256()
|
hash_sha256 = hashlib.sha256()
|
||||||
|
global blksize
|
||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
for chunk in iter(lambda: f.read(4096), b""):
|
for chunk in iter(lambda: f.read(blksize), b""):
|
||||||
hash_sha256.update(chunk)
|
hash_sha256.update(chunk)
|
||||||
|
|
||||||
return hash_sha256.hexdigest()
|
return hash_sha256.hexdigest()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue