"
+
+ pbar = tqdm.tqdm(total=steps - initial_step)
+ try:
+ for i in range((steps - initial_step) * gradient_step):
+ if scheduler.finished or hypernetwork.step > steps:
+ break
+ if shared.state.interrupted:
+ break
+ for j, batch in enumerate(dl):
+ # works as a drop_last=True for gradient accumulation
+ if j == max_steps_per_epoch:
+ break
+ if use_beta_scheduler:
+ scheduler_beta.step(hypernetwork.step)
+ else:
+ scheduler.apply(optimizer, hypernetwork.step)
+ if scheduler.finished:
+ break
+ if shared.state.interrupted:
+ break
+
+ with torch.autocast("cuda"):
+ x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
+ if tag_drop_out != 0 or shuffle_tags:
+ shared.sd_model.cond_stage_model.to(devices.device)
+ c = shared.sd_model.cond_stage_model(batch.cond_text).to(devices.device,
+ non_blocking=pin_memory)
+ shared.sd_model.cond_stage_model.to(devices.cpu)
+ else:
+ c = stack_conds(batch.cond).to(devices.device, non_blocking=pin_memory)
+ loss = shared.sd_model(x, c)[0]
+ for filenames in batch.filename:
+ loss_dict[filenames].append(loss.item())
+ loss /= gradient_step
+ del x
+ del c
+
+ _loss_step += loss.item()
+ scaler.scale(loss).backward()
+ batch.latent_sample.to(devices.cpu)
+ # go back until we reach gradient accumulation steps
+ if (j + 1) % gradient_step != 0:
+ continue
+ gradient_clipping(weights)
+ # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.7f}")
+ # scaler.unscale_(optimizer)
+ # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.15f}")
+ # torch.nn.utils.clip_grad_norm_(weights, max_norm=1.0)
+ # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.15f}")
+ try:
+ scaler.step(optimizer)
+ except AssertionError:
+ optimizer.param_groups[0]['capturable'] = True
+ scaler.step(optimizer)
+ scaler.update()
+ hypernetwork.step += 1
+ pbar.update()
+ optimizer.zero_grad(set_to_none=True)
+ loss_step = _loss_step
+ _loss_step = 0
+
+ steps_done = hypernetwork.step + 1
+
+ epoch_num = hypernetwork.step // steps_per_epoch
+ epoch_step = hypernetwork.step % steps_per_epoch
+
+ description = f"Training hypernetwork [Epoch {epoch_num}: {epoch_step + 1}/{steps_per_epoch}]loss: {loss_step:.7f}"
+ pbar.set_description(description)
+ if hypernetwork_dir is not None and ((use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and save_when_converge) or (save_hypernetwork_every > 0 and steps_done % save_hypernetwork_every == 0)):
+ # Before saving, change name to match current checkpoint.
+ hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
+ last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
+ save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file)
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
+
+ write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, steps_per_epoch,
+ {
+ "loss": f"{loss_step:.7f}",
+ "learn_rate": optimizer.param_groups[0]['lr']
+ })
+ if shared.opts.training_enable_tensorboard:
+ epoch_num = hypernetwork.step // len(ds)
+ epoch_step = hypernetwork.step - (epoch_num * len(ds)) + 1
+ mean_loss = sum(sum(x) for x in loss_dict.values()) / sum(len(x) for x in loss_dict.values())
+ tensorboard_add(tensorboard_writer, loss=mean_loss, global_step=hypernetwork.step, step=epoch_step, learn_rate=scheduler.learn_rate, epoch_num=epoch_num)
+ if images_dir is not None and (use_beta_scheduler and scheduler_beta.is_EOC(hypernetwork.step) and create_when_converge) or (create_image_every > 0 and steps_done % create_image_every == 0):
+ forced_filename = f'{hypernetwork_name}-{steps_done}'
+ last_saved_image = os.path.join(images_dir, forced_filename)
+ rng_state = torch.get_rng_state()
+ cuda_rng_state = None
+ if torch.cuda.is_available():
+ cuda_rng_state = torch.cuda.get_rng_state_all()
+ hypernetwork.eval()
+ if move_optimizer:
+ optim_to(optimizer, devices.cpu)
+ gc.collect()
+ shared.sd_model.cond_stage_model.to(devices.device)
+ shared.sd_model.first_stage_model.to(devices.device)
+
+ p = processing.StableDiffusionProcessingTxt2Img(
+ sd_model=shared.sd_model,
+ do_not_save_grid=True,
+ do_not_save_samples=True,
+ )
+
+ if preview_from_txt2img:
+ p.prompt = preview_prompt
+ p.negative_prompt = preview_negative_prompt
+ p.steps = preview_steps
+ p.sampler_name = sd_samplers.samplers[preview_sampler_index].name
+ p.cfg_scale = preview_cfg_scale
+ p.seed = preview_seed
+ p.width = preview_width
+ p.height = preview_height
+ else:
+ p.prompt = batch.cond_text[0]
+ p.steps = 20
+ p.width = training_width
+ p.height = training_height
+
+ preview_text = p.prompt
+
+ processed = processing.process_images(p)
+ image = processed.images[0] if len(processed.images) > 0 else None
+ if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images:
+ tensorboard_add_image(tensorboard_writer, f"Validation at epoch {epoch_num}", image, hypernetwork.step)
+
+ if unload:
+ shared.sd_model.cond_stage_model.to(devices.cpu)
+ shared.sd_model.first_stage_model.to(devices.cpu)
+ torch.set_rng_state(rng_state)
+ if torch.cuda.is_available():
+ torch.cuda.set_rng_state_all(cuda_rng_state)
+ hypernetwork.train()
+ if move_optimizer:
+ optim_to(optimizer, devices.device)
+ if image is not None:
+ shared.state.assign_current_image(image)
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
+ shared.opts.samples_format,
+ processed.infotexts[0], p=p,
+ forced_filename=forced_filename,
+ save_to_dirs=False)
+ last_saved_image += f", prompt: {preview_text}"
+
+ shared.state.job_no = hypernetwork.step
+
+ shared.state.textinfo = f"""
+
+Loss: {loss_step:.7f}
+Step: {steps_done}
+Last prompt: {html.escape(batch.cond_text[0])}
+Last saved hypernetwork: {html.escape(last_saved_file)}
+Last saved image: {html.escape(last_saved_image)}
+
+"""
+ except Exception:
+ print(traceback.format_exc(), file=sys.stderr)
+ finally:
+ pbar.leave = False
+ pbar.close()
+ hypernetwork.eval()
+ shared.parallel_processing_allowed = old_parallel_processing_allowed
+ report_statistics(loss_dict)
+ filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
+ save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
+ del optimizer
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
+ shared.sd_model.cond_stage_model.to(devices.device)
+ shared.sd_model.first_stage_model.to(devices.device)
+
+ return hypernetwork, filename
+
+
+def train_hypernetwork_tuning(id_task, hypernetwork_name, data_root, log_directory,
+ create_image_every, save_hypernetwork_every, preview_from_txt2img, preview_prompt,
+ preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed,
+ preview_width, preview_height,
+ move_optimizer=True,
+ optional_new_hypernetwork_name='', load_hypernetworks_options='', load_training_options=''):
+ load_hypernetworks_options = load_hypernetworks_options.split(',')
+ load_training_options = load_training_options.split(',')
+ # images allows training previews to have infotext. Importing it at the top causes a circular import problem.
+ for load_hypernetworks_option in load_hypernetworks_options:
+ for load_training_option in load_training_options:
+ internal_clean_training(hypernetwork_name if load_hypernetworks_option != '' else optional_new_hypernetwork_name, data_root, log_directory,
+ create_image_every, save_hypernetwork_every,
+ preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height,
+ move_optimizer,
+ load_hypernetworks_option, load_training_option)
+ if shared.state.interrupted:
+ return
\ No newline at end of file
diff --git a/patches/external_pr/ui.py b/patches/external_pr/ui.py
index 44a819f..51dd9be 100644
--- a/patches/external_pr/ui.py
+++ b/patches/external_pr/ui.py
@@ -1,12 +1,16 @@
import html
+import json
import os
+import random
from modules import shared, sd_hijack, devices
+from modules.call_queue import wrap_gradio_call
+from modules.hypernetworks.ui import keys
from modules.paths import script_path
from modules.ui import create_refresh_button, gr_show
from webui import wrap_gradio_gpu_call
from .textual_inversion import train_embedding as train_embedding_external
-from .hypernetwork import train_hypernetwork as train_hypernetwork_external
+from .hypernetwork import train_hypernetwork as train_hypernetwork_external, train_hypernetwork_tuning
import gradio as gr
@@ -35,6 +39,51 @@ Hypernetwork saved to {html.escape(filename)}
sd_hijack.apply_optimizations()
+def train_hypernetwork_ui_tuning(*args):
+
+ initial_hypernetwork = shared.loaded_hypernetwork
+
+ assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible'
+
+ try:
+ sd_hijack.undo_optimizations()
+
+ hypernetwork, filename = train_hypernetwork_tuning(*args)
+
+ res = f"""
+Training {'interrupted' if shared.state.interrupted else 'finished'} at {hypernetwork.step} steps.
+Hypernetwork saved to {html.escape(filename)}
+"""
+ return res, ""
+ except Exception:
+ raise
+ finally:
+ shared.loaded_hypernetwork = initial_hypernetwork
+ shared.sd_model.cond_stage_model.to(devices.device)
+ shared.sd_model.first_stage_model.to(devices.device)
+ sd_hijack.apply_optimizations()
+
+
+def save_training_setting(*args):
+ save_file_name, hypernetwork_learn_rate, batch_size, gradient_step, training_width, \
+ training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, \
+ template_file, use_beta_scheduler, beta_repeat_epoch, epoch_mult, warmup, min_lr, \
+ gamma_rate, use_beta_adamW_checkbox, save_converge_opt, generate_converge_opt, \
+ adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps, show_gradient_clip_checkbox, \
+ gradient_clip_opt, optional_gradient_clip_value, optional_gradient_norm_type = args
+
+ filename = (str(random.randint(0, 1024)) if save_file_name == '' else save_file_name) + '_train_' + '.json'
+ with open(filename, 'w') as file:
+ json.dump(locals(), file)
+ print(f"File saved as {filename}")
+
+def save_hypernetwork_setting(*args):
+ save_file_name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, dropout_structure, optional_info, weight_init_seed, normal_std = args
+ filename = (str(random.randint(0, 1024)) if save_file_name == '' else save_file_name) + '_hypernetwork_' + '.json'
+ with open(filename, 'w') as file:
+ json.dump(locals(), file)
+ print(f"File saved as {filename}")
+
def on_train_gamma_tab(params=None):
dummy_component = gr.Label(visible=False)
with gr.Tab(label="Train_Gamma") as train_gamma:
@@ -140,8 +189,45 @@ def on_train_gamma_tab(params=None):
train_embedding = gr.Button(value="Train Embedding", variant='primary')
ti_output = gr.Text(elem_id="ti_output3", value="", show_label=False)
ti_outcome = gr.HTML(elem_id="ti_error3", value="")
+ save_training_option = gr.Button(label="Save training setting")
+ save_file_name = gr.Textbox(label="File name to save setting as", value="")
+ load_training_option = gr.Textbox(label="Load training option from saved json file. This will override settings above", value="")
+ #Full path to .json or simple names are recommended.
+ save_training_option.click(
+ fn = wrap_gradio_call(save_training_setting),
+ inputs=[
+ save_file_name,
+ hypernetwork_learn_rate,
+ batch_size,
+ gradient_step,
+ training_width,
+ training_height,
+ steps,
+ shuffle_tags,
+ tag_drop_out,
+ latent_sampling_method,
+ template_file,
+ use_beta_scheduler,
+ beta_repeat_epoch,
+ epoch_mult,
+ warmup,
+ min_lr,
+ gamma_rate,
+ use_beta_adamW_checkbox,
+ save_converge_opt,
+ generate_converge_opt,
+ adamw_weight_decay,
+ adamw_beta_1,
+ adamw_beta_2,
+ adamw_eps,
+ show_gradient_clip_checkbox,
+ gradient_clip_opt,
+ optional_gradient_clip_value,
+ optional_gradient_norm_type],
+ outputs=[
-
+ ]
+ )
train_embedding.click(
fn=wrap_gradio_gpu_call(train_embedding_external, extra_outputs=[gr.update()]),
_js="start_training_textual_inversion",
@@ -229,7 +315,8 @@ def on_train_gamma_tab(params=None):
show_gradient_clip_checkbox,
gradient_clip_opt,
optional_gradient_clip_value,
- optional_gradient_norm_type
+ optional_gradient_norm_type,
+ load_training_option
],
outputs=[
@@ -244,3 +331,64 @@ def on_train_gamma_tab(params=None):
outputs=[],
)
return [(train_gamma, "Train Gamma", "train_gamma")]
+
+def on_train_tuning(params=None):
+ dummy_component = gr.Label(visible=False)
+ with gr.Tab(label="Train_Tuning") as train_tuning:
+ gr.HTML(
+ value="Train Hypernetwork; you must specify a directory [wiki]
")
+ with gr.Row():
+ train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', elem_id="train_hypernetwork",
+ choices=[x for x in shared.hypernetworks.keys()])
+ create_refresh_button(train_hypernetwork_name, shared.reload_hypernetworks,
+ lambda: {"choices": sorted([x for x in shared.hypernetworks.keys()])},
+ "refresh_train_hypernetwork_name")
+ optional_new_hypernetwork_name = gr.Textbox(label="Hypernetwork name to create, leave it empty to use selected", value="")
+ load_hypernetworks_option = gr.Textbox(
+ label="Load Hypernetwork creation option from saved json file. filename cannot have ',' inside, and files should be splitted by ','.", value="")
+ load_training_options = gr.Textbox(
+ label="Load training option(s) from saved json file. filename cannot have ',' inside, and files should be splitted by ','.", value="")
+ move_optim_when_generate = gr.Checkbox(label="Unload Optimizer when generating preview(hypernetwork)", value=True)
+ dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
+ log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs",
+ value="textual_inversion")
+ create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable',
+ value=500, precision=0)
+ save_model_every = gr.Number(
+ label='Save a copy of model to log directory every N steps, 0 to disable', value=500, precision=0)
+ preview_from_txt2img = gr.Checkbox(
+ label='Read parameters (prompt, etc...) from txt2img tab when making previews', value=False)
+ with gr.Row():
+ interrupt_training = gr.Button(value="Interrupt")
+ train_hypernetwork = gr.Button(value="Train Hypernetwork", variant='primary')
+ ti_output = gr.Text(elem_id="ti_output4", value="", show_label=False)
+ ti_outcome = gr.HTML(elem_id="ti_error4", value="")
+ train_hypernetwork.click(
+ fn=wrap_gradio_gpu_call(train_hypernetwork_ui_tuning, extra_outputs=[gr.update()]),
+ _js="start_training_textual_inversion",
+ inputs=[
+ dummy_component,
+ train_hypernetwork_name,
+ dataset_directory,
+ log_directory,
+ create_image_every,
+ save_model_every,
+ preview_from_txt2img,
+ *params.txt2img_preview_params,
+ move_optim_when_generate,
+ optional_new_hypernetwork_name,
+ load_hypernetworks_option,
+ load_training_options
+ ],
+ outputs=[
+ ti_output,
+ ti_outcome,
+ ]
+ )
+
+ interrupt_training.click(
+ fn=lambda: shared.state.interrupt(),
+ inputs=[],
+ outputs=[],
+ )
+ return [(train_tuning, "Train Tuning", "train_tuning")]
\ No newline at end of file
diff --git a/patches/ui.py b/patches/ui.py
index 1900d5c..a85252e 100644
--- a/patches/ui.py
+++ b/patches/ui.py
@@ -2,9 +2,44 @@ import html
import os
from modules import shared, sd_hijack, devices
-from .hypernetwork import Hypernetwork, train_hypernetwork
+from .hypernetwork import Hypernetwork, train_hypernetwork, load_hypernetwork
import gradio as gr
+def create_hypernetwork_load(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None, optional_info=None,
+ weight_init_seed=None, normal_std=0.01):
+ # Remove illegal characters from name.
+ name = "".join( x for x in name if (x.isalnum() or x in "._- "))
+ assert name, "Name cannot be empty!"
+ fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
+ if not overwrite_old:
+ assert not os.path.exists(fn), f"file {fn} already exists"
+
+ if type(layer_structure) == str:
+ layer_structure = [float(x.strip()) for x in layer_structure.split(",")]
+
+ if dropout_structure and type(dropout_structure) == str:
+ dropout_structure = [float(x.strip()) for x in dropout_structure.split(",")]
+ normal_std = float(normal_std)
+ assert normal_std > 0, "Normal Standard Deviation should be bigger than 0!"
+ hypernet = Hypernetwork(
+ name=name,
+ enable_sizes=[int(x) for x in enable_sizes],
+ layer_structure=layer_structure,
+ activation_func=activation_func,
+ weight_init=weight_init,
+ add_layer_norm=add_layer_norm,
+ use_dropout=use_dropout,
+ dropout_structure=dropout_structure if use_dropout and dropout_structure else [0] * len(layer_structure),
+ optional_info=optional_info,
+ generation_seed=weight_init_seed if weight_init_seed != -1 else None,
+ normal_std=normal_std
+ )
+ hypernet.save(fn)
+
+ load_hypernetwork(name)
+
+ return hypernet
+
def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None, optional_info=None,
weight_init_seed=None, normal_std=0.01):
diff --git a/scripts/hypernetwork-extensions.py b/scripts/hypernetwork-extensions.py
index 5e64f2e..0feab6c 100644
--- a/scripts/hypernetwork-extensions.py
+++ b/scripts/hypernetwork-extensions.py
@@ -1,5 +1,6 @@
import os
+from modules.call_queue import wrap_gradio_call
from modules.hypernetworks.ui import keys
import modules.scripts as scripts
from modules import script_callbacks, shared, sd_hijack
@@ -108,8 +109,8 @@ def create_training_tab(params: script_callbacks.UiTrainTabParams = None):
def create_extension_tab(params=None):
with gr.Tab(label="Create Beta hypernetwork") as create_beta:
new_hypernetwork_name = gr.Textbox(label="Name")
- new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"],
- choices=["768", "320", "640", "1280"])
+ new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1024", "1280"],
+ choices=["768", "320", "640", "1024", "1280"])
new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure",
placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
new_hypernetwork_activation_func = gr.Dropdown(value="linear",
@@ -143,8 +144,27 @@ def create_extension_tab(params=None):
with gr.Column():
create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary')
+ save_setting = gr.Button(value="Save hypernetwork setting to file")
+ setting_name = gr.Textbox(label="Setting file name", value="")
ti_output = gr.Text(elem_id="ti_output2", value="", show_label=False)
ti_outcome = gr.HTML(elem_id="ti_error2", value="")
+
+ save_setting.click(
+ fn=wrap_gradio_call(external_patch_ui.save_hypernetwork_setting),
+ inputs=[
+ new_hypernetwork_sizes,
+ overwrite_old_hypernetwork,
+ new_hypernetwork_layer_structure,
+ new_hypernetwork_activation_func,
+ new_hypernetwork_initialization_option,
+ new_hypernetwork_add_layer_norm,
+ new_hypernetwork_use_dropout,
+ new_hypernetwork_dropout_structure,
+ optional_info,
+ generation_seed if generation_seed.visible else None,
+ normal_std if normal_std.visible else 0.01],
+ outputs=[]
+ )
create_hypernetwork.click(
fn=ui.create_hypernetwork,
inputs=[