From 065fcd110c9deadd5a0fc67e078df00e92bdf626 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 17 Jan 2023 14:24:25 +0900 Subject: [PATCH 1/5] implement ResBlock its residual block --- patches/external_pr/hypernetwork.py | 3 +- patches/external_pr/ui.py | 2 +- patches/hypernetwork.py | 85 ++++++++++++++++++++++++++--- patches/ui.py | 10 ++-- scripts/hypernetwork-extensions.py | 7 ++- 5 files changed, 92 insertions(+), 15 deletions(-) diff --git a/patches/external_pr/hypernetwork.py b/patches/external_pr/hypernetwork.py index f100141..b50f083 100644 --- a/patches/external_pr/hypernetwork.py +++ b/patches/external_pr/hypernetwork.py @@ -502,9 +502,10 @@ def internal_clean_training(hypernetwork_name, data_root, log_directory, optional_info = dump_hyper['optional_info'] weight_init_seed = dump_hyper['weight_init_seed'] normal_std = dump_hyper['normal_std'] + skip_connection = dump_hyper['skip_connection'] hypernetwork = create_hypernetwork_load(hypernetwork_name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, - dropout_structure, optional_info, weight_init_seed, normal_std) + dropout_structure, optional_info, weight_init_seed, normal_std, skip_connection) else: load_hypernetwork(hypernetwork_name) hypernetwork_name = hypernetwork_name.rsplit('(',1)[0] + time.strftime('%Y%m%d%H%M%S') diff --git a/patches/external_pr/ui.py b/patches/external_pr/ui.py index e43e20d..88e5f79 100644 --- a/patches/external_pr/ui.py +++ b/patches/external_pr/ui.py @@ -82,7 +82,7 @@ def save_training_setting(*args): def save_hypernetwork_setting(*args): - save_file_name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, dropout_structure, optional_info, weight_init_seed, normal_std = args + save_file_name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, dropout_structure, optional_info, weight_init_seed, normal_std, skip_connection = args dumped_locals = locals() dumped_locals.pop('args') filename = (str(random.randint(0, 1024)) if save_file_name == '' else save_file_name) + '_hypernetwork_' + '.json' diff --git a/patches/hypernetwork.py b/patches/hypernetwork.py index 77ff27d..ec4af84 100644 --- a/patches/hypernetwork.py +++ b/patches/hypernetwork.py @@ -31,6 +31,65 @@ from .dataset import PersonalizedBase from modules.textual_inversion.learn_schedule import LearnRateScheduler +def init_weight(layer, weight_init="Normal", normal_std=0.01, activation_func="relu"): + w, b = layer.weight.data, layer.bias.data + if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: + normal_(w, mean=0.0, std=normal_std) + normal_(b, mean=0.0, std=0) + elif weight_init == 'XavierUniform': + xavier_uniform_(w) + zeros_(b) + elif weight_init == 'XavierNormal': + xavier_normal_(w) + zeros_(b) + elif weight_init == 'KaimingUniform': + kaiming_uniform_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + elif weight_init == 'KaimingNormal': + kaiming_normal_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + else: + raise KeyError(f"Key {weight_init} is not defined as initialization!") + + +class ResBlock(torch.nn.Module): + """Residual Block""" + def __init__(self, n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device=None, state_dict=None): + super().__init__() + self.n_outputs = n_outputs + linears = [torch.nn.Linear(n_inputs, n_outputs)] + init_weight(linears[0], weight_init, normal_std, activation_func) + if add_layer_norm: + linears.append(torch.nn.LayerNorm(n_outputs)) + if dropout_p > 0: + linears.append(torch.nn.Dropout(p=dropout_p)) + if activation_func == "linear" or activation_func is None: + pass + elif activation_func in HypernetworkModule.activation_dict: + linears.append(HypernetworkModule.activation_dict[activation_func]()) + else: + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') + self.linear = torch.nn.Sequential(*linears) + if state_dict is not None: + self.load_state_dict(state_dict) + if device is not None: + self.to(device) + + def trainables(self, train=False): + layer_structure = [] + for layer in self.linear: + if train: + layer.train() + else: + layer.eval() + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: + layer_structure += [layer.weight, layer.bias] + return layer_structure + + def forward(self, x, **kwargs): + return torch.nn.functional.interpolate(x, size=self.n_outputs, mode="nearest-exact") + self.linear(x) + + class HypernetworkModule(torch.nn.Module): multiplier = 1.0 @@ -46,17 +105,24 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, activate_output=False, dropout_structure=None, device=None, generation_seed=None, normal_std=0.01): + add_layer_norm=False, activate_output=False, dropout_structure=None, device=None, generation_seed=None, normal_std=0.01, **kwargs): super().__init__() - + skip_connection = kwargs.get('skip_connection', False) assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - assert dropout_structure is None or dropout_structure[0] == dropout_structure[-1] == 0, "Dropout Sequence should start and end with probability 0!" + assert skip_connection or dropout_structure is None or dropout_structure[0] == dropout_structure[-1] == 0, "Dropout Sequence should start and end with probability 0!" assert dropout_structure is None or len(dropout_structure) == len(layer_structure), "Dropout Sequence should match length with layer structure!" linears = [] for i in range(len(layer_structure) - 1): + if skip_connection: + n_inputs, n_outputs = int(dim * layer_structure[i]), int(dim * layer_structure[i+1]) + dropout_p = dropout_structure[i+1] + if activation_func is None: + activation_func = "linear" + linears.append(ResBlock(n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device)) + continue # Add a fully-connected layer linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) @@ -144,6 +210,8 @@ class HypernetworkModule(torch.nn.Module): layer.eval() if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: layer_structure += [layer.weight, layer.bias] + elif type(layer) == ResBlock: + layer_structure += layer.trainables(train) return layer_structure def set_train(self,mode=True): @@ -176,6 +244,7 @@ class Hypernetwork: self.optimizer_state_dict = None self.dropout_structure = kwargs['dropout_structure'] if 'dropout_structure' in kwargs and use_dropout else None self.optional_info = kwargs.get('optional_info', None) + self.skip_connection = kwargs.get('skip_connection', False) generation_seed = kwargs.get('generation_seed', None) normal_std = kwargs.get('normal_std', 0.01) if self.dropout_structure is None: @@ -184,9 +253,9 @@ class Hypernetwork: for size in enable_sizes or []: self.layers[size] = ( HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection), HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection), ) self.eval() @@ -237,6 +306,7 @@ class Hypernetwork: state_dict['dropout_structure'] = self.dropout_structure state_dict['last_layer_dropout'] = (self.dropout_structure[-2] != 0) if self.dropout_structure is not None else self.last_layer_dropout state_dict['optional_info'] = self.optional_info if self.optional_info else None + state_dict['skip_connection'] = self.skip_connection if self.optimizer_name is not None: optimizer_saved_dict['optimizer_name'] = self.optimizer_name @@ -266,6 +336,7 @@ class Hypernetwork: self.use_dropout = True if self.dropout_structure is not None and any(self.dropout_structure) else state_dict.get('use_dropout', False) self.activate_output = state_dict.get('activate_output', True) self.last_layer_dropout = state_dict.get('last_layer_dropout', False) # Silent fix for HNs before 4918eb6 + self.skip_connection = state_dict.get('skip_connection', False) # Dropout structure should have same length as layer structure, Every digits should be in [0,1), and last digit must be 0. if self.dropout_structure is None: self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) @@ -296,9 +367,9 @@ class Hypernetwork: if type(size) == int: self.layers[size] = ( HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, self.dropout_structure), + self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection), HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, self.dropout_structure), + self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection), ) self.name = state_dict.get('name', self.name) diff --git a/patches/ui.py b/patches/ui.py index 9864d26..4e6b252 100644 --- a/patches/ui.py +++ b/patches/ui.py @@ -5,7 +5,7 @@ from modules import shared, sd_hijack, devices from .hypernetwork import Hypernetwork, train_hypernetwork, load_hypernetwork def create_hypernetwork_load(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None, optional_info=None, - weight_init_seed=None, normal_std=0.01): + weight_init_seed=None, normal_std=0.01, skip_connection=False): # Remove illegal characters from name. name = "".join( x for x in name if (x.isalnum() or x in "._- ")) assert name, "Name cannot be empty!" @@ -31,7 +31,8 @@ def create_hypernetwork_load(name, enable_sizes, overwrite_old, layer_structure= dropout_structure=dropout_structure if use_dropout and dropout_structure else [0] * len(layer_structure), optional_info=optional_info, generation_seed=weight_init_seed if weight_init_seed != -1 else None, - normal_std=normal_std + normal_std=normal_std, + skip_connection=skip_connection ) hypernet.save(fn) shared.reload_hypernetworks() @@ -41,7 +42,7 @@ def create_hypernetwork_load(name, enable_sizes, overwrite_old, layer_structure= def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None, optional_info=None, - weight_init_seed=None, normal_std=0.01): + weight_init_seed=None, normal_std=0.01, skip_connection=False): # Remove illegal characters from name. name = "".join( x for x in name if (x.isalnum() or x in "._- ")) assert name, "Name cannot be empty!" @@ -67,7 +68,8 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, dropout_structure=dropout_structure if use_dropout and dropout_structure else [0] * len(layer_structure), optional_info=optional_info, generation_seed=weight_init_seed if weight_init_seed != -1 else None, - normal_std=normal_std + normal_std=normal_std, + skip_connection=skip_connection ) hypernet.save(fn) diff --git a/scripts/hypernetwork-extensions.py b/scripts/hypernetwork-extensions.py index 12c0f4f..713472b 100644 --- a/scripts/hypernetwork-extensions.py +++ b/scripts/hypernetwork-extensions.py @@ -135,6 +135,7 @@ def create_extension_tab(params=None): new_hypernetwork_dropout_structure = gr.Textbox("0, 0, 0", label="Enter hypernetwork Dropout structure (or empty). Recommended : 0~0.35 incrementing sequence: 0, 0.05, 0.15", placeholder="1st and last digit must be 0 and values should be between 0 and 1. ex:'0, 0.01, 0'") + skip_connection = gr.Checkbox(label="Use skip-connection. Won't work without extension!") optional_info = gr.Textbox("", label="Optional information about Hypernetwork", placeholder="Training information, dateset, etc") overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork") @@ -165,7 +166,8 @@ def create_extension_tab(params=None): new_hypernetwork_dropout_structure, optional_info, generation_seed if generation_seed.visible else None, - normal_std if normal_std.visible else 0.01], + normal_std if normal_std.visible else 0.01, + skip_connection], outputs=[ ti_output, ti_outcome, @@ -185,7 +187,8 @@ def create_extension_tab(params=None): new_hypernetwork_dropout_structure, optional_info, generation_seed if generation_seed.visible else None, - normal_std if normal_std.visible else 0.01 + normal_std if normal_std.visible else 0.01, + skip_connection ], outputs=[ new_hypernetwork_name, From ed214c45c88999a50ca8f91ee59b6a24daec4a57 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 17 Jan 2023 15:17:00 +0900 Subject: [PATCH 2/5] Fix loading residual --- patches/hypernetwork.py | 16 ++++++++++++++-- patches/ui.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/patches/hypernetwork.py b/patches/hypernetwork.py index ec4af84..5a3ae62 100644 --- a/patches/hypernetwork.py +++ b/patches/hypernetwork.py @@ -61,6 +61,7 @@ class ResBlock(torch.nn.Module): init_weight(linears[0], weight_init, normal_std, activation_func) if add_layer_norm: linears.append(torch.nn.LayerNorm(n_outputs)) + init_weight(linears[1], weight_init, normal_std, activation_func) if dropout_p > 0: linears.append(torch.nn.Dropout(p=dropout_p)) if activation_func == "linear" or activation_func is None: @@ -87,7 +88,8 @@ class ResBlock(torch.nn.Module): return layer_structure def forward(self, x, **kwargs): - return torch.nn.functional.interpolate(x, size=self.n_outputs, mode="nearest-exact") + self.linear(x) + interpolated = torch.nn.functional.interpolate(x, size=self.n_outputs, mode="nearest-exact") + return interpolated + self.linear(x) @@ -107,7 +109,8 @@ class HypernetworkModule(torch.nn.Module): def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, activate_output=False, dropout_structure=None, device=None, generation_seed=None, normal_std=0.01, **kwargs): super().__init__() - skip_connection = kwargs.get('skip_connection', False) + self.skip_connection = skip_connection = kwargs.get('skip_connection', False) + assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" @@ -197,6 +200,15 @@ class HypernetworkModule(torch.nn.Module): state_dict[to] = x def forward(self, x, multiplier=None): + if self.skip_connection: + if self.training or multiplier is None or not isinstance(multiplier, (int, float)): + return self.linear(x) + else: + resnet_result = self.linear(x) + residual = resnet_result - x + if multiplier is None or not isinstance(multiplier, (int, float)): + multiplier = HypernetworkModule.multiplier + return x + multiplier * residual # interpolate if multiplier is None or not isinstance(multiplier, (int, float)): return x + self.linear(x) * (HypernetworkModule.multiplier if not self.training else 1) return x + self.linear(x) * multiplier diff --git a/patches/ui.py b/patches/ui.py index 4e6b252..3c1659c 100644 --- a/patches/ui.py +++ b/patches/ui.py @@ -56,7 +56,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, if dropout_structure and type(dropout_structure) == str: dropout_structure = [float(x.strip()) for x in dropout_structure.split(",")] normal_std = float(normal_std) - assert normal_std > 0, "Normal Standard Deviation should be bigger than 0!" + assert normal_std >= 0, "Normal Standard Deviation should be bigger than 0!" hypernet = Hypernetwork( name=name, enable_sizes=[int(x) for x in enable_sizes], From 328692830c72ad87868782b6e70eea0e72ea9fdf Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 17 Jan 2023 16:51:19 +0900 Subject: [PATCH 3/5] If someone is curious --- patches/external_pr/dataset.py | 6 +- patches/external_pr/hypernetwork.py | 10 ++- patches/external_pr/textual_inversion.py | 97 ++++++++++++++---------- patches/external_pr/ui.py | 49 +++++++----- patches/hypernetwork.py | 28 +++++-- 5 files changed, 121 insertions(+), 69 deletions(-) diff --git a/patches/external_pr/dataset.py b/patches/external_pr/dataset.py index baaaa40..1389617 100644 --- a/patches/external_pr/dataset.py +++ b/patches/external_pr/dataset.py @@ -47,7 +47,7 @@ class DatasetEntry: class PersonalizedBase(Dataset): def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, - shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once'): + shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once', latent_sampling_std=-1): re_word = re.compile(shared.opts.dataset_filename_word_regex) if len( shared.opts.dataset_filename_word_regex) > 0 else None seed = random.randrange(sys.maxsize) @@ -128,6 +128,10 @@ class PersonalizedBase(Dataset): latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu) entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample) elif latent_sampling_method == "random": + if latent_sampling_std != -1: + assert latent_sampling_std > 0, f"Cannnot apply negative standard deviation {latent_sampling_std}" + print(f"Applying patch, changing std from {latent_dist.std} to {latent_sampling_std}...") + latent_dist.std = latent_sampling_std entry = DatasetEntry(filename=path, filename_text=filename_text, latent_dist=latent_dist) if not (self.tag_drop_out != 0 or self.shuffle_tags): diff --git a/patches/external_pr/hypernetwork.py b/patches/external_pr/hypernetwork.py index b50f083..98544ab 100644 --- a/patches/external_pr/hypernetwork.py +++ b/patches/external_pr/hypernetwork.py @@ -51,7 +51,7 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi use_adamw_parameter=False, adamw_weight_decay=0.01, adamw_beta_1=0.9, adamw_beta_2=0.99, adamw_eps=1e-8, use_grad_opts=False, gradient_clip_opt='None', optional_gradient_clip_value=1e01, - optional_gradient_norm_type=2, + optional_gradient_norm_type=2, latent_sampling_std=-1, load_training_options=''): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -86,6 +86,7 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi gradient_clip_opt = dump['gradient_clip_opt'] optional_gradient_clip_value = dump['optional_gradient_clip_value'] optional_gradient_norm_type = dump['optional_gradient_norm_type'] + latent_sampling_std = dump.get('latent_sampling_std', -1) try: if use_adamw_parameter: adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps = [float(x) for x in @@ -223,7 +224,8 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, - latent_sampling_method=latent_sampling_method) + latent_sampling_method=latent_sampling_method, + latent_sampling_std=latent_sampling_std) latent_sampling_method = ds.latent_sampling_method @@ -542,6 +544,7 @@ def internal_clean_training(hypernetwork_name, data_root, log_directory, gradient_clip_opt = dump['gradient_clip_opt'] optional_gradient_clip_value = dump['optional_gradient_clip_value'] optional_gradient_norm_type = dump['optional_gradient_norm_type'] + latent_sampling_std = dump.get('latent_sampling_std', -1) else: raise RuntimeError(f"Cannot load from {load_training_options}!") else: @@ -683,7 +686,8 @@ def internal_clean_training(hypernetwork_name, data_root, log_directory, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, - latent_sampling_method=latent_sampling_method) + latent_sampling_method=latent_sampling_method, + latent_sampling_std=latent_sampling_std) latent_sampling_method = ds.latent_sampling_method diff --git a/patches/external_pr/textual_inversion.py b/patches/external_pr/textual_inversion.py index 07258e9..34b079a 100644 --- a/patches/external_pr/textual_inversion.py +++ b/patches/external_pr/textual_inversion.py @@ -21,9 +21,11 @@ from modules.textual_inversion.textual_inversion import save_embedding from torch.utils.tensorboard import SummaryWriter from ..tbutils import tensorboard_add, tensorboard_setup, tensorboard_add_scaler, tensorboard_add_image -#apply OsError avoid here + +# apply OsError avoid here delayed_values = {} + def write_loss(log_directory, filename, step, epoch_len, values): if shared.opts.training_write_csv_every == 0: return @@ -55,9 +57,9 @@ def write_loss(log_directory, filename, step, epoch_len, values): **values, }) except OSError: - epoch, epoch_step = divmod(step-1, epoch_len) + epoch, epoch_step = divmod(step - 1, epoch_len) if log_directory + filename in delayed_values: - delayed_values[log_directory + filename].append((step , epoch, epoch_step, values)) + delayed_values[log_directory + filename].append((step, epoch, epoch_step, values)) else: delayed_values[log_directory + filename] = [(step, epoch, epoch_step, values)] @@ -86,15 +88,19 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat assert log_directory, "Log directory is empty" -def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, +def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, + training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height, - use_beta_scheduler=False, beta_repeat_epoch=4000, epoch_mult=1,warmup =10, min_lr=1e-7, gamma_rate=1, save_when_converge=False, create_when_converge=False, + use_beta_scheduler=False, beta_repeat_epoch=4000, epoch_mult=1, warmup=10, min_lr=1e-7, + gamma_rate=1, save_when_converge=False, create_when_converge=False, move_optimizer=True, - use_adamw_parameter=False, adamw_weight_decay=0.01, adamw_beta_1=0.9, adamw_beta_2=0.99,adamw_eps=1e-8, - use_grad_opts=False, gradient_clip_opt='None', optional_gradient_clip_value=1e01, optional_gradient_norm_type=2 + use_adamw_parameter=False, adamw_weight_decay=0.01, adamw_beta_1=0.9, adamw_beta_2=0.99, + adamw_eps=1e-8, + use_grad_opts=False, gradient_clip_opt='None', optional_gradient_clip_value=1e01, + optional_gradient_norm_type=2, latent_sampling_std=-1 ): save_embedding_every = save_embedding_every or 0 create_image_every = create_image_every or 0 @@ -102,20 +108,23 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st save_embedding_every, create_image_every, log_directory, name="embedding") try: if use_adamw_parameter: - adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps = [float(x) for x in [adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps]] + adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps = [float(x) for x in + [adamw_weight_decay, adamw_beta_1, + adamw_beta_2, adamw_eps]] assert 0 <= adamw_weight_decay, "Weight decay paramter should be larger or equal than zero!" - assert (all(0 <= x <= 1 for x in [adamw_beta_1, adamw_beta_2, adamw_eps])), "Cannot use negative or >1 number for adamW parameters!" + assert (all(0 <= x <= 1 for x in [adamw_beta_1, adamw_beta_2, + adamw_eps])), "Cannot use negative or >1 number for adamW parameters!" adamW_kwarg_dict = { - 'weight_decay' : adamw_weight_decay, - 'betas' : (adamw_beta_1, adamw_beta_2), - 'eps' : adamw_eps + 'weight_decay': adamw_weight_decay, + 'betas': (adamw_beta_1, adamw_beta_2), + 'eps': adamw_eps } print('Using custom AdamW parameters') else: adamW_kwarg_dict = { - 'weight_decay' : 0.01, - 'betas' : (0.9, 0.99), - 'eps' : 1e-8 + 'weight_decay': 0.01, + 'betas': (0.9, 0.99), + 'eps': 1e-8 } if use_beta_scheduler: print("Using Beta Scheduler") @@ -134,10 +143,10 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st print(f"Generate image when converges : {create_when_converge}") else: beta_repeat_epoch = 4000 - epoch_mult=1 - warmup=10 - min_lr=1e-7 - gamma_rate=1 + epoch_mult = 1 + warmup = 10 + min_lr = 1e-7 + gamma_rate = 1 save_when_converge = False create_when_converge = False except ValueError: @@ -153,6 +162,7 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st except ValueError: raise RuntimeError(f"Cannot convert invalid gradient norm type {optional_gradient_norm_type})") assert grad_norm >= 0, f"P-norm cannot be calculated from negative number {grad_norm}" + def gradient_clipping(arg1): torch.nn.utils.clip_grad_norm_(arg1, optional_gradient_clip_value, optional_gradient_norm_type) return @@ -212,38 +222,40 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st tensorboard_writer = tensorboard_setup(log_directory) pin_memory = shared.opts.pin_memory - detach_grad = shared.opts.disable_ema # test code that removes EMA + detach_grad = shared.opts.disable_ema # test code that removes EMA if detach_grad: print("Disabling training for staged models!") shared.sd_model.cond_stage_model.requires_grad_(False) shared.sd_model.first_stage_model.requires_grad_(False) torch.cuda.empty_cache() ds = PersonalizedBase(data_root=data_root, width=training_width, - height=training_height, - repeats=shared.opts.training_image_repeats_per_epoch, - placeholder_token=embedding_name, model=shared.sd_model, - cond_model=shared.sd_model.cond_stage_model, - device=devices.device, template_file=template_file, - batch_size=batch_size, gradient_step=gradient_step, - shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, - latent_sampling_method=latent_sampling_method) + height=training_height, + repeats=shared.opts.training_image_repeats_per_epoch, + placeholder_token=embedding_name, model=shared.sd_model, + cond_model=shared.sd_model.cond_stage_model, + device=devices.device, template_file=template_file, + batch_size=batch_size, gradient_step=gradient_step, + shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, + latent_sampling_method=latent_sampling_method, + latent_sampling_std=latent_sampling_std) latent_sampling_method = ds.latent_sampling_method dl = PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, - batch_size=ds.batch_size, pin_memory=pin_memory) + batch_size=ds.batch_size, pin_memory=pin_memory) if unload: shared.parallel_processing_allowed = False shared.sd_model.first_stage_model.to(devices.cpu) embedding.vec.requires_grad_(True) - optimizer_name = 'AdamW' # hardcoded optimizer name now + optimizer_name = 'AdamW' # hardcoded optimizer name now if use_adamw_parameter: optimizer = torch.optim.AdamW(params=[embedding.vec], lr=scheduler.learn_rate, **adamW_kwarg_dict) else: optimizer = torch.optim.AdamW(params=[embedding.vec], lr=scheduler.learn_rate, weight_decay=0.0) - if os.path.exists(filename + '.optim'): # This line must be changed if Optimizer type can be different from saved optimizer. + if os.path.exists( + filename + '.optim'): # This line must be changed if Optimizer type can be different from saved optimizer. try: optimizer_saved_dict = torch.load(filename + '.optim', map_location='cpu') if embedding.checksum() == optimizer_saved_dict.get('hash', None): @@ -259,8 +271,10 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st if move_optimizer: optim_to(optimizer, devices.device) if use_beta_scheduler: - scheduler_beta = CosineAnnealingWarmUpRestarts(optimizer=optimizer, first_cycle_steps=beta_repeat_epoch, cycle_mult=epoch_mult, max_lr=scheduler.learn_rate, warmup_steps=warmup, min_lr=min_lr, gamma=gamma_rate) - scheduler_beta.last_epoch = embedding.step-1 + scheduler_beta = CosineAnnealingWarmUpRestarts(optimizer=optimizer, first_cycle_steps=beta_repeat_epoch, + cycle_mult=epoch_mult, max_lr=scheduler.learn_rate, + warmup_steps=warmup, min_lr=min_lr, gamma=gamma_rate) + scheduler_beta.last_epoch = embedding.step - 1 else: scheduler_beta = None for pg in optimizer.param_groups: @@ -310,7 +324,8 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st c = shared.sd_model.cond_stage_model(batch.cond_text) if is_training_inpainting_model: if img_c is None: - img_c = processing.txt2img_image_conditioning(shared.sd_model, c, training_width, training_height) + img_c = processing.txt2img_image_conditioning(shared.sd_model, c, training_width, + training_height) cond = {"c_concat": [img_c], "c_crossattn": [c]} else: @@ -340,7 +355,9 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st epoch_step = embedding.step % steps_per_epoch pbar.set_description(f"[Epoch {epoch_num}: {epoch_step + 1}/{steps_per_epoch}]loss: {loss_step:.7f}") - if embedding_dir is not None and ((use_beta_scheduler and scheduler_beta.is_EOC(embedding.step) and save_when_converge) or (save_embedding_every > 0 and steps_done % save_embedding_every == 0)): + if embedding_dir is not None and ( + (use_beta_scheduler and scheduler_beta.is_EOC(embedding.step) and save_when_converge) or ( + save_embedding_every > 0 and steps_done % save_embedding_every == 0)): # Before saving, change name to match current checkpoint. embedding_name_every = f'{embedding_name}-{steps_done}' last_saved_file = os.path.join(embedding_dir, f'{embedding_name_every}.pt') @@ -355,7 +372,9 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st "learn_rate": scheduler.learn_rate }) - if images_dir is not None and ((use_beta_scheduler and scheduler_beta.is_EOC(embedding.step) and create_when_converge) or (create_image_every > 0 and steps_done % create_image_every == 0)): + if images_dir is not None and ( + (use_beta_scheduler and scheduler_beta.is_EOC(embedding.step) and create_when_converge) or ( + create_image_every > 0 and steps_done % create_image_every == 0)): forced_filename = f'{embedding_name}-{steps_done}' last_saved_image = os.path.join(images_dir, forced_filename) rng_state = torch.get_rng_state() @@ -429,7 +448,8 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st checkpoint = sd_models.select_checkpoint() footer_left = checkpoint.model_name - footer_mid = '[{}]'.format(checkpoint.shorthash if hasattr(checkpoint, 'shorthash') else checkpoint.hash) + footer_mid = '[{}]'.format( + checkpoint.shorthash if hasattr(checkpoint, 'shorthash') else checkpoint.hash) footer_right = '{}v {}s'.format(vectorSize, steps_done) captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right) @@ -449,7 +469,6 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st save_to_dirs=False) last_saved_image += f", prompt: {preview_text}" - shared.state.job_no = embedding.step shared.state.textinfo = f""" @@ -471,4 +490,4 @@ Last saved image: {html.escape(last_saved_image)}
pbar.close() shared.sd_model.first_stage_model.to(devices.device) shared.parallel_processing_allowed = old_parallel_processing_allowed - return embedding, filename \ No newline at end of file + return embedding, filename diff --git a/patches/external_pr/ui.py b/patches/external_pr/ui.py index 88e5f79..7230554 100644 --- a/patches/external_pr/ui.py +++ b/patches/external_pr/ui.py @@ -15,7 +15,6 @@ import gradio as gr def train_hypernetwork_ui(*args): - initial_hypernetwork = shared.loaded_hypernetwork assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible' @@ -40,7 +39,6 @@ Hypernetwork saved to {html.escape(filename)} def train_hypernetwork_ui_tuning(*args): - initial_hypernetwork = shared.loaded_hypernetwork assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible' @@ -69,7 +67,7 @@ def save_training_setting(*args): template_file, use_beta_scheduler, beta_repeat_epoch, epoch_mult, warmup, min_lr, \ gamma_rate, use_beta_adamW_checkbox, save_when_converge, create_when_converge, \ adamw_weight_decay, adamw_beta_1, adamw_beta_2, adamw_eps, show_gradient_clip_checkbox, \ - gradient_clip_opt, optional_gradient_clip_value, optional_gradient_norm_type = args + gradient_clip_opt, optional_gradient_clip_value, optional_gradient_norm_type, latent_sampling_std = args dumped_locals = locals() dumped_locals.pop('args') filename = (str(random.randint(0, 1024)) if save_file_name == '' else save_file_name) + '_train_' + '.json' @@ -93,6 +91,7 @@ def save_hypernetwork_setting(*args): print(f"File saved as {filename}") return filename, "" + def on_train_gamma_tab(params=None): dummy_component = gr.Label(visible=False) with gr.Tab(label="Train_Gamma") as train_gamma: @@ -123,15 +122,18 @@ def on_train_gamma_tab(params=None): show_gradient_clip_checkbox = gr.Checkbox( label='Show Gradient Clipping Options(for both)') with gr.Row(visible=False) as adamW_options: - adamw_weight_decay = gr.Textbox(label="AdamW weight decay parameter", placeholder="default = 0.01", value="0.01") + adamw_weight_decay = gr.Textbox(label="AdamW weight decay parameter", placeholder="default = 0.01", + value="0.01") adamw_beta_1 = gr.Textbox(label="AdamW beta1 parameter", placeholder="default = 0.9", value="0.9") adamw_beta_2 = gr.Textbox(label="AdamW beta2 parameter", placeholder="default = 0.99", value="0.99") adamw_eps = gr.Textbox(label="AdamW epsilon parameter", placeholder="default = 1e-8", value="1e-8") with gr.Row(visible=False) as beta_scheduler_options: use_beta_scheduler = gr.Checkbox(label='Use CosineAnnealingWarmupRestarts Scheduler') beta_repeat_epoch = gr.Textbox(label='Steps for cycle', placeholder="Cycles every nth Step", value="64") - epoch_mult = gr.Textbox(label='Step multiplier per cycle', placeholder="Step length multiplier every cycle", value="1") - warmup = gr.Textbox(label='Warmup step per cycle', placeholder="CosineAnnealing lr increase step", value="5") + epoch_mult = gr.Textbox(label='Step multiplier per cycle', placeholder="Step length multiplier every cycle", + value="1") + warmup = gr.Textbox(label='Warmup step per cycle', placeholder="CosineAnnealing lr increase step", + value="5") min_lr = gr.Textbox(label='Minimum learning rate', placeholder="restricts decay value, but does not restrict gamma rate decay", value="6e-7") @@ -144,7 +146,7 @@ def on_train_gamma_tab(params=None): gradient_clip_opt = gr.Radio(label="Gradient Clipping Options", choices=["None", "limit", "norm"]) optional_gradient_clip_value = gr.Textbox(label="Limiting value", value="1e-1") optional_gradient_norm_type = gr.Textbox(label="Norm type", value="2") - #change by feedback + # change by feedback use_beta_adamW_checkbox.change( fn=lambda show: gr_show(show), inputs=[use_beta_adamW_checkbox], @@ -165,7 +167,8 @@ def on_train_gamma_tab(params=None): inputs=[show_gradient_clip_checkbox], outputs=[gradient_clip_options], ) - move_optim_when_generate = gr.Checkbox(label="Unload Optimizer when generating preview(hypernetwork)", value=True) + move_optim_when_generate = gr.Checkbox(label="Unload Optimizer when generating preview(hypernetwork)", + value=True) batch_size = gr.Number(label='Batch size', value=1, precision=0) gradient_step = gr.Number(label='Gradient accumulation steps', value=1, precision=0) dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") @@ -191,10 +194,12 @@ def on_train_gamma_tab(params=None): with gr.Row(): latent_sampling_method = gr.Radio(label='Choose latent sampling method', value="once", choices=['once', 'deterministic', 'random']) + latent_sampling_std_value = gr.Number(label="Standard deviation for sampling", value=-1) with gr.Row(): save_training_option = gr.Button(value="Save training setting") save_file_name = gr.Textbox(label="File name to save setting as", value="") - load_training_option = gr.Textbox(label="Load training option from saved json file. This will override settings above", value="") + load_training_option = gr.Textbox( + label="Load training option from saved json file. This will override settings above", value="") with gr.Row(): interrupt_training = gr.Button(value="Interrupt") train_hypernetwork = gr.Button(value="Train Hypernetwork", variant='primary') @@ -202,9 +207,9 @@ def on_train_gamma_tab(params=None): ti_output = gr.Text(elem_id="ti_output3", value="", show_label=False) ti_outcome = gr.HTML(elem_id="ti_error3", value="") - #Full path to .json or simple names are recommended. + # Full path to .json or simple names are recommended. save_training_option.click( - fn = wrap_gradio_call(save_training_setting), + fn=wrap_gradio_call(save_training_setting), inputs=[ save_file_name, hypernetwork_learn_rate, @@ -233,7 +238,8 @@ def on_train_gamma_tab(params=None): show_gradient_clip_checkbox, gradient_clip_opt, optional_gradient_clip_value, - optional_gradient_norm_type], + optional_gradient_norm_type, + latent_sampling_std_value], outputs=[ ti_output, ti_outcome, @@ -279,7 +285,8 @@ def on_train_gamma_tab(params=None): show_gradient_clip_checkbox, gradient_clip_opt, optional_gradient_clip_value, - optional_gradient_norm_type + optional_gradient_norm_type, + latent_sampling_std_value ], outputs=[ ti_output, @@ -327,6 +334,7 @@ def on_train_gamma_tab(params=None): gradient_clip_opt, optional_gradient_clip_value, optional_gradient_norm_type, + latent_sampling_std_value, load_training_option ], @@ -343,6 +351,7 @@ def on_train_gamma_tab(params=None): ) return [(train_gamma, "Train Gamma", "train_gamma")] + def on_train_tuning(params=None): dummy_component = gr.Label(visible=False) with gr.Tab(label="Train_Tuning") as train_tuning: @@ -354,14 +363,18 @@ def on_train_tuning(params=None): create_refresh_button(train_hypernetwork_name, shared.reload_hypernetworks, lambda: {"choices": sorted([x for x in shared.hypernetworks.keys()])}, "refresh_train_hypernetwork_name") - optional_new_hypernetwork_name = gr.Textbox(label="Hypernetwork name to create, leave it empty to use selected", value="") + optional_new_hypernetwork_name = gr.Textbox( + label="Hypernetwork name to create, leave it empty to use selected", value="") with gr.Row(): load_hypernetworks_option = gr.Textbox( - label="Load Hypernetwork creation option from saved json file", placeholder = ". filename cannot have ',' inside, and files should be splitted by ','.", value="") + label="Load Hypernetwork creation option from saved json file", + placeholder=". filename cannot have ',' inside, and files should be splitted by ','.", value="") with gr.Row(): load_training_options = gr.Textbox( - label="Load training option(s) from saved json file", placeholder = ". filename cannot have ',' inside, and files should be splitted by ','.", value="") - move_optim_when_generate = gr.Checkbox(label="Unload Optimizer when generating preview(hypernetwork)", value=True) + label="Load training option(s) from saved json file", + placeholder=". filename cannot have ',' inside, and files should be splitted by ','.", value="") + move_optim_when_generate = gr.Checkbox(label="Unload Optimizer when generating preview(hypernetwork)", + value=True) dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") @@ -404,4 +417,4 @@ def on_train_tuning(params=None): inputs=[], outputs=[], ) - return [(train_tuning, "Train Tuning", "train_tuning")] \ No newline at end of file + return [(train_tuning, "Train Tuning", "train_tuning")] diff --git a/patches/hypernetwork.py b/patches/hypernetwork.py index 5a3ae62..f3aeed5 100644 --- a/patches/hypernetwork.py +++ b/patches/hypernetwork.py @@ -54,9 +54,13 @@ def init_weight(layer, weight_init="Normal", normal_std=0.01, activation_func="r class ResBlock(torch.nn.Module): """Residual Block""" - def __init__(self, n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device=None, state_dict=None): + def __init__(self, n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device=None, state_dict=None, **kwargs): super().__init__() self.n_outputs = n_outputs + self.upsample_layer = None + self.upsample = kwargs.get("upsample_model", None) + if self.upsample is "Linear": + self.upsample_layer = torch.nn.Linear(n_inputs, n_outputs, bias=False) linears = [torch.nn.Linear(n_inputs, n_outputs)] init_weight(linears[0], weight_init, normal_std, activation_func) if add_layer_norm: @@ -88,7 +92,10 @@ class ResBlock(torch.nn.Module): return layer_structure def forward(self, x, **kwargs): - interpolated = torch.nn.functional.interpolate(x, size=self.n_outputs, mode="nearest-exact") + if self.upsample_layer is None: + interpolated = torch.nn.functional.interpolate(x, size=self.n_outputs, mode="nearest-exact") + else: + interpolated = self.upsample_layer(x) return interpolated + self.linear(x) @@ -110,7 +117,7 @@ class HypernetworkModule(torch.nn.Module): add_layer_norm=False, activate_output=False, dropout_structure=None, device=None, generation_seed=None, normal_std=0.01, **kwargs): super().__init__() self.skip_connection = skip_connection = kwargs.get('skip_connection', False) - + upsample_linear = kwargs.get('upsample_linear', None) assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" @@ -124,7 +131,7 @@ class HypernetworkModule(torch.nn.Module): dropout_p = dropout_structure[i+1] if activation_func is None: activation_func = "linear" - linears.append(ResBlock(n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device)) + linears.append(ResBlock(n_inputs, n_outputs, activation_func, weight_init, add_layer_norm, dropout_p, normal_std, device, upsample_model=upsample_linear)) continue # Add a fully-connected layer @@ -257,6 +264,7 @@ class Hypernetwork: self.dropout_structure = kwargs['dropout_structure'] if 'dropout_structure' in kwargs and use_dropout else None self.optional_info = kwargs.get('optional_info', None) self.skip_connection = kwargs.get('skip_connection', False) + self.upsample_linear = kwargs.get('upsample_linear', None) generation_seed = kwargs.get('generation_seed', None) normal_std = kwargs.get('normal_std', 0.01) if self.dropout_structure is None: @@ -265,9 +273,11 @@ class Hypernetwork: for size in enable_sizes or []: self.layers[size] = ( HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection, + upsample_linear=self.upsample_linear), HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure, generation_seed=generation_seed, normal_std=normal_std, skip_connection=self.skip_connection, + upsample_linear=self.upsample_linear), ) self.eval() @@ -319,6 +329,7 @@ class Hypernetwork: state_dict['last_layer_dropout'] = (self.dropout_structure[-2] != 0) if self.dropout_structure is not None else self.last_layer_dropout state_dict['optional_info'] = self.optional_info if self.optional_info else None state_dict['skip_connection'] = self.skip_connection + state_dict['upsample_linear'] = self.upsample_linear if self.optimizer_name is not None: optimizer_saved_dict['optimizer_name'] = self.optimizer_name @@ -349,6 +360,7 @@ class Hypernetwork: self.activate_output = state_dict.get('activate_output', True) self.last_layer_dropout = state_dict.get('last_layer_dropout', False) # Silent fix for HNs before 4918eb6 self.skip_connection = state_dict.get('skip_connection', False) + self.upsample_linear = state_dict.get('upsample_linear', False) # Dropout structure should have same length as layer structure, Every digits should be in [0,1), and last digit must be 0. if self.dropout_structure is None: self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) @@ -379,9 +391,9 @@ class Hypernetwork: if type(size) == int: self.layers[size] = ( HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection), + self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection, upsample_linear=self.upsample_linear), HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection), + self.add_layer_norm, self.activate_output, self.dropout_structure, skip_connection=self.skip_connection, upsample_linear=self.upsample_linear), ) self.name = state_dict.get('name', self.name) From 72b7609c3f92201db00acfdd97cf88e4dcde9a2a Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 17 Jan 2023 17:18:43 +0900 Subject: [PATCH 4/5] clip_ instead --- patches/external_pr/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/patches/external_pr/dataset.py b/patches/external_pr/dataset.py index 1389617..84af89f 100644 --- a/patches/external_pr/dataset.py +++ b/patches/external_pr/dataset.py @@ -130,8 +130,8 @@ class PersonalizedBase(Dataset): elif latent_sampling_method == "random": if latent_sampling_std != -1: assert latent_sampling_std > 0, f"Cannnot apply negative standard deviation {latent_sampling_std}" - print(f"Applying patch, changing std from {latent_dist.std} to {latent_sampling_std}...") - latent_dist.std = latent_sampling_std + print(f"Applying patch, clipping std from {torch.max(latent_dist.std).item()} to {latent_sampling_std}...") + latent_dist.std.clip_(latent_sampling_std) entry = DatasetEntry(filename=path, filename_text=filename_text, latent_dist=latent_dist) if not (self.tag_drop_out != 0 or self.shuffle_tags): From 28a4272fe56df3e8bc07514c49081658ae11a6f9 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 17 Jan 2023 17:42:01 +0900 Subject: [PATCH 5/5] prevent direct random module access --- patches/external_pr/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/patches/external_pr/dataset.py b/patches/external_pr/dataset.py index 84af89f..7f068d4 100644 --- a/patches/external_pr/dataset.py +++ b/patches/external_pr/dataset.py @@ -11,7 +11,6 @@ from torch.utils.data import Dataset, DataLoader, Sampler from torchvision import transforms from ..hnutil import get_closest -import random from collections import defaultdict from random import Random import tqdm @@ -26,6 +25,7 @@ random_state_manager = Random(None) shuffle = random_state_manager.shuffle choice = random_state_manager.choice choices = random_state_manager.choices +randrange = random_state_manager.randrange def set_rng(seed=None): @@ -50,7 +50,7 @@ class PersonalizedBase(Dataset): shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once', latent_sampling_std=-1): re_word = re.compile(shared.opts.dataset_filename_word_regex) if len( shared.opts.dataset_filename_word_regex) > 0 else None - seed = random.randrange(sys.maxsize) + seed = randrange(sys.maxsize) set_rng(seed) # reset forked RNG state when we create dataset. print(f"Dataset seed was set to f{seed}") self.placeholder_token = placeholder_token @@ -158,7 +158,7 @@ class PersonalizedBase(Dataset): text = text.replace("[name]", self.placeholder_token) tags = filename_text.split(',') if self.tag_drop_out != 0: - tags = [t for t in tags if random.random() > self.tag_drop_out] + tags = [t for t in tags if random_state_manager.random() > self.tag_drop_out] if self.shuffle_tags: shuffle(tags) text = text.replace("[filewords]", ','.join(tags))