diff --git a/docs/Finetuning/top_level.md b/docs/Finetuning/top_level.md new file mode 100644 index 0000000..5530cd3 --- /dev/null +++ b/docs/Finetuning/top_level.md @@ -0,0 +1,28 @@ +# Finetuning Resource Guide + +This guide is a resource compilation to facilitate the development of robust LoRA models. + +-Need to add resources here + +## Guidelines for SDXL Finetuning + +- Set the `Max resolution` to at least 1024x1024, as this is the standard resolution for SDXL. +- The fine-tuning can be done with 24GB GPU memory with the batch size of 1. + - Train U-Net only. + - Use gradient checkpointing. + - Use `--cache_text_encoder_outputs` option and caching latents. + - Use Adafactor optimizer. RMSprop 8bit or Adagrad 8bit may work. AdamW 8bit doesn't seem to work. +- PyTorch 2 seems to use slightly less GPU memory than PyTorch 1. + +Example of the optimizer settings for Adafactor with the fixed learning rate: +``` +optimizer_type = "adafactor" +optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False" ] +lr_scheduler = "constant_with_warmup" +lr_warmup_steps = 100 +learning_rate = 4e-7 # SDXL original learning rate +``` + +## Resource Contributions + +If you have valuable resources to add, kindly create a PR on Github. \ No newline at end of file diff --git a/docs/LoRA/top_level.md b/docs/LoRA/top_level.md index 08bc81d..d2c3c6d 100644 --- a/docs/LoRA/top_level.md +++ b/docs/LoRA/top_level.md @@ -1,4 +1,4 @@ -# LoRA Development Resource Guide +# LoRA Resource Guide This guide is a resource compilation to facilitate the development of robust LoRA models. diff --git a/finetune_gui.py b/finetune_gui.py index 21cdfb6..18878a2 100644 --- a/finetune_gui.py +++ b/finetune_gui.py @@ -632,382 +632,393 @@ def finetune_tab(headless=False): dummy_db_true = gr.Label(value=True, visible=False) dummy_db_false = gr.Label(value=False, visible=False) dummy_headless = gr.Label(value=headless, visible=False) - gr.Markdown('Train a custom model using kohya finetune python code...') + with gr.Tab('Training'): + gr.Markdown('Train a custom model using kohya finetune python code...') - ( - button_open_config, - button_save_config, - button_save_as_config, - config_file_name, - button_load_config, - ) = gradio_config(headless=headless) - - ( - pretrained_model_name_or_path, - v2, - v_parameterization, - sdxl_checkbox, - save_model_as, - model_list, - ) = gradio_source_model(headless=headless) - - with gr.Tab('Folders'): - with gr.Row(): - train_dir = gr.Textbox( - label='Training config folder', - placeholder='folder where the training configuration files will be saved', - ) - train_dir_folder = gr.Button( - folder_symbol, - elem_id='open_folder_small', - visible=(not headless), - ) - train_dir_folder.click( - get_folder_path, - outputs=train_dir, - show_progress=False, - ) - - image_folder = gr.Textbox( - label='Training Image folder', - placeholder='folder where the training images are located', - ) - image_folder_input_folder = gr.Button( - folder_symbol, - elem_id='open_folder_small', - visible=(not headless), - ) - image_folder_input_folder.click( - get_folder_path, - outputs=image_folder, - show_progress=False, - ) - with gr.Row(): - output_dir = gr.Textbox( - label='Model output folder', - placeholder='folder where the model will be saved', - ) - output_dir_input_folder = gr.Button( - folder_symbol, - elem_id='open_folder_small', - visible=(not headless), - ) - output_dir_input_folder.click( - get_folder_path, - outputs=output_dir, - show_progress=False, - ) - - logging_dir = gr.Textbox( - label='Logging folder', - placeholder='Optional: enable logging and output TensorBoard log to this folder', - ) - logging_dir_input_folder = gr.Button( - folder_symbol, - elem_id='open_folder_small', - visible=(not headless), - ) - logging_dir_input_folder.click( - get_folder_path, - outputs=logging_dir, - show_progress=False, - ) - with gr.Row(): - output_name = gr.Textbox( - label='Model output name', - placeholder='Name of the model to output', - value='last', - interactive=True, - ) - train_dir.change( - remove_doublequote, - inputs=[train_dir], - outputs=[train_dir], - ) - image_folder.change( - remove_doublequote, - inputs=[image_folder], - outputs=[image_folder], - ) - output_dir.change( - remove_doublequote, - inputs=[output_dir], - outputs=[output_dir], - ) - with gr.Tab('Dataset preparation'): - with gr.Row(): - max_resolution = gr.Textbox( - label='Resolution (width,height)', value='512,512' - ) - min_bucket_reso = gr.Textbox( - label='Min bucket resolution', value='256' - ) - max_bucket_reso = gr.Textbox( - label='Max bucket resolution', value='1024' - ) - batch_size = gr.Textbox(label='Batch size', value='1') - with gr.Row(): - create_caption = gr.Checkbox( - label='Generate caption metadata', value=True - ) - create_buckets = gr.Checkbox( - label='Generate image buckets metadata', value=True - ) - use_latent_files = gr.Dropdown( - label='Use latent files', - choices=[ - 'No', - 'Yes', - ], - value='Yes', - ) - with gr.Accordion('Advanced parameters', open=False): - with gr.Row(): - caption_metadata_filename = gr.Textbox( - label='Caption metadata filename', value='meta_cap.json' - ) - latent_metadata_filename = gr.Textbox( - label='Latent metadata filename', value='meta_lat.json' - ) - with gr.Row(): - full_path = gr.Checkbox(label='Use full path', value=True) - weighted_captions = gr.Checkbox( - label='Weighted captions', value=False - ) - with gr.Tab('Training parameters'): ( + button_open_config, + button_save_config, + button_save_as_config, + config_file_name, + button_load_config, + ) = gradio_config(headless=headless) + + ( + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl_checkbox, + save_model_as, + model_list, + ) = gradio_source_model(headless=headless) + + with gr.Tab('Folders'): + with gr.Row(): + train_dir = gr.Textbox( + label='Training config folder', + placeholder='folder where the training configuration files will be saved', + ) + train_dir_folder = gr.Button( + folder_symbol, + elem_id='open_folder_small', + visible=(not headless), + ) + train_dir_folder.click( + get_folder_path, + outputs=train_dir, + show_progress=False, + ) + + image_folder = gr.Textbox( + label='Training Image folder', + placeholder='folder where the training images are located', + ) + image_folder_input_folder = gr.Button( + folder_symbol, + elem_id='open_folder_small', + visible=(not headless), + ) + image_folder_input_folder.click( + get_folder_path, + outputs=image_folder, + show_progress=False, + ) + with gr.Row(): + output_dir = gr.Textbox( + label='Model output folder', + placeholder='folder where the model will be saved', + ) + output_dir_input_folder = gr.Button( + folder_symbol, + elem_id='open_folder_small', + visible=(not headless), + ) + output_dir_input_folder.click( + get_folder_path, + outputs=output_dir, + show_progress=False, + ) + + logging_dir = gr.Textbox( + label='Logging folder', + placeholder='Optional: enable logging and output TensorBoard log to this folder', + ) + logging_dir_input_folder = gr.Button( + folder_symbol, + elem_id='open_folder_small', + visible=(not headless), + ) + logging_dir_input_folder.click( + get_folder_path, + outputs=logging_dir, + show_progress=False, + ) + with gr.Row(): + output_name = gr.Textbox( + label='Model output name', + placeholder='Name of the model to output', + value='last', + interactive=True, + ) + train_dir.change( + remove_doublequote, + inputs=[train_dir], + outputs=[train_dir], + ) + image_folder.change( + remove_doublequote, + inputs=[image_folder], + outputs=[image_folder], + ) + output_dir.change( + remove_doublequote, + inputs=[output_dir], + outputs=[output_dir], + ) + with gr.Tab('Dataset preparation'): + with gr.Row(): + max_resolution = gr.Textbox( + label='Resolution (width,height)', value='512,512' + ) + min_bucket_reso = gr.Textbox( + label='Min bucket resolution', value='256' + ) + max_bucket_reso = gr.Textbox( + label='Max bucket resolution', value='1024' + ) + batch_size = gr.Textbox(label='Batch size', value='1') + with gr.Row(): + create_caption = gr.Checkbox( + label='Generate caption metadata', value=True + ) + create_buckets = gr.Checkbox( + label='Generate image buckets metadata', value=True + ) + use_latent_files = gr.Dropdown( + label='Use latent files', + choices=[ + 'No', + 'Yes', + ], + value='Yes', + ) + with gr.Accordion('Advanced parameters', open=False): + with gr.Row(): + caption_metadata_filename = gr.Textbox( + label='Caption metadata filename', value='meta_cap.json' + ) + latent_metadata_filename = gr.Textbox( + label='Latent metadata filename', value='meta_lat.json' + ) + with gr.Row(): + full_path = gr.Checkbox(label='Use full path', value=True) + weighted_captions = gr.Checkbox( + label='Weighted captions', value=False + ) + with gr.Tab('Parameters'): + ( + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + num_cpu_threads_per_process, + seed, + caption_extension, + cache_latents, + cache_latents_to_disk, + optimizer, + optimizer_args, + ) = gradio_training(learning_rate_value='1e-5') + + # SDXL parameters + with gr.Row(visible=False) as sdxl_row: + sdxl_cache_text_encoder_outputs = gr.Checkbox( + label='(SDXL) Cache text encoder outputs', + info='Cache the outputs of the text encoders. This option is useful to reduce the GPU memory usage. This option cannot be used with options for shuffling or dropping the captions.', + value=False + ) + sdxl_no_half_vae = gr.Checkbox( + label='(SDXL) No half VAE', + info='Disable the half-precision (mixed-precision) VAE. VAE for SDXL seems to produce NaNs in some cases. This option is useful to avoid the NaNs.', + value=False + ) + + sdxl_checkbox.change(lambda sdxl_checkbox: gr.Row.update(visible=sdxl_checkbox), inputs=[sdxl_checkbox], outputs=[sdxl_row]) + + with gr.Row(): + dataset_repeats = gr.Textbox(label='Dataset repeats', value=40) + train_text_encoder = gr.Checkbox( + label='Train text encoder', value=True + ) + with gr.Accordion('Advanced parameters', open=False): + with gr.Row(): + gradient_accumulation_steps = gr.Number( + label='Gradient accumulate steps', value='1' + ) + ( + # use_8bit_adam, + xformers, + full_fp16, + gradient_checkpointing, + shuffle_caption, + color_aug, + flip_aug, + clip_skip, + mem_eff_attn, + save_state, + resume, + max_token_length, + max_train_epochs, + max_data_loader_n_workers, + keep_tokens, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + caption_dropout_every_n_epochs, + caption_dropout_rate, + noise_offset_type, + noise_offset, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + additional_parameters, + vae_batch_size, + min_snr_gamma, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + use_wandb, + wandb_api_key, + scale_v_pred_loss_like_noise_pred, + ) = gradio_advanced_training(headless=headless) + color_aug.change( + color_aug_changed, + inputs=[color_aug], + outputs=[cache_latents], # Not applicable to fine_tune.py + ) + + ( + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + ) = sample_gradio_config() + + button_run = gr.Button('Train model', variant='primary') + + button_print = gr.Button('Print training command') + + # Setup gradio tensorboard buttons + button_start_tensorboard, button_stop_tensorboard = gradio_tensorboard() + + button_start_tensorboard.click( + start_tensorboard, + inputs=logging_dir, + ) + + button_stop_tensorboard.click( + stop_tensorboard, + show_progress=False, + ) + + settings_list = [ + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl_checkbox, + train_dir, + image_folder, + output_dir, + logging_dir, + max_resolution, + min_bucket_reso, + max_bucket_reso, + batch_size, + flip_aug, + caption_metadata_filename, + latent_metadata_filename, + full_path, learning_rate, lr_scheduler, lr_warmup, + dataset_repeats, train_batch_size, epoch, save_every_n_epochs, mixed_precision, save_precision, - num_cpu_threads_per_process, seed, + num_cpu_threads_per_process, + train_text_encoder, + create_caption, + create_buckets, + save_model_as, caption_extension, + # use_8bit_adam, + xformers, + clip_skip, + save_state, + resume, + gradient_checkpointing, + gradient_accumulation_steps, + mem_eff_attn, + shuffle_caption, + output_name, + max_token_length, + max_train_epochs, + max_data_loader_n_workers, + full_fp16, + color_aug, + model_list, cache_latents, cache_latents_to_disk, + use_latent_files, + keep_tokens, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + caption_dropout_every_n_epochs, + caption_dropout_rate, optimizer, optimizer_args, - ) = gradio_training(learning_rate_value='1e-5') - - # SDXL parameters - with gr.Row(visible=False) as sdxl_row: - sdxl_cache_text_encoder_outputs = gr.Checkbox( - label='(SDXL) Cache text encoder outputs', - info='Cache the outputs of the text encoders. This option is useful to reduce the GPU memory usage. This option cannot be used with options for shuffling or dropping the captions.', - value=False - ) - sdxl_no_half_vae = gr.Checkbox( - label='(SDXL) No half VAE', - info='Disable the half-precision (mixed-precision) VAE. VAE for SDXL seems to produce NaNs in some cases. This option is useful to avoid the NaNs.', - value=False - ) - - sdxl_checkbox.change(lambda sdxl_checkbox: gr.Row.update(visible=sdxl_checkbox), inputs=[sdxl_checkbox], outputs=[sdxl_row]) - - with gr.Row(): - dataset_repeats = gr.Textbox(label='Dataset repeats', value=40) - train_text_encoder = gr.Checkbox( - label='Train text encoder', value=True - ) - with gr.Accordion('Advanced parameters', open=False): - with gr.Row(): - gradient_accumulation_steps = gr.Number( - label='Gradient accumulate steps', value='1' - ) - ( - # use_8bit_adam, - xformers, - full_fp16, - gradient_checkpointing, - shuffle_caption, - color_aug, - flip_aug, - clip_skip, - mem_eff_attn, - save_state, - resume, - max_token_length, - max_train_epochs, - max_data_loader_n_workers, - keep_tokens, - persistent_data_loader_workers, - bucket_no_upscale, - random_crop, - bucket_reso_steps, - caption_dropout_every_n_epochs, - caption_dropout_rate, - noise_offset_type, - noise_offset, - adaptive_noise_scale, - multires_noise_iterations, - multires_noise_discount, - additional_parameters, - vae_batch_size, - min_snr_gamma, - save_every_n_steps, - save_last_n_steps, - save_last_n_steps_state, - use_wandb, - wandb_api_key, - scale_v_pred_loss_like_noise_pred, - ) = gradio_advanced_training(headless=headless) - color_aug.change( - color_aug_changed, - inputs=[color_aug], - outputs=[cache_latents], # Not applicable to fine_tune.py - ) - - ( + noise_offset_type, + noise_offset, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, sample_every_n_steps, sample_every_n_epochs, sample_sampler, sample_prompts, - ) = sample_gradio_config() + additional_parameters, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + use_wandb, + wandb_api_key, + scale_v_pred_loss_like_noise_pred, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + ] - button_run = gr.Button('Train model', variant='primary') + button_run.click( + train_model, + inputs=[dummy_headless] + [dummy_db_false] + settings_list, + show_progress=False, + ) - button_print = gr.Button('Print training command') + button_print.click( + train_model, + inputs=[dummy_headless] + [dummy_db_true] + settings_list, + show_progress=False, + ) - # Setup gradio tensorboard buttons - button_start_tensorboard, button_stop_tensorboard = gradio_tensorboard() + button_open_config.click( + open_configuration, + inputs=[dummy_db_true, config_file_name] + settings_list, + outputs=[config_file_name] + settings_list, + show_progress=False, + ) - button_start_tensorboard.click( - start_tensorboard, - inputs=logging_dir, - ) + button_load_config.click( + open_configuration, + inputs=[dummy_db_false, config_file_name] + settings_list, + outputs=[config_file_name] + settings_list, + show_progress=False, + ) - button_stop_tensorboard.click( - stop_tensorboard, - show_progress=False, - ) + button_save_config.click( + save_configuration, + inputs=[dummy_db_false, config_file_name] + settings_list, + outputs=[config_file_name], + show_progress=False, + ) - settings_list = [ - pretrained_model_name_or_path, - v2, - v_parameterization, - sdxl_checkbox, - train_dir, - image_folder, - output_dir, - logging_dir, - max_resolution, - min_bucket_reso, - max_bucket_reso, - batch_size, - flip_aug, - caption_metadata_filename, - latent_metadata_filename, - full_path, - learning_rate, - lr_scheduler, - lr_warmup, - dataset_repeats, - train_batch_size, - epoch, - save_every_n_epochs, - mixed_precision, - save_precision, - seed, - num_cpu_threads_per_process, - train_text_encoder, - create_caption, - create_buckets, - save_model_as, - caption_extension, - # use_8bit_adam, - xformers, - clip_skip, - save_state, - resume, - gradient_checkpointing, - gradient_accumulation_steps, - mem_eff_attn, - shuffle_caption, - output_name, - max_token_length, - max_train_epochs, - max_data_loader_n_workers, - full_fp16, - color_aug, - model_list, - cache_latents, - cache_latents_to_disk, - use_latent_files, - keep_tokens, - persistent_data_loader_workers, - bucket_no_upscale, - random_crop, - bucket_reso_steps, - caption_dropout_every_n_epochs, - caption_dropout_rate, - optimizer, - optimizer_args, - noise_offset_type, - noise_offset, - adaptive_noise_scale, - multires_noise_iterations, - multires_noise_discount, - sample_every_n_steps, - sample_every_n_epochs, - sample_sampler, - sample_prompts, - additional_parameters, - vae_batch_size, - min_snr_gamma, - weighted_captions, - save_every_n_steps, - save_last_n_steps, - save_last_n_steps_state, - use_wandb, - wandb_api_key, - scale_v_pred_loss_like_noise_pred, - sdxl_cache_text_encoder_outputs, - sdxl_no_half_vae, - ] - - button_run.click( - train_model, - inputs=[dummy_headless] + [dummy_db_false] + settings_list, - show_progress=False, - ) - - button_print.click( - train_model, - inputs=[dummy_headless] + [dummy_db_true] + settings_list, - show_progress=False, - ) - - button_open_config.click( - open_configuration, - inputs=[dummy_db_true, config_file_name] + settings_list, - outputs=[config_file_name] + settings_list, - show_progress=False, - ) - - button_load_config.click( - open_configuration, - inputs=[dummy_db_false, config_file_name] + settings_list, - outputs=[config_file_name] + settings_list, - show_progress=False, - ) - - button_save_config.click( - save_configuration, - inputs=[dummy_db_false, config_file_name] + settings_list, - outputs=[config_file_name], - show_progress=False, - ) - - button_save_as_config.click( - save_configuration, - inputs=[dummy_db_true, config_file_name] + settings_list, - outputs=[config_file_name], - show_progress=False, - ) + button_save_as_config.click( + save_configuration, + inputs=[dummy_db_true, config_file_name] + settings_list, + outputs=[config_file_name], + show_progress=False, + ) + + with gr.Tab('Guides'): + gr.Markdown( + 'This section provide Various Finetuning guides and information...' + ) + top_level_path = './docs/Finetuning/top_level.md' + if os.path.exists(top_level_path): + with open(os.path.join(top_level_path), 'r', encoding='utf8') as file: + guides_top_level = file.read() + '\n' + gr.Markdown(guides_top_level) def UI(**kwargs): diff --git a/presets/lora/lokr-sd15.json b/presets/lora/lokr-sd15.json index 41a3538..4c06eba 100644 --- a/presets/lora/lokr-sd15.json +++ b/presets/lora/lokr-sd15.json @@ -22,7 +22,6 @@ "dim_from_weights": false, "down_lr_weight": "", "enable_bucket": true, - "epoch": 1, "factor": -1, "flip_aug": false, "full_fp16": false, @@ -30,7 +29,6 @@ "gradient_checkpointing": false, "keep_tokens": "0", "learning_rate": 1.0, - "logging_dir": "", "lora_network_weights": "", "lr_scheduler": "cosine", "lr_scheduler_num_cycles": "", @@ -44,7 +42,6 @@ "mid_lr_weight": "", "min_snr_gamma": 10, "mixed_precision": "bf16", - "model_list": "runwayml/stable-diffusion-v1-5", "module_dropout": 0, "multires_noise_discount": 0.2, "multires_noise_iterations": 8, @@ -57,20 +54,16 @@ "num_cpu_threads_per_process": 2, "optimizer": "Prodigy", "optimizer_args": "", - "output_dir": "", - "output_name": "", "persistent_data_loader_workers": false, "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", "prior_loss_weight": 1.0, "random_crop": false, "rank_dropout": 0, - "reg_data_dir": "", "resume": "", "sample_every_n_epochs": 0, "sample_every_n_steps": 0, "sample_prompts": "", "sample_sampler": "euler_a", - "save_every_n_epochs": 1, "save_every_n_steps": 0, "save_last_n_steps": 0, "save_last_n_steps_state": 0, @@ -84,7 +77,6 @@ "stop_text_encoder_training": 0, "text_encoder_lr": 1.0, "train_batch_size": 1, - "train_data_dir": "", "train_on_input": false, "training_comment": "", "unet_lr": 1.0, diff --git a/test/config/iA3-Prodigy.json b/test/config/iA3-Prodigy.json index df8bf59..23f3bd7 100644 --- a/test/config/iA3-Prodigy.json +++ b/test/config/iA3-Prodigy.json @@ -67,8 +67,8 @@ "reg_data_dir": "", "resume": "", "sample_every_n_epochs": 1, - "sample_every_n_steps": null, - "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_every_n_steps": 0, + "sample_prompts": "a man wearing a gas mask, by darius kawasaki", "sample_sampler": "euler_a", "save_every_n_epochs": 1, "save_every_n_steps": 0,