diff --git a/sd-scripts b/sd-scripts index e2ed265..5753b8f 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit e2ed26510450cf147da1b66aea5154d04d0942ec +Subproject commit 5753b8ff6bc045c27c1c61535e35195da860269c diff --git a/tools/Untitled-1.txt b/tools/Untitled-1.txt deleted file mode 100644 index 8776ee9..0000000 --- a/tools/Untitled-1.txt +++ /dev/null @@ -1,287 +0,0 @@ -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision bf16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_fro_0.65.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 16 --device cuda --sdxl --target_fro_retained 0.5 --group_size 6 --svd_mode per_layer --dynamic_param 0.65 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision bf16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_cumulative_0.9.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 16 --device cuda --sdxl --target_fro_retained 0.5 --group_size 6 --svd_mode per_layer --dynamic_param 0.9 --dynamic_method sv_cumulative - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_fro_0.5v2.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 256 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_fro --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_cumulative_0.5v2.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 768 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_cumulative --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_ratio_0.5.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 768 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_ratio --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_knee.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 512 --device cuda --sdxl --dynamic_method sv_knee --verbose --dynamic_param 0.5 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ` ---save_precision fp16 ` ---save_to E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_sv_cumulative_knee.safetensors ` ---model_tuned E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` ---model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---dim 512 ` ---device cuda ` ---sdxl ` ---dynamic_method sv_cumulative_knee ` ---verbose ` ---dynamic_param 0.25 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_v2.safetensors ` ---rank 4 ` ---iterations 200 ` ---lr 0.005 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---verbose_layer_debug ` ---save_weights_dtype fp16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_64_4000steps.safetensors ` ---rank 64 ` ---initial_alpha 32 ` ---max_rank_doublings 2 ` ---max_iterations 16000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_16_16000steps.safetensors ` ---rank 16 ` ---initial_alpha 8 ` ---max_rank_retries 3 ` ---rank_increase_factor 1.5 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_16_8000steps.safetensors ` ---rank 16 ` ---initial_alpha 16 ` ---max_rank_retries 6 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` -E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_loha_16_8000steps.safetensors ` ---rank 16 ` ---initial_alpha 16 ` ---max_rank_retries 6 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/aetherverseXL_v10.safetensors ` -E:/lora/sdxl/aetherverseXL_v10_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 9e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/aetherverseXL_v10.safetensors ` -E:/lora/sdxl/aetherverseXL_v10_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 9e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/proteus_v06.safetensors ` -E:/lora/sdxl/proteus_v06_1e-7.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-7 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_1e-8v3.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 29 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 ` ---continue_training_from_loha E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_1e-8v2_resume_L422.safetensors - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` -E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-7 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 - -C:\Users\berna\Downloads\Dune_Movie_Loha2.safetensors - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ` ---save_to E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha.safetensors ` ---model_org_path E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned_path E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---algo loha ` ---network_alpha 64 ` ---network_dim 4 ` ---conv_alpha 64 ` ---conv_dim 4 ` ---device cuda ` ---sdxl ` ---save_precision fp16 ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_model_difference.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---save_dtype float16 - ---model_org_path E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned_path E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---algo loha ` ---network_alpha 64 ` ---network_dim 4 ` ---conv_alpha 64 ` ---conv_dim 4 ` ---device cuda ` ---sdxl ` ---save_precision fp16 ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_to E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors --algo loha --sdxl --dim 32 --conv_dim 32 --dynamic_method sv_cumulative --dynamic_param 0.99 --save_precision fp16 --device cuda --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ^ - --model_org_path "D:\StableDiffusion\models\sdxl_base_1.0.safetensors" ^ - --model_tuned_path "D:\StableDiffusion\models\my_sdxl_finetune.safetensors" ^ - --save_to "C:\LoRA_Extractor\output\my_loha_sdxl.safetensors" ^ - --sdxl ^ - --algo loha ^ - --network_alpha 64 ^ - --network_dim 4 ^ - --conv_alpha 64 ^ - --conv_dim 4 ^ - --save_precision bf16 ^ - --device cuda ^ - --verbose - -sv_cumulative_knee - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_two_pass_energy_512.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --total_rank_budget 2048 --device cuda --sdxl --svd_mode per_layer --dynamic_param 1.0 --dynamic_method two_pass_energy --verbose --min_rank 4 --max_rank 32 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ^ - --save_precision bf16 ^ - --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_two_pass_energy_512.safetensors ^ - --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ^ - --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors ^ - --dim 512 ^ - --device cuda ^ - --sdxl ^ - --target_fro_retained 0.5 ^ - --group_size 6 ^ - --svd_mode per_layer ^ - --dynamic_method two_pass_energy ^ - --dynamic_param 1.0 ^ - --min_rank 4 ^ - --verbose \ No newline at end of file diff --git a/tools/Untitled-2.txt b/tools/Untitled-2.txt deleted file mode 100644 index 433fb87..0000000 --- a/tools/Untitled-2.txt +++ /dev/null @@ -1,397 +0,0 @@ -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision bf16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_fro_0.65.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 16 --device cuda --sdxl --target_fro_retained 0.5 --group_size 6 --svd_mode per_layer --dynamic_param 0.65 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision bf16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_cumulative_0.9.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 16 --device cuda --sdxl --target_fro_retained 0.5 --group_size 6 --svd_mode per_layer --dynamic_param 0.9 --dynamic_method sv_cumulative - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_fro_0.5v2.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 256 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_fro --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_cumulative_0.5v2.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 768 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_cumulative --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_ratio_0.5.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 768 --device cuda --sdxl --dynamic_param 0.5 --dynamic_method sv_ratio --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_sv_knee.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --dim 512 --device cuda --sdxl --dynamic_method sv_knee --verbose --dynamic_param 0.5 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ` ---save_precision fp16 ` ---save_to E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_sv_cumulative_knee.safetensors ` ---model_tuned E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` ---model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---dim 512 ` ---device cuda ` ---sdxl ` ---dynamic_method sv_fro ` ---verbose ` ---dynamic_param 0.25 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_lora_from_models-nw.py ` ---save_precision fp16 ` ---model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned E:/models/sdxl/xxxRay_v11.safetensors ` ---save_to E:/lora/sdxl/xxxRay_v11_sv_cumulative_knee.safetensors ` ---dim 384 ` ---device cuda ` ---sdxl ` ---dynamic_method sv_cumulative_knee ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_lora_from_models-nw.py ` ---save_precision fp16 ` ---model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned E:/models/sdxl/xxxRay_v11.safetensors ` ---save_to E:/lora/sdxl/xxxRay_v11_sv_fro_0.9_1024.safetensors ` ---dim 1024 ` ---device cuda ` ---sdxl ` ---dynamic_method sv_fro ` ---dynamic_param 0.9 ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_lora_from_models-nw.py ` ---save_precision fp16 ` ---model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned E:/models/sdxl/proteus_v06.safetensors ` ---save_to E:/lora/sdxl/proteus_v06_sv_cumulative_knee_1024.safetensors ` ---dim 1024 ` ---device cuda ` ---sdxl ` ---dynamic_method sv_cumulative_knee ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_v2.safetensors ` ---rank 4 ` ---iterations 200 ` ---lr 0.005 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---verbose_layer_debug ` ---save_weights_dtype fp16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_64_4000steps.safetensors ` ---rank 64 ` ---initial_alpha 32 ` ---max_rank_doublings 2 ` ---max_iterations 16000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_16_16000steps.safetensors ` ---rank 16 ` ---initial_alpha 8 ` ---max_rank_retries 3 ` ---rank_increase_factor 1.5 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_16_8000steps.safetensors ` ---rank 16 ` ---initial_alpha 16 ` ---max_rank_retries 6 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.05 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` -E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_loha_16_8000steps.safetensors ` ---rank 16 ` ---initial_alpha 16 ` ---max_rank_retries 6 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9.9999999e-8 ` ---lr 0.1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/aetherverseXL_v10.safetensors ` -E:/lora/sdxl/aetherverseXL_v10_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 9e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/aetherverseXL_v10.safetensors ` -E:/lora/sdxl/aetherverseXL_v10_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 30 ` ---rank_increase_factor 1.2 ` ---max_iterations 8000 ` ---min_iterations 200 ` ---target_loss 9e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 100 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 ` ---rank_search_strategy binary_search_min_rank - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/aetherverseXL_v10.safetensors ` -E:/lora/sdxl/aetherverseXL_v10_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 8 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 400 ` ---target_loss 9e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 100 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 ` ---skip_delta_threshold 3e-7 ` ---rank_search_strategy binary_search_min_rank - - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\lr_finder.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/xxxRay_v11.safetensors ` - --lr_finder_num_layers 16 ` - --lr_finder_min_lr 1e-8 ` - --lr_finder_max_lr 0.2 ` - --lr_finder_num_steps 120 ` - --lr_finder_iters_per_step 40 ` - --rank 8 ` - --initial_alpha 8.0 ` - --precision bf16 ` - --device cuda ` - --lr_finder_plot ` - --lr_finder_show_plot - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/xxxRay_v11.safetensors ` -E:/lora/sdxl/xxxRay_v11_loha_1e-7.safetensors ` ---rank 2 ` ---initial_alpha 2 ` ---max_rank_retries 7 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 400 ` ---target_loss 1e-7 ` ---lr 1e-01 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 100 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 ` ---skip_delta_threshold 1e-7 ` ---rank_search_strategy binary_search_min_rank ` ---probe_aggressive_early_stop - -D:\kohya_ss\venv\Scripts\python.exe D:\kohya_ss\tools\model_diff_report.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---top_n_diff 15 --plot_histograms --plot_histograms_top_n 3 --output_dir ./analysis_results - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` -E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_loha_3e-7.safetensors ` ---rank 1 ` ---initial_alpha 1 ` ---max_rank_retries 10 ` ---rank_increase_factor 2 ` ---max_iterations 8000 ` ---min_iterations 400 ` ---target_loss 3e-7 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 100 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 ` ---skip_delta_threshold 6e-7 ` ---rank_search_strategy binary_search_min_rank ` ---probe_aggressive_early_stop - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/proteus_v06.safetensors ` -E:/lora/sdxl/proteus_v06_1e-7.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-7 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---save_every_n_layers 10 ` ---keep_n_resume_files 10 ` ---rank_search_strategy binary_search_min_rank - - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` -E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_1e-8v3.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 29 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-8 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 ` ---continue_training_from_loha E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_1e-8v2_resume_L422.safetensors - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES.safetensors ` -E:/lora/sdxl/lustifySDXLNSFW_oltFIXEDTEXTURES_loha_9e-8.safetensors ` ---rank 4 ` ---initial_alpha 4 ` ---max_rank_retries 27 ` ---rank_increase_factor 1.2 ` ---max_iterations 16000 ` ---min_iterations 400 ` ---target_loss 1e-7 ` ---lr 1e-1 ` ---device cuda ` ---precision fp32 ` ---verbose ` ---save_weights_dtype bf16 ` ---progress_check_interval 200 ` ---advanced_projection_decay_cap_min 0.5 ` ---advanced_projection_decay_cap_max 1.05 ` ---min_progress_loss_ratio 0.000001 ` ---projection_sample_interval 1 ` ---projection_min_ema_history 100 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_model_difference.py ` -E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` -E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---save_dtype float16 - ---model_org_path E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` ---model_tuned_path E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` ---algo loha ` ---network_alpha 64 ` ---network_dim 4 ` ---conv_alpha 64 ` ---conv_dim 4 ` ---device cuda ` ---sdxl ` ---save_precision fp16 ` ---verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_to E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors --algo loha --sdxl --dim 32 --conv_dim 32 --dynamic_method sv_cumulative --dynamic_param 0.99 --save_precision fp16 --device cuda --verbose - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ^ - --model_org_path "D:\StableDiffusion\models\sdxl_base_1.0.safetensors" ^ - --model_tuned_path "D:\StableDiffusion\models\my_sdxl_finetune.safetensors" ^ - --save_to "C:\LoRA_Extractor\output\my_loha_sdxl.safetensors" ^ - --sdxl ^ - --algo loha ^ - --network_alpha 64 ^ - --network_dim 4 ^ - --conv_alpha 64 ^ - --conv_dim 4 ^ - --save_precision bf16 ^ - --device cuda ^ - --verbose - -sv_cumulative_knee - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py --save_precision fp16 --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_two_pass_energy_512.safetensors --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors --total_rank_budget 2048 --device cuda --sdxl --svd_mode per_layer --dynamic_param 1.0 --dynamic_method two_pass_energy --verbose --min_rank 4 --max_rank 32 - -D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\sd-scripts\networks\extract_lora_from_models-nw.py ^ - --save_precision bf16 ^ - --save_to E:/lora/sdxl/cinemaDiffusoXL_beta03_two_pass_energy_512.safetensors ^ - --model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ^ - --model_tuned E:/models/sdxl/cinemaDiffusoXL_beta03.safetensors ^ - --dim 512 ^ - --device cuda ^ - --sdxl ^ - --target_fro_retained 0.5 ^ - --group_size 6 ^ - --svd_mode per_layer ^ - --dynamic_method two_pass_energy ^ - --dynamic_param 1.0 ^ - --min_rank 4 ^ - --verbose \ No newline at end of file diff --git a/tools/extract loha and lora examples.txt b/tools/extract loha and lora examples.txt new file mode 100644 index 0000000..108b525 --- /dev/null +++ b/tools/extract loha and lora examples.txt @@ -0,0 +1,65 @@ +D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_lora_from_models-nw.py ` +--save_precision fp16 ` +--model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` +--model_tuned E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` +--save_to E:/lora/sdxl/dreamshaperXL_alpha2Xl10_sv_fro_0.9_1024.safetensors ` +--dim 1024 ` +--device cuda ` +--sdxl ` +--dynamic_method sv_fro ` +--dynamic_param 0.9 ` +--verbose + +D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_lora_from_models-nw.py ` +--save_precision fp16 ` +--model_org E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` +--model_tuned E:/models/sdxl/proteus_v06.safetensors ` +--save_to E:/lora/sdxl/proteus_v06_sv_cumulative_knee_1024.safetensors ` +--dim 1024 ` +--device cuda ` +--sdxl ` +--dynamic_method sv_cumulative_knee ` +--verbose + +D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\lr_finder.py ` +E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` +E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` + --lr_finder_num_layers 16 ` + --lr_finder_min_lr 1e-8 ` + --lr_finder_max_lr 0.2 ` + --lr_finder_num_steps 120 ` + --lr_finder_iters_per_step 40 ` + --rank 8 ` + --initial_alpha 8.0 ` + --precision bf16 ` + --device cuda ` + --lr_finder_plot ` + --lr_finder_show_plot + +D:\kohya_ss\.venv\Scripts\python.exe D:\kohya_ss\tools\extract_loha_from_tuned_model.py ` +E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` +E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` +E:/lora/sdxl/dreamshaperXL_alpha2Xl10_loha_1e-7.safetensors ` +--rank 2 ` +--initial_alpha 2 ` +--max_rank_retries 7 ` +--rank_increase_factor 2 ` +--max_iterations 8000 ` +--min_iterations 400 ` +--target_loss 1e-7 ` +--lr 1e-01 ` +--device cuda ` +--precision fp32 ` +--verbose ` +--save_weights_dtype bf16 ` +--progress_check_interval 100 ` +--save_every_n_layers 10 ` +--keep_n_resume_files 10 ` +--skip_delta_threshold 1e-7 ` +--rank_search_strategy binary_search_min_rank ` +--probe_aggressive_early_stop + +D:\kohya_ss\venv\Scripts\python.exe D:\kohya_ss\tools\model_diff_report.py ` +E:/models/sdxl/base/sd_xl_base_1.0_0.9vae.safetensors ` +E:/models/sdxl/dreamshaperXL_alpha2Xl10.safetensors ` +--top_n_diff 15 --plot_histograms --plot_histograms_top_n 3 --output_dir ./analysis_results \ No newline at end of file diff --git a/tools/extract_loha_from_tuned_model.py b/tools/extract_loha_from_model.py similarity index 100% rename from tools/extract_loha_from_tuned_model.py rename to tools/extract_loha_from_model.py diff --git a/tools/extract_loha_from_tuned_model copy.py b/tools/extract_loha_from_tuned_model copy.py deleted file mode 100644 index aec9bb6..0000000 --- a/tools/extract_loha_from_tuned_model copy.py +++ /dev/null @@ -1,662 +0,0 @@ -import argparse -import os -import torch -import torch.nn as nn -import torch.nn.functional as F -from safetensors.torch import save_file, load_file -import safetensors # Import the main library to use safetensors.safe_open -from tqdm import tqdm -import math -import json -from collections import OrderedDict -import signal -import sys -import glob - -# --- Global variables --- -extracted_loha_state_dict_global = OrderedDict() -layer_optimization_stats_global = [] -args_global = None -processed_layers_this_session_count_global = 0 -previously_completed_module_prefixes_global = set() -all_completed_module_prefixes_ever_global = set() # Tracks all module prefixes ever completed (resumed + current) -skipped_identical_count_global = 0 -skipped_other_reason_count_global = 0 -keys_scanned_this_run_global = 0 -save_attempted_on_interrupt = False -outer_pbar_global = None -main_loop_completed_scan_flag_global = False # True if the main key loop finished a full scan - -# --- optimize_loha_for_layer and get_module_shape_info_from_weight (UNCHANGED) --- -def optimize_loha_for_layer( - layer_name: str, delta_W_target: torch.Tensor, out_dim: int, in_dim_effective: int, - k_h: int, k_w: int, rank: int, initial_alpha_val: float, lr: float = 1e-3, - max_iterations: int = 1000, min_iterations: int = 100, target_loss: float = None, - weight_decay: float = 1e-4, device: str = 'cuda', dtype: torch.dtype = torch.float32, - is_conv: bool = True, verbose_layer_debug: bool = False -): - delta_W_target = delta_W_target.to(device, dtype=dtype) - if is_conv: - k_ops = k_h * k_w - hada_w1_a = nn.Parameter(torch.empty(out_dim, rank, device=device, dtype=dtype)); nn.init.kaiming_uniform_(hada_w1_a, a=math.sqrt(5)) - hada_w1_b = nn.Parameter(torch.empty(rank, in_dim_effective * k_ops, device=device, dtype=dtype)); nn.init.normal_(hada_w1_b, std=0.02) - hada_w2_a = nn.Parameter(torch.empty(out_dim, rank, device=device, dtype=dtype)); nn.init.kaiming_uniform_(hada_w2_a, a=math.sqrt(5)) - hada_w2_b = nn.Parameter(torch.empty(rank, in_dim_effective * k_ops, device=device, dtype=dtype)); nn.init.normal_(hada_w2_b, std=0.02) - else: # Linear - hada_w1_a = nn.Parameter(torch.empty(out_dim, rank, device=device, dtype=dtype)); nn.init.kaiming_uniform_(hada_w1_a, a=math.sqrt(5)) - hada_w1_b = nn.Parameter(torch.empty(rank, in_dim_effective, device=device, dtype=dtype)); nn.init.normal_(hada_w1_b, std=0.02) - hada_w2_a = nn.Parameter(torch.empty(out_dim, rank, device=device, dtype=dtype)); nn.init.kaiming_uniform_(hada_w2_a, a=math.sqrt(5)) - hada_w2_b = nn.Parameter(torch.empty(rank, in_dim_effective, device=device, dtype=dtype)); nn.init.normal_(hada_w2_b, std=0.02) - alpha_param = nn.Parameter(torch.tensor(initial_alpha_val, device=device, dtype=dtype)) - optimizer = torch.optim.AdamW([hada_w1_a, hada_w1_b, hada_w2_a, hada_w2_b, alpha_param], lr=lr, weight_decay=weight_decay) - patience_epochs = max(10, int(max_iterations * 0.05)) - scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=patience_epochs, factor=0.5, min_lr=1e-7, verbose=False) - iter_pbar = tqdm(range(max_iterations), desc=f"Opt: {layer_name}", leave=False, dynamic_ncols=True, position=1, mininterval=0.5) - final_loss = float('inf'); stopped_early_by_loss = False; iterations_actually_done = 0 - for i in iter_pbar: - iterations_actually_done = i + 1 - if save_attempted_on_interrupt: print(f"\n Interrupt during opt of {layer_name}. Stopping layer after {i} iters."); break - optimizer.zero_grad(); eff_alpha_scale = alpha_param / rank - if is_conv: - term1_flat = hada_w1_a @ hada_w1_b; term1_reshaped = term1_flat.view(out_dim, in_dim_effective, k_h, k_w) - term2_flat = hada_w2_a @ hada_w2_b; term2_reshaped = term2_flat.view(out_dim, in_dim_effective, k_h, k_w) - delta_W_loha = eff_alpha_scale * term1_reshaped * term2_reshaped - else: - term1 = hada_w1_a @ hada_w1_b; term2 = hada_w2_a @ hada_w2_b - delta_W_loha = eff_alpha_scale * term1 * term2 - loss = F.mse_loss(delta_W_loha, delta_W_target); final_loss = loss.item() - loss.backward(); optimizer.step(); scheduler.step(loss) - current_lr = optimizer.param_groups[0]['lr'] - iter_pbar.set_postfix_str(f"Loss={final_loss:.3e}, AlphaP={alpha_param.item():.2f}, LR={current_lr:.1e}", refresh=True) - if verbose_layer_debug and (i == 0 or (i + 1) % (max_iterations // 10 if max_iterations >= 10 else 1) == 0 or i == max_iterations - 1): - iter_pbar.write(f" Debug {layer_name} - Iter {i+1}/{max_iterations}: Loss: {final_loss:.6e}, LR: {current_lr:.2e}, AlphaP: {alpha_param.item():.4f}") - if target_loss is not None and i >= min_iterations -1 and final_loss <= target_loss: - if verbose_layer_debug or (args_global and args_global.verbose): iter_pbar.write(f" Target loss {target_loss:.2e} reached for {layer_name} at iter {i+1}.") - stopped_early_by_loss = True; break - if not save_attempted_on_interrupt: iter_pbar.set_description_str(f"Opt: {layer_name} (Done)"); iter_pbar.set_postfix_str(f"FinalLoss={final_loss:.2e}, It={iterations_actually_done}{', EarlyStop' if stopped_early_by_loss else ''}") - iter_pbar.close() - if save_attempted_on_interrupt and not stopped_early_by_loss and iterations_actually_done < max_iterations: - return {'final_loss': final_loss, 'stopped_early': False, 'iterations_done': iterations_actually_done, 'interrupted_mid_layer': True} - return {'hada_w1_a': hada_w1_a.data.cpu().contiguous(), 'hada_w1_b': hada_w1_b.data.cpu().contiguous(), - 'hada_w2_a': hada_w2_a.data.cpu().contiguous(), 'hada_w2_b': hada_w2_b.data.cpu().contiguous(), - 'alpha': alpha_param.data.cpu().contiguous(), 'final_loss': final_loss, - 'stopped_early': stopped_early_by_loss, 'iterations_done': iterations_actually_done, - 'interrupted_mid_layer': False} - -def get_module_shape_info_from_weight(weight_tensor: torch.Tensor): - if len(weight_tensor.shape) == 4: is_conv = True; out_dim, in_dim_effective, k_h, k_w = weight_tensor.shape; groups = 1; return out_dim, in_dim_effective, k_h, k_w, groups, is_conv - elif len(weight_tensor.shape) == 2: is_conv = False; out_dim, in_dim = weight_tensor.shape; return out_dim, in_dim, None, None, 1, is_conv - return None - -# --- NEW: Helper function to generate intermediate filenames --- -def generate_intermediate_filename(base_save_path: str, num_total_completed_layers: int) -> str: - base, ext = os.path.splitext(base_save_path) - return f"{base}_resume_L{num_total_completed_layers}{ext}" - -# --- NEW: Helper function to find the best file to resume from --- -def find_best_resume_file(intended_final_path: str) -> tuple[str | None, int]: - output_dir = os.path.dirname(intended_final_path) - if not output_dir: output_dir = "." # Current directory if no path part - base_save_name, save_ext = os.path.splitext(os.path.basename(intended_final_path)) - - potential_files = [] - # Check the main intended file first - if os.path.exists(intended_final_path): - potential_files.append(intended_final_path) - - # Check for intermediate files - intermediate_pattern = os.path.join(output_dir, f"{base_save_name}_resume_L*{save_ext}") - potential_files.extend(glob.glob(intermediate_pattern)) - - best_file_path = None - max_completed_modules = -1 - - if not potential_files: - print(" No existing main LoHA file or intermediate resume files found.") - return None, -1 - - print(f" Found potential resume files: {potential_files}") - - for file_path in potential_files: - try: - if not os.path.exists(file_path): continue # Should not happen with glob but good check - with safetensors.safe_open(file_path, framework="pt", device="cpu") as f: - metadata = f.metadata() - - if metadata and "ss_completed_loha_modules" in metadata: - num_completed = len(json.loads(metadata["ss_completed_loha_modules"])) - if num_completed > max_completed_modules: - max_completed_modules = num_completed - best_file_path = file_path - elif num_completed == max_completed_modules and best_file_path != intended_final_path and file_path == intended_final_path: - # Prefer the main file if module count is the same as an intermediate - best_file_path = file_path - - - elif max_completed_modules == -1: # If no file has metadata, consider the first one (or main one) - # This case handles files without the specific metadata, preferring the main file if it exists. - # It's a basic fallback; files with proper metadata will usually win. - if best_file_path is None or (file_path == intended_final_path and best_file_path != intended_final_path): - best_file_path = file_path # Fallback to considering the file itself if no metadata found yet - max_completed_modules = 0 # Treat as 0 if no metadata, to be potentially overridden - print(f" File {file_path} has no 'ss_completed_loha_modules' metadata. Treating as 0 completed for now.") - - - except Exception as e: - print(f" Warning: Could not read or parse metadata from {file_path}: {e}") - if best_file_path is None and file_path == intended_final_path and max_completed_modules == -1: - best_file_path = file_path # If primary file is broken, still note it as a candidate if nothing better - max_completed_modules = 0 - - - if best_file_path: - print(f" Selected '{os.path.basename(best_file_path)}' for resume (contains {max_completed_modules} completed modules in metadata).") - elif not potential_files: # Already handled above, but as a safeguard - print(f" No existing LoHA file or intermediate files found matching pattern for: {intended_final_path}") - else: - print(f" Could not determine a best file to resume from among candidates, or no valid metadata found.") - - - return best_file_path, max_completed_modules - -# --- NEW: Helper function to clean up intermediate files --- -def cleanup_intermediate_files(final_intended_path: str): - output_dir = os.path.dirname(final_intended_path) - if not output_dir: output_dir = "." - base_save_name, save_ext = os.path.splitext(os.path.basename(final_intended_path)) - intermediate_pattern = os.path.join(output_dir, f"{base_save_name}_resume_L*{save_ext}") - - cleaned_count = 0 - for file_path in glob.glob(intermediate_pattern): - try: - os.remove(file_path) - if args_global and args_global.verbose: print(f" Cleaned up intermediate file: {file_path}") - cleaned_count +=1 - except OSError as e: - print(f" Warning: Could not clean up intermediate file {file_path}: {e}") - if cleaned_count > 0: - print(f" Cleaned up {cleaned_count} intermediate file(s).") - - -# --- perform_graceful_save (MODIFIED to only require output_path_override) --- -def perform_graceful_save(output_path_to_save: str): - global extracted_loha_state_dict_global, layer_optimization_stats_global, args_global - global processed_layers_this_session_count_global, save_attempted_on_interrupt - global skipped_identical_count_global, skipped_other_reason_count_global, keys_scanned_this_run_global - global all_completed_module_prefixes_ever_global # Use this for metadata - - # Ensure all_completed_module_prefixes_ever is up-to-date before saving - # This should already be handled by adding to it when layers are processed or resumed. - current_session_processed_prefixes = {stat['name'] for stat in layer_optimization_stats_global} - # `all_completed_module_prefixes_ever_global` should already include `previously_completed_module_prefixes_global` - # and any newly processed ones. - - total_processed_ever = len(all_completed_module_prefixes_ever_global) - - if not extracted_loha_state_dict_global and not previously_completed_module_prefixes_global : # Check against all_completed for empty save - # If all_completed is also empty, it means nothing was resumed and nothing new processed - if not all_completed_module_prefixes_ever_global: - print(f"No layers were processed or loaded to save to {output_path_to_save}. Save aborted.") - return - - args_to_use = args_global - if not args_to_use: print("Error: Global args not available for saving metadata."); return - - final_save_path = output_path_to_save # Use the direct path given - - if args_to_use.save_weights_dtype == "fp16": final_save_dtype_torch = torch.float16 - elif args_to_use.save_weights_dtype == "bf16": final_save_dtype_torch = torch.bfloat16 - else: final_save_dtype_torch = torch.float32 - - final_state_dict_to_save = OrderedDict() - for k, v_tensor in extracted_loha_state_dict_global.items(): - if v_tensor.is_floating_point(): final_state_dict_to_save[k] = v_tensor.to(final_save_dtype_torch) - else: final_state_dict_to_save[k] = v_tensor - - # Metadata uses all_completed_module_prefixes_ever_global - print(f"\nAttempting to save LoHA for {total_processed_ever} unique modules in total " - f"({processed_layers_this_session_count_global} new this session) to {final_save_path}") - - eff_global_network_alpha_val = args_to_use.initial_alpha; eff_global_network_alpha_str = f"{eff_global_network_alpha_val:.8f}" - global_rank_str = str(args_to_use.rank) - conv_rank_str = str(args_to_use.conv_rank if args_to_use.conv_rank is not None else args_to_use.rank) - eff_conv_alpha_val = args_to_use.initial_conv_alpha; conv_alpha_str = f"{eff_conv_alpha_val:.8f}" - - network_args_dict = { - "algo": "loha", "dim": global_rank_str, "alpha": eff_global_network_alpha_str, - "conv_dim": conv_rank_str, "conv_alpha": conv_alpha_str, - "dropout": str(args_to_use.dropout), "rank_dropout": str(args_to_use.rank_dropout), "module_dropout": str(args_to_use.module_dropout), - "use_tucker": "false", "use_scalar": "false", "block_size": "1",} - - sf_metadata = { - "ss_network_module": "lycoris.kohya", "ss_network_rank": global_rank_str, - "ss_network_alpha": eff_global_network_alpha_str, "ss_network_algo": "loha", - "ss_network_args": json.dumps(network_args_dict), - "ss_comment": f"Extracted LoHA (Interrupt: {save_attempted_on_interrupt}). OptPrec: {args_to_use.precision}. SaveDtype: {args_to_use.save_weights_dtype}. ATOL: {args_to_use.atol_fp32_check}. Layers: {total_processed_ever}. MaxIter: {args_to_use.max_iterations}. TargetLoss: {args_to_use.target_loss}", - "ss_base_model_name": os.path.splitext(os.path.basename(args_to_use.base_model_path))[0], - "ss_ft_model_name": os.path.splitext(os.path.basename(args_to_use.ft_model_path))[0], - "ss_save_weights_dtype": args_to_use.save_weights_dtype, "ss_optimization_precision": args_to_use.precision, - "ss_completed_loha_modules": json.dumps(list(all_completed_module_prefixes_ever_global)) # Use the global cumulative set - } - - json_metadata_for_file = { - "comfyui_lora_type": "LyCORIS_LoHa", "model_name": os.path.splitext(os.path.basename(final_save_path))[0], - "base_model_path": args_to_use.base_model_path, "ft_model_path": args_to_use.ft_model_path, - "loha_extraction_settings": {k: str(v) if isinstance(v, type(os.pathsep)) else v for k,v in vars(args_to_use).items()}, - "extraction_summary":{ - "processed_layers_in_total_cumulative": total_processed_ever, # Cumulative - "processed_this_session": processed_layers_this_session_count_global, - "skipped_identical_count_this_session": skipped_identical_count_global, - "skipped_other_reason_count_this_session": skipped_other_reason_count_global, - "total_candidate_keys_scanned_in_loop_this_session": keys_scanned_this_run_global, - }, - "layer_optimization_details_this_session": layer_optimization_stats_global, - "embedded_safetensors_metadata": sf_metadata, - "interrupted_save": save_attempted_on_interrupt - } - - if final_save_path.endswith(".safetensors"): - try: - save_file(final_state_dict_to_save, final_save_path, metadata=sf_metadata) - print(f"LoHA state_dict saved to: {final_save_path}") - except Exception as e: - print(f"Error saving .safetensors file: {e}"); return - - metadata_json_file_path = os.path.splitext(final_save_path)[0] + "_extraction_metadata.json" - try: - with open(metadata_json_file_path, 'w') as f: json.dump(json_metadata_for_file, f, indent=4) - print(f"Extended metadata saved to: {metadata_json_file_path}") - except Exception as e: print(f"Could not save extended metadata JSON: {e}") - else: - # Saving to .pt might not be fully robust with this new scheme if JSON metadata is critical - print(f"Saving to .pt not fully supported with extended metadata JSON. Saving basic .pt file.") - torch.save({'state_dict': final_state_dict_to_save, 'metadata': sf_metadata}, final_save_path) - print(f"LoHA state_dict saved to: {final_save_path} (basic .pt save)") - - -# --- handle_interrupt (MODIFIED to use intermediate filenames) --- -def handle_interrupt(signum, frame): - global save_attempted_on_interrupt, outer_pbar_global, args_global, all_completed_module_prefixes_ever_global - - print("\n" + "="*30 + "\nCtrl+C (SIGINT) detected!\n" + "="*30) - if save_attempted_on_interrupt: print("Save already attempted. Force exiting."); os._exit(1); return - save_attempted_on_interrupt = True - - if outer_pbar_global: outer_pbar_global.close() # Close main progress bar - - # Close any active layer progress bar (it's trickier, this might not catch it if deep in opt) - # For simplicity, we rely on the check within optimize_loha_for_layer - - print("Attempting to save progress for processed layers...") - if args_global and args_global.save_to: - num_layers_for_filename = len(all_completed_module_prefixes_ever_global) - interrupt_save_path = generate_intermediate_filename(args_global.save_to, num_layers_for_filename) - print(f"Interrupt save will be to: {interrupt_save_path}") - perform_graceful_save(output_path_to_save=interrupt_save_path) - else: - print("Cannot perform interrupt save: args_global or save_to path not defined.") - - print("Graceful save attempt finished. Exiting.") - sys.exit(0) - -def main(cli_args): - global args_global, extracted_loha_state_dict_global, layer_optimization_stats_global - global processed_layers_this_session_count_global, save_attempted_on_interrupt, outer_pbar_global - global skipped_identical_count_global, skipped_other_reason_count_global, keys_scanned_this_run_global - global previously_completed_module_prefixes_global, all_completed_module_prefixes_ever_global - global main_loop_completed_scan_flag_global - - args_global = cli_args - signal.signal(signal.SIGINT, handle_interrupt) - - if args_global.precision == "fp16": target_opt_dtype = torch.float16 - elif args_global.precision == "bf16": target_opt_dtype = torch.bfloat16 - else: target_opt_dtype = torch.float32 - - if args_global.save_weights_dtype == "fp16": final_save_dtype = torch.float16 - elif args_global.save_weights_dtype == "bf16": final_save_dtype = torch.bfloat16 - else: final_save_dtype = torch.float32 - - print(f"Using device: {args_global.device}, Opt Dtype: {target_opt_dtype}, Save Dtype: {final_save_dtype}") - if args_global.target_loss: print(f"Target Loss: {args_global.target_loss:.2e} (after {args_global.min_iterations} min iters)") - print(f"Max Iters/Layer: {args_global.max_iterations}") - - # --- MODIFIED: Loading Existing LoHA for resuming (using find_best_resume_file) --- - chosen_resume_file = None - if not args_global.overwrite: - print(f"\nChecking for existing LoHA file or resume states for: {args_global.save_to}") - chosen_resume_file, num_modules_in_chosen_file = find_best_resume_file(args_global.save_to) - - if chosen_resume_file: - print(f" Attempting to resume from: {chosen_resume_file} ({num_modules_in_chosen_file} modules reported in metadata).") - try: - file_metadata = None - with safetensors.safe_open(chosen_resume_file, framework="pt", device="cpu") as f: - file_metadata = f.metadata() - - completed_modules_in_file = set() - if file_metadata and "ss_completed_loha_modules" in file_metadata: - try: - completed_modules_in_file = set(json.loads(file_metadata.get("ss_completed_loha_modules"))) - # Verify count if possible, though num_modules_in_chosen_file is already from this. - if len(completed_modules_in_file) != num_modules_in_chosen_file and num_modules_in_chosen_file !=0 : # 0 can be if file had no metadata but was chosen - print(f" Warning: Metadata module count ({len(completed_modules_in_file)}) differs from initial scan count ({num_modules_in_chosen_file}). Using parsed set.") - except json.JSONDecodeError: - print(" Warning: Could not parse 'ss_completed_loha_modules' metadata from chosen file. Will not load specific tensors by prefix matching.") - else: - print(" 'ss_completed_loha_modules' not found in chosen file's metadata. Will not load specific tensors by prefix matching (might load all if no prefixes known).") - - - if completed_modules_in_file: # Only load if we have a list of modules to check against - print(" Loading tensors from chosen resume file...") - loaded_sd_for_resume = load_file(chosen_resume_file, device='cpu') - - resumed_tensor_count = 0 - for key, tensor_val in loaded_sd_for_resume.items(): - module_prefix_for_check = ".".join(key.split('.')[:-1]) # e.g. lora_unet_..._block_0_fc1 - is_bias_for_completed_module = key.endswith(".bias") and module_prefix_for_check in completed_modules_in_file - - # Check if the tensor belongs to a module marked as completed - # This covers hada_w1_a, hada_w1_b etc. for LoHA layers, and biases. - if module_prefix_for_check in completed_modules_in_file or is_bias_for_completed_module : - extracted_loha_state_dict_global[key] = tensor_val - resumed_tensor_count +=1 - - previously_completed_module_prefixes_global = completed_modules_in_file - all_completed_module_prefixes_ever_global.update(previously_completed_module_prefixes_global) # Initialize with loaded - print(f" Successfully loaded {len(previously_completed_module_prefixes_global)} module prefixes " - f"with {resumed_tensor_count} tensors for resume from {os.path.basename(chosen_resume_file)}.") - del loaded_sd_for_resume - elif not completed_modules_in_file and num_modules_in_chosen_file == 0 and os.path.exists(chosen_resume_file): - # This case could mean an empty LoRA was found (e.g. from a previous failed start) - # or a file without the specific metadata was chosen by find_best_resume_file. - # We don't load anything specific but acknowledge the file existed. - print(f" Chosen resume file {os.path.basename(chosen_resume_file)} seems empty or has no LoHA module metadata. Starting new layer processing.") - - - # Optional: Load accompanying JSON metadata if it exists for the chosen_resume_file - resume_metadata_json_path = os.path.splitext(chosen_resume_file)[0] + "_extraction_metadata.json" - if os.path.exists(resume_metadata_json_path): - try: - with open(resume_metadata_json_path, 'r') as f_meta: - loaded_json_meta = json.load(f_meta) - # You could potentially load old layer_optimization_stats_global if needed for some cumulative report - # For now, we just acknowledge it. - print(f" Loaded accompanying metadata from: {os.path.basename(resume_metadata_json_path)}") - except Exception as e_json: - print(f" Could not load or parse JSON metadata from {resume_metadata_json_path}: {e_json}") - - except Exception as e: - print(f" Error loading or parsing chosen LoHA file '{chosen_resume_file}': {e}. Starting fresh for new layers.") - extracted_loha_state_dict_global.clear() - previously_completed_module_prefixes_global.clear() - all_completed_module_prefixes_ever_global.clear() - else: - print(" No suitable existing LoHA file found to resume from. Starting fresh.") - # Globals are already empty, so no action needed. - - elif args_global.overwrite and os.path.exists(args_global.save_to): - print(f"Overwriting specified output file as per --overwrite: {args_global.save_to}") - print(" Any existing intermediate resume files for this target will NOT be automatically cleaned with --overwrite until a new final save.") - extracted_loha_state_dict_global.clear() - previously_completed_module_prefixes_global.clear() - all_completed_module_prefixes_ever_global.clear() - # Note: We don't clean intermediates here because the user might want to revert. - # Cleanup happens on successful *final* save. - - print(f"\nLoading base model: {args_global.base_model_path}") - if args_global.base_model_path.endswith(".safetensors"): base_model_sd = load_file(args_global.base_model_path, device='cpu') - else: base_model_sd = torch.load(args_global.base_model_path, map_location='cpu'); base_model_sd = base_model_sd.get('state_dict', base_model_sd) - - print(f"Loading fine-tuned model: {args_global.ft_model_path}") - if args_global.ft_model_path.endswith(".safetensors"): ft_model_sd = load_file(args_global.ft_model_path, device='cpu') - else: ft_model_sd = torch.load(args_global.ft_model_path, map_location='cpu'); ft_model_sd = ft_model_sd.get('state_dict', ft_model_sd) - - # Reset session-specific counters - processed_layers_this_session_count_global = 0 - skipped_identical_count_global = 0 # For this session's scan - skipped_other_reason_count_global = 0 # For this session's scan - keys_scanned_this_run_global = 0 - layer_optimization_stats_global.clear() # For this session's stats - main_loop_completed_scan_flag_global = False - - - all_candidate_keys = [] - for k in base_model_sd.keys(): - if k.endswith('.weight') and k in ft_model_sd and (len(base_model_sd[k].shape) == 2 or len(base_model_sd[k].shape) == 4): - all_candidate_keys.append(k) - all_candidate_keys.sort() - total_candidates_to_scan = len(all_candidate_keys) - - print(f"Found {total_candidates_to_scan} candidate '.weight' keys common to both models and of suitable shape.") - - outer_pbar_global = tqdm(total=total_candidates_to_scan, desc="Scanning Layers", dynamic_ncols=True, position=0) - - try: - for key_name in all_candidate_keys: - if save_attempted_on_interrupt: break - keys_scanned_this_run_global += 1 - outer_pbar_global.update(1) - - original_module_path = key_name[:-len(".weight")] - loha_key_prefix = "" - if original_module_path.startswith("model.diffusion_model."): loha_key_prefix = "lora_unet_" + original_module_path[len("model.diffusion_model."):].replace(".", "_") - elif original_module_path.startswith("conditioner.embedders.0.transformer."): loha_key_prefix = "lora_te1_" + original_module_path[len("conditioner.embedders.0.transformer."):].replace(".", "_") - elif original_module_path.startswith("conditioner.embedders.1.model.transformer."): loha_key_prefix = "lora_te2_" + original_module_path[len("conditioner.embedders.1.model.transformer."):].replace(".", "_") - else: loha_key_prefix = "lora_" + original_module_path.replace(".", "_") - - # Check if already processed (either resumed or done in this session earlier if logic allowed re-scanning) - if loha_key_prefix in all_completed_module_prefixes_ever_global: - if args_global.verbose: - if loha_key_prefix in previously_completed_module_prefixes_global: - tqdm.write(f"Skipping {loha_key_prefix} (scan): already processed (loaded from resumed LoHA).") - # else: # This case should not happen if all_completed_module_prefixes_ever_global is managed correctly - # tqdm.write(f"Skipping {loha_key_prefix} (scan): already processed in this session (should be rare).") - outer_pbar_global.set_description_str(f"Scan {keys_scanned_this_run_global}/{total_candidates_to_scan} (Resumed: {len(previously_completed_module_prefixes_global)}, New Opt: {processed_layers_this_session_count_global})") - continue - - if args_global.max_layers is not None and args_global.max_layers > 0 and processed_layers_this_session_count_global >= args_global.max_layers: - # Still need to scan all keys to correctly determine if the job is "fully complete" later - # So, we just skip optimization but continue scanning. - if args_global.verbose and processed_layers_this_session_count_global == args_global.max_layers and (keys_scanned_this_run_global - (len(all_completed_module_prefixes_ever_global) - processed_layers_this_session_count_global) - skipped_identical_count_global - skipped_other_reason_count_global) == (args_global.max_layers +1) : # First time hitting this after max_layers - tqdm.write(f"\nReached max_layers limit ({args_global.max_layers}) for new layers this session. Continuing scan only to assess remaining layers.") - outer_pbar_global.set_description_str(f"Scan {keys_scanned_this_run_global}/{total_candidates_to_scan} (Max New Layers Reached, Opt Ths Sess: {processed_layers_this_session_count_global})") - # This key is not skipped due to being identical or other error, but due to max_layers. - # We don't increment skipped_other_reason_count_global here, as it's still a valid candidate for a future run. - continue # Continue scanning - - base_W = base_model_sd[key_name].to(dtype=torch.float32) - ft_W = ft_model_sd[key_name].to(dtype=torch.float32) - if base_W.shape != ft_W.shape: - skipped_other_reason_count_global +=1 - if args_global.verbose: tqdm.write(f"Skipping {key_name} (shape mismatch).") - continue - shape_info = get_module_shape_info_from_weight(base_W) - if shape_info is None: - skipped_other_reason_count_global +=1 - if args_global.verbose: tqdm.write(f"Skipping {key_name} (unsupported shape).") - continue - - delta_W_fp32 = (ft_W - base_W) - if torch.allclose(delta_W_fp32, torch.zeros_like(delta_W_fp32), atol=args_global.atol_fp32_check): - if args_global.verbose: tqdm.write(f"Skipping {key_name} (identical weights).") - skipped_identical_count_global += 1 - continue - - # If we reach here, this layer is a candidate for optimization in this session - max_layers_target_str = f"/{args_global.max_layers}" if args_global.max_layers is not None and args_global.max_layers > 0 else "" - outer_pbar_global.set_description_str(f"Optimizing L{processed_layers_this_session_count_global + 1}{max_layers_target_str} (Scan {keys_scanned_this_run_global}/{total_candidates_to_scan})") - if args_global.verbose: tqdm.write(f"\n Orig: {key_name} -> LoHA: {loha_key_prefix}") - - out_dim, in_dim_effective, k_h, k_w, _, is_conv = shape_info - delta_W_target_for_opt = delta_W_fp32.to(dtype=target_opt_dtype) - current_rank = args_global.conv_rank if is_conv and args_global.conv_rank is not None else args_global.rank - current_initial_alpha = args_global.initial_conv_alpha if is_conv else args_global.initial_alpha - - tqdm.write(f"Optimizing Layer {processed_layers_this_session_count_global + 1}{max_layers_target_str}: {loha_key_prefix} (Orig: {original_module_path}, Shp: {list(base_W.shape)}, R: {current_rank}, Alpha_init: {current_initial_alpha:.1f})") - try: - opt_results = optimize_loha_for_layer( - layer_name=loha_key_prefix, delta_W_target=delta_W_target_for_opt, - out_dim=out_dim, in_dim_effective=in_dim_effective, k_h=k_h, k_w=k_w, rank=current_rank, - initial_alpha_val=current_initial_alpha, lr=args_global.lr, - max_iterations=args_global.max_iterations, min_iterations=args_global.min_iterations, - target_loss=args_global.target_loss, weight_decay=args_global.weight_decay, - device=args_global.device, dtype=target_opt_dtype, is_conv=is_conv, - verbose_layer_debug=args_global.verbose_layer_debug - ) - if not opt_results.get('interrupted_mid_layer'): - for p_name, p_val in opt_results.items(): - if p_name not in ['final_loss', 'stopped_early', 'iterations_done', 'interrupted_mid_layer']: - extracted_loha_state_dict_global[f'{loha_key_prefix}.{p_name}'] = p_val.to(final_save_dtype) - - layer_optimization_stats_global.append({ - "name": loha_key_prefix, "original_name": original_module_path, - "final_loss": opt_results['final_loss'], "iterations_done": opt_results['iterations_done'], - "stopped_early_by_loss_target": opt_results['stopped_early']}) - - all_completed_module_prefixes_ever_global.add(loha_key_prefix) # Add to cumulative set - - tqdm.write(f" Layer {loha_key_prefix} Done. Loss: {opt_results['final_loss']:.4e}, Iters: {opt_results['iterations_done']}{', Stopped by Loss' if opt_results['stopped_early'] else ''}") - - if args_global.use_bias: - original_bias_key = f"{original_module_path}.bias" - # Check if bias exists in ft_model and differs from base (or base doesn't have it) - bias_differs = False - if original_bias_key in ft_model_sd: - ft_B = ft_model_sd[original_bias_key].to(dtype=torch.float32) - if original_bias_key in base_model_sd: - base_B = base_model_sd[original_bias_key].to(dtype=torch.float32) - if not torch.allclose(base_B, ft_B, atol=args_global.atol_fp32_check): - bias_differs = True - else: # Bias in FT but not in base - bias_differs = True - - if bias_differs: - extracted_loha_state_dict_global[original_bias_key] = ft_B.cpu().to(final_save_dtype) - if args_global.verbose: tqdm.write(f" Saved differing/new bias for {original_bias_key}") - # Note: Bias keys are not added to "loha_key_prefix" sets as they don't have LoHA params. - # They are just carried over if different. - - processed_layers_this_session_count_global += 1 - else: # Interrupted mid-layer - if args_global.verbose: tqdm.write(f" Opt for {loha_key_prefix} interrupted; not saving params for this layer.") - # Do not add to all_completed_module_prefixes_ever_global or increment processed_layers_this_session_count_global - except Exception as e: - print(f"\nError during optimization for {original_module_path} ({loha_key_prefix}): {e}") - import traceback; traceback.print_exc() - skipped_other_reason_count_global +=1 # Count as skipped due to error during opt - - # After the loop finishes (or breaks due to interrupt) - if not save_attempted_on_interrupt and keys_scanned_this_run_global == total_candidates_to_scan: - main_loop_completed_scan_flag_global = True - - - finally: # This will run whether the try block completes normally or an exception (like interrupt) occurs - if outer_pbar_global: - if not outer_pbar_global.disable and outer_pbar_global.n < outer_pbar_global.total: - outer_pbar_global.update(outer_pbar_global.total - outer_pbar_global.n) # Fill up the bar - outer_pbar_global.close() - - # --- Save decision logic --- - if not save_attempted_on_interrupt: # If interrupted, handler already saved - print("\n--- Final Optimization Summary (This Session) ---") - for stat in layer_optimization_stats_global: print(f"Layer: {stat['name']}, Final Loss: {stat['final_loss']:.4e}, Iters: {stat['iterations_done']}{', Stopped by Loss' if stat['stopped_early_by_loss_target'] else ''}") - - print(f"\n--- Overall Summary ---") - print(f"Total unique LoHA modules accumulated (resumed + new): {len(all_completed_module_prefixes_ever_global)}") - print(f" Processed new this session: {processed_layers_this_session_count_global}") - print(f" Skipped as identical (this session's scan): {skipped_identical_count_global}") - print(f" Skipped for other reasons (this session's scan, e.g., shape error, opt error): {skipped_other_reason_count_global}") - print(f" Total candidate keys scanned in loop (this session): {keys_scanned_this_run_global}/{total_candidates_to_scan}") - - actual_save_path: str - save_to_final_name = False - - if main_loop_completed_scan_flag_global: - # Number of layers that were found to be different and optimizable during the full scan of *this session* - num_optimizable_layers_identified_in_scan = total_candidates_to_scan - skipped_identical_count_global - skipped_other_reason_count_global - - # Check if all *such* layers are now accounted for in our cumulative set - if len(all_completed_module_prefixes_ever_global) >= num_optimizable_layers_identified_in_scan: - # This implies that all layers that showed a difference in the current model comparison - # are now present in the LoHA state dict (either from resume or processed now). - # We also need to ensure max_layers didn't prematurely stop us if it was less than this count. - if args_global.max_layers is None or processed_layers_this_session_count_global >= args_global.max_layers or len(all_completed_module_prefixes_ever_global) < (len(previously_completed_module_prefixes_global) + args_global.max_layers): - # If max_layers is not set, or if we processed up to max_layers (or didn't need to because all were done), - # and the total count matches the optimizable count from scan, then it's final. - save_to_final_name = True - else: # max_layers was hit, and it's less than total optimizable, so not final. - print(f" Scan completed, but max_layers ({args_global.max_layers}) may have limited processing before all {num_optimizable_layers_identified_in_scan} differing layers were handled.") - else: - print(f" Scan completed, but not all {num_optimizable_layers_identified_in_scan} differing layers are processed yet " - f"(current total: {len(all_completed_module_prefixes_ever_global)}).") - else: - print(" Scan did not complete fully. Saving intermediate state.") - - - if save_to_final_name: - actual_save_path = args_global.save_to - print(f"\nAll optimizable layers appear to be processed. Saving to final path: {actual_save_path}") - else: - num_layers_for_filename = len(all_completed_module_prefixes_ever_global) - actual_save_path = generate_intermediate_filename(args_global.save_to, num_layers_for_filename) - print(f"\nRun incomplete or not all differing layers processed. Saving intermediate state to: {actual_save_path}") - - perform_graceful_save(output_path_to_save=actual_save_path) - - if save_to_final_name and actual_save_path == args_global.save_to : # Ensure it's the final path - print("\nCleaning up intermediate resume files...") - cleanup_intermediate_files(args_global.save_to) - - else: # Save was attempted by interrupt handler - print("\nProcess was interrupted. Graceful save to an intermediate file was attempted by signal handler.") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Extract LoHA parameters by optimizing against weight differences. Saves intermediate files like 'name_resume_L{count}.safetensors'.") - parser.add_argument("base_model_path", type=str, help="Path to the base model state_dict file (.pt, .pth, .safetensors)") - parser.add_argument("ft_model_path", type=str, help="Path to the fine-tuned model state_dict file (.pt, .pth, .safetensors)") - parser.add_argument("save_to", type=str, help="Path to save the FINAL extracted LoHA file (recommended .safetensors). Intermediate files will be based on this name.") - parser.add_argument("--overwrite", action="store_true", help="Ignore and overwrite any existing FINAL LoHA output file and its intermediate files if found at the start. Does not prevent resuming from other intermediate files if the final target does not exist.") - parser.add_argument("--rank", type=int, default=4, help="Default rank for LoHA decomposition (used for linear layers and as fallback for conv).") - parser.add_argument("--conv_rank", type=int, default=None, help="Specific rank for convolutional LoHA layers. Defaults to --rank if not set.") - parser.add_argument("--initial_alpha", type=float, default=None, help="Global initial alpha for optimization. Defaults to 'rank'.") - parser.add_argument("--initial_conv_alpha", type=float, default=None, help="Specific initial alpha for Conv LoHA. Defaults to '--initial_alpha' or conv_rank.") - parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate for LoHA optimization per layer.") - parser.add_argument("--max_iterations", type=int, default=1000, help="Maximum number of optimization iterations per layer.") - parser.add_argument("--min_iterations", type=int, default=100, help="Minimum iterations before checking target loss.") - parser.add_argument("--target_loss", type=float, default=None, help="Target MSE loss for early stopping per layer.") - parser.add_argument("--weight_decay", type=float, default=1e-5, help="Weight decay for LoHA optimization.") - parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device ('cuda' or 'cpu').") - parser.add_argument("--precision", type=str, default="fp32", choices=["fp32", "fp16", "bf16"], help="Optimization precision. Default: fp32.") - parser.add_argument("--save_weights_dtype", type=str, default="bf16", choices=["fp32", "fp16", "bf16"], help="Dtype for saved LoHA weights. Default: bf16.") - parser.add_argument("--atol_fp32_check", type=float, default=1e-6, help="Tolerance for identical weight check.") - parser.add_argument("--use_bias", action="store_true", help="Save differing bias terms.") - parser.add_argument("--dropout", type=float, default=0.0, help="General dropout (metadata only).") - parser.add_argument("--rank_dropout", type=float, default=0.0, help="Rank dropout (metadata only).") - parser.add_argument("--module_dropout", type=float, default=0.0, help="Module dropout (metadata only).") - parser.add_argument("--max_layers", type=int, default=None, help="Max NEW differing layers to process this session. Scan will continue to assess all layers.") - parser.add_argument("--verbose", action="store_true", help="General verbose output.") - parser.add_argument("--verbose_layer_debug", action="store_true", help="Detailed per-iteration optimization debug output.") - - parsed_args = parser.parse_args() - if not os.path.exists(parsed_args.base_model_path): print(f"Error: Base model path not found: {parsed_args.base_model_path}"); exit(1) - if not os.path.exists(parsed_args.ft_model_path): print(f"Error: Fine-tuned model path not found: {parsed_args.ft_model_path}"); exit(1) - - save_dir = os.path.dirname(parsed_args.save_to) - if save_dir and not os.path.exists(save_dir): - try: - os.makedirs(save_dir, exist_ok=True) - print(f"Created directory: {save_dir}") - except OSError as e: - print(f"Error: Could not create directory {save_dir}: {e}"); exit(1) - - if parsed_args.initial_alpha is None: parsed_args.initial_alpha = float(parsed_args.rank) - # Ensure conv_alpha defaults correctly after initial_alpha might have defaulted to rank - if parsed_args.initial_conv_alpha is None: - # If conv_rank is set, use that for default alpha, else use the global initial_alpha (which might itself be rank) - conv_rank_for_alpha_default = parsed_args.conv_rank if parsed_args.conv_rank is not None else parsed_args.rank - parsed_args.initial_conv_alpha = float(conv_rank_for_alpha_default) if parsed_args.conv_rank is not None else parsed_args.initial_alpha - - - main(parsed_args) \ No newline at end of file diff --git a/tools/extract_lora_from_models-nw.py b/tools/extract_lora_from_models-new.py similarity index 95% rename from tools/extract_lora_from_models-nw.py rename to tools/extract_lora_from_models-new.py index 51ef86f..62e2f6a 100644 --- a/tools/extract_lora_from_models-nw.py +++ b/tools/extract_lora_from_models-new.py @@ -1,33 +1,5 @@ import sys import os - -# 1. Add sd-scripts directory to sys.path -# This block can now be potentially removed if no other sd-scripts imports are needed -# OR kept if there's a chance of re-introducing some utilities for other purposes. -# For full removal of the sd-scripts dependency for *this script's execution*, -# ensure no other `from library...` or `from networks...` exist. -# script_dir = os.path.dirname(os.path.abspath(__file__)) -# project_root = os.path.dirname(script_dir) -# sd_scripts_dir_path = os.path.join(project_root, "sd-scripts") - -# if sd_scripts_dir_path not in sys.path: -# sys.path.insert(0, sd_scripts_dir_path) - -# Now you can import from the library package and the networks package -# try: -# # model_util and sdxl_model_util REMOVED from here -# # from library.utils import setup_logging # REMOVED -# # from networks import lora # REMOVED -# except ImportError as e: -# print(f"Error importing from sd-scripts. Please check your sd-scripts folder structure.") -# # print(f"Attempted to load from: {sd_scripts_dir_path}") # If path addition is removed -# print(f"Original error: {e}") -# print("Current sys.path relevant entries:") -# for p in sys.path: -# if "sd-scripts" in p or "kohya_ss" in p: # Adjust if sd_scripts_dir_path is removed -# print(p) -# raise - import argparse import json import time diff --git a/tools/extract_lora_from_models-nw_v1.0.py b/tools/extract_lora_from_models-nw_v1.0.py deleted file mode 100644 index b1a9221..0000000 --- a/tools/extract_lora_from_models-nw_v1.0.py +++ /dev/null @@ -1,340 +0,0 @@ -import argparse -import json -import os -import time -import torch -from safetensors.torch import load_file, save_file -from tqdm import tqdm -from library import sai_model_spec, model_util, sdxl_model_util -import lora -from library.utils import setup_logging -setup_logging() -import logging -logger = logging.getLogger(__name__) - -MIN_SV = 1e-6 - -def index_sv_cumulative(S, target): - original_sum = float(torch.sum(S)) - cumulative_sums = torch.cumsum(S, dim=0) / original_sum - index = int(torch.searchsorted(cumulative_sums, target)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_fro(S, target): - S_squared = S.pow(2) - S_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq - index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_ratio(S, target): - max_sv = S[0] - min_sv = max_sv / target - index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_knee(S): - """Determine rank using the knee point detection method.""" - n = len(S) - if n < 3: # Need at least 3 points to detect a knee - return 1 - - # Line coefficients from (1, S[0]) to (n, S[-1]) - a = S[0] - S[-1] - b = n - 1 - c = 1 * S[-1] - n * S[0] - - # Compute distances for each k - distances = [] - for k in range(1, n + 1): - dist = abs(a * k + b * S[k - 1] + c) / (a**2 + b**2)**0.5 - distances.append(dist) - - # Find index of maximum distance (add 1 because k starts at 1) - index = torch.argmax(torch.tensor(distances)).item() + 1 - index = max(1, min(index, n - 1)) - return index - -def index_sv_rel_decrease(S, tau=0.1): - """Determine rank based on relative decrease threshold.""" - if len(S) < 2: - return 1 - - # Compute ratios of consecutive singular values - ratios = S[1:] / S[:-1] - - # Find the smallest k where ratio < tau - for k in range(len(ratios)): - if ratios[k] < tau: - return max(1, k + 1) # k + 1 because we want rank after the drop - - # If no drop below tau, return max rank - return min(len(S), len(S) - 1) - -def save_to_file(file_name, model, state_dict, dtype, metadata=None): - if dtype is not None: - for key in list(state_dict.keys()): - if isinstance(state_dict[key], torch.Tensor): - state_dict[key] = state_dict[key].to(dtype) - if os.path.splitext(file_name)[1] == ".safetensors": - save_file(model, file_name, metadata) - else: - torch.save(model, file_name) - -def svd( - model_org=None, - model_tuned=None, - save_to=None, - dim=4, - v2=None, - sdxl=None, - conv_dim=None, - v_parameterization=None, - device=None, - save_precision=None, - clamp_quantile=0.99, - min_diff=0.01, - no_metadata=False, - load_precision=None, - load_original_model_to=None, - load_tuned_model_to=None, - dynamic_method=None, - dynamic_param=None, - verbose=False, -): - def str_to_dtype(p): - if p == "float": - return torch.float - if p == "fp16": - return torch.float16 - if p == "bf16": - return torch.bfloat16 - return None - - assert not (v2 and sdxl), "v2 and sdxl cannot be specified at the same time" - v_parameterization = v2 if v_parameterization is None else v_parameterization - - load_dtype = str_to_dtype(load_precision) if load_precision else None - save_dtype = str_to_dtype(save_precision) if save_precision else torch.float - work_device = "cpu" - - # Load models - if not sdxl: - logger.info(f"Loading original SD model: {model_org}") - text_encoder_o, _, unet_o = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_org) - text_encoders_o = [text_encoder_o] - if load_dtype: - text_encoder_o.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SD model: {model_tuned}") - text_encoder_t, _, unet_t = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_tuned) - text_encoders_t = [text_encoder_t] - if load_dtype: - text_encoder_t.to(load_dtype) - unet_t.to(load_dtype) - - model_version = model_util.get_model_version_str_for_sd1_sd2(v2, v_parameterization) - else: - device_org = load_original_model_to or "cpu" - device_tuned = load_tuned_model_to or "cpu" - - logger.info(f"Loading original SDXL model: {model_org}") - text_encoder_o1, text_encoder_o2, _, unet_o, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_org, device_org - ) - text_encoders_o = [text_encoder_o1, text_encoder_o2] - if load_dtype: - text_encoder_o1.to(load_dtype) - text_encoder_o2.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SDXL model: {model_tuned}") - text_encoder_t1, text_encoder_t2, _, unet_t, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_tuned, device_tuned - ) - text_encoders_t = [text_encoder_t1, text_encoder_t2] - if load_dtype: - text_encoder_t1.to(load_dtype) - text_encoder_t2.to(load_dtype) - unet_t.to(load_dtype) - - model_version = sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0 - - # Create LoRA network - kwargs = {"conv_dim": conv_dim, "conv_alpha": conv_dim} if conv_dim else {} - - # Define a small initial dimension for memory efficiency - init_dim = 4 # Small value to minimize memory usage - - # Create LoRA networks with minimal dimension - lora_network_o = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_o, unet_o, **kwargs) - lora_network_t = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_t, unet_t, **kwargs) - - assert len(lora_network_o.text_encoder_loras) == len(lora_network_t.text_encoder_loras), "Model versions differ (SD1.x vs SD2.x)" - - # Compute differences - diffs = {} - text_encoder_different = False - for lora_o, lora_t in zip(lora_network_o.text_encoder_loras, lora_network_t.text_encoder_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - - if not text_encoder_different and torch.max(torch.abs(diff)) > min_diff: - text_encoder_different = True - logger.info(f"Text encoder differs: max diff {torch.max(torch.abs(diff))} > {min_diff}") - diffs[lora_name] = diff - - for text_encoder in text_encoders_t: - del text_encoder - - if not text_encoder_different: - logger.warning("Text encoders are identical. Extracting U-Net only.") - lora_network_o.text_encoder_loras = [] - diffs.clear() - - for lora_o, lora_t in zip(lora_network_o.unet_loras, lora_network_t.unet_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - diffs[lora_name] = diff - - del lora_network_t, unet_t - - # Filter relevant modules - lora_names = set(lora.lora_name for lora in lora_network_o.text_encoder_loras + lora_network_o.unet_loras) - - # Extract and resize LoRA using SVD - logger.info("Extracting and resizing LoRA via SVD") - lora_weights = {} - with torch.no_grad(): - for lora_name in tqdm(lora_names): - mat = diffs[lora_name] - if device: - mat = mat.to(device) - mat = mat.to(torch.float) - - conv2d = len(mat.size()) == 4 - kernel_size = mat.size()[2:4] if conv2d else None - conv2d_3x3 = conv2d and kernel_size != (1, 1) - out_dim, in_dim = mat.size()[0:2] - - if conv2d: - mat = mat.flatten(start_dim=1) if conv2d_3x3 else mat.squeeze() - - U, S, Vh = torch.linalg.svd(mat) - - # Determine rank - max_rank = dim if not conv2d_3x3 or conv_dim is None else conv_dim - if dynamic_method: - if S[0] <= MIN_SV: - rank = 1 - elif dynamic_method == "sv_ratio": - rank = index_sv_ratio(S, dynamic_param) - elif dynamic_method == "sv_cumulative": - rank = index_sv_cumulative(S, dynamic_param) - elif dynamic_method == "sv_fro": - rank = index_sv_fro(S, dynamic_param) - elif dynamic_method == "sv_knee": - rank = index_sv_knee(S) - elif dynamic_method == "sv_rel_decrease": - rank = index_sv_rel_decrease(S, dynamic_param) - rank = min(rank, max_rank, in_dim, out_dim) - else: - rank = min(max_rank, in_dim, out_dim) - - # Truncate SVD components - U = U[:, :rank] @ torch.diag(S[:rank]) - Vh = Vh[:rank, :] - - # Clamp values - dist = torch.cat([U.flatten(), Vh.flatten()]) - hi_val = torch.quantile(dist, clamp_quantile) - U = U.clamp(-hi_val, hi_val) - Vh = Vh.clamp(-hi_val, hi_val) - - if conv2d: - U = U.reshape(out_dim, rank, 1, 1) - Vh = Vh.reshape(rank, in_dim, *kernel_size) - - U = U.to(work_device, dtype=save_dtype).contiguous() - Vh = Vh.to(work_device, dtype=save_dtype).contiguous() - lora_weights[lora_name] = (U, Vh) - - # Verbose output - if verbose: - s_sum = float(torch.sum(S)) - s_rank = float(torch.sum(S[:rank])) - fro = float(torch.sqrt(torch.sum(S.pow(2)))) - fro_rank = float(torch.sqrt(torch.sum(S[:rank].pow(2)))) - ratio = S[0] / S[rank - 1] if rank > 1 else float('inf') - logger.info(f"{lora_name:75} | sum(S) retained: {s_rank/s_sum:.1%}, fro retained: {fro_rank/fro:.1%}, max ratio: {ratio:.1f}, rank: {rank}") - - # Create state dict - lora_sd = {} - for lora_name, (up_weight, down_weight) in lora_weights.items(): - lora_sd[lora_name + ".lora_up.weight"] = up_weight - lora_sd[lora_name + ".lora_down.weight"] = down_weight - lora_sd[lora_name + ".alpha"] = torch.tensor(down_weight.size()[0], dtype=save_dtype) - - # Load and save LoRA - lora_network_save, lora_sd = lora.create_network_from_weights(1.0, None, None, text_encoders_o, unet_o, weights_sd=lora_sd) - lora_network_save.apply_to(text_encoders_o, unet_o) - info = lora_network_save.load_state_dict(lora_sd) - logger.info(f"Loaded extracted and resized LoRA weights: {info}") - - os.makedirs(os.path.dirname(save_to), exist_ok=True) - - # Metadata - net_kwargs = {"conv_dim": str(conv_dim), "conv_alpha": str(float(conv_dim))} if conv_dim else {} - metadata = { - "ss_v2": str(v2), - "ss_base_model_version": model_version, - "ss_network_module": "networks.lora", - "ss_network_dim": str(dim) if not dynamic_method else "Dynamic", - "ss_network_alpha": str(float(dim)) if not dynamic_method else "Dynamic", - "ss_network_args": json.dumps(net_kwargs), - } - if not no_metadata: - title = os.path.splitext(os.path.basename(save_to))[0] - sai_metadata = sai_model_spec.build_metadata(None, v2, v_parameterization, sdxl, True, False, time.time(), title=title) - metadata.update(sai_metadata) - - save_to_file(save_to, lora_sd, lora_sd, save_dtype, metadata) - logger.info(f"LoRA saved to: {save_to}") - -def setup_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--v2", action="store_true", help="Load Stable Diffusion v2.x model") - parser.add_argument("--v_parameterization", action="store_true", help="Set v-parameterization metadata (defaults to v2)") - parser.add_argument("--sdxl", action="store_true", help="Load Stable Diffusion SDXL base model") - parser.add_argument("--load_precision", choices=[None, "float", "fp16", "bf16"], help="Precision for loading models") - parser.add_argument("--save_precision", choices=[None, "float", "fp16", "bf16"], default=None, help="Precision for saving LoRA") - parser.add_argument("--model_org", required=True, help="Original Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--model_tuned", required=True, help="Tuned Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--save_to", required=True, help="Output file name (ckpt/safetensors)") - parser.add_argument("--dim", type=int, default=4, help="Max dimension (rank) of LoRA for linear layers") - parser.add_argument("--conv_dim", type=int, help="Max dimension (rank) of LoRA for Conv2d-3x3") - parser.add_argument("--device", default="cuda", help="Device for computation (e.g., cuda)") - parser.add_argument("--clamp_quantile", type=float, default=0.99, help="Quantile for clamping weights") - parser.add_argument("--min_diff", type=float, default=0.01, help="Minimum weight difference to extract") - parser.add_argument("--no_metadata", action="store_true", help="Omit detailed metadata") - parser.add_argument("--load_original_model_to", help="Device for original model (SDXL only)") - parser.add_argument("--load_tuned_model_to", help="Device for tuned model (SDXL only)") - parser.add_argument("--dynamic_method", choices=[None, "sv_ratio", "sv_fro", "sv_cumulative", "sv_knee", "sv_rel_decrease"], help="Dynamic rank reduction method") - parser.add_argument("--dynamic_param", type=float, help="Parameter for dynamic rank reduction") - parser.add_argument("--verbose", action="store_true", help="Show detailed rank reduction info") - return parser - -if __name__ == "__main__": - parser = setup_parser() - args = parser.parse_args() - if args.dynamic_method and not args.dynamic_param: - raise ValueError("Dynamic method requires a dynamic parameter") - svd(**vars(args)) \ No newline at end of file diff --git a/tools/extract_lora_from_models-nw_v1.1.py b/tools/extract_lora_from_models-nw_v1.1.py deleted file mode 100644 index c4cf320..0000000 --- a/tools/extract_lora_from_models-nw_v1.1.py +++ /dev/null @@ -1,432 +0,0 @@ -import argparse -import json -import os -import time -import torch -from safetensors.torch import load_file, save_file -from tqdm import tqdm -from library import sai_model_spec, model_util, sdxl_model_util -import lora -from library.utils import setup_logging -setup_logging() -import logging -logger = logging.getLogger(__name__) - -MIN_SV = 1e-6 - -def index_sv_cumulative(S, target): - original_sum = float(torch.sum(S)) - cumulative_sums = torch.cumsum(S, dim=0) / original_sum - index = int(torch.searchsorted(cumulative_sums, target)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_fro(S, target): - S_squared = S.pow(2) - S_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq - index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_ratio(S, target): - max_sv = S[0] - min_sv = max_sv / target - index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_knee_improved(S, MIN_SV_KNEE=1e-8): # MIN_SV_KNEE can be same as global MIN_SV or specific - """ - Determine rank using the knee point detection method with normalization. - Normalizes singular values and their indices to the [0,1] range - to make the knee detection scale-invariant. - """ - n = len(S) - if n < 3: # Need at least 3 points to detect a knee - return 1 - - # S is expected to be sorted in descending order. - s_max, s_min = S[0], S[-1] - - # Handle flat or nearly flat singular value spectrum - if s_max - s_min < MIN_SV_KNEE: - # If all singular values are almost the same, a knee is not well-defined. - # Returning 1 is a conservative choice for low rank. - # Alternatively, n // 2 or n - 1 could be chosen depending on desired behavior. - return 1 - - # Normalize singular values to [0, 1] - # s_normalized[0] will be 1, s_normalized[n-1] will be 0 - s_normalized = (S - s_min) / (s_max - s_min) - - # Normalize indices to [0, 1] - # x_normalized[0] will be 0, x_normalized[n-1] will be 1 - x_normalized = torch.linspace(0, 1, n, device=S.device, dtype=S.dtype) - - # The line in normalized space connects (x_norm[0], s_norm[0]) to (x_norm[n-1], s_norm[n-1]) - # This is (0, 1) to (1, 0). - # The equation of the line passing through (0,1) and (1,0) is x + y - 1 = 0. - # So, A=1, B=1, C=-1 for the line equation Ax + By + C = 0. - - # Calculate the perpendicular distance from each point (x_normalized[i], s_normalized[i]) to this line. - # Distance = |A*x_i + B*y_i + C| / sqrt(A^2 + B^2) - # Distance = |1*x_normalized + 1*s_normalized - 1| / sqrt(1^2 + 1^2) - # Distance = |x_normalized + s_normalized - 1| / sqrt(2) - - # The sqrt(2) denominator is constant and doesn't affect argmax, so it can be omitted for finding the index. - distances = (x_normalized + s_normalized - 1).abs() - - # Find the 0-based index of the point with the maximum distance. - knee_index_0based = torch.argmax(distances).item() - - # Convert 0-based index to 1-based rank. - rank = knee_index_0based + 1 - - # Clamp rank similar to original: must be > 0 and <= n-1 (typical for rank reduction) - # If knee_index_0based is n-1 (last point), rank becomes n. min(n, n-1) results in n-1. - rank = max(1, min(rank, n - 1)) - - return rank - -def index_sv_cumulative_knee(S, min_sv_threshold=1e-8): - """ - Determine rank using the knee point detection method on the normalized cumulative sum of singular values. - This method identifies a point where adding more singular values contributes diminishingly to the total sum. - """ - n = len(S) - if n < 3: # Need at least 3 points to detect a knee - return 1 - - s_sum = torch.sum(S) - # If all singular values are zero or very small, return rank 1. - if s_sum < min_sv_threshold: - return 1 - - # Calculate cumulative sum of singular values, normalized by the total sum. - # y_values[0] = S[0]/s_sum, ..., y_values[n-1] = 1.0 - y_values = torch.cumsum(S, dim=0) / s_sum - - # Normalize these y_values (cumulative sums) to the range [0,1] for knee detection. - y_min, y_max = y_values[0], y_values[n-1] # y_max is typically 1.0 - - # If the normalized cumulative sum curve is very flat (e.g., S[0] captures almost all energy), - # it implies the first few components are dominant. - if y_max - y_min < min_sv_threshold: # Using min_sv_threshold here as a sensitivity for flatness - # This condition means (S[0] + ... + S[n-1]) - S[0] is small relative to sum(S) if n>1 - # Effectively, S[1:] components are negligible. - return 1 - - # y_norm[0] = 0, y_norm[n-1] = 1 (represents the normalized cumulative sum from start to end) - y_norm = (y_values - y_min) / (y_max - y_min) - - # x_values are indices, normalized to [0, 1] - # x_norm[0] = 0, ..., x_norm[n-1] = 1 - x_norm = torch.linspace(0, 1, n, device=S.device, dtype=S.dtype) - - # The "knee" is the point on the curve (x_norm[i], y_norm[i]) that is farthest - # from the line connecting the start and end of this normalized curve. - # In this normalized space, the line connects (0,0) to (1,1). - # The equation of this line is Y = X, or X - Y = 0. - # The distance from a point (x_i, y_i) to the line X - Y = 0 is |x_i - y_i| / sqrt(1^2 + (-1)^2). - # We can maximize |x_i - y_i| (or |y_i - x_i|) as sqrt(2) is a constant factor. - distances = (y_norm - x_norm).abs() # y_norm is expected to be >= x_norm for a concave cumulative curve. - - # Find the 0-based index of the point with the maximum distance. - knee_index_0based = torch.argmax(distances).item() - - # Convert 0-based index to 1-based rank. - rank = knee_index_0based + 1 - - # Clamp rank to be between 1 and n-1 (as n elements give n-1 possible ranks for truncation). - # A rank of n means no truncation. n-1 is the highest sensible rank for reduction. - rank = max(1, min(rank, n - 1)) - - return rank - -def index_sv_rel_decrease(S, tau=0.1): - """Determine rank based on relative decrease threshold.""" - if len(S) < 2: - return 1 - - # Compute ratios of consecutive singular values - ratios = S[1:] / S[:-1] - - # Find the smallest k where ratio < tau - for k in range(len(ratios)): - if ratios[k] < tau: - return max(1, k + 1) # k + 1 because we want rank after the drop - - # If no drop below tau, return max rank - return min(len(S), len(S) - 1) - -def save_to_file(file_name, model, state_dict, dtype, metadata=None): - if dtype is not None: - for key in list(state_dict.keys()): - if isinstance(state_dict[key], torch.Tensor): - state_dict[key] = state_dict[key].to(dtype) - if os.path.splitext(file_name)[1] == ".safetensors": - save_file(model, file_name, metadata) - else: - torch.save(model, file_name) - -def svd( - model_org=None, - model_tuned=None, - save_to=None, - dim=4, - v2=None, - sdxl=None, - conv_dim=None, - v_parameterization=None, - device=None, - save_precision=None, - clamp_quantile=0.99, - min_diff=0.01, - no_metadata=False, - load_precision=None, - load_original_model_to=None, - load_tuned_model_to=None, - dynamic_method=None, - dynamic_param=None, - verbose=False, -): - def str_to_dtype(p): - if p == "float": - return torch.float - if p == "fp16": - return torch.float16 - if p == "bf16": - return torch.bfloat16 - return None - - assert not (v2 and sdxl), "v2 and sdxl cannot be specified at the same time" - v_parameterization = v2 if v_parameterization is None else v_parameterization - - load_dtype = str_to_dtype(load_precision) if load_precision else None - save_dtype = str_to_dtype(save_precision) if save_precision else torch.float - work_device = "cpu" - - # Load models - if not sdxl: - logger.info(f"Loading original SD model: {model_org}") - text_encoder_o, _, unet_o = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_org) - text_encoders_o = [text_encoder_o] - if load_dtype: - text_encoder_o.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SD model: {model_tuned}") - text_encoder_t, _, unet_t = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_tuned) - text_encoders_t = [text_encoder_t] - if load_dtype: - text_encoder_t.to(load_dtype) - unet_t.to(load_dtype) - - model_version = model_util.get_model_version_str_for_sd1_sd2(v2, v_parameterization) - else: - device_org = load_original_model_to or "cpu" - device_tuned = load_tuned_model_to or "cpu" - - logger.info(f"Loading original SDXL model: {model_org}") - text_encoder_o1, text_encoder_o2, _, unet_o, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_org, device_org - ) - text_encoders_o = [text_encoder_o1, text_encoder_o2] - if load_dtype: - text_encoder_o1.to(load_dtype) - text_encoder_o2.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SDXL model: {model_tuned}") - text_encoder_t1, text_encoder_t2, _, unet_t, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_tuned, device_tuned - ) - text_encoders_t = [text_encoder_t1, text_encoder_t2] - if load_dtype: - text_encoder_t1.to(load_dtype) - text_encoder_t2.to(load_dtype) - unet_t.to(load_dtype) - - model_version = sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0 - - # Create LoRA network - kwargs = {"conv_dim": conv_dim, "conv_alpha": conv_dim} if conv_dim else {} - - # Define a small initial dimension for memory efficiency - init_dim = 4 # Small value to minimize memory usage - - # Create LoRA networks with minimal dimension - lora_network_o = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_o, unet_o, **kwargs) - lora_network_t = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_t, unet_t, **kwargs) - - assert len(lora_network_o.text_encoder_loras) == len(lora_network_t.text_encoder_loras), "Model versions differ (SD1.x vs SD2.x)" - - # Compute differences - diffs = {} - text_encoder_different = False - for lora_o, lora_t in zip(lora_network_o.text_encoder_loras, lora_network_t.text_encoder_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - - if not text_encoder_different and torch.max(torch.abs(diff)) > min_diff: - text_encoder_different = True - logger.info(f"Text encoder differs: max diff {torch.max(torch.abs(diff))} > {min_diff}") - diffs[lora_name] = diff - - for text_encoder in text_encoders_t: - del text_encoder - - if not text_encoder_different: - logger.warning("Text encoders are identical. Extracting U-Net only.") - lora_network_o.text_encoder_loras = [] - diffs.clear() - - for lora_o, lora_t in zip(lora_network_o.unet_loras, lora_network_t.unet_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - diffs[lora_name] = diff - - del lora_network_t, unet_t - - # Filter relevant modules - lora_names = set(lora.lora_name for lora in lora_network_o.text_encoder_loras + lora_network_o.unet_loras) - - # Extract and resize LoRA using SVD - logger.info("Extracting and resizing LoRA via SVD") - lora_weights = {} - with torch.no_grad(): - for lora_name in tqdm(lora_names): - mat = diffs[lora_name] - if device: - mat = mat.to(device) - mat = mat.to(torch.float) - - conv2d = len(mat.size()) == 4 - kernel_size = mat.size()[2:4] if conv2d else None - conv2d_3x3 = conv2d and kernel_size != (1, 1) - out_dim, in_dim = mat.size()[0:2] - - if conv2d: - mat = mat.flatten(start_dim=1) if conv2d_3x3 else mat.squeeze() - - U, S, Vh = torch.linalg.svd(mat) - - # Determine rank - max_rank = dim if not conv2d_3x3 or conv_dim is None else conv_dim - if dynamic_method: - if S[0] <= MIN_SV: - rank = 1 - elif dynamic_method == "sv_ratio": - rank = index_sv_ratio(S, dynamic_param) - elif dynamic_method == "sv_cumulative": - rank = index_sv_cumulative(S, dynamic_param) - elif dynamic_method == "sv_fro": - rank = index_sv_fro(S, dynamic_param) - elif dynamic_method == "sv_knee": - rank = index_sv_knee_improved(S, MIN_SV) # Pass MIN_SV or a specific threshold - elif dynamic_method == "sv_cumulative_knee": # New method - rank = index_sv_cumulative_knee(S, MIN_SV) # Pass MIN_SV or a specific threshold - elif dynamic_method == "sv_rel_decrease": - rank = index_sv_rel_decrease(S, dynamic_param) - rank = min(rank, max_rank, in_dim, out_dim) - else: - rank = min(max_rank, in_dim, out_dim) - - # Truncate SVD components - U = U[:, :rank] @ torch.diag(S[:rank]) - Vh = Vh[:rank, :] - - # Clamp values - dist = torch.cat([U.flatten(), Vh.flatten()]) - hi_val = torch.quantile(dist, clamp_quantile) - U = U.clamp(-hi_val, hi_val) - Vh = Vh.clamp(-hi_val, hi_val) - - if conv2d: - U = U.reshape(out_dim, rank, 1, 1) - Vh = Vh.reshape(rank, in_dim, *kernel_size) - - U = U.to(work_device, dtype=save_dtype).contiguous() - Vh = Vh.to(work_device, dtype=save_dtype).contiguous() - lora_weights[lora_name] = (U, Vh) - - # Verbose output - if verbose: - s_sum = float(torch.sum(S)) - s_rank = float(torch.sum(S[:rank])) - fro = float(torch.sqrt(torch.sum(S.pow(2)))) - fro_rank = float(torch.sqrt(torch.sum(S[:rank].pow(2)))) - ratio = S[0] / S[rank - 1] if rank > 1 else float('inf') - logger.info(f"{lora_name:75} | sum(S) retained: {s_rank/s_sum:.1%}, fro retained: {fro_rank/fro:.1%}, max ratio: {ratio:.1f}, rank: {rank}") - - # Create state dict - lora_sd = {} - for lora_name, (up_weight, down_weight) in lora_weights.items(): - lora_sd[lora_name + ".lora_up.weight"] = up_weight - lora_sd[lora_name + ".lora_down.weight"] = down_weight - lora_sd[lora_name + ".alpha"] = torch.tensor(down_weight.size()[0], dtype=save_dtype) - - # Load and save LoRA - lora_network_save, lora_sd = lora.create_network_from_weights(1.0, None, None, text_encoders_o, unet_o, weights_sd=lora_sd) - lora_network_save.apply_to(text_encoders_o, unet_o) - info = lora_network_save.load_state_dict(lora_sd) - logger.info(f"Loaded extracted and resized LoRA weights: {info}") - - os.makedirs(os.path.dirname(save_to), exist_ok=True) - - # Metadata - net_kwargs = {"conv_dim": str(conv_dim), "conv_alpha": str(float(conv_dim))} if conv_dim else {} - metadata = { - "ss_v2": str(v2), - "ss_base_model_version": model_version, - "ss_network_module": "networks.lora", - "ss_network_dim": str(dim) if not dynamic_method else "Dynamic", - "ss_network_alpha": str(float(dim)) if not dynamic_method else "Dynamic", - "ss_network_args": json.dumps(net_kwargs), - } - if not no_metadata: - title = os.path.splitext(os.path.basename(save_to))[0] - sai_metadata = sai_model_spec.build_metadata(None, v2, v_parameterization, sdxl, True, False, time.time(), title=title) - metadata.update(sai_metadata) - - save_to_file(save_to, lora_sd, lora_sd, save_dtype, metadata) - logger.info(f"LoRA saved to: {save_to}") - -def setup_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--v2", action="store_true", help="Load Stable Diffusion v2.x model") - parser.add_argument("--v_parameterization", action="store_true", help="Set v-parameterization metadata (defaults to v2)") - parser.add_argument("--sdxl", action="store_true", help="Load Stable Diffusion SDXL base model") - parser.add_argument("--load_precision", choices=[None, "float", "fp16", "bf16"], help="Precision for loading models") - parser.add_argument("--save_precision", choices=[None, "float", "fp16", "bf16"], default=None, help="Precision for saving LoRA") - parser.add_argument("--model_org", required=True, help="Original Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--model_tuned", required=True, help="Tuned Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--save_to", required=True, help="Output file name (ckpt/safetensors)") - parser.add_argument("--dim", type=int, default=4, help="Max dimension (rank) of LoRA for linear layers") - parser.add_argument("--conv_dim", type=int, help="Max dimension (rank) of LoRA for Conv2d-3x3") - parser.add_argument("--device", default="cuda", help="Device for computation (e.g., cuda)") - parser.add_argument("--clamp_quantile", type=float, default=0.99, help="Quantile for clamping weights") - parser.add_argument("--min_diff", type=float, default=0.01, help="Minimum weight difference to extract") - parser.add_argument("--no_metadata", action="store_true", help="Omit detailed metadata") - parser.add_argument("--load_original_model_to", help="Device for original model (SDXL only)") - parser.add_argument("--load_tuned_model_to", help="Device for tuned model (SDXL only)") - parser.add_argument("--dynamic_param", type=float, help="Parameter for dynamic rank reduction") - parser.add_argument("--verbose", action="store_true", help="Show detailed rank reduction info") - parser.add_argument( - "--dynamic_method", - choices=[None, "sv_ratio", "sv_fro", "sv_cumulative", "sv_knee", "sv_rel_decrease", "sv_cumulative_knee"], # Added "sv_cumulative_knee" - help="Dynamic rank reduction method" - ) - return parser - -if __name__ == "__main__": - parser = setup_parser() - args = parser.parse_args() - if args.dynamic_method and not args.dynamic_param: - raise ValueError("Dynamic method requires a dynamic parameter") - svd(**vars(args)) \ No newline at end of file diff --git a/tools/extract_lora_from_models-nw_v1.2.py b/tools/extract_lora_from_models-nw_v1.2.py deleted file mode 100644 index 277623b..0000000 --- a/tools/extract_lora_from_models-nw_v1.2.py +++ /dev/null @@ -1,545 +0,0 @@ -import sys -import os - -# 1. Add sd-scripts directory to sys.path -script_dir = os.path.dirname(os.path.abspath(__file__)) -project_root = os.path.dirname(script_dir) -sd_scripts_dir_path = os.path.join(project_root, "sd-scripts") - -if sd_scripts_dir_path not in sys.path: - sys.path.insert(0, sd_scripts_dir_path) - -# Now you can import from the library package and the networks package -try: - from library import sai_model_spec, model_util, sdxl_model_util - from library.utils import setup_logging - from networks import lora # <--- CORRECTED LORA IMPORT -except ImportError as e: - print(f"Error importing from sd-scripts. Please check your sd-scripts folder structure.") - print(f"Attempted to load from: {sd_scripts_dir_path}") - print(f"Original error: {e}") - print("Current sys.path relevant entries:") - for p in sys.path: - if "sd-scripts" in p or "kohya_ss" in p: # Print relevant paths for debugging - print(p) - # Ensure 'networks' directory exists in 'sd-scripts' and contains 'lora.py' - # Also ensure 'sd-scripts/networks/__init__.py' exists. - raise - -# --- The rest of your script --- -import argparse -import json -# import os # Already imported -import time -import torch -from safetensors.torch import load_file, save_file -from tqdm import tqdm - -setup_logging() -import logging -logger = logging.getLogger(__name__) - -MIN_SV = 1e-6 - -def index_sv_cumulative(S, target): - original_sum = float(torch.sum(S)) - cumulative_sums = torch.cumsum(S, dim=0) / original_sum - index = int(torch.searchsorted(cumulative_sums, target)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_fro(S, target): - S_squared = S.pow(2) - S_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq - index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_ratio(S, target): - max_sv = S[0] - min_sv = max_sv / target - index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S) - 1)) - return index - -def index_sv_knee_improved(S, MIN_SV_KNEE=1e-8): # MIN_SV_KNEE can be same as global MIN_SV or specific - """ - Determine rank using the knee point detection method with normalization. - Normalizes singular values and their indices to the [0,1] range - to make the knee detection scale-invariant. - """ - n = len(S) - if n < 3: # Need at least 3 points to detect a knee - return 1 - - # S is expected to be sorted in descending order. - s_max, s_min = S[0], S[-1] - - # Handle flat or nearly flat singular value spectrum - if s_max - s_min < MIN_SV_KNEE: - # If all singular values are almost the same, a knee is not well-defined. - # Returning 1 is a conservative choice for low rank. - # Alternatively, n // 2 or n - 1 could be chosen depending on desired behavior. - return 1 - - # Normalize singular values to [0, 1] - # s_normalized[0] will be 1, s_normalized[n-1] will be 0 - s_normalized = (S - s_min) / (s_max - s_min) - - # Normalize indices to [0, 1] - # x_normalized[0] will be 0, x_normalized[n-1] will be 1 - x_normalized = torch.linspace(0, 1, n, device=S.device, dtype=S.dtype) - - # The line in normalized space connects (x_norm[0], s_norm[0]) to (x_norm[n-1], s_norm[n-1]) - # This is (0, 1) to (1, 0). - # The equation of the line passing through (0,1) and (1,0) is x + y - 1 = 0. - # So, A=1, B=1, C=-1 for the line equation Ax + By + C = 0. - - # Calculate the perpendicular distance from each point (x_normalized[i], s_normalized[i]) to this line. - # Distance = |A*x_i + B*y_i + C| / sqrt(A^2 + B^2) - # Distance = |1*x_normalized + 1*s_normalized - 1| / sqrt(1^2 + 1^2) - # Distance = |x_normalized + s_normalized - 1| / sqrt(2) - - # The sqrt(2) denominator is constant and doesn't affect argmax, so it can be omitted for finding the index. - distances = (x_normalized + s_normalized - 1).abs() - - # Find the 0-based index of the point with the maximum distance. - knee_index_0based = torch.argmax(distances).item() - - # Convert 0-based index to 1-based rank. - rank = knee_index_0based + 1 - - # Clamp rank similar to original: must be > 0 and <= n-1 (typical for rank reduction) - # If knee_index_0based is n-1 (last point), rank becomes n. min(n, n-1) results in n-1. - rank = max(1, min(rank, n - 1)) - - return rank - -def index_sv_cumulative_knee(S, min_sv_threshold=1e-8): - """ - Determine rank using the knee point detection method on the normalized cumulative sum of singular values. - This method identifies a point where adding more singular values contributes diminishingly to the total sum. - """ - n = len(S) - if n < 3: # Need at least 3 points to detect a knee - return 1 - - s_sum = torch.sum(S) - # If all singular values are zero or very small, return rank 1. - if s_sum < min_sv_threshold: - return 1 - - # Calculate cumulative sum of singular values, normalized by the total sum. - # y_values[0] = S[0]/s_sum, ..., y_values[n-1] = 1.0 - y_values = torch.cumsum(S, dim=0) / s_sum - - # Normalize these y_values (cumulative sums) to the range [0,1] for knee detection. - y_min, y_max = y_values[0], y_values[n-1] # y_max is typically 1.0 - - # If the normalized cumulative sum curve is very flat (e.g., S[0] captures almost all energy), - # it implies the first few components are dominant. - if y_max - y_min < min_sv_threshold: # Using min_sv_threshold here as a sensitivity for flatness - # This condition means (S[0] + ... + S[n-1]) - S[0] is small relative to sum(S) if n>1 - # Effectively, S[1:] components are negligible. - return 1 - - # y_norm[0] = 0, y_norm[n-1] = 1 (represents the normalized cumulative sum from start to end) - y_norm = (y_values - y_min) / (y_max - y_min) - - # x_values are indices, normalized to [0, 1] - # x_norm[0] = 0, ..., x_norm[n-1] = 1 - x_norm = torch.linspace(0, 1, n, device=S.device, dtype=S.dtype) - - # The "knee" is the point on the curve (x_norm[i], y_norm[i]) that is farthest - # from the line connecting the start and end of this normalized curve. - # In this normalized space, the line connects (0,0) to (1,1). - # The equation of this line is Y = X, or X - Y = 0. - # The distance from a point (x_i, y_i) to the line X - Y = 0 is |x_i - y_i| / sqrt(1^2 + (-1)^2). - # We can maximize |x_i - y_i| (or |y_i - x_i|) as sqrt(2) is a constant factor. - distances = (y_norm - x_norm).abs() # y_norm is expected to be >= x_norm for a concave cumulative curve. - - # Find the 0-based index of the point with the maximum distance. - knee_index_0based = torch.argmax(distances).item() - - # Convert 0-based index to 1-based rank. - rank = knee_index_0based + 1 - - # Clamp rank to be between 1 and n-1 (as n elements give n-1 possible ranks for truncation). - # A rank of n means no truncation. n-1 is the highest sensible rank for reduction. - rank = max(1, min(rank, n - 1)) - - return rank - -def index_sv_rel_decrease(S, tau=0.1): - """Determine rank based on relative decrease threshold.""" - if len(S) < 2: - # For matrices with fewer than 2 singular values, a relative decrease - # isn't meaningful. Returning 1 is a sensible default. - return 1 - - # Compute ratios of consecutive singular values - # S is sorted descending, so S[:-1] >= S[1:] - # ratios will be <= 1.0 - ratios = S[1:] / S[:-1] # Example: S=[10,1,0.5], ratios=[0.1, 0.5] - - # Find the smallest k such that S[k+1]/S[k] < tau. - # The rank would then be k+1, as we include S[k]. - for k in range(len(ratios)): # k ranges from 0 to len(S)-2 - if ratios[k] < tau: - # We found a significant drop after the k-th singular value. - # So, we keep k+1 singular values (indices 0 to k). - # The rank is k+1. Since k >= 0, k+1 >= 1. - return k + 1 - - # If no drop below tau was found, it means all relative decreases were >= tau. - # In this case, this method suggests using all available singular values. - # The actual rank will be capped later by args.dim/conv_dim and matrix dimensions. - return len(S) - -def save_to_file(file_name, model_to_save, state_dict_content, dtype, metadata=None): # Renamed params for clarity - if dtype is not None: - for key in list(state_dict_content.keys()): - if isinstance(state_dict_content[key], torch.Tensor): - state_dict_content[key] = state_dict_content[key].to(dtype) - - # save_file from safetensors expects a state_dict as the first argument if metadata is also passed. - # torch.save would also expect the state_dict. - # The 'model' variable being passed to save_file should be the state_dict itself. - if os.path.splitext(file_name)[1] == ".safetensors": - save_file(model_to_save, file_name, metadata=metadata) # Pass metadata correctly - else: - torch.save(model_to_save, file_name) - -def svd( - model_org=None, - model_tuned=None, - save_to=None, - dim=4, - v2=None, - sdxl=None, - conv_dim=None, - v_parameterization=None, - device=None, - save_precision=None, - clamp_quantile=0.99, - min_diff=0.01, - no_metadata=False, - load_precision=None, - load_original_model_to=None, - load_tuned_model_to=None, - dynamic_method=None, - dynamic_param=None, - verbose=False, -): - def str_to_dtype(p): - if p == "float": - return torch.float - if p == "fp16": - return torch.float16 - if p == "bf16": - return torch.bfloat16 - return None - - assert not (v2 and sdxl), "v2 and sdxl cannot be specified at the same time" - v_parameterization = v2 if v_parameterization is None else v_parameterization - - load_dtype = str_to_dtype(load_precision) if load_precision else None - save_dtype = str_to_dtype(save_precision) if save_precision else torch.float - work_device = "cpu" - - # Load models - if not sdxl: - logger.info(f"Loading original SD model: {model_org}") - text_encoder_o, _, unet_o = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_org) - text_encoders_o = [text_encoder_o] - if load_dtype: - text_encoder_o.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SD model: {model_tuned}") - text_encoder_t, _, unet_t = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_tuned) - text_encoders_t = [text_encoder_t] - if load_dtype: - text_encoder_t.to(load_dtype) - unet_t.to(load_dtype) - - model_version = model_util.get_model_version_str_for_sd1_sd2(v2, v_parameterization) - else: - device_org = load_original_model_to or "cpu" - device_tuned = load_tuned_model_to or "cpu" - - logger.info(f"Loading original SDXL model: {model_org}") - text_encoder_o1, text_encoder_o2, _, unet_o, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_org, device_org - ) - text_encoders_o = [text_encoder_o1, text_encoder_o2] - if load_dtype: - text_encoder_o1.to(load_dtype) - text_encoder_o2.to(load_dtype) - unet_o.to(load_dtype) - - logger.info(f"Loading tuned SDXL model: {model_tuned}") - text_encoder_t1, text_encoder_t2, _, unet_t, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint( - sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_tuned, device_tuned - ) - text_encoders_t = [text_encoder_t1, text_encoder_t2] - if load_dtype: - text_encoder_t1.to(load_dtype) - text_encoder_t2.to(load_dtype) - unet_t.to(load_dtype) - - model_version = sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0 - - # Create LoRA network - kwargs = {"conv_dim": conv_dim, "conv_alpha": conv_dim} if conv_dim else {} - - # Define a small initial dimension for memory efficiency - init_dim = 4 # Small value to minimize memory usage - - # Create LoRA networks with minimal dimension - lora_network_o = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_o, unet_o, **kwargs) - lora_network_t = lora.create_network(1.0, init_dim, init_dim, None, text_encoders_t, unet_t, **kwargs) - - assert len(lora_network_o.text_encoder_loras) == len(lora_network_t.text_encoder_loras), "Model versions differ (SD1.x vs SD2.x)" - - # Compute differences - diffs = {} - text_encoder_different = False - for lora_o, lora_t in zip(lora_network_o.text_encoder_loras, lora_network_t.text_encoder_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - - if not text_encoder_different and torch.max(torch.abs(diff)) > min_diff: - text_encoder_different = True - logger.info(f"Text encoder differs: max diff {torch.max(torch.abs(diff))} > {min_diff}") - diffs[lora_name] = diff - - for text_encoder in text_encoders_t: - del text_encoder - - if not text_encoder_different: - logger.warning("Text encoders are identical. Extracting U-Net only.") - lora_network_o.text_encoder_loras = [] - diffs.clear() - - for lora_o, lora_t in zip(lora_network_o.unet_loras, lora_network_t.unet_loras): - lora_name = lora_o.lora_name - diff = lora_t.org_module.weight.to(work_device) - lora_o.org_module.weight.to(work_device) - lora_o.org_module.weight = None - lora_t.org_module.weight = None - diffs[lora_name] = diff - - del lora_network_t, unet_t - - # Filter relevant modules - lora_names = set(lora.lora_name for lora in lora_network_o.text_encoder_loras + lora_network_o.unet_loras) - - # Extract and resize LoRA using SVD - logger.info("Extracting and resizing LoRA via SVD") - lora_weights = {} - with torch.no_grad(): - for lora_name in tqdm(lora_names): - mat = diffs[lora_name] - if device: - mat = mat.to(device) - mat = mat.to(torch.float) - - conv2d = len(mat.size()) == 4 - kernel_size = mat.size()[2:4] if conv2d else None - conv2d_3x3 = conv2d and kernel_size != (1, 1) - out_dim, in_dim = mat.size()[0:2] - - if conv2d: - mat = mat.flatten(start_dim=1) if conv2d_3x3 else mat.squeeze() - - U, S, Vh = torch.linalg.svd(mat) - - # Determine rank - max_rank = dim if not conv2d_3x3 or conv_dim is None else conv_dim - if dynamic_method: - if S[0] <= MIN_SV: - rank = 1 - elif dynamic_method == "sv_ratio": - rank = index_sv_ratio(S, dynamic_param) - elif dynamic_method == "sv_cumulative": - rank = index_sv_cumulative(S, dynamic_param) - elif dynamic_method == "sv_fro": - rank = index_sv_fro(S, dynamic_param) - elif dynamic_method == "sv_knee": - rank = index_sv_knee_improved(S, MIN_SV) # Pass MIN_SV or a specific threshold - elif dynamic_method == "sv_cumulative_knee": # New method - rank = index_sv_cumulative_knee(S, MIN_SV) # Pass MIN_SV or a specific threshold - elif dynamic_method == "sv_rel_decrease": - rank = index_sv_rel_decrease(S, dynamic_param) - rank = min(rank, max_rank, in_dim, out_dim) - else: - rank = min(max_rank, in_dim, out_dim) - - rank = max(1, rank) # Ensure rank is at least 1 - - # Truncate SVD components and distribute sqrt(S) - S_k = S[:rank] - U_k = U[:, :rank] - Vh_k = Vh[:rank, :] - - # Ensure S_k values are non-negative before sqrt to avoid NaN from tiny negative SVD artifacts - S_k_non_negative = torch.clamp(S_k, min=0.0) # Use 0.0 for float tensor - s_sqrt = torch.sqrt(S_k_non_negative) - - # Distribute s_sqrt: U_final = U_k * diag(s_sqrt), Vh_final = diag(s_sqrt) * Vh_k - # Using efficient broadcasting for multiplication: - U_final = U_k * s_sqrt.unsqueeze(0) # (out_dim, rank) * (1, rank) - Vh_final = Vh_k * s_sqrt.unsqueeze(1) # (rank, in_dim_effective) * (rank, 1) - - # Clamp values (applied to U_final, Vh_final) - # The distribution of values in U_final and Vh_final might be different - # than the original U and Vh, so the effect of clamping might change. - dist = torch.cat([U_final.flatten(), Vh_final.flatten()]) - hi_val = torch.quantile(dist, clamp_quantile) - U_clamped = U_final.clamp(-hi_val, hi_val) - Vh_clamped = Vh_final.clamp(-hi_val, hi_val) - - if conv2d: - # U_clamped is (out_dim, rank) - U_clamped = U_clamped.reshape(out_dim, rank, 1, 1) - - # Vh_clamped is (rank, in_dim * possibly_kernel_dims) - # It needs to be reshaped back to (rank, in_dim, kernel_h, kernel_w) - if conv2d_3x3 : # Original mat was (out_dim, in_dim * k_h * k_w) - Vh_clamped = Vh_clamped.reshape(rank, in_dim, *kernel_size) - else: # Original mat was (out_dim, in_dim) for 1x1 conv, kernel_size is (1,1) - Vh_clamped = Vh_clamped.reshape(rank, in_dim, *kernel_size) # kernel_size is (1,1) here - - U_clamped = U_clamped.to(work_device, dtype=save_dtype).contiguous() - Vh_clamped = Vh_clamped.to(work_device, dtype=save_dtype).contiguous() - lora_weights[lora_name] = (U_clamped, Vh_clamped) - - # Verbose output (S values are pre-modification for accurate reporting of original SVD properties) - if verbose: - s_sum_total = float(torch.sum(S)) - s_sum_rank = float(torch.sum(S[:rank])) # Sum of the singular values actually used for reconstruction - - fro_orig_total = float(torch.sqrt(torch.sum(S.pow(2)))) - fro_reconstructed_rank = float(torch.sqrt(torch.sum(S[:rank].pow(2)))) # Frobenius norm of the matrix part represented by chosen rank - - # Ratio of the largest retained singular value to the smallest retained singular value - # S is sorted, S[0] is max. S[rank-1] is the smallest singular value included if rank > 0. - ratio_sv = S[0] / S[rank - 1] if rank > 0 and S[rank - 1].abs() > MIN_SV else float('inf') # Avoid division by zero or tiny number - - # Ensure denominators are not zero for percentages - sum_s_retained_percentage = (s_sum_rank / s_sum_total) if s_sum_total > MIN_SV else 1.0 - fro_retained_percentage = (fro_reconstructed_rank / fro_orig_total) if fro_orig_total > MIN_SV else 1.0 - - logger.info( - f"{lora_name:75} | rank: {rank}, " - f"sum(S) retained: {sum_s_retained_percentage:.2%}, " - f"Frobenius norm retained: {fro_retained_percentage:.2%}, " - f"max_retained_sv/min_retained_sv ratio: {ratio_sv:.2f}" - ) - - # Create state dict - lora_sd = {} - for lora_name, (up_weight, down_weight) in lora_weights.items(): - lora_sd[lora_name + ".lora_up.weight"] = up_weight - lora_sd[lora_name + ".lora_down.weight"] = down_weight - lora_sd[lora_name + ".alpha"] = torch.tensor(down_weight.size()[0], dtype=save_dtype) # alpha is rank - - # Load and save LoRA - lora_network_save, lora_sd = lora.create_network_from_weights(1.0, None, None, text_encoders_o, unet_o, weights_sd=lora_sd) - lora_network_save.apply_to(text_encoders_o, unet_o) # This applies weights, not strictly necessary if just saving sd - info = lora_network_save.load_state_dict(lora_sd) # This populates the network object with the weights from lora_sd - logger.info(f"Loaded extracted and resized LoRA weights into network object: {info}") - - - os.makedirs(os.path.dirname(save_to), exist_ok=True) - - # Metadata - net_kwargs = {"conv_dim": str(conv_dim), "conv_alpha": str(float(conv_dim))} if conv_dim else {} - # Determine network_dim and network_alpha for metadata based on dynamic method - if dynamic_method: - network_dim_meta = "Dynamic" - network_alpha_meta = "Dynamic" # Alpha is rank, which is dynamic - else: - network_dim_meta = str(dim) - network_alpha_meta = str(float(dim)) # Alpha is rank, which is dim - - metadata = { - "ss_v2": str(v2), - "ss_base_model_version": model_version, - "ss_network_module": "networks.lora", - "ss_network_dim": network_dim_meta, - "ss_network_alpha": network_alpha_meta, # Alpha is typically the rank - "ss_network_args": json.dumps(net_kwargs), - "ss_lowram": "False", # Assuming not specifically lowram mode - "ss_num_train_images": "N/A", # Not applicable for extraction - # Add other relevant metadata as per sai_model_spec or conventions - } - if not no_metadata: - title = os.path.splitext(os.path.basename(save_to))[0] - # Build sai_metadata, ensuring it includes necessary fields like 'ss_sd_model_hash' if possible - # For extraction, some training-specific metadata might not be relevant or available. - sai_metadata = sai_model_spec.build_metadata( - None, # training_info (usually from train_util or fine_tune) - can be None for extraction - v2, - v_parameterization, - sdxl, - True, # is_sd2 - False, # is_v_pred_like - time.time(), - title=title, - # model_hash=None, # Original model hash if available - # tuned_model_hash=None # Tuned model hash if available - ) - # Filter out None values from sai_metadata if any, or handle them in build_metadata - sai_metadata_cleaned = {k: v for k, v in sai_metadata.items() if v is not None} - metadata.update(sai_metadata_cleaned) - - - # Use the state_dict 'lora_sd' for saving, not the network object 'lora_network_save' - save_to_file(save_to, lora_sd, lora_sd, save_dtype, metadata) # Pass lora_sd as the model/state_dict to save - logger.info(f"LoRA saved to: {save_to}") - -def setup_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--v2", action="store_true", help="Load Stable Diffusion v2.x model") - parser.add_argument("--v_parameterization", action="store_true", help="Set v-parameterization metadata (defaults to v2)") - parser.add_argument("--sdxl", action="store_true", help="Load Stable Diffusion SDXL base model") - parser.add_argument("--load_precision", choices=[None, "float", "fp16", "bf16"], help="Precision for loading models") - parser.add_argument("--save_precision", choices=[None, "float", "fp16", "bf16"], default=None, help="Precision for saving LoRA") - parser.add_argument("--model_org", required=True, help="Original Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--model_tuned", required=True, help="Tuned Stable Diffusion model (ckpt/safetensors)") - parser.add_argument("--save_to", required=True, help="Output file name (ckpt/safetensors)") - parser.add_argument("--dim", type=int, default=4, help="Max dimension (rank) of LoRA for linear layers") - parser.add_argument("--conv_dim", type=int, help="Max dimension (rank) of LoRA for Conv2d-3x3") - parser.add_argument("--device", default="cuda", help="Device for computation (e.g., cuda)") - parser.add_argument("--clamp_quantile", type=float, default=0.99, help="Quantile for clamping weights") - parser.add_argument("--min_diff", type=float, default=0.01, help="Minimum weight difference to extract") - parser.add_argument("--no_metadata", action="store_true", help="Omit detailed metadata") - parser.add_argument("--load_original_model_to", help="Device for original model (SDXL only)") - parser.add_argument("--load_tuned_model_to", help="Device for tuned model (SDXL only)") - parser.add_argument("--dynamic_param", type=float, help="Parameter for dynamic rank reduction") - parser.add_argument("--verbose", action="store_true", help="Show detailed rank reduction info") - parser.add_argument( - "--dynamic_method", - choices=[None, "sv_ratio", "sv_fro", "sv_cumulative", "sv_knee", "sv_rel_decrease", "sv_cumulative_knee"], # Added "sv_cumulative_knee" - help="Dynamic rank reduction method" - ) - return parser - -if __name__ == "__main__": - parser = setup_parser() - args = parser.parse_args() - methods_requiring_param = ["sv_ratio", "sv_fro", "sv_cumulative", "sv_rel_decrease"] - if args.dynamic_method in methods_requiring_param and args.dynamic_param is None: - raise ValueError(f"Dynamic method '{args.dynamic_method}' requires --dynamic_param to be set.") - - # Add a check for rank > 0 if not dynamic, or ensure dynamic methods return rank >= 1 - if not args.dynamic_method and args.dim <= 0: - raise ValueError(f"--dim (rank) must be > 0. Got {args.dim}") - if args.conv_dim is not None and args.conv_dim <=0: - raise ValueError(f"--conv_dim (rank) must be > 0 if specified. Got {args.conv_dim}") - - svd(**vars(args)) \ No newline at end of file