From fd668e3941d8a00afdfb0add8e531febceb828d4 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 8 Feb 2023 11:53:39 -0500 Subject: [PATCH] update import paths --- .gitmodules | 36 ++-- TODO.md | 2 +- cli/modules/bench.py | 9 +- cli/modules/embedding-preview.py | 1 - cli/{ => modules}/extract-lora.py | 4 +- cli/modules/grid.py | 1 - cli/modules/image-watermark.py | 2 +- cli/modules/interrogate.py | 2 - cli/modules/models-preview.py | 7 +- cli/modules/process.py | 16 +- cli/modules/prompt-ideas.py | 2 - cli/modules/prompt-promptist.py | 2 - cli/modules/sdapi.py | 1 - cli/modules/train-lora.py | 186 ++++++++++++++++++ cli/modules/train-losschart.py | 2 - cli/modules/train-lossrate.py | 2 - cli/modules/video-extract.py | 1 - extensions-builtin/sd-webui-model-converter | 2 +- extensions-builtin/seed_travel | 2 +- .../stable-diffusion-webui-images-browser | 2 +- modules/img2img.py | 2 + requirements.txt | 1 + 22 files changed, 227 insertions(+), 58 deletions(-) rename cli/{ => modules}/extract-lora.py (98%) create mode 100755 cli/modules/train-lora.py diff --git a/.gitmodules b/.gitmodules index 0302c69dd..a4d449f27 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,32 +1,32 @@ [submodule "wiki"] - path = wiki - url = https://github.com/vladmandic/automatic.wiki + path = wiki + url = https://github.com/vladmandic/automatic.wiki [submodule "extensions-builtin/sd-extension-system-info"] - path = extensions-builtin/sd-extension-system-info - url = https://github.com/vladmandic/sd-extension-system-info + path = extensions-builtin/sd-extension-system-info + url = https://github.com/vladmandic/sd-extension-system-info [submodule "extensions-builtin/sd-extension-aesthetic-scorer"] - path = extensions-builtin/sd-extension-aesthetic-scorer - url = https://github.com/vladmandic/sd-extension-aesthetic-scorer + path = extensions-builtin/sd-extension-aesthetic-scorer + url = https://github.com/vladmandic/sd-extension-aesthetic-scorer [submodule "extensions-builtin/sd-extension-steps-animation"] - path = extensions-builtin/sd-extension-steps-animation - url = https://github.com/vladmandic/sd-extension-steps-animation + path = extensions-builtin/sd-extension-steps-animation + url = https://github.com/vladmandic/sd-extension-steps-animation [submodule "extensions-builtin/stable-diffusion-webui-images-browser"] - path = extensions-builtin/stable-diffusion-webui-images-browser - url = https://github.com/AlUlkesh/stable-diffusion-webui-images-browser + path = extensions-builtin/stable-diffusion-webui-images-browser + url = https://github.com/AlUlkesh/stable-diffusion-webui-images-browser ignore = dirty [submodule "extensions-builtin/seed_travel"] - path = extensions-builtin/seed_travel - url = https://github.com/yownas/seed_travel + path = extensions-builtin/seed_travel + url = https://github.com/yownas/seed_travel ignore = dirty [submodule "extensions-builtin/sd-webui-model-converter"] - path = extensions-builtin/sd-webui-model-converter - url = https://github.com/Akegarasu/sd-webui-model-converter + path = extensions-builtin/sd-webui-model-converter + url = https://github.com/Akegarasu/sd-webui-model-converter ignore = dirty [submodule "extensions-builtin/sd-dynamic-thresholding"] - path = extensions-builtin/sd-dynamic-thresholding - url = https://github.com/mcmonkeyprojects/sd-dynamic-thresholding + path = extensions-builtin/sd-dynamic-thresholding + url = https://github.com/mcmonkeyprojects/sd-dynamic-thresholding ignore = dirty [submodule "modules/lora"] - path = modules/lora - url = https://github.com/kohya-ss/sd-scripts + path = modules/lora + url = https://github.com/kohya-ss/sd-scripts ignore = dirty diff --git a/TODO.md b/TODO.md index f3b9d2049..ed7ce678e 100644 --- a/TODO.md +++ b/TODO.md @@ -121,7 +121,7 @@ Cool stuff that is not integrated anywhere... - initial work on `lora` integration can render loras without extensions can extract lora from fine-tuned model - training is tbd + training prototype in place in `train-lora.py`, not optimized or integrated - initial work on `custom diffusion` integration no testing so far - spent quite some time making stable-diffusion compatible with upcomming `pytorch` 2.0 release diff --git a/cli/modules/bench.py b/cli/modules/bench.py index df0de38eb..801c9ccfd 100755 --- a/cli/modules/bench.py +++ b/cli/modules/bench.py @@ -6,15 +6,10 @@ import asyncio import base64 import io import json -import os -import sys import time - from PIL import Image - -sys.path.append(os.path.join(os.path.dirname(__file__), 'modules')) -import modules.sdapi as sdapi -from modules.util import Map, log +import sdapi as sdapi +from util import Map, log options = Map({ diff --git a/cli/modules/embedding-preview.py b/cli/modules/embedding-preview.py index 3d5763e14..8076d1a61 100755 --- a/cli/modules/embedding-preview.py +++ b/cli/modules/embedding-preview.py @@ -11,7 +11,6 @@ import argparse from pathlib import Path from PIL import Image from inspect import getsourcefile - from util import Map, log from sdapi import getsync, postsync from grid import grid diff --git a/cli/extract-lora.py b/cli/modules/extract-lora.py similarity index 98% rename from cli/extract-lora.py rename to cli/modules/extract-lora.py index 7d857cdfc..85bfaafaa 100755 --- a/cli/extract-lora.py +++ b/cli/modules/extract-lora.py @@ -12,11 +12,11 @@ import argparse import torch import transformers from tqdm import tqdm +from util import log -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'modules', 'lora')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'modules', 'lora')) import library.model_util as model_util import networks.lora as lora -from modules.util import log def svd(args): # pylint: disable=redefined-outer-name diff --git a/cli/modules/grid.py b/cli/modules/grid.py index 83798e619..4922409b3 100755 --- a/cli/modules/grid.py +++ b/cli/modules/grid.py @@ -8,7 +8,6 @@ import argparse import math import logging from pathlib import Path - import filetype from PIL import Image, ImageDraw, ImageFont from util import log diff --git a/cli/modules/image-watermark.py b/cli/modules/image-watermark.py index 50f3553c4..73e891238 100755 --- a/cli/modules/image-watermark.py +++ b/cli/modules/image-watermark.py @@ -9,7 +9,7 @@ from imwatermark import WatermarkEncoder, WatermarkDecoder from PIL import Image from PIL.ExifTags import TAGS from PIL.TiffImagePlugin import ImageFileDirectory_v2 -from modules.util import log, Map +from util import log, Map import piexif import piexif.helper diff --git a/cli/modules/interrogate.py b/cli/modules/interrogate.py index e9790de42..96fabb66e 100755 --- a/cli/modules/interrogate.py +++ b/cli/modules/interrogate.py @@ -8,10 +8,8 @@ import base64 import sys import os import asyncio - import filetype from PIL import Image - from util import log, Map import sdapi as sdapi diff --git a/cli/modules/models-preview.py b/cli/modules/models-preview.py index 32958ff48..7be4fd6f8 100755 --- a/cli/modules/models-preview.py +++ b/cli/modules/models-preview.py @@ -6,13 +6,12 @@ import time import asyncio import argparse from pathlib import Path +from util import Map, log +from sdapi import get, post, close +from grid import grid sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -sys.path.append(os.path.join(os.path.dirname(__file__), 'modules')) from generate import sd, generate -from modules.util import Map, log -from modules.sdapi import get, post, close -from modules.grid import grid default = 'sd-v15-runwayml.ckpt [cc6cb27103]' diff --git a/cli/modules/process.py b/cli/modules/process.py index 89e352b99..d6aa15f18 100755 --- a/cli/modules/process.py +++ b/cli/modules/process.py @@ -25,14 +25,12 @@ import base64 import pathlib import argparse import logging - import filetype import numpy as np import mediapipe as mp from PIL import Image, ImageOps from skimage.metrics import structural_similarity as ssim from scipy.stats import beta - from util import log, Map from sdapi import postsync @@ -40,6 +38,7 @@ from sdapi import postsync params = Map({ 'src': '', # source folder 'dst': '', # destination folder + 'format': '.png', # image format 'extract_face': True, # extract face from image 'extract_body': True, # extract face from image 'clear_dst': True, # remove all files from destination at the start @@ -50,15 +49,15 @@ params = Map({ 'face_pad': 0.07, # pad face image percentage 'face_model': 1, # which face model to use 0/close-up 1/standard 'face_blur_score': 1.5, # max score for face blur detection - 'face_range_score': 0.3, # min score for face dynamic range detection + 'face_range_score': 0.2, # min score for face dynamic range detection 'body_score': 0.9, # min body detection score 'body_visibility': 0.5, # min visibility score for each detected body part 'body_parts': 15, # min number of detected body parts with sufficient visibility 'body_pad': 0.2, # pad body image percentage 'body_model': 2, # body model to use 0/low 1/medium 2/high 'body_blur_score': 1.8, # max score for body blur detection - 'body_range_score': 0.3, # min score for body dynamic range detection - 'segmentation_face': True, # segmentation enabled + 'body_range_score': 0.2, # min score for body dynamic range detection + 'segmentation_face': False, # segmentation enabled 'segmentation_body': False, # segmentation enabled 'segmentation_model': 0, # segmentation model 0/general 1/landscape 'segmentation_background': (192, 192, 192), # segmentation background color @@ -263,7 +262,7 @@ def interrogate(img, fn): res = postsync('/sdapi/v1/interrogate', json) caption = res.caption if 'caption' in res else '' log.info({ 'interrogate': caption }) - file = fn.replace('.jpg', '.txt') + file = fn.replace(params.format, '.txt') f = open(file, 'w') f.write(caption) f.close() @@ -278,7 +277,7 @@ def process_file(f: str, dst: str = None, preview: bool = False, offline: bool = else: dir = dst base = os.path.basename(f).split('.')[0] - fn = os.path.join(dir, str(i[what]).rjust(3, '0') + '-' + what + '-' + base + '.jpg') + fn = os.path.join(dir, str(i[what]).rjust(3, '0') + '-' + what + '-' + base + params.format) # log.debug({ 'save': fn }) if not preview: img.save(fn) @@ -318,6 +317,7 @@ def process_file(f: str, dst: str = None, preview: bool = False, offline: bool = log.debug({ 'no body': f }) image.close() + return i def process_images(src: str, dst: str, args = None): params.src = src @@ -337,7 +337,7 @@ def process_images(src: str, dst: str, args = None): for root, _sub_dirs, files in os.walk(src): for f in files: process_file(os.path.join(root, f), dst) - + return i if __name__ == '__main__': # log.setLevel(logging.DEBUG) diff --git a/cli/modules/prompt-ideas.py b/cli/modules/prompt-ideas.py index 9b403f2f9..ff70123da 100755 --- a/cli/modules/prompt-ideas.py +++ b/cli/modules/prompt-ideas.py @@ -6,9 +6,7 @@ model from: import logging import argparse - from transformers import GPT2Tokenizer, GPT2LMHeadModel - from util import log diff --git a/cli/modules/prompt-promptist.py b/cli/modules/prompt-promptist.py index 1240cc8c0..60c5ee680 100755 --- a/cli/modules/prompt-promptist.py +++ b/cli/modules/prompt-promptist.py @@ -5,9 +5,7 @@ use microsoft promptist to beautify prompt """ import sys - from transformers import AutoModelForCausalLM, AutoTokenizer - from util import log diff --git a/cli/modules/sdapi.py b/cli/modules/sdapi.py index fb04ba662..0fd15ff34 100755 --- a/cli/modules/sdapi.py +++ b/cli/modules/sdapi.py @@ -9,7 +9,6 @@ import asyncio import logging import requests import sys - from util import Map, log diff --git a/cli/modules/train-lora.py b/cli/modules/train-lora.py new file mode 100755 index 000000000..835c39a28 --- /dev/null +++ b/cli/modules/train-lora.py @@ -0,0 +1,186 @@ +#!/bin/env python + +""" +Extract approximating LoRA by SVD from two SD models +Based on: +""" + +import os +import sys +import argparse +import tempfile +import transformers +from pathlib import Path +from util import log, Map +from process import process_file + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'modules', 'lora')) +from train_network import train + + +options = Map({ + "v2": False, + "v_parameterization": False, + "pretrained_model_name_or_path": "/mnt/d/Models/stable-diffusion/sd-v15-runwayml.ckpt", + "train_data_dir": "/tmp/rreid/img", + "shuffle_caption": False, + "caption_extension": ".txt", + "caption_extention": None, + "keep_tokens": None, + "color_aug": False, + "flip_aug": False, + "face_crop_aug_range": None, + "random_crop": False, + "debug_dataset": False, + "resolution": "512,512", + "cache_latents": True, + "enable_bucket": False, + "min_bucket_reso": 256, + "max_bucket_reso": 1024, + "bucket_reso_steps": 64, + "bucket_no_upscale": False, + "reg_data_dir": None, + "in_json": "/tmp/rreid/rreid.json", + "dataset_repeats": 1, + "output_dir": "/mnt/d/Models/lora/", + "output_name": "lora-rreid-random-v1", + "save_precision": "fp16", + "save_every_n_epochs": 1, + "save_n_epoch_ratio": None, + "save_last_n_epochs": None, + "save_last_n_epochs_state": None, + "save_state": False, + "resume": None, + "train_batch_size": 1, + "max_token_length": None, + "use_8bit_adam": False, + "mem_eff_attn": False, + "xformers": False, + "vae": None, + "learning_rate": 1e-05, + "max_train_steps": 5000, + "max_train_epochs": None, + "max_data_loader_n_workers": 8, + "persistent_data_loader_workers": False, + "seed": 42, + "gradient_checkpointing": False, + "gradient_accumulation_steps": 1, + "mixed_precision": "fp16", + "full_fp16": False, + "clip_skip": None, + "logging_dir": None, + "log_prefix": None, + "lr_scheduler": "cosine", + "lr_warmup_steps": 0, + "prior_loss_weight": 1.0, + "no_metadata": False, + "save_model_as": "ckpt", + "unet_lr": 0.001, + "text_encoder_lr": 5e-05, + "lr_scheduler_num_cycles": 1, + "lr_scheduler_power": 1, + "network_weights": None, + "network_module": "networks.lora", + "network_dim": 16, + "network_alpha": 1.0, + "network_args": None, + "network_train_unet_only": False, + "network_train_text_encoder_only": False, + "training_comment": "mood-magic" +}) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description = 'train lora') + parser.add_argument('--model', type=str, default=None, required=True, help='original model to use a base for training') + parser.add_argument('--input', type=str, default=None, required=True, help='input folder with training images') + parser.add_argument('--dir', type=str, default=None, required=True, help='folder containing lora checkpoints') + parser.add_argument('--name', type=str, default=None, required=True, help='lora name') + parser.add_argument('--steps', type=int, default=5000, required=False, help='training steps') + parser.add_argument('--dim', type=int, default=16, required=False, help='network dimension') + parser.add_argument("--noprocess", default = False, action='store_true', help = "skip processing and use existing input data") + args = parser.parse_args() + if not os.path.exists(args.model) or not os.path.isfile(args.model): + log.error({ 'lora cannot find model': args.model }) + exit(1) + options.pretrained_model_name_or_path = args.model + if not os.path.exists(args.input) or not os.path.isdir(args.input): + log.error({ 'lora cannot find training dir': args.input }) + exit(1) + if not os.path.exists(args.dir) or not os.path.isdir(args.dir): + log.error({ 'lora cannot find training dir': args.dir }) + exit(1) + options.output_dir = args.dir + options.output_name = args.name + options.max_train_steps = args.steps + options.network_dim = args.dim + log.info({ 'train lora args': vars(options) }) + transformers.logging.set_verbosity_error() + + if args.noprocess: + options.train_data_dir = args.input + else: + dir = os.path.join(tempfile.gettempdir(), args.name, '10_processed') + Path(dir).mkdir(parents=True, exist_ok=True) + files = [] + json_data = {} + for root, _sub_dirs, folder in os.walk(args.input): + for f in folder: + files.append(os.path.join(root, f)) + for f in files: + res = process_file(f = f, dst = dir, preview = False, offline = True) + + log.info({ 'processed': res, 'inputs': len(files) }) + options.train_data_dir = args.input + dir = os.path.join(tempfile.gettempdir(), args.name) + + train(options) + + +""" +- cannot use `accelerate` with *dynamo* enabled +- cannot use `xformers` due to *faketensors* requirement +- cannot use `mem_eff_attn` due to *forwardfunc* mismatch + +TODO + +--gradient_checkpointing +--gradient_accumulation_steps=10 +--caption_extension=txt +--in_json + +WORKING + +process.py --output "/tmp/rreid/img/10_processed" /home/vlado/generative/Input/ryanreid/random --offline + +accelerate launch --no_python --quiet --num_cpu_threads_per_process=16 python /home/vlado/dev/automatic/modules/lora/train_network.py \ +--pretrained_model_name_or_path="/mnt/d/Models/stable-diffusion/sd-v15-runwayml.ckpt" \ +--train_data_dir="/tmp/rreid/img" \ +--logging_dir="/tmp/rreid/logging" \ +--output_dir="/mnt/d/Models/lora/" \ +--output_name="lora-rreid-random-v1" \ +--resolution=512,512 \ +--learning_rate=1e-5 \ +--unet_lr=1e-3 \ +--text_encoder_lr=5e-5 \ +--lr_scheduler_num_cycles=1 \ +--lr_scheduler=cosine \ +--max_train_steps=5000 \ +--network_alpha=1 \ +--network_dim=16 \ +--network_module=networks.lora \ +--save_every_n_epochs=1 \ +--save_model_as=ckpt \ +--save_precision=fp16 \ +--mixed_precision=fp16 \ +--seed=42 \ +--train_batch_size=1 \ +--cache_latents \ + +metadata { image_key: img_md: { caption: str, tags: [] } } + +abs_path = glob_images(train_data_dir, image_key) + +}} + +./train-lora.py --model /mnt/d/Models/stable-diffusion/sd-v15-runwayml.ckpt --name rreid --dir /mnt/d/Models/lora --input ~/generative/Input/ryanreid/random/ +""" \ No newline at end of file diff --git a/cli/modules/train-losschart.py b/cli/modules/train-losschart.py index 851f0e05e..2d9c7a3f1 100755 --- a/cli/modules/train-losschart.py +++ b/cli/modules/train-losschart.py @@ -6,12 +6,10 @@ import sys import json import pathlib import logging - import torch import numpy as np from PIL import Image, ImageFont, ImageDraw from matplotlib import pyplot as plt - from util import log, Map diff --git a/cli/modules/train-lossrate.py b/cli/modules/train-lossrate.py index f607fe0bd..4a6165ff5 100755 --- a/cli/modules/train-lossrate.py +++ b/cli/modules/train-lossrate.py @@ -5,11 +5,9 @@ auto-generate learn-rate import io import math import logging - import numpy as np from PIL import Image, ImageFont, ImageDraw from matplotlib import pyplot as plt - from util import log, Map diff --git a/cli/modules/video-extract.py b/cli/modules/video-extract.py index 5f9bc9f7a..4a68c7440 100755 --- a/cli/modules/video-extract.py +++ b/cli/modules/video-extract.py @@ -8,7 +8,6 @@ import subprocess import pathlib import argparse import filetype - from util import log, Map diff --git a/extensions-builtin/sd-webui-model-converter b/extensions-builtin/sd-webui-model-converter index 1f9b51745..7a998ede9 160000 --- a/extensions-builtin/sd-webui-model-converter +++ b/extensions-builtin/sd-webui-model-converter @@ -1 +1 @@ -Subproject commit 1f9b51745f64da7e00577c10bc95554e431b5dc4 +Subproject commit 7a998ede9cdc3b9db3ea9116e74fd2487701d544 diff --git a/extensions-builtin/seed_travel b/extensions-builtin/seed_travel index beef29d88..8f818f18f 160000 --- a/extensions-builtin/seed_travel +++ b/extensions-builtin/seed_travel @@ -1 +1 @@ -Subproject commit beef29d887866c46c7dd8203496b6a8abc3cc2ae +Subproject commit 8f818f18f258ac4e53a34c1219819da497e97be1 diff --git a/extensions-builtin/stable-diffusion-webui-images-browser b/extensions-builtin/stable-diffusion-webui-images-browser index c1a4590d5..3390e353f 160000 --- a/extensions-builtin/stable-diffusion-webui-images-browser +++ b/extensions-builtin/stable-diffusion-webui-images-browser @@ -1 +1 @@ -Subproject commit c1a4590d545bf68035c8e6a2f43cec04f380f2f8 +Subproject commit 3390e353fd06d63a0906219c01577d53390b5d6f diff --git a/modules/img2img.py b/modules/img2img.py index bcc158dc9..c973b7708 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -73,6 +73,8 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args): if not save_normally: os.makedirs(output_dir, exist_ok=True) + if processed_image.mode == 'RGBA': + processed_image = processed_image.convert("RGB") processed_image.save(os.path.join(output_dir, filename)) diff --git a/requirements.txt b/requirements.txt index 49c6d0b2e..16698c0c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ accelerate aenum basicsr blendmodes +bitsandbytes clean-fid colormap easydev