From 579a1e57bdb7d0ecc268ed4e2b7e98422403c991 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Thu, 23 Oct 2025 15:10:47 +0900 Subject: [PATCH 01/58] update --- config_files/config-5080.json | 42 ++++ config_files/config-5090.json | 41 ++++ epoch와-steps설정.md | 180 ++++++++++++++++ generate-captions-standalone.py | 365 +++++++++++++++++++++++++++++++ generate-captions.cmd | 1 + generate-captions.py | 369 ++++++++++++++++++++++++++++++++ run-train-extd.cmd | 31 +++ run-train.cmd | 1 + run-venv.cmd | 1 + 로컬-설치가이드.md | 190 ++++++++++++++++ 10 files changed, 1221 insertions(+) create mode 100644 config_files/config-5080.json create mode 100644 config_files/config-5090.json create mode 100644 epoch와-steps설정.md create mode 100644 generate-captions-standalone.py create mode 100644 generate-captions.cmd create mode 100644 generate-captions.py create mode 100644 run-train-extd.cmd create mode 100644 run-train.cmd create mode 100644 run-venv.cmd create mode 100644 로컬-설치가이드.md diff --git a/config_files/config-5080.json b/config_files/config-5080.json new file mode 100644 index 0000000..54b3b2d --- /dev/null +++ b/config_files/config-5080.json @@ -0,0 +1,42 @@ +{ + "general": { + "shuffle_caption": true, + "caption_extension": ".txt", + "keep_tokens": 1, + "seed": 1234 + }, + "model": { + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", + "vae": "stabilityai/sd-vae-ft-mse" + }, + "training": { + "resolution": "768,768", + "batch_size": 1, + "learning_rate": 0.00015, + "lr_scheduler": "cosine_with_restarts", + "max_train_steps": 4000, + "optimizer": "adamw8bit", + "mixed_precision": "fp16", + "gradient_checkpointing": true, + "clip_skip": 2, + "network_dim": 32, + "network_alpha": 16, + "save_precision": "fp16", + "save_every_n_steps": 1000 + }, + "folders": { + "train_data_dir": "./data/train", + "reg_data_dir": "./data/reg", + "output_dir": "./output_5080", + "logging_dir": "./logs_5080" + }, + "advanced": { + "bucket_reso_steps": 64, + "bucket_no_upscale": true, + "xformers": true, + "cache_latents": true, + "min_bucket_reso": 320, + "max_bucket_reso": 768 + } + } + \ No newline at end of file diff --git a/config_files/config-5090.json b/config_files/config-5090.json new file mode 100644 index 0000000..a16ddbd --- /dev/null +++ b/config_files/config-5090.json @@ -0,0 +1,41 @@ +{ + "general": { + "shuffle_caption": true, + "caption_extension": ".txt", + "keep_tokens": 1, + "seed": 42 + }, + "model": { + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5", + "vae": "stabilityai/sd-vae-ft-mse" + }, + "training": { + "resolution": "1024,1024", + "batch_size": 2, + "learning_rate": 0.0001, + "lr_scheduler": "cosine_with_restarts", + "max_train_steps": 6000, + "optimizer": "adamw8bit", + "mixed_precision": "bf16", + "gradient_checkpointing": false, + "clip_skip": 2, + "network_dim": 64, + "network_alpha": 32, + "save_precision": "bf16", + "save_every_n_steps": 1000 + }, + "folders": { + "train_data_dir": "./data/train", + "reg_data_dir": "./data/reg", + "output_dir": "./output_5090", + "logging_dir": "./logs_5090" + }, + "advanced": { + "bucket_reso_steps": 64, + "bucket_no_upscale": true, + "xformers": true, + "cache_latents": true, + "min_bucket_reso": 512, + "max_bucket_reso": 1024 + } +} diff --git a/epoch와-steps설정.md b/epoch와-steps설정.md new file mode 100644 index 0000000..4999128 --- /dev/null +++ b/epoch와-steps설정.md @@ -0,0 +1,180 @@ +Max train epoch +training epochs (overrides max_train_steps). 0 = no override +``` + +### **해석:** +- **"Max train epoch"**: 최대 학습 에포크 수 +- **"overrides max_train_steps"**: 이 값을 설정하면 max_train_steps를 **무시함** +- **"0 = no override"**: `0`으로 설정하면 max_train_steps를 **따름** + +--- + +## 🎯 사용 방법 + +### **케이스 1: Epoch 기준으로 학습** ⭐ 일반적 +``` +Max train epoch: 10 +Max train steps: 0 (또는 비워둠) +``` +**결과:** 10 에포크 학습 + +--- + +### **케이스 2: Steps 기준으로 학습** +``` +Max train epoch: 0 +Max train steps: 2000 +``` +**결과:** 2000 스텝 학습 + +--- + +### **케이스 3: 둘 다 설정 (Epoch 우선!)** +``` +Max train epoch: 10 +Max train steps: 5000 +``` +**결과:** 10 에포크만 학습 (max_train_steps **무시됨**) + +--- + +## 🔍 우선순위 정리 +``` +Max train epoch > 0 → 이것만 사용 (steps 무시) +Max train epoch = 0 → max_train_steps 사용 +``` + +--- + +## 💡 실전 설정 + +### **일반적인 LoRA 학습:** +``` +Max train epoch: 10 ← 여기만 설정 +Max train steps: 0 ← 0 또는 비워둠 +Save every N epochs: 1 +``` + +### **정밀한 스텝 컨트롤이 필요할 때:** +``` +Max train epoch: 0 ← 0으로 설정 +Max train steps: 2500 ← 여기 설정 +Save every N steps: 500 +``` + +--- + +## 📊 예시 계산 + +### **50장, 4회 반복 기준:** + +#### **설정 A: Epoch 우선** +``` +Max train epoch: 10 +Max train steps: 999999 ← 아무리 커도 무시됨 +``` +**실제 학습:** 50 × 4 × 10 = **2000 스텝** + +#### **설정 B: Steps 우선** +``` +Max train epoch: 0 +Max train steps: 1500 +``` +**실제 학습:** **1500 스텝** (7.5 에포크) + +--- + +## ⚠️ 흔한 실수 + +### ❌ **틀린 설정:** +``` +Max train epoch: 10 +Max train steps: 2000 +``` +→ Steps 값이 **무시됨!** (Epoch만 적용) + +### ✅ **올바른 설정:** + +**Epoch 쓰고 싶으면:** +``` +Max train epoch: 10 +Max train steps: 0 +``` + +**Steps 쓰고 싶으면:** +``` +Max train epoch: 0 +Max train steps: 2000 +``` + +--- + +## 🎯 **최종 답변** + +### **같은 값 넣으면 되나요?** +❌ **아니요!** + +### **어떻게 설정해야 하나요?** + +#### **대부분의 경우 (권장):** +``` +Max train epoch: 10 ← 원하는 에포크 수 +Max train steps: 0 ← 0으로! +``` + +#### **스텝 수를 정확히 지정하고 싶으면:** +``` +Max train epoch: 0 ← 0으로! +Max train steps: 2500 ← 원하는 스텝 수 + + +---------------- + + + +총 스텝 = 이미지 수 × 반복 횟수 × 에포크 수 + +3000 = 100 × 2 × 에포크 +3000 = 200 × 에포크 +에포크 = 3000 ÷ 200 +에포크 = 15 +``` + +--- + +## ✅ 답: **15 에포크** + +### **설정:** +``` +폴더명: 2_character_name +이미지: 100장 +Max train epoch: 15 +Max train steps: 0 +``` + +### **결과:** +``` +1 에포크 = 100 × 2 = 200 스텝 +15 에포크 = 200 × 15 = 3000 스텝 ✅ + +✅ 고정 Seed (추천!) +시드: Seed: 42 (또는 1234, 777 등 아무 숫자) + +**이유:** + +### ✅ **고정 Seed (추천!)** +``` +Seed: 42 +``` +**장점:** +- **재현성** - 똑같은 결과 재생산 가능 +- **실험 비교** - 다른 하이퍼파라미터 테스트 시 공정한 비교 +- **디버깅** - 문제 발생 시 재현 가능 +- **협업** - 다른 사람도 같은 결과 얻을 수 있음 + +**사용 케이스:** +- 대부분의 경우 ✅ +- 하이퍼파라미터 튜닝 +- 안정적인 학습 원함 + +--- \ No newline at end of file diff --git a/generate-captions-standalone.py b/generate-captions-standalone.py new file mode 100644 index 0000000..7c2fcc0 --- /dev/null +++ b/generate-captions-standalone.py @@ -0,0 +1,365 @@ +""" +독립 실행형 BLIP + WD14 하이브리드 캡션 생성기 +실사 LoRA 학습을 위한 통합 캡션 생성 스크립트 + +설치 필요: +pip install transformers pillow torch torchvision onnxruntime-gpu huggingface_hub +""" + +import os +import sys +from pathlib import Path +from tqdm import tqdm +import argparse +from PIL import Image +import torch + + +# ============================== +# ⚙️ 설정 (수정 가능) +# ============================== + +class Config: + # 데이터셋 경로 + DATASET_DIRS = [ + "./dataset/mainchar", # 메인 캐릭터 + "./dataset/bg", # 배경/보조 + ] + + # 모델 설정 + BLIP_MODEL = "Salesforce/blip-image-captioning-large" + WD14_MODEL = "SmilingWolf/wd-v1-4-moat-tagger-v2" + + # WD14 임계값 + WD14_GENERAL_THRESHOLD = 0.35 + WD14_CHARACTER_THRESHOLD = 0.85 + + # BLIP 설정 + BLIP_MAX_LENGTH = 75 + BLIP_NUM_BEAMS = 1 # 1=greedy, >1=beam search + + # 제거할 WD14 메타 태그 + REMOVE_TAGS = [ + # 메타 태그 + "1girl", "1boy", "solo", "2girls", "3girls", "multiple girls", + "looking at viewer", "facing viewer", "solo focus", + # 배경 + "simple background", "white background", "grey background", + "transparent background", "gradient background", + # 품질/메타데이터 + "highres", "absurdres", "lowres", "bad anatomy", + "signature", "watermark", "artist name", "dated", + "commentary", "username", + # Danbooru 메타 + "rating:safe", "rating:questionable", "rating:explicit", + "safe", "questionable", "explicit", + ] + + # 출력 설정 + OUTPUT_ENCODING = "utf-8" + OVERWRITE_EXISTING = False + CREATE_BACKUP = True + + # 디바이스 + DEVICE = "cuda" if torch.cuda.is_available() else "cpu" + + # 캡션 포맷 + # "blip_first": BLIP 문장이 먼저 + # "tags_first": WD14 태그가 먼저 + CAPTION_FORMAT = "blip_first" + + +# ============================== +# 🔧 유틸리티 함수 +# ============================== + +def normalize_tags(tags_str): + """태그 정규화: 소문자, 공백 정리, 중복 제거""" + if not tags_str: + return [] + tags = [tag.strip().lower() for tag in tags_str.split(',')] + # 중복 제거 (순서 유지) + seen = set() + unique_tags = [] + for tag in tags: + if tag and tag not in seen: + seen.add(tag) + unique_tags.append(tag) + return unique_tags + + +def remove_unwanted_tags(tags_list, remove_list): + """불필요한 태그 제거""" + remove_set = set(tag.lower() for tag in remove_list) + return [tag for tag in tags_list if tag not in remove_set] + + +def merge_captions(blip_caption, wd14_tags, remove_tags, format_type="blip_first"): + """ + BLIP 캡션과 WD14 태그 병합 + """ + # BLIP 정규화 + blip_normalized = blip_caption.strip().lower() if blip_caption else "" + + # WD14 태그 정규화 및 필터링 + wd14_normalized = normalize_tags(wd14_tags) + wd14_filtered = remove_unwanted_tags(wd14_normalized, remove_tags) + + # BLIP 문장의 단어들 (중복 제거용) + blip_words = set(blip_normalized.replace(',', ' ').split()) if blip_normalized else set() + + # WD14에서 BLIP 중복 제거 + wd14_deduped = [] + for tag in wd14_filtered: + # 단순 중복 체크 (선택적으로 비활성화 가능) + tag_words = set(tag.replace('_', ' ').split()) + if not tag_words.intersection(blip_words): + wd14_deduped.append(tag) + + # 최종 병합 + if format_type == "blip_first": + # BLIP 문장 + WD14 태그 + if blip_normalized and wd14_deduped: + merged = f"{blip_normalized}, {', '.join(wd14_deduped)}" + elif blip_normalized: + merged = blip_normalized + elif wd14_deduped: + merged = ', '.join(wd14_deduped) + else: + merged = "" + else: + # WD14 태그 + BLIP 문장 + if wd14_deduped and blip_normalized: + merged = f"{', '.join(wd14_deduped)}, {blip_normalized}" + elif wd14_deduped: + merged = ', '.join(wd14_deduped) + elif blip_normalized: + merged = blip_normalized + else: + merged = "" + + return merged + + +# ============================== +# 🎨 BLIP 캡션 생성 +# ============================== + +class BLIPCaptioner: + def __init__(self, model_name, device, max_length=75, num_beams=1): + from transformers import BlipProcessor, BlipForConditionalGeneration + + print(f" → BLIP 모델 로딩... ({model_name})") + self.processor = BlipProcessor.from_pretrained(model_name) + self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(device) + self.model.eval() + self.device = device + self.max_length = max_length + self.num_beams = num_beams + + def generate(self, image_path): + try: + image = Image.open(image_path).convert("RGB") + inputs = self.processor(image, return_tensors="pt").to(self.device) + + with torch.no_grad(): + outputs = self.model.generate( + **inputs, + max_length=self.max_length, + num_beams=self.num_beams, + ) + + caption = self.processor.decode(outputs[0], skip_special_tokens=True) + return caption.strip() + + except Exception as e: + print(f"⚠️ BLIP 실패 ({Path(image_path).name}): {e}") + return "" + + +# ============================== +# 🏷️ WD14 태그 생성 +# ============================== + +class WD14Tagger: + def __init__(self, model_name, device, general_thresh=0.35, character_thresh=0.85): + import onnxruntime as ort + from huggingface_hub import hf_hub_download + + print(f" → WD14 모델 로딩... ({model_name})") + + # 모델 다운로드 + model_path = hf_hub_download(model_name, filename="model.onnx") + tags_path = hf_hub_download(model_name, filename="selected_tags.csv") + + # ONNX 세션 + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if device == "cuda" else ['CPUExecutionProvider'] + self.session = ort.InferenceSession(model_path, providers=providers) + + # 태그 로드 + import pandas as pd + self.tags_df = pd.read_csv(tags_path) + self.general_thresh = general_thresh + self.character_thresh = character_thresh + + def generate(self, image_path): + try: + import numpy as np + + # 이미지 전처리 + image = Image.open(image_path).convert("RGB") + image = image.resize((448, 448)) + image_array = np.array(image).astype(np.float32) / 255.0 + image_array = np.expand_dims(image_array, axis=0) + + # 추론 + input_name = self.session.get_inputs()[0].name + output = self.session.run(None, {input_name: image_array})[0] + + # 태그 필터링 + tags = [] + for i, score in enumerate(output[0]): + tag_type = self.tags_df.iloc[i]['category'] + threshold = self.character_thresh if tag_type == 4 else self.general_thresh + + if score >= threshold: + tag_name = self.tags_df.iloc[i]['name'].replace('_', ' ') + tags.append(tag_name) + + return ', '.join(tags) + + except Exception as e: + print(f"⚠️ WD14 실패 ({Path(image_path).name}): {e}") + return "" + + +# ============================== +# 📁 파일 처리 +# ============================== + +def get_image_files(directory): + """이미지 파일 찾기""" + extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} + image_files = [] + + dir_path = Path(directory) + for ext in extensions: + image_files.extend(dir_path.glob(f"*{ext}")) + image_files.extend(dir_path.glob(f"*{ext.upper()}")) + + return sorted(image_files) + + +def create_backup(caption_path): + """백업 생성""" + if caption_path.exists(): + backup_dir = caption_path.parent / "caption_backup" + backup_dir.mkdir(exist_ok=True) + + import shutil + backup_path = backup_dir / caption_path.name + shutil.copy2(caption_path, backup_path) + + +# ============================== +# 🚀 메인 프로세스 +# ============================== + +def process_directory(directory, blip_captioner, wd14_tagger, config): + """디렉토리 처리""" + print(f"\n📁 처리 중: {directory}") + + image_files = get_image_files(directory) + + if not image_files: + print(f"⚠️ 이미지 없음: {directory}") + return 0 + + print(f"📸 {len(image_files)}개 이미지 발견") + + success_count = 0 + skip_count = 0 + + for image_path in tqdm(image_files, desc="캡션 생성"): + caption_path = image_path.with_suffix('.txt') + + # 기존 파일 확인 + if caption_path.exists() and not config.OVERWRITE_EXISTING: + skip_count += 1 + continue + + # 백업 + if config.CREATE_BACKUP and caption_path.exists(): + create_backup(caption_path) + + try: + # BLIP 생성 + blip_caption = blip_captioner.generate(image_path) + + # WD14 생성 + wd14_tags = wd14_tagger.generate(image_path) + + # 병합 + merged = merge_captions( + blip_caption, wd14_tags, + config.REMOVE_TAGS, + config.CAPTION_FORMAT + ) + + # 저장 + if merged: + with open(caption_path, 'w', encoding=config.OUTPUT_ENCODING) as f: + f.write(merged) + success_count += 1 + else: + print(f"⚠️ 빈 캡션: {image_path.name}") + + except Exception as e: + print(f"❌ 실패 ({image_path.name}): {e}") + continue + + print(f"✅ 완료: {success_count}개 생성, {skip_count}개 스킵") + return success_count + + +def main(): + parser = argparse.ArgumentParser(description="BLIP + WD14 하이브리드 캡션") + parser.add_argument("--dirs", nargs="+", help="처리할 디렉토리") + parser.add_argument("--overwrite", action="store_true", help="덮어쓰기") + parser.add_argument("--device", default=None, help="cuda/cpu") + parser.add_argument("--format", choices=["blip_first", "tags_first"], help="캡션 포맷") + + args = parser.parse_args() + + config = Config() + if args.dirs: + config.DATASET_DIRS = args.dirs + if args.overwrite: + config.OVERWRITE_EXISTING = True + if args.device: + config.DEVICE = args.device + if args.format: + config.CAPTION_FORMAT = args.format + + print("=" * 60) + print("🎨 BLIP + WD14 하이브리드 캡션 생성기") + print("=" * 60) + print(f"📁 대상: {config.DATASET_DIRS}") + print(f"💾 덮어쓰기: {config.OVERWRITE_EXISTING}") + print(f"🖥️ 디바이스: {config.DEVICE}") + print(f"📝 포맷: {config.CAPTION_FORMAT}") + print("=" * 60) + + # 모델 로드 + print("\n🔄 모델 로딩 중...") + + try: + blip_captioner = BLIPCaptioner( + config.BLIP_MODEL, + config.DEVICE, + config.BLIP_MAX_LENGTH, + config.BLIP_NUM_BEAMS + ) + + wd14_tagger = WD14Tagger( + config.WD14_MODEL, \ No newline at end of file diff --git a/generate-captions.cmd b/generate-captions.cmd new file mode 100644 index 0000000..1d19a22 --- /dev/null +++ b/generate-captions.cmd @@ -0,0 +1 @@ +python generate-captions.py --dirs ./dataset/mainchar/2_karina --char "karina" --device 3 \ No newline at end of file diff --git a/generate-captions.py b/generate-captions.py new file mode 100644 index 0000000..5f6d36a --- /dev/null +++ b/generate-captions.py @@ -0,0 +1,369 @@ +""" +BLIP + WD14 하이브리드 캡션 생성기 +실사 LoRA 학습을 위한 통합 캡션 생성 스크립트 + +필요 환경: kohya_ss (sd-scripts) +""" + +import os +import sys +from pathlib import Path +from tqdm import tqdm +import argparse + +# Kohya_ss 모듈 임포트 +try: + # BLIP 관련 + from library.blip.blip import load_blip_model, generate_caption as blip_generate + # WD14 관련 + from library.train_util import load_model_from_onnx + from wd14_tagger import WD14Tagger +except ImportError: + print("❌ Kohya_ss 환경에서 실행해주세요!") + print("경로: sd-scripts/ 폴더 안에서 실행") + sys.exit(1) + + +# ============================== +# ⚙️ 설정 (수정 가능) +# ============================== + +class Config: + # 데이터셋 경로 + DATASET_DIRS = [ + "./dataset/mainchar", # 메인 캐릭터 + "./dataset/bg", # 배경/보조 + ] + + # 모델 설정 + BLIP_MODEL_PATH = "Salesforce/blip-image-captioning-large" + WD14_MODEL_PATH = "SmilingWolf/wd-v1-4-moat-tagger-v2" + + # WD14 임계값 + WD14_GENERAL_THRESHOLD = 0.35 + WD14_CHARACTER_THRESHOLD = 0.85 + + # BLIP 설정 + BLIP_MAX_LENGTH = 75 + BLIP_NUM_BEAMS = 1 # 1 = greedy, >1 = beam search + + # 제거할 WD14 메타 태그 + REMOVE_TAGS = [ + "1girl", "1boy", "solo", "looking at viewer", + "simple background", "white background", "grey background", + "highres", "absurdres", "lowres", "bad anatomy", + "signature", "watermark", "artist name", "dated", + "rating:safe", "rating:questionable", "rating:explicit", + ] + + # 출력 설정 + OUTPUT_ENCODING = "utf-8" + OVERWRITE_EXISTING = False # True면 기존 캡션 덮어쓰기 + + # 디바이스 + DEVICE = "cuda" # 또는 "cpu" + + # 백업 생성 + CREATE_BACKUP = True + + +# ============================== +# 🔧 유틸리티 함수 +# ============================== + +def normalize_tags(tags_str): + """태그 정규화: 소문자 변환, 공백 정리, 중복 제거""" + tags = [tag.strip().lower() for tag in tags_str.split(',')] + # 중복 제거 (순서 유지) + seen = set() + unique_tags = [] + for tag in tags: + if tag and tag not in seen: + seen.add(tag) + unique_tags.append(tag) + return unique_tags + + +def remove_unwanted_tags(tags_list, remove_list): + """불필요한 태그 제거""" + remove_set = set(tag.lower() for tag in remove_list) + return [tag for tag in tags_list if tag not in remove_set] + + +def merge_captions(blip_caption, wd14_tags, remove_tags): + """ + BLIP 캡션과 WD14 태그 병합 + + 형식: [BLIP 문장], [WD14 태그들] + """ + # BLIP 정규화 + blip_normalized = blip_caption.strip().lower() + + # WD14 태그 정규화 및 필터링 + wd14_normalized = normalize_tags(wd14_tags) + wd14_filtered = remove_unwanted_tags(wd14_normalized, remove_tags) + + # BLIP 문장의 단어들 추출 (중복 제거용) + blip_words = set(blip_normalized.replace(',', ' ').split()) + + # WD14에서 BLIP에 이미 포함된 단어 제거 (선택적) + # 예: BLIP "smiling girl" → WD14 "smile" 중복 제거 + wd14_deduped = [] + for tag in wd14_filtered: + # 태그가 BLIP 문장에 포함되지 않으면 추가 + if not any(word in tag or tag in word for word in blip_words): + wd14_deduped.append(tag) + + # 최종 병합: BLIP (문장) + WD14 (태그) + if wd14_deduped: + merged = f"{blip_normalized}, {', '.join(wd14_deduped)}" + else: + merged = blip_normalized + + return merged + + +# ============================== +# 🎨 캡션 생성 함수 +# ============================== + +def generate_blip_caption(image_path, model, processor, config): + """BLIP으로 자연어 캡션 생성""" + from PIL import Image + + try: + image = Image.open(image_path).convert("RGB") + + inputs = processor(image, return_tensors="pt").to(config.DEVICE) + + outputs = model.generate( + **inputs, + max_length=config.BLIP_MAX_LENGTH, + num_beams=config.BLIP_NUM_BEAMS, + ) + + caption = processor.decode(outputs[0], skip_special_tokens=True) + return caption.strip() + + except Exception as e: + print(f"⚠️ BLIP 생성 실패 ({image_path.name}): {e}") + return "" + + +def generate_wd14_tags(image_path, tagger, config): + """WD14로 태그 생성""" + try: + tags = tagger.tag( + str(image_path), + general_threshold=config.WD14_GENERAL_THRESHOLD, + character_threshold=config.WD14_CHARACTER_THRESHOLD, + ) + + # 태그를 콤마로 연결 + tag_string = ", ".join(tags) + return tag_string + + except Exception as e: + print(f"⚠️ WD14 생성 실패 ({image_path.name}): {e}") + return "" + + +# ============================== +# 📁 파일 처리 +# ============================== + +def get_image_files(directory): + """디렉토리에서 이미지 파일 찾기""" + image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} + + image_files = [] + for ext in image_extensions: + image_files.extend(Path(directory).glob(f"*{ext}")) + image_files.extend(Path(directory).glob(f"*{ext.upper()}")) + + return sorted(image_files) + + +def create_backup(caption_path): + """기존 캡션 파일 백업""" + if caption_path.exists(): + backup_dir = caption_path.parent / "caption_backup" + backup_dir.mkdir(exist_ok=True) + + backup_path = backup_dir / caption_path.name + import shutil + shutil.copy2(caption_path, backup_path) + + +# ============================== +# 🚀 메인 프로세스 +# ============================== + +def process_directory(directory, blip_model, blip_processor, wd14_tagger, config): + """단일 디렉토리 처리""" + + print(f"\n📁 처리 중: {directory}") + + # 이미지 파일 찾기 + image_files = get_image_files(directory) + + if not image_files: + print(f"⚠️ 이미지 파일을 찾을 수 없습니다: {directory}") + return 0 + + print(f"📸 {len(image_files)}개 이미지 발견") + + success_count = 0 + skip_count = 0 + + for image_path in tqdm(image_files, desc="캡션 생성"): + + # 캡션 파일 경로 + caption_path = image_path.with_suffix('.txt') + + # 기존 파일 존재 확인 + if caption_path.exists() and not config.OVERWRITE_EXISTING: + skip_count += 1 + continue + + # 백업 생성 + if config.CREATE_BACKUP and caption_path.exists(): + create_backup(caption_path) + + try: + # 1. BLIP 캡션 생성 + blip_caption = generate_blip_caption( + image_path, blip_model, blip_processor, config + ) + + # 2. WD14 태그 생성 + wd14_tags = generate_wd14_tags(image_path, wd14_tagger, config) + + # 3. 병합 + merged_caption = merge_captions( + blip_caption, wd14_tags, config.REMOVE_TAGS + ) + # 캐릭터명 prefix 추가 (가중치 강조) + if getattr(config, "CHARACTER_PREFIX", ""): + char_token = config.CHARACTER_PREFIX.strip() + # LoRA 학습용 강조 토큰 처리 + if not char_token.endswith(")"): + char_token = f"{char_token} (1.3)" + merged_caption = f"{char_token}, {merged_caption}" + + # 4. 저장 + if merged_caption: + with open(caption_path, 'w', encoding=config.OUTPUT_ENCODING) as f: + f.write(merged_caption) + success_count += 1 + else: + print(f"⚠️ 빈 캡션: {image_path.name}") + + except Exception as e: + print(f"❌ 처리 실패 ({image_path.name}): {e}") + continue + + print(f"✅ 완료: {success_count}개 생성, {skip_count}개 스킵") + return success_count + + +def main(): + parser = argparse.ArgumentParser(description="BLIP + WD14 하이브리드 캡션 생성") + parser.add_argument( + "--dirs", + nargs="+", + default=Config.DATASET_DIRS, + help="처리할 디렉토리 목록" + ) + parser.add_argument( + "--overwrite", + action="store_true", + help="기존 캡션 덮어쓰기" + ) + parser.add_argument( + "--device", + default=Config.DEVICE, + help="디바이스 (cuda/cpu)" + ) + parser.add_argument( + "--char", + type=str, + default="", + help="모든 캡션 앞에 붙일 캐릭터명 (예: 'anya character')" + ) + + args = parser.parse_args() + config = Config() + config.DATASET_DIRS = args.dirs + config.OVERWRITE_EXISTING = args.overwrite + config.DEVICE = args.device + + print("=" * 60) + print("🎨 BLIP + WD14 하이브리드 캡션 생성기") + print("=" * 60) + print(f"📁 대상 디렉토리: {config.DATASET_DIRS}") + print(f"💾 덮어쓰기: {config.OVERWRITE_EXISTING}") + print(f"🖥️ 디바이스: {config.DEVICE}") + print("=" * 60) + + # 모델 로드 + print("\n🔄 모델 로딩 중...") + + try: + # BLIP 로드 + from transformers import BlipProcessor, BlipForConditionalGeneration + + print(" → BLIP 모델 로딩...") + blip_processor = BlipProcessor.from_pretrained(config.BLIP_MODEL_PATH) + blip_model = BlipForConditionalGeneration.from_pretrained( + config.BLIP_MODEL_PATH + ).to(config.DEVICE) + blip_model.eval() + + # WD14 로드 + print(" → WD14 Tagger 로딩...") + wd14_tagger = WD14Tagger( + model_dir=config.WD14_MODEL_PATH, + device=config.DEVICE + ) + + print("✅ 모델 로딩 완료!\n") + + except Exception as e: + print(f"❌ 모델 로딩 실패: {e}") + sys.exit(1) + + # 각 디렉토리 처리 + total_success = 0 + + for directory in config.DATASET_DIRS: + if not Path(directory).exists(): + print(f"⚠️ 디렉토리 없음: {directory}") + continue + + count = process_directory( + directory, blip_model, blip_processor, wd14_tagger, config + ) + total_success += count + + # 완료 메시지 + print("\n" + "=" * 60) + print(f"🎉 전체 완료!") + print(f"✅ 총 {total_success}개 캡션 생성됨") + print("=" * 60) + + # 결과 예시 출력 + print("\n📝 생성 예시:") + for directory in config.DATASET_DIRS: + txt_files = list(Path(directory).glob("*.txt")) + if txt_files: + example_file = txt_files[0] + with open(example_file, 'r', encoding='utf-8') as f: + content = f.read() + print(f"\n{example_file.name}:") + print(f" {content[:100]}...") + break + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/run-train-extd.cmd b/run-train-extd.cmd new file mode 100644 index 0000000..1977a22 --- /dev/null +++ b/run-train-extd.cmd @@ -0,0 +1,31 @@ +$env:CUDA_VISIBLE_DEVICES = "1" + +accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ + sdxl_train_network.py ^ + --pretrained_model_name_or_path="./models/stable-diffusion-xl-base-1.0" ^ + --train_data_dir="./train_data" ^ + --output_dir="./output_model" ^ + --logging_dir="./logs" ^ + --output_name="karina" ^ + --network_module=networks.lora ^ + --network_dim=32 ^ + --network_alpha=16 ^ + --learning_rate=1e-4 ^ + --optimizer_type="AdamW8bit" ^ + --lr_scheduler="cosine" ^ + --lr_warmup_steps=100 ^ + --max_train_epochs=15 ^ + --save_every_n_epochs=1 ^ + --mixed_precision="bf16" ^ + --save_precision="bf16" ^ + --cache_latents ^ + --cache_latents_to_disk ^ + --cache_text_encoder_outputs ^ + --gradient_checkpointing ^ + --xformers ^ + --seed=42 ^ + --bucket_no_upscale ^ + --min_bucket_reso=512 ^ + --max_bucket_reso=2048 ^ + --bucket_reso_steps=64 ^ + --resolution="1024,1024" \ No newline at end of file diff --git a/run-train.cmd b/run-train.cmd new file mode 100644 index 0000000..309b1aa --- /dev/null +++ b/run-train.cmd @@ -0,0 +1 @@ +accelerate launch --num_cpu_threads_per_process 8 train_network.py --config_file=config_5080.json \ No newline at end of file diff --git a/run-venv.cmd b/run-venv.cmd new file mode 100644 index 0000000..1b69d01 --- /dev/null +++ b/run-venv.cmd @@ -0,0 +1 @@ +venv/Scripts/activate \ No newline at end of file diff --git a/로컬-설치가이드.md b/로컬-설치가이드.md new file mode 100644 index 0000000..7a61528 --- /dev/null +++ b/로컬-설치가이드.md @@ -0,0 +1,190 @@ +CUDA 12.4 +CUDNN 9.1.0 + +# 1. 클론 +git clone https://github.com/kohya-ss/sd-scripts.git +cd sd-scripts + +# 2. 가상환경 생성 +python -m venv venv +.\venv\Scripts\activate # Windows +# 또는 +source venv/bin/activate # Linux/Mac + +# 3. PyTorch 설치 (CUDA 11.8 기준) +pip install torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu118 + +# 4. 의존성 설치 +pip install --upgrade -r requirements.txt +pip install xformers==0.0.23.post1 --index-url https://download.pytorch.org/whl/cu118 + +# 5. Accelerate 설정 +accelerate config + +질문에 답변: + +컴퓨팅 환경: This machine +머신 타입: No distributed training +CPU only?: NO +torch dynamo?: NO +DeepSpeed?: NO +GPU ids: all (또는 0) +Mixed precision: 8GB VRAM의 경우 fp16, 12GB 이상의 경우 bf16 + + +SDXL(Stable Diffusion XL) 모델 아키텍처는 여러 개의 대용량 파일로 구성되며 두 가지 주요 구성 요소는 기본 모델과 선택적 정제 모델입니다..safetensors. 모델 파일 크기는 사용된 파일 형식(예: 또는 .ckpt) 에 따라 달라질 수 있습니다 . +공식 SDXL 1.0 파일 +공식 SDXL 1.0 릴리스의 일반적인 파일 크기는 다음과 같습니다. +기본 모델: 약 6.94GB. 이 모델은 텍스트 프롬프트에서 초기 이미지를 생성하는 데 사용됩니다. +리파이너 모델: 약 6.08GB. 리파이너는 기본 모델에서 생성된 이미지에 세부적인 정보를 추가하고 품질을 개선하는 두 번째 단계로 사용됩니다. +전체 파이프라인의 총 크기: 기본 모델과 정제 모델의 결합된 크기는 약 13GB이지만, 많은 사용자는 이제 더 빠른 워크플로를 위해 기본 모델을 주로 사용합니다. + + +3. 이미지 크기 조정 및 버킷화 +최적 해상도: 훈련하려는 기본 모델에 따라 권장 해상도가 다릅니다. +Stable Diffusion 1.5: 512x512, 512x768, 768x512 등 +SDXL: 1024x1024 +버킷화 (Bucketing): Kohya_ss는 여러 다른 해상도의 이미지를 효율적으로 처리하는 '버킷' 기능을 제공합니다. +GUI에서 사전 처리 탭 선택: Kohya GUI의 Utilities 탭에서 Prepare training data를 선택합니다. +폴더 설정: Source directory에 원본 이미지 폴더를, Destination directory에 처리된 이미지를 저장할 폴더를 지정합니다. +최소/최대 해상도 설정: Min resolution과 Max resolution을 설정하고, Use buckets를 활성화합니다. +자동 크기 조정: Process images를 실행하면 이미지가 지정된 버킷 해상도에 맞게 자동으로 리사이즈되고 크롭됩니다. + + +💡 추가 팁 +최대 3000 스텝 또는 30 에포크 권장 GitHub +50장의 학습 이미지와 4회 반복 권장 GitHub +VRAM별 배치 크기: + +8GB: batch_size 1-2 GitHub +12GB: batch_size 2-3 GitHub +더 높은 VRAM: batch_size 5+ GitHub + + +## 💡 반복 횟수 선택 가이드 + +| 이미지 수 | 권장 반복 | 폴더명 예시 | 10 에포크 시 총 스텝 | +|----------|---------|------------|-------------------| +| 10장 | 10회 | `10_character` | 1000 스텝 | +| 20장 | 5회 | `5_character` | 1000 스텝 | +| 50장 | 4회 | `4_character` | 2000 스텝 | +| 100장 | 2회 | `2_character` | 2000 스텝 | +| 200장 | 1회 | `1_character` | 2000 스텝 | + +**목표:** 총 스텝이 **1500~3000** 정도가 되도록 조절 + + + +고품질 (일관된 스타일/포즈/의상): +→ 4회 반복으로 충분 + +저품질 (다양한 각도/의상/배경): +→ 10~20회 필요할 수도 +``` + +### **2. 목표가 다름** +``` +Gemini 기준: 캐릭터 얼굴/특징 확실히 학습 +Kohya 문서: 과적합(overfitting) 방지 +``` + +### **3. 총 스텝 수 계산 방식** +``` +50장 × 4회 × 10 에포크 = 2000 스텝 +50장 × 10회 × 5 에포크 = 2500 스텝 +50장 × 20회 × 2 에포크 = 2000 스텝 +``` +결국 **총 스텝이 비슷**하면 결과도 비슷해요! + +--- + +## 📊 실전 테스트 결과 (커뮤니티 경험) + +| 반복 횟수 | 이미지 수 | 에포크 | 총 스텝 | 결과 | +|----------|----------|-------|---------|------| +| 4회 | 50장 | 10 | 2000 | ⭐⭐⭐⭐ 균형 잡힘 | +| 10회 | 50장 | 5 | 2500 | ⭐⭐⭐⭐⭐ 강한 학습 | +| 20회 | 50장 | 3 | 3000 | ⚠️ 과적합 위험 | +| 2회 | 100장 | 10 | 2000 | ⭐⭐⭐⭐ 자연스러움 | + +--- + +## 🎯 실전 가이드 + +### **상황별 권장 설정** + +#### **📷 고품질 데이터셋 (일관된 캐릭터/스타일)** +``` +이미지: 50장 +반복: 4~6회 +에포크: 8~10 +총 스텝: 1600~3000 + +예: 4_character_name +``` + +#### **🎨 다양한 데이터셋 (여러 포즈/의상/배경)** +``` +이미지: 50장 +반복: 8~12회 +에포크: 5~8 +총 스텝: 2000~4800 + +예: 10_character_name +``` + +#### **⚡ 빠른 테스트 (품질 확인용)** +``` +이미지: 30장 +반복: 5회 +에포크: 5 +총 스텝: 750 + +예: 5_test_character +``` + +#### **🏆 프로덕션 품질 (상업용/고품질)** +``` +이미지: 100장+ +반복: 3~5회 +에포크: 10~15 +총 스텝: 3000~7500 + +예: 3_character_name +``` + +--- + +## 💡 **내 추천: 점진적 테스트** + +### **1단계: 낮게 시작** +``` +폴더: 4_character +에포크: 5 +→ 1000 스텝에서 결과 확인 +``` + +### **2단계: 필요시 증가** +``` +만족스럽지 않으면: +폴더: 8_character +에포크: 5 +→ 2000 스텝에서 재확인 +``` + +### **3단계: 최적점 찾기** +``` +과적합 보이면: 반복 횟수 줄이기 +언더피팅이면: 반복 횟수 늘리기 +``` + +--- + +## 🔬 과학적(?) 접근 + +### **총 스텝 기준 가이드:** +``` +1000~1500 스텝: 가벼운 스타일 LoRA +2000~3000 스텝: 캐릭터 LoRA (일반) +3000~5000 스텝: 디테일한 캐릭터 +5000+ 스텝: 복잡한 컨셉/다중 캐릭터 \ No newline at end of file From 6d17d2f13430f16ecdf80e9c39be5ff6b7ff2b66 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Thu, 23 Oct 2025 22:49:20 +0900 Subject: [PATCH 02/58] update --- .github/FUNDING.yml | 3 - .github/dependabot.yml | 7 - .github/workflows/docker_publish.yml | 91 ------- .github/workflows/typos.yaml | 21 -- .gitignore | 3 + generate-captions.cmd | 1 - generate-captions.py | 369 --------------------------- 7 files changed, 3 insertions(+), 492 deletions(-) delete mode 100644 .github/FUNDING.yml delete mode 100644 .github/dependabot.yml delete mode 100644 .github/workflows/docker_publish.yml delete mode 100644 .github/workflows/typos.yaml delete mode 100644 generate-captions.cmd delete mode 100644 generate-captions.py diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index bae5e45..0000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1,3 +0,0 @@ -# These are supported funding model platforms - -github: [bmaltais] diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 64284b9..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "monthly" diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml deleted file mode 100644 index ac198d1..0000000 --- a/.github/workflows/docker_publish.yml +++ /dev/null @@ -1,91 +0,0 @@ -# Check this guide for more information about publishing to ghcr.io with GitHub Actions: -# https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio - -# Build the Docker image and push it to the registry -name: docker_publish - -on: - # Trigger the workflow on tags push that match the pattern v*, for example v1.0.0 - push: - tags: - - "v*" - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - # Only run this job on tags - docker-tag: - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - - # Sets the permissions granted to the GITHUB_TOKEN for the actions in this job. - permissions: - contents: read - packages: write - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - submodules: true - - # We require additional space due to the large size of our image. (~10GB) - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - tool-cache: true - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Docker meta:${{ github.ref_name }} - id: meta - uses: docker/metadata-action@v5 - with: - images: ghcr.io/${{ github.repository_owner }}/kohya-ss-gui - flavor: | - latest=auto - prefix= - suffix= - # https://github.com/docker/metadata-action/tree/v5/?tab=readme-ov-file#tags-input - tags: | - type=semver,pattern=v{{major}} - type=semver,pattern={{raw}} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - # You may need to manage write and read access of GitHub Actions for repositories in the container settings. - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push - uses: docker/build-push-action@v6 - id: publish - with: - context: . - file: ./Dockerfile - push: true - target: final - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.ref_name }} - RELEASE=${{ github.run_number }} - platforms: linux/amd64 - # Cache to regietry instead of gha to avoid the capacity limit. - cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache - cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache,mode=max - sbom: true - provenance: true diff --git a/.github/workflows/typos.yaml b/.github/workflows/typos.yaml deleted file mode 100644 index a2d02db..0000000 --- a/.github/workflows/typos.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -# yamllint disable rule:line-length -name: Typos - -on: # yamllint disable-line rule:truthy - push: - pull_request: - types: - - opened - - synchronize - - reopened - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: typos-action - uses: crate-ci/typos@v1.32.0 diff --git a/.gitignore b/.gitignore index 084abf2..71325c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea # Python .venv venv @@ -53,6 +54,8 @@ models data config.toml sd-scripts +sd-scripts/ +sd-scripts/** venv venv* .python-version \ No newline at end of file diff --git a/generate-captions.cmd b/generate-captions.cmd deleted file mode 100644 index 1d19a22..0000000 --- a/generate-captions.cmd +++ /dev/null @@ -1 +0,0 @@ -python generate-captions.py --dirs ./dataset/mainchar/2_karina --char "karina" --device 3 \ No newline at end of file diff --git a/generate-captions.py b/generate-captions.py deleted file mode 100644 index 5f6d36a..0000000 --- a/generate-captions.py +++ /dev/null @@ -1,369 +0,0 @@ -""" -BLIP + WD14 하이브리드 캡션 생성기 -실사 LoRA 학습을 위한 통합 캡션 생성 스크립트 - -필요 환경: kohya_ss (sd-scripts) -""" - -import os -import sys -from pathlib import Path -from tqdm import tqdm -import argparse - -# Kohya_ss 모듈 임포트 -try: - # BLIP 관련 - from library.blip.blip import load_blip_model, generate_caption as blip_generate - # WD14 관련 - from library.train_util import load_model_from_onnx - from wd14_tagger import WD14Tagger -except ImportError: - print("❌ Kohya_ss 환경에서 실행해주세요!") - print("경로: sd-scripts/ 폴더 안에서 실행") - sys.exit(1) - - -# ============================== -# ⚙️ 설정 (수정 가능) -# ============================== - -class Config: - # 데이터셋 경로 - DATASET_DIRS = [ - "./dataset/mainchar", # 메인 캐릭터 - "./dataset/bg", # 배경/보조 - ] - - # 모델 설정 - BLIP_MODEL_PATH = "Salesforce/blip-image-captioning-large" - WD14_MODEL_PATH = "SmilingWolf/wd-v1-4-moat-tagger-v2" - - # WD14 임계값 - WD14_GENERAL_THRESHOLD = 0.35 - WD14_CHARACTER_THRESHOLD = 0.85 - - # BLIP 설정 - BLIP_MAX_LENGTH = 75 - BLIP_NUM_BEAMS = 1 # 1 = greedy, >1 = beam search - - # 제거할 WD14 메타 태그 - REMOVE_TAGS = [ - "1girl", "1boy", "solo", "looking at viewer", - "simple background", "white background", "grey background", - "highres", "absurdres", "lowres", "bad anatomy", - "signature", "watermark", "artist name", "dated", - "rating:safe", "rating:questionable", "rating:explicit", - ] - - # 출력 설정 - OUTPUT_ENCODING = "utf-8" - OVERWRITE_EXISTING = False # True면 기존 캡션 덮어쓰기 - - # 디바이스 - DEVICE = "cuda" # 또는 "cpu" - - # 백업 생성 - CREATE_BACKUP = True - - -# ============================== -# 🔧 유틸리티 함수 -# ============================== - -def normalize_tags(tags_str): - """태그 정규화: 소문자 변환, 공백 정리, 중복 제거""" - tags = [tag.strip().lower() for tag in tags_str.split(',')] - # 중복 제거 (순서 유지) - seen = set() - unique_tags = [] - for tag in tags: - if tag and tag not in seen: - seen.add(tag) - unique_tags.append(tag) - return unique_tags - - -def remove_unwanted_tags(tags_list, remove_list): - """불필요한 태그 제거""" - remove_set = set(tag.lower() for tag in remove_list) - return [tag for tag in tags_list if tag not in remove_set] - - -def merge_captions(blip_caption, wd14_tags, remove_tags): - """ - BLIP 캡션과 WD14 태그 병합 - - 형식: [BLIP 문장], [WD14 태그들] - """ - # BLIP 정규화 - blip_normalized = blip_caption.strip().lower() - - # WD14 태그 정규화 및 필터링 - wd14_normalized = normalize_tags(wd14_tags) - wd14_filtered = remove_unwanted_tags(wd14_normalized, remove_tags) - - # BLIP 문장의 단어들 추출 (중복 제거용) - blip_words = set(blip_normalized.replace(',', ' ').split()) - - # WD14에서 BLIP에 이미 포함된 단어 제거 (선택적) - # 예: BLIP "smiling girl" → WD14 "smile" 중복 제거 - wd14_deduped = [] - for tag in wd14_filtered: - # 태그가 BLIP 문장에 포함되지 않으면 추가 - if not any(word in tag or tag in word for word in blip_words): - wd14_deduped.append(tag) - - # 최종 병합: BLIP (문장) + WD14 (태그) - if wd14_deduped: - merged = f"{blip_normalized}, {', '.join(wd14_deduped)}" - else: - merged = blip_normalized - - return merged - - -# ============================== -# 🎨 캡션 생성 함수 -# ============================== - -def generate_blip_caption(image_path, model, processor, config): - """BLIP으로 자연어 캡션 생성""" - from PIL import Image - - try: - image = Image.open(image_path).convert("RGB") - - inputs = processor(image, return_tensors="pt").to(config.DEVICE) - - outputs = model.generate( - **inputs, - max_length=config.BLIP_MAX_LENGTH, - num_beams=config.BLIP_NUM_BEAMS, - ) - - caption = processor.decode(outputs[0], skip_special_tokens=True) - return caption.strip() - - except Exception as e: - print(f"⚠️ BLIP 생성 실패 ({image_path.name}): {e}") - return "" - - -def generate_wd14_tags(image_path, tagger, config): - """WD14로 태그 생성""" - try: - tags = tagger.tag( - str(image_path), - general_threshold=config.WD14_GENERAL_THRESHOLD, - character_threshold=config.WD14_CHARACTER_THRESHOLD, - ) - - # 태그를 콤마로 연결 - tag_string = ", ".join(tags) - return tag_string - - except Exception as e: - print(f"⚠️ WD14 생성 실패 ({image_path.name}): {e}") - return "" - - -# ============================== -# 📁 파일 처리 -# ============================== - -def get_image_files(directory): - """디렉토리에서 이미지 파일 찾기""" - image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} - - image_files = [] - for ext in image_extensions: - image_files.extend(Path(directory).glob(f"*{ext}")) - image_files.extend(Path(directory).glob(f"*{ext.upper()}")) - - return sorted(image_files) - - -def create_backup(caption_path): - """기존 캡션 파일 백업""" - if caption_path.exists(): - backup_dir = caption_path.parent / "caption_backup" - backup_dir.mkdir(exist_ok=True) - - backup_path = backup_dir / caption_path.name - import shutil - shutil.copy2(caption_path, backup_path) - - -# ============================== -# 🚀 메인 프로세스 -# ============================== - -def process_directory(directory, blip_model, blip_processor, wd14_tagger, config): - """단일 디렉토리 처리""" - - print(f"\n📁 처리 중: {directory}") - - # 이미지 파일 찾기 - image_files = get_image_files(directory) - - if not image_files: - print(f"⚠️ 이미지 파일을 찾을 수 없습니다: {directory}") - return 0 - - print(f"📸 {len(image_files)}개 이미지 발견") - - success_count = 0 - skip_count = 0 - - for image_path in tqdm(image_files, desc="캡션 생성"): - - # 캡션 파일 경로 - caption_path = image_path.with_suffix('.txt') - - # 기존 파일 존재 확인 - if caption_path.exists() and not config.OVERWRITE_EXISTING: - skip_count += 1 - continue - - # 백업 생성 - if config.CREATE_BACKUP and caption_path.exists(): - create_backup(caption_path) - - try: - # 1. BLIP 캡션 생성 - blip_caption = generate_blip_caption( - image_path, blip_model, blip_processor, config - ) - - # 2. WD14 태그 생성 - wd14_tags = generate_wd14_tags(image_path, wd14_tagger, config) - - # 3. 병합 - merged_caption = merge_captions( - blip_caption, wd14_tags, config.REMOVE_TAGS - ) - # 캐릭터명 prefix 추가 (가중치 강조) - if getattr(config, "CHARACTER_PREFIX", ""): - char_token = config.CHARACTER_PREFIX.strip() - # LoRA 학습용 강조 토큰 처리 - if not char_token.endswith(")"): - char_token = f"{char_token} (1.3)" - merged_caption = f"{char_token}, {merged_caption}" - - # 4. 저장 - if merged_caption: - with open(caption_path, 'w', encoding=config.OUTPUT_ENCODING) as f: - f.write(merged_caption) - success_count += 1 - else: - print(f"⚠️ 빈 캡션: {image_path.name}") - - except Exception as e: - print(f"❌ 처리 실패 ({image_path.name}): {e}") - continue - - print(f"✅ 완료: {success_count}개 생성, {skip_count}개 스킵") - return success_count - - -def main(): - parser = argparse.ArgumentParser(description="BLIP + WD14 하이브리드 캡션 생성") - parser.add_argument( - "--dirs", - nargs="+", - default=Config.DATASET_DIRS, - help="처리할 디렉토리 목록" - ) - parser.add_argument( - "--overwrite", - action="store_true", - help="기존 캡션 덮어쓰기" - ) - parser.add_argument( - "--device", - default=Config.DEVICE, - help="디바이스 (cuda/cpu)" - ) - parser.add_argument( - "--char", - type=str, - default="", - help="모든 캡션 앞에 붙일 캐릭터명 (예: 'anya character')" - ) - - args = parser.parse_args() - config = Config() - config.DATASET_DIRS = args.dirs - config.OVERWRITE_EXISTING = args.overwrite - config.DEVICE = args.device - - print("=" * 60) - print("🎨 BLIP + WD14 하이브리드 캡션 생성기") - print("=" * 60) - print(f"📁 대상 디렉토리: {config.DATASET_DIRS}") - print(f"💾 덮어쓰기: {config.OVERWRITE_EXISTING}") - print(f"🖥️ 디바이스: {config.DEVICE}") - print("=" * 60) - - # 모델 로드 - print("\n🔄 모델 로딩 중...") - - try: - # BLIP 로드 - from transformers import BlipProcessor, BlipForConditionalGeneration - - print(" → BLIP 모델 로딩...") - blip_processor = BlipProcessor.from_pretrained(config.BLIP_MODEL_PATH) - blip_model = BlipForConditionalGeneration.from_pretrained( - config.BLIP_MODEL_PATH - ).to(config.DEVICE) - blip_model.eval() - - # WD14 로드 - print(" → WD14 Tagger 로딩...") - wd14_tagger = WD14Tagger( - model_dir=config.WD14_MODEL_PATH, - device=config.DEVICE - ) - - print("✅ 모델 로딩 완료!\n") - - except Exception as e: - print(f"❌ 모델 로딩 실패: {e}") - sys.exit(1) - - # 각 디렉토리 처리 - total_success = 0 - - for directory in config.DATASET_DIRS: - if not Path(directory).exists(): - print(f"⚠️ 디렉토리 없음: {directory}") - continue - - count = process_directory( - directory, blip_model, blip_processor, wd14_tagger, config - ) - total_success += count - - # 완료 메시지 - print("\n" + "=" * 60) - print(f"🎉 전체 완료!") - print(f"✅ 총 {total_success}개 캡션 생성됨") - print("=" * 60) - - # 결과 예시 출력 - print("\n📝 생성 예시:") - for directory in config.DATASET_DIRS: - txt_files = list(Path(directory).glob("*.txt")) - if txt_files: - example_file = txt_files[0] - with open(example_file, 'r', encoding='utf-8') as f: - content = f.read() - print(f"\n{example_file.name}:") - print(f" {content[:100]}...") - break - - -if __name__ == "__main__": - main() \ No newline at end of file From f05b9bc2b14f23576a10cc2e98e8d748a900d2dc Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Thu, 23 Oct 2025 23:20:11 +0900 Subject: [PATCH 03/58] update --- generate-captions-standalone.py | 365 -------------------------------- 1 file changed, 365 deletions(-) delete mode 100644 generate-captions-standalone.py diff --git a/generate-captions-standalone.py b/generate-captions-standalone.py deleted file mode 100644 index 7c2fcc0..0000000 --- a/generate-captions-standalone.py +++ /dev/null @@ -1,365 +0,0 @@ -""" -독립 실행형 BLIP + WD14 하이브리드 캡션 생성기 -실사 LoRA 학습을 위한 통합 캡션 생성 스크립트 - -설치 필요: -pip install transformers pillow torch torchvision onnxruntime-gpu huggingface_hub -""" - -import os -import sys -from pathlib import Path -from tqdm import tqdm -import argparse -from PIL import Image -import torch - - -# ============================== -# ⚙️ 설정 (수정 가능) -# ============================== - -class Config: - # 데이터셋 경로 - DATASET_DIRS = [ - "./dataset/mainchar", # 메인 캐릭터 - "./dataset/bg", # 배경/보조 - ] - - # 모델 설정 - BLIP_MODEL = "Salesforce/blip-image-captioning-large" - WD14_MODEL = "SmilingWolf/wd-v1-4-moat-tagger-v2" - - # WD14 임계값 - WD14_GENERAL_THRESHOLD = 0.35 - WD14_CHARACTER_THRESHOLD = 0.85 - - # BLIP 설정 - BLIP_MAX_LENGTH = 75 - BLIP_NUM_BEAMS = 1 # 1=greedy, >1=beam search - - # 제거할 WD14 메타 태그 - REMOVE_TAGS = [ - # 메타 태그 - "1girl", "1boy", "solo", "2girls", "3girls", "multiple girls", - "looking at viewer", "facing viewer", "solo focus", - # 배경 - "simple background", "white background", "grey background", - "transparent background", "gradient background", - # 품질/메타데이터 - "highres", "absurdres", "lowres", "bad anatomy", - "signature", "watermark", "artist name", "dated", - "commentary", "username", - # Danbooru 메타 - "rating:safe", "rating:questionable", "rating:explicit", - "safe", "questionable", "explicit", - ] - - # 출력 설정 - OUTPUT_ENCODING = "utf-8" - OVERWRITE_EXISTING = False - CREATE_BACKUP = True - - # 디바이스 - DEVICE = "cuda" if torch.cuda.is_available() else "cpu" - - # 캡션 포맷 - # "blip_first": BLIP 문장이 먼저 - # "tags_first": WD14 태그가 먼저 - CAPTION_FORMAT = "blip_first" - - -# ============================== -# 🔧 유틸리티 함수 -# ============================== - -def normalize_tags(tags_str): - """태그 정규화: 소문자, 공백 정리, 중복 제거""" - if not tags_str: - return [] - tags = [tag.strip().lower() for tag in tags_str.split(',')] - # 중복 제거 (순서 유지) - seen = set() - unique_tags = [] - for tag in tags: - if tag and tag not in seen: - seen.add(tag) - unique_tags.append(tag) - return unique_tags - - -def remove_unwanted_tags(tags_list, remove_list): - """불필요한 태그 제거""" - remove_set = set(tag.lower() for tag in remove_list) - return [tag for tag in tags_list if tag not in remove_set] - - -def merge_captions(blip_caption, wd14_tags, remove_tags, format_type="blip_first"): - """ - BLIP 캡션과 WD14 태그 병합 - """ - # BLIP 정규화 - blip_normalized = blip_caption.strip().lower() if blip_caption else "" - - # WD14 태그 정규화 및 필터링 - wd14_normalized = normalize_tags(wd14_tags) - wd14_filtered = remove_unwanted_tags(wd14_normalized, remove_tags) - - # BLIP 문장의 단어들 (중복 제거용) - blip_words = set(blip_normalized.replace(',', ' ').split()) if blip_normalized else set() - - # WD14에서 BLIP 중복 제거 - wd14_deduped = [] - for tag in wd14_filtered: - # 단순 중복 체크 (선택적으로 비활성화 가능) - tag_words = set(tag.replace('_', ' ').split()) - if not tag_words.intersection(blip_words): - wd14_deduped.append(tag) - - # 최종 병합 - if format_type == "blip_first": - # BLIP 문장 + WD14 태그 - if blip_normalized and wd14_deduped: - merged = f"{blip_normalized}, {', '.join(wd14_deduped)}" - elif blip_normalized: - merged = blip_normalized - elif wd14_deduped: - merged = ', '.join(wd14_deduped) - else: - merged = "" - else: - # WD14 태그 + BLIP 문장 - if wd14_deduped and blip_normalized: - merged = f"{', '.join(wd14_deduped)}, {blip_normalized}" - elif wd14_deduped: - merged = ', '.join(wd14_deduped) - elif blip_normalized: - merged = blip_normalized - else: - merged = "" - - return merged - - -# ============================== -# 🎨 BLIP 캡션 생성 -# ============================== - -class BLIPCaptioner: - def __init__(self, model_name, device, max_length=75, num_beams=1): - from transformers import BlipProcessor, BlipForConditionalGeneration - - print(f" → BLIP 모델 로딩... ({model_name})") - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(device) - self.model.eval() - self.device = device - self.max_length = max_length - self.num_beams = num_beams - - def generate(self, image_path): - try: - image = Image.open(image_path).convert("RGB") - inputs = self.processor(image, return_tensors="pt").to(self.device) - - with torch.no_grad(): - outputs = self.model.generate( - **inputs, - max_length=self.max_length, - num_beams=self.num_beams, - ) - - caption = self.processor.decode(outputs[0], skip_special_tokens=True) - return caption.strip() - - except Exception as e: - print(f"⚠️ BLIP 실패 ({Path(image_path).name}): {e}") - return "" - - -# ============================== -# 🏷️ WD14 태그 생성 -# ============================== - -class WD14Tagger: - def __init__(self, model_name, device, general_thresh=0.35, character_thresh=0.85): - import onnxruntime as ort - from huggingface_hub import hf_hub_download - - print(f" → WD14 모델 로딩... ({model_name})") - - # 모델 다운로드 - model_path = hf_hub_download(model_name, filename="model.onnx") - tags_path = hf_hub_download(model_name, filename="selected_tags.csv") - - # ONNX 세션 - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if device == "cuda" else ['CPUExecutionProvider'] - self.session = ort.InferenceSession(model_path, providers=providers) - - # 태그 로드 - import pandas as pd - self.tags_df = pd.read_csv(tags_path) - self.general_thresh = general_thresh - self.character_thresh = character_thresh - - def generate(self, image_path): - try: - import numpy as np - - # 이미지 전처리 - image = Image.open(image_path).convert("RGB") - image = image.resize((448, 448)) - image_array = np.array(image).astype(np.float32) / 255.0 - image_array = np.expand_dims(image_array, axis=0) - - # 추론 - input_name = self.session.get_inputs()[0].name - output = self.session.run(None, {input_name: image_array})[0] - - # 태그 필터링 - tags = [] - for i, score in enumerate(output[0]): - tag_type = self.tags_df.iloc[i]['category'] - threshold = self.character_thresh if tag_type == 4 else self.general_thresh - - if score >= threshold: - tag_name = self.tags_df.iloc[i]['name'].replace('_', ' ') - tags.append(tag_name) - - return ', '.join(tags) - - except Exception as e: - print(f"⚠️ WD14 실패 ({Path(image_path).name}): {e}") - return "" - - -# ============================== -# 📁 파일 처리 -# ============================== - -def get_image_files(directory): - """이미지 파일 찾기""" - extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} - image_files = [] - - dir_path = Path(directory) - for ext in extensions: - image_files.extend(dir_path.glob(f"*{ext}")) - image_files.extend(dir_path.glob(f"*{ext.upper()}")) - - return sorted(image_files) - - -def create_backup(caption_path): - """백업 생성""" - if caption_path.exists(): - backup_dir = caption_path.parent / "caption_backup" - backup_dir.mkdir(exist_ok=True) - - import shutil - backup_path = backup_dir / caption_path.name - shutil.copy2(caption_path, backup_path) - - -# ============================== -# 🚀 메인 프로세스 -# ============================== - -def process_directory(directory, blip_captioner, wd14_tagger, config): - """디렉토리 처리""" - print(f"\n📁 처리 중: {directory}") - - image_files = get_image_files(directory) - - if not image_files: - print(f"⚠️ 이미지 없음: {directory}") - return 0 - - print(f"📸 {len(image_files)}개 이미지 발견") - - success_count = 0 - skip_count = 0 - - for image_path in tqdm(image_files, desc="캡션 생성"): - caption_path = image_path.with_suffix('.txt') - - # 기존 파일 확인 - if caption_path.exists() and not config.OVERWRITE_EXISTING: - skip_count += 1 - continue - - # 백업 - if config.CREATE_BACKUP and caption_path.exists(): - create_backup(caption_path) - - try: - # BLIP 생성 - blip_caption = blip_captioner.generate(image_path) - - # WD14 생성 - wd14_tags = wd14_tagger.generate(image_path) - - # 병합 - merged = merge_captions( - blip_caption, wd14_tags, - config.REMOVE_TAGS, - config.CAPTION_FORMAT - ) - - # 저장 - if merged: - with open(caption_path, 'w', encoding=config.OUTPUT_ENCODING) as f: - f.write(merged) - success_count += 1 - else: - print(f"⚠️ 빈 캡션: {image_path.name}") - - except Exception as e: - print(f"❌ 실패 ({image_path.name}): {e}") - continue - - print(f"✅ 완료: {success_count}개 생성, {skip_count}개 스킵") - return success_count - - -def main(): - parser = argparse.ArgumentParser(description="BLIP + WD14 하이브리드 캡션") - parser.add_argument("--dirs", nargs="+", help="처리할 디렉토리") - parser.add_argument("--overwrite", action="store_true", help="덮어쓰기") - parser.add_argument("--device", default=None, help="cuda/cpu") - parser.add_argument("--format", choices=["blip_first", "tags_first"], help="캡션 포맷") - - args = parser.parse_args() - - config = Config() - if args.dirs: - config.DATASET_DIRS = args.dirs - if args.overwrite: - config.OVERWRITE_EXISTING = True - if args.device: - config.DEVICE = args.device - if args.format: - config.CAPTION_FORMAT = args.format - - print("=" * 60) - print("🎨 BLIP + WD14 하이브리드 캡션 생성기") - print("=" * 60) - print(f"📁 대상: {config.DATASET_DIRS}") - print(f"💾 덮어쓰기: {config.OVERWRITE_EXISTING}") - print(f"🖥️ 디바이스: {config.DEVICE}") - print(f"📝 포맷: {config.CAPTION_FORMAT}") - print("=" * 60) - - # 모델 로드 - print("\n🔄 모델 로딩 중...") - - try: - blip_captioner = BLIPCaptioner( - config.BLIP_MODEL, - config.DEVICE, - config.BLIP_MAX_LENGTH, - config.BLIP_NUM_BEAMS - ) - - wd14_tagger = WD14Tagger( - config.WD14_MODEL, \ No newline at end of file From b7789cdaf07b7e89ca32f112f3385f302f321473 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 05:52:53 +0900 Subject: [PATCH 04/58] update --- Dockerfile | 3 +++ run-train-extd.cmd | 31 ------------------------------- run-train-simple.cmd | 1 + run-train.cmd | 32 +++++++++++++++++++++++++++++++- 4 files changed, 35 insertions(+), 32 deletions(-) delete mode 100644 run-train-extd.cmd create mode 100644 run-train-simple.cmd diff --git a/Dockerfile b/Dockerfile index 3b5d5ae..873f3a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,6 @@ + +# pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel + # syntax=docker/dockerfile:1 ARG UID=1000 ARG VERSION=EDGE diff --git a/run-train-extd.cmd b/run-train-extd.cmd deleted file mode 100644 index 1977a22..0000000 --- a/run-train-extd.cmd +++ /dev/null @@ -1,31 +0,0 @@ -$env:CUDA_VISIBLE_DEVICES = "1" - -accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ - sdxl_train_network.py ^ - --pretrained_model_name_or_path="./models/stable-diffusion-xl-base-1.0" ^ - --train_data_dir="./train_data" ^ - --output_dir="./output_model" ^ - --logging_dir="./logs" ^ - --output_name="karina" ^ - --network_module=networks.lora ^ - --network_dim=32 ^ - --network_alpha=16 ^ - --learning_rate=1e-4 ^ - --optimizer_type="AdamW8bit" ^ - --lr_scheduler="cosine" ^ - --lr_warmup_steps=100 ^ - --max_train_epochs=15 ^ - --save_every_n_epochs=1 ^ - --mixed_precision="bf16" ^ - --save_precision="bf16" ^ - --cache_latents ^ - --cache_latents_to_disk ^ - --cache_text_encoder_outputs ^ - --gradient_checkpointing ^ - --xformers ^ - --seed=42 ^ - --bucket_no_upscale ^ - --min_bucket_reso=512 ^ - --max_bucket_reso=2048 ^ - --bucket_reso_steps=64 ^ - --resolution="1024,1024" \ No newline at end of file diff --git a/run-train-simple.cmd b/run-train-simple.cmd new file mode 100644 index 0000000..309b1aa --- /dev/null +++ b/run-train-simple.cmd @@ -0,0 +1 @@ +accelerate launch --num_cpu_threads_per_process 8 train_network.py --config_file=config_5080.json \ No newline at end of file diff --git a/run-train.cmd b/run-train.cmd index 309b1aa..9f17493 100644 --- a/run-train.cmd +++ b/run-train.cmd @@ -1 +1,31 @@ -accelerate launch --num_cpu_threads_per_process 8 train_network.py --config_file=config_5080.json \ No newline at end of file +setx CUDA_VISIBLE_DEVICES "1" + +accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ + sdxl_train_network.py ^ + --pretrained_model_name_or_path="./models/stable-diffusion-xl-base-1.0" ^ + --train_data_dir="./dataset/mainchar" ^ + --output_dir="./output_model" ^ + --logging_dir="./logs" ^ + --output_name="karina" ^ + --network_module=networks.lora ^ + --network_dim=32 ^ + --network_alpha=16 ^ + --learning_rate=1e-4 ^ + --optimizer_type="AdamW8bit" ^ + --lr_scheduler="cosine" ^ + --lr_warmup_steps=100 ^ + --max_train_epochs=15 ^ + --save_every_n_epochs=1 ^ + --mixed_precision="bf16" ^ + --save_precision="bf16" ^ + --cache_latents ^ + --cache_latents_to_disk ^ + --cache_text_encoder_outputs ^ + --gradient_checkpointing ^ + --xformers ^ + --seed=42 ^ + --bucket_no_upscale ^ + --min_bucket_reso=512 ^ + --max_bucket_reso=2048 ^ + --bucket_reso_steps=64 ^ + --resolution="1024,1024" \ No newline at end of file From 60a4212d655826462165893408d67addcf0950a0 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 05:53:37 +0900 Subject: [PATCH 05/58] update --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 71325c7..6fd9a76 100644 --- a/.gitignore +++ b/.gitignore @@ -58,4 +58,7 @@ sd-scripts/ sd-scripts/** venv venv* -.python-version \ No newline at end of file +.python-version +output_model +output_model/ +output_model/** \ No newline at end of file From 7ad6f139b52ae2640337606fcdb455d9a79bbc8b Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 22:16:44 +0900 Subject: [PATCH 06/58] update --- dataset/images/.gitkeep | 0 dataset/outputs/.gitkeep | 0 run-train.cmd | 2 +- sd-scripts | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 dataset/images/.gitkeep delete mode 100644 dataset/outputs/.gitkeep diff --git a/dataset/images/.gitkeep b/dataset/images/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/dataset/outputs/.gitkeep b/dataset/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/run-train.cmd b/run-train.cmd index 9f17493..7e59285 100644 --- a/run-train.cmd +++ b/run-train.cmd @@ -3,7 +3,7 @@ setx CUDA_VISIBLE_DEVICES "1" accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ sdxl_train_network.py ^ --pretrained_model_name_or_path="./models/stable-diffusion-xl-base-1.0" ^ - --train_data_dir="./dataset/mainchar" ^ + --train_data_dir="./dataset/train/mainchar" ^ --output_dir="./output_model" ^ --logging_dir="./logs" ^ --output_name="karina" ^ diff --git a/sd-scripts b/sd-scripts index 3e6935a..c77ae04 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 3e6935a07edcb944407840ef74fcaf6fcad352f7 +Subproject commit c77ae0492ee24e2279eb66d3405de22237ac757f From 4c170763479bca0954864053a20518381a219cab Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 22:56:00 +0900 Subject: [PATCH 07/58] update --- .dockerignore | 8 +- Dockerfile | 201 +++--------- README.md | 557 +++++++++++++++------------------- SECURITY.md | 14 - config_files/config-5080.json | 2 +- config_files/config-5090.json | 4 +- run-train-simple.cmd | 1 - run-train.cmd | 31 -- run-venv.cmd | 1 - sd-scripts | 2 +- 10 files changed, 295 insertions(+), 526 deletions(-) delete mode 100644 SECURITY.md delete mode 100644 run-train-simple.cmd delete mode 100644 run-train.cmd delete mode 100644 run-venv.cmd diff --git a/.dockerignore b/.dockerignore index 9849d33..fd39772 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,7 +3,6 @@ cudnn_windows/ bitsandbytes_windows/ bitsandbytes_windows_deprecated/ dataset/ -models/ __pycache__/ venv/ **/.hadolint.yml @@ -14,3 +13,10 @@ venv/ **/.github **/.vscode **/*.ps1 +.idea/ +.vscode/ +docs/ +examples/ +logs/ +outputs/ +#models/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 873f3a1..1bd5fa6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,182 +1,49 @@ +# CUDA 12.1 + PyTorch 2.3.0 +FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel -# pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel - -# syntax=docker/dockerfile:1 -ARG UID=1000 -ARG VERSION=EDGE -ARG RELEASE=0 - -######################################## -# Base stage -######################################## -FROM docker.io/library/python:3.11-slim-bookworm AS base - -# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 -ARG TARGETARCH -ARG TARGETVARIANT - -WORKDIR /tmp - -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility - -# Install CUDA partially -# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/#debian -# Installing the complete CUDA Toolkit system-wide usually adds around 8GB to the image size. -# Since most CUDA packages already installed through pip, there's no need to download the entire toolkit. -# Therefore, we opt to install only the essential libraries. -# Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64 - -ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring_x86_64.deb -RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ - --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ - dpkg -i cuda-keyring_x86_64.deb && \ - rm -f cuda-keyring_x86_64.deb && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - # !If you experience any related issues, replace the following line with `cuda-12-8` to obtain the complete CUDA package. - cuda-nvcc-12-8 - -ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}" -ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 -ENV CUDA_VERSION=12.8 -ENV NVIDIA_REQUIRE_CUDA=cuda>=12.8 -ENV CUDA_HOME=/usr/local/cuda - -######################################## -# Build stage -######################################## -FROM base AS build - -# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 -ARG TARGETARCH -ARG TARGETVARIANT - +# 기본 작업 경로 설정 WORKDIR /app -# Install uv -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ +# 필수 패키지 설치 +RUN apt-get update && apt-get install -y --no-install-recommends \ + git wget curl vim \ + && rm -rf /var/lib/apt/lists/* -ENV UV_PROJECT_ENVIRONMENT=/venv -ENV VIRTUAL_ENV=/venv -ENV UV_LINK_MODE=copy -ENV UV_PYTHON_DOWNLOADS=0 -ENV UV_INDEX=https://download.pytorch.org/whl/cu128 +# Python 패키지 캐싱 방지 +ENV PIP_NO_CACHE_DIR=1 -# Install build dependencies -RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ - --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ - apt-get update && apt-get upgrade -y && \ - apt-get install -y --no-install-recommends python3-launchpadlib git curl +# kohya_ss 전체 복사 (모델 포함) +COPY kohya_ss /app/kohya_ss -# Install big dependencies separately for layer caching -# !Please note that the version restrictions should be the same as pyproject.toml -# No packages listed should be removed in the next `uv sync` command -# If this happens, please update the version restrictions or update the uv.lock file -RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \ - uv venv --system-site-packages /venv && \ - uv pip install --no-deps \ - # torch (1.0GiB) - torch==2.7.0+cu128 \ - # triton (149.3MiB) - triton>=3.1.0 \ - # tensorflow (615.0MiB) - tensorflow>=2.16.1 \ - # onnxruntime-gpu (215.7MiB) - onnxruntime-gpu==1.19.2 +# pip 업그레이드 및 공통 유틸 설치 +RUN pip install --upgrade pip setuptools wheel \ + && pip install --no-cache-dir accelerate bitsandbytes xformers -# Install dependencies -RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \ - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ - --mount=type=bind,source=uv.lock,target=uv.lock \ - --mount=type=bind,source=sd-scripts,target=sd-scripts,rw \ - uv sync --frozen --no-dev --no-install-project --no-editable - -# Replace pillow with pillow-simd (Only for x86) -ARG TARGETPLATFORM -RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ - --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ - if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ - apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \ - uv pip uninstall pillow && \ - CC="cc -mavx2" uv pip install pillow-simd; \ +# 두 requirements.txt 모두 설치 +RUN pip install --no-cache-dir -r /app/kohya_ss/requirements.txt --use-pep517 \ + && if [ -f /app/kohya_ss/sd-scripts/requirements.txt ]; then \ + pip install --no-cache-dir -r /app/kohya_ss/sd-scripts/requirements.txt --use-pep517; \ fi -######################################## -# Final stage -######################################## -FROM base AS final +# 모델 파일 복사 (미리 포함시킬 가중치) +COPY kohya_ss/sd-scripts/models /app/kohya_ss/sd-scripts/models -ARG TARGETARCH -ARG TARGETVARIANT +# (선택) BLIP/WD14 등 관련 종속 추가 +RUN pip install transformers==4.44.2 accelerate==0.33.0 \ + torch torchvision torchaudio -WORKDIR /tmp +# 모델 디렉토리 확인 로그 +RUN echo "✅ Copied models:" && ls -R /app/kohya_ss/models || echo "⚠️ No models found" -# Install runtime dependencies -RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ - --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ - apt-get update && apt-get upgrade -y && \ - apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init +# 엔트리포인트 복사 및 실행 권한 +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh -# Fix missing libnvinfer7 -RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \ - ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7 +# 환경 변수 기본값 +ENV TRAIN_DIR=/workspace/dataset +ENV OUTPUT_DIR=/workspace/output_model -# Create user -ARG UID -RUN groupadd -g $UID $UID && \ - useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID +# 볼륨 마운트 포인트 +VOLUME ["/workspace/dataset", "/workspace/output_model"] -# Create directories with correct permissions -RUN install -d -m 775 -o $UID -g 0 /dataset && \ - install -d -m 775 -o $UID -g 0 /licenses && \ - install -d -m 775 -o $UID -g 0 /app && \ - install -d -m 775 -o $UID -g 0 /venv - -# Copy licenses (OpenShift Policy) -COPY --link --chmod=775 LICENSE.md /licenses/LICENSE.md - -# Copy dependencies and code (and support arbitrary uid for OpenShift best practice) -COPY --link --chown=$UID:0 --chmod=775 --from=build /venv /venv -COPY --link --chown=$UID:0 --chmod=775 . /app - -ENV PATH="/venv/bin${PATH:+:${PATH}}" -ENV PYTHONPATH="/venv/lib/python3.11/site-packages" - -ENV LD_LIBRARY_PATH="/venv/lib/python3.11/site-packages/nvidia/cudnn/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" -ENV LD_PRELOAD=libtcmalloc.so -ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - -# Rich logging -# https://rich.readthedocs.io/en/stable/console.html#interactive-mode -ENV FORCE_COLOR="true" -ENV COLUMNS="100" - -WORKDIR /app - -VOLUME [ "/dataset" ] - -# 7860: Kohya GUI -EXPOSE 7860 - -USER $UID - -STOPSIGNAL SIGINT - -# Use dumb-init as PID 1 to handle signals properly -ENTRYPOINT ["dumb-init", "--"] -CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless", "--noverify"] - -ARG VERSION -ARG RELEASE -LABEL name="bmaltais/kohya_ss" \ - vendor="bmaltais" \ - maintainer="bmaltais" \ - # Dockerfile source repository - url="https://github.com/bmaltais/kohya_ss" \ - version=${VERSION} \ - # This should be a number, incremented with each change - release=${RELEASE} \ - io.k8s.display-name="kohya_ss" \ - summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \ - description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss." +ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md index 252653a..2b0efa0 100644 --- a/README.md +++ b/README.md @@ -1,307 +1,250 @@ -# Kohya's GUI - -[![GitHub stars](https://img.shields.io/github/stars/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/stargazers) -[![GitHub forks](https://img.shields.io/github/forks/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/network/members) -[![License](https://img.shields.io/github/license/bmaltais/kohya_ss)](LICENSE.md) -[![GitHub issues](https://img.shields.io/github/issues/bmaltais/kohya_ss)](https://github.com/bmaltais/kohya_ss/issues) - -This is a GUI and CLI for training diffusion models. - -This project provides a user-friendly Gradio-based Graphical User Interface (GUI) for [Kohya's Stable Diffusion training scripts](https://github.com/kohya-ss/sd-scripts). -Stable Diffusion training empowers users to customize image generation models by fine-tuning existing models, creating unique artistic styles, -and training specialized models like LoRA (Low-Rank Adaptation). - -Key features of this GUI include: -* Easy-to-use interface for setting a wide range of training parameters. -* Automatic generation of the command-line interface (CLI) commands required to run the training scripts. -* Support for various training methods, including LoRA, Dreambooth, fine-tuning, and SDXL training. - -Support for Linux and macOS is also available. While Linux support is actively maintained through community contributions, macOS compatibility may vary. - -## Table of Contents - -- [Installation Options](#installation-options) - - [Local Installation Overview](#local-installation-overview) - - [`uv` vs `pip` – What's the Difference?](#uv-vs-pip--whats-the-difference) - - [Cloud Installation Overview](#cloud-installation-overview) - - [Colab](#-colab) - - [Runpod, Novita, Docker](#runpod-novita-docker) -- [Custom Path Defaults](#custom-path-defaults) - - [LoRA](#lora) - - [Sample image generation during training](#sample-image-generation-during-training) - - [Troubleshooting](#troubleshooting) - - [Page File Limit](#page-file-limit) - - [No module called tkinter](#no-module-called-tkinter) - - [LORA Training on TESLA V100 - GPU Utilization Issue](#lora-training-on-tesla-v100---gpu-utilization-issue) -- [SDXL training](#sdxl-training) -- [Masked loss](#masked-loss) -- [Guides](#guides) - - [Using Accelerate Lora Tab to Select GPU ID](#using-accelerate-lora-tab-to-select-gpu-id) - - [Starting Accelerate in GUI](#starting-accelerate-in-gui) - - [Running Multiple Instances (linux)](#running-multiple-instances-linux) - - [Monitoring Processes](#monitoring-processes) -- [Interesting Forks](#interesting-forks) -- [Contributing](#contributing) -- [License](#license) -- [Change History](#change-history) - - [v25.0.3](#v2503) - - [v25.0.2](#v2502) - - [v25.0.1](#v2501) - - [v25.0.0](#v2500) - - -## Installation Options - -You can run `kohya_ss` either **locally on your machine** or via **cloud-based solutions** like Colab or Runpod. - -- If you have a GPU-equipped PC and want full control: install it locally using `uv` or `pip`. -- If your system doesn’t meet requirements or you prefer a browser-based setup: use Colab or a paid GPU provider like Runpod or Novita. -- If you are a developer or DevOps user, Docker is also supported. - ---- - -### Local Installation Overview - -You can install `kohya_ss` locally using either the `uv` or `pip` method. Choose one depending on your platform and preferences: - -| Platform | Recommended Method | Instructions | -|--------------|----------------|---------------------------------------------| -| Linux | `uv` | [uv_linux.md](./docs/Installation/uv_linux.md) | -| Linux or Mac | `pip` | [pip_linux.md](./docs/Installation/pip_linux.md) | -| Windows | `uv` | [uv_windows.md](./docs/Installation/uv_windows.md) | -| Windows | `pip` | [pip_windows.md](./docs/Installation/pip_windows.md) | - -#### `uv` vs `pip` – What's the Difference? - -- `uv` is faster and isolates dependencies more cleanly, ideal if you want minimal setup hassle. -- `pip` is more traditional, easier to debug if issues arise, and works better with some IDEs or Python tooling. -- If unsure: try `uv`. If it doesn't work for you, fall back to `pip`. - -### Cloud Installation Overview - -#### 🦒 Colab - -For browser-based training without local setup, use this Colab notebook: - - -- No installation required -- Free to use (GPU availability may vary) -- Maintained by **camenduru**, not the original author - -| Colab | Info | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ | -| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/kohya_ss-colab/blob/main/kohya_ss_colab.ipynb) | kohya_ss_gui_colab | - -> 💡 If you encounter issues, please report them on camenduru’s repo. - -**Special thanks** -I would like to express my gratitude to camenduru for their valuable contribution. - -#### Runpod, Novita, Docker - -These options are for users running training on hosted GPU infrastructure or containers. - -- **[Runpod setup](docs/runpod_setup.md)** – Ready-made GPU background training via templates. -- **[Novita setup](docs/novita_setup.md)** – Similar to Runpod, but integrated into the Novita UI. -- **[Docker setup](docs/docker.md)** – For developers/sysadmins using containerized environments. - - -## Custom Path Defaults with `config.toml` - -The GUI supports a configuration file named `config.toml` that allows you to set default paths for many of the input fields. This is useful for avoiding repetitive manual selection of directories every time you start the GUI. - -**Purpose of `config.toml`:** - -* Pre-fill default directory paths for pretrained models, datasets, output folders, LoRA models, etc. -* Streamline your workflow by having the GUI remember your preferred locations. - -**How to Use and Customize:** - -1. **Create your configuration file:** - * In the root directory of the `kohya_ss` repository, you'll find a file named `config example.toml`. - * Copy this file and rename the copy to `config.toml`. This `config.toml` file will be automatically loaded when the GUI starts. -2. **Edit `config.toml`:** - * Open `config.toml` with a text editor. - * The file uses TOML (Tom's Obvious, Minimal Language) format, which consists of `key = "value"` pairs. - * Modify the paths for the keys according to your local directory structure. - * **Important:** - * Use absolute paths (e.g., `C:/Users/YourName/StableDiffusion/Models` or `/home/yourname/sd-models`). - * Alternatively, you can use paths relative to the `kohya_ss` root directory. - * Ensure you use forward slashes (`/`) for paths, even on Windows, as this is generally more compatible with TOML and Python. - * Make sure the specified directories exist on your system. - -**Structure of `config.toml`:** - -The `config.toml` file can have several sections, typically corresponding to different training modes or general settings. Common keys you might want to set include: - -* `model_dir`: Default directory for loading base Stable Diffusion models. -* `lora_model_dir`: Default directory for saving and loading LoRA models. -* `output_dir`: Default base directory for training outputs (images, logs, model checkpoints). -* `dataset_dir`: A general default if you store all your datasets in one place. -* Specific input paths for different training tabs like Dreambooth, Finetune, LoRA, etc. (e.g., `db_model_dir`, `ft_source_model_name_or_path`). - -**Example Configurations:** - -Here's an example snippet of what your `config.toml` might look like: - -```toml -# General settings -model_dir = "C:/ai_stuff/stable-diffusion-webui/models/Stable-diffusion" -lora_model_dir = "C:/ai_stuff/stable-diffusion-webui/models/Lora" -vae_dir = "C:/ai_stuff/stable-diffusion-webui/models/VAE" -output_dir = "C:/ai_stuff/kohya_ss_outputs" -logging_dir = "C:/ai_stuff/kohya_ss_outputs/logs" - -# Dreambooth specific paths -db_model_dir = "C:/ai_stuff/stable-diffusion-webui/models/Stable-diffusion" -db_reg_image_dir = "C:/ai_stuff/datasets/dreambooth_regularization_images" -# Add other db_... paths as needed - -# Finetune specific paths -ft_model_dir = "C:/ai_stuff/stable-diffusion-webui/models/Stable-diffusion" -# Add other ft_... paths as needed - -# LoRA / LoCon specific paths -lc_model_dir = "C:/ai_stuff/stable-diffusion-webui/models/Stable-diffusion" # Base model for LoRA training -lc_output_dir = "C:/ai_stuff/kohya_ss_outputs/lora" -lc_dataset_dir = "C:/ai_stuff/datasets/my_lora_project" -# Add other lc_... paths as needed - -# You can find a comprehensive list of all available keys in the `config example.toml` file. -# Refer to it to customize paths for all supported options in the GUI. -``` - -**Using a Custom Config File Path:** - -If you prefer to name your configuration file differently or store it in another location, you can specify its path using the `--config` command-line argument when launching the GUI: - -* On Windows: `gui.bat --config D:/my_configs/kohya_settings.toml` -* On Linux/macOS: `./gui.sh --config /home/user/my_configs/kohya_settings.toml` - -By effectively using `config.toml`, you can significantly speed up your training setup process. Always refer to the `config example.toml` for the most up-to-date list of configurable paths. - -## LoRA - -To train a LoRA, you can currently use the `train_network.py` code. You can create a LoRA network by using the all-in-one GUI. - -Once you have created the LoRA network, you can generate images using auto1111 by installing [this extension](https://github.com/kohya-ss/sd-webui-additional-networks). - -For more detailed information on LoRA training options and advanced configurations, please refer to our LoRA documentation: -- [LoRA Training Guide](docs/LoRA/top_level.md) -- [LoRA Training Options](docs/LoRA/options.md) - -## Sample image generation during training - -A prompt file might look like this, for example: - -```txt -# prompt 1 -masterpiece, best quality, (1girl), in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy, bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 - -# prompt 2 -masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n (low quality, worst quality), bad anatomy, bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 -``` - -Lines beginning with `#` are comments. You can specify options for the generated image with options like `--n` after the prompt. The following options can be used: - -- `--n`: Negative prompt up to the next option. -- `--w`: Specifies the width of the generated image. -- `--h`: Specifies the height of the generated image. -- `--d`: Specifies the seed of the generated image. -- `--l`: Specifies the CFG scale of the generated image. -- `--s`: Specifies the number of steps in the generation. - -The prompt weighting such as `( )` and `[ ]` is working. - -## Troubleshooting - -If you encounter any issues, refer to the troubleshooting steps below. - -### Page File Limit - -If you encounter an X error related to the page file, you may need to increase the page file size limit in Windows. - -### No module called tkinter - -If you encounter an error indicating that the module `tkinter` is not found, try reinstalling Python 3.10 on your system. - -### LORA Training on TESLA V100 - GPU Utilization Issue - -See [Troubleshooting LORA Training on TESLA V100](docs/troubleshooting_tesla_v100.md) for details. - -## SDXL training - -For detailed guidance on SDXL training, please refer to the [official sd-scripts documentation](https://github.com/kohya-ss/sd-scripts/blob/main/README.md#sdxl-training) and relevant sections in our [LoRA Training Guide](docs/LoRA/top_level.md). - -## Masked loss - -The masked loss is supported in each training script. To enable the masked loss, specify the `--masked_loss` option. - -> [!WARNING] -> The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue. - -ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. The pixel values 0-255 are converted to 0-1 (i.e., the pixel value 128 is treated as the half weight of the loss). See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset). - -## Guides - -The following are guides extracted from issues discussions - -### Using Accelerate Lora Tab to Select GPU ID - -#### Starting Accelerate in GUI - -- Open the kohya GUI on your desired port. -- Open the `Accelerate launch` tab -- Ensure the Multi-GPU checkbox is unchecked. -- Set GPU IDs to the desired GPU (like 1). - -#### Running Multiple Instances (linux) - -- For tracking multiple processes, use separate kohya GUI instances on different ports (e.g., 7860, 7861). -- Start instances using `nohup ./gui.sh --listen 0.0.0.0 --server_port --headless > log.log 2>&1 &`. - -#### Monitoring Processes - -- Open each GUI in a separate browser tab. -- For terminal access, use SSH and tools like `tmux` or `screen`. - -For more details, visit the [GitHub issue](https://github.com/bmaltais/kohya_ss/issues/2577). - -## Interesting Forks - -To finetune HunyuanDiT models or create LoRAs, visit this [fork](https://github.com/Tencent/HunyuanDiT/tree/main/kohya_ss-hydit) - -## Contributing - -Contributions are welcome! If you'd like to contribute to this project, please consider the following: -- For bug reports or feature requests, please open an issue on the [GitHub Issues page](https://github.com/bmaltais/kohya_ss/issues). -- If you'd like to submit code changes, please open a pull request. Ensure your changes are well-tested and follow the existing code style. -- For security-related concerns, please refer to our `SECURITY.md` file. - -## License - -This project is licensed under the Apache License 2.0. See the [LICENSE.md](LICENSE.md) file for details. - -## Change History - -### v25.0.3 - -- Upgrade Gradio, diffusers and huggingface-hub to latest release to fix issue with ASGI. -- Add a new method to setup and run the GUI. You will find two new script for both Windows (gui-uv.bat) and Linux (gui-uv.sh). With those scripts there is no need to run setup.bat or setup.sh anymore. - -### v25.0.2 - -- Force gradio to 5.14.0 or greater so it is updated. - -### v25.0.1 - -- Fix issue with requirements version causing huggingface download issues - -### v25.0.0 - -- Major update: Introduced support for flux.1 and sd3, moving the GUI to align with more recent script functionalities. -- Users preferring the pre-flux.1/sd3 version can check out tag `v24.1.7`. - ```shell - git checkout v24.1.7 - ``` -- For details on new flux.1 and sd3 parameters, refer to the [sd-scripts README](https://github.com/kohya-ss/sd-scripts/blob/sd3/README.md). +NVIDIA Studio Driver(SDR) : Windows 10/11 → 531.79 / 536.67 등 : +아래서 기종 선택하고 Studio Driver 선택하고 검색 버튼 +https://www.nvidia.com/ko-kr/geforce/drivers/ +제일 낮은 버전이 아마 괜찮을 듯 함. + +CUDA 12.3 : https://developer.nvidia.com/cuda-12-3-0-download-archive +cuDNN v9.5.0 : https://developer.nvidia.com/cudnn-9-5-0-download-archive + +cuDNN (예: C:\Program Files\NVIDIA\CUDNN\v9.5 )폴더 안에는 bin, include, lib 폴더가 있습니다. +각 폴더의 내용을 CUDA Toolkit이 설치된 경로 내의 해당 폴더에 복사합니다. +예시: cuDNN/bin을 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin으로 복사합니다. + + +4. SDXL 모델 다운로드 +SDXL 기본 해상도는 1024x1024 X이며, 다음 모델이 필요합니다: +필수 모델: +SDXL Base 모델 (.safetensors 또는 .ckpt) +Hugging Face 또는 +CivitAI에서 다운로드 + +권장: VAE 모델 (선택사항이지만 권장) +https://huggingface.co/madebyollin/sdxl-vae-fp16-fix +SDXL fp16 VAE GitHub: madebyollin/sdxl-vae-fp16-fix +모델을 원하는 폴더에 저장하세요 (예: C:/models/sdxl/) + + + + +이 저장소에는 Stable Diffusion용 훈련, 생성 및 유틸리티 스크립트가 포함되어 있습니다. + +변경 내역은 페이지 하단으로 이동했습니다. + +최신 업데이트: 2025-03-21 (버전 0.9.1) + +일본어판 README는 여기 + +개발 버전은 dev 브랜치에 있습니다. 최신 변경 사항은 dev 브랜치를 확인해 주세요. + +FLUX.1 및 SD3/SD3.5 지원은 sd3 브랜치에서 이루어집니다. 해당 모델을 훈련하려면 sd3 브랜치를 사용해 주세요. + +더 쉬운 사용법(GUI 및 PowerShell 스크립트 등)을 원하시면 bmaltais가 관리하는 저장소를 방문해 주세요. @bmaltais 님께 감사드립니다! + +이 저장소에는 다음 스크립트가 포함되어 있습니다: + +- DreamBooth 훈련 (U-Net 및 텍스트 인코더 포함) +- 미세 조정 (네이티브 훈련) (U-Net 및 텍스트 인코더 포함) +- LoRA 훈련 +- 텍스트 역전 훈련 +- 이미지 생성 +- 모델 변환 (1.x 및 2.x, Stable Diffusion ckpt/safetensors 및 Diffusers 지원) + +### 후원사 + +아래 기업들의 아낌없는 후원에 깊이 감사드립니다: + + + AiHUB Inc. + + +### 프로젝트 후원 + +이 프로젝트가 도움이 되셨다면 [GitHub 후원](https://github.com/sponsors/kohya-ss/)을 통해 개발을 후원해 주시기 바랍니다. 여러분의 후원에 깊이 감사드립니다! + + +## requirements.txt 파일 안내 + +이 파일에는 PyTorch 요구 사항이 포함되어 있지 않습니다. PyTorch 버전은 환경에 따라 달라지므로 별도로 관리됩니다. 먼저 환경에 맞는 PyTorch를 설치해 주세요. 설치 방법은 아래를 참고하세요. + +스크립트는 PyTorch 2.1.2로 테스트되었습니다. PyTorch 2.2 이상도 작동합니다. 적절한 버전의 PyTorch와 xformers를 설치해 주세요. + +## 사용법 문서 링크 + +대부분의 문서는 일본어로 작성되었습니다. + +[darkstorm2150님의 영어 번역본은 여기](https://github.com/darkstorm2150/sd-scripts#links-to-usage-documentation)에서 확인하실 수 있습니다. darkstorm2150님께 감사드립니다! + +* [훈련 가이드 - 공통](sd-scripts/docs/train_README-ja.md) : 데이터 준비, 옵션 등... + * [중국어 버전](sd-scripts/docs/train_README-zh.md) +* [SDXL 훈련](sd-scripts/docs/train_SDXL-en.md) (영어 버전) +* [데이터셋 구성](sd-scripts/docs/config_README-ja.md) + * [영어 버전](sd-scripts/docs/config_README-en.md) +* [DreamBooth 훈련 가이드](sd-scripts/docs/train_db_README-ja.md) +* [단계별 미세 조정 가이드](sd-scripts/docs/fine_tune_README_ja.md): +* [LoRA 훈련](sd-scripts/docs/train_network_README-ja.md) +* [텍스트 역전 훈련](sd-scripts/docs/train_ti_README-ja.md) +* [이미지 생성](sd-scripts/docs/gen_img_README-ja.md) +* note.com [모델 변환](https://note.com/kohya_ss/n/n374f316fe4ad) + +## Windows Required Dependencies + +## Windows 필수 종속성 + +Python 3.10.6 및 Git: + +- Python 3.10.6: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe +- git: https://git-scm.com/download/win + +Python 3.10.x, 3.11.x, 3.12.x도 작동하지만 테스트되지 않았습니다. + +venv가 작동하도록 PowerShell에 제한 없는 스크립트 실행 권한 부여: + +- 관리자 권한 PowerShell 창 열기 +- `Set-ExecutionPolicy Unrestricted` 입력 후 A 선택 +- 관리자 권한 PowerShell 창 닫기 + +## Windows 설치 + +일반 PowerShell 터미널을 열고 다음 명령어를 입력하세요: + +```powershell +git clone https://github.com/kohya-ss/sd-scripts.git +cd sd-scripts + +python -m venv venv +.\venv\Scripts\activate + +pip install torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu118 +pip install --upgrade -r requirements.txt +pip install xformers==0.0.23.post1 --index-url https://download.pytorch.org/whl/cu118 + +accelerate config +``` + +`python -m venv` 명령어 실행 시 `python`만 표시된다면, `python`을 `py`로 변경하십시오. + +참고: 현재 `bitsandbytes==0.44.0`, `prodigyopt==1.0` 및 `lion-pytorch==0.0.6`이 requirements.txt에 포함되어 있습니다. 다른 버전을 사용하려면 수동으로 설치하십시오. + +이 설치는 CUDA 11.8용입니다. 다른 버전의 CUDA를 사용하는 경우, 해당 버전의 PyTorch와 xformers를 설치하십시오. 예를 들어, CUDA 12를 사용하는 경우 `pip install torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu121` 및 `pip install xformers==0.0.23.post1 --index-url https://download.pytorch.org/whl/cu121`를 실행하십시오. + +PyTorch 2.2 이상을 사용하는 경우 `torch==2.1.2`, `torchvision==0.16.2`, `xformers==0.0.23.post1`을 적절한 버전으로 변경하십시오. + + +accelerate config에 대한 답변: + +```txt +- This machine +- No distributed training +- NO +- NO +- NO +- all +- fp16 +``` + +bf16을 사용하려면 마지막 질문에 `bf16`이라고 답변해 주세요. + +참고: 일부 사용자가 훈련 중 ``ValueError: fp16 혼합 정밀도는 GPU가 필요합니다`` 오류가 발생한다고 보고했습니다. 이 경우, 여섯 번째 질문에 `0`을 입력하세요: +``이 머신에서 훈련에 사용할 GPU(ID 기준)를 쉼표로 구분된 목록으로 입력하세요? [all]:`` + +(ID `0`의 단일 GPU가 사용됩니다.) + +## 업그레이드 + +새 버전이 출시되면 다음 명령어로 저장소를 업그레이드할 수 있습니다: + +```powershell +cd sd-scripts +git pull +.\venv\Scripts\activate +pip install --use-pep517 --upgrade -r requirements.txt +``` + +명령어가 성공적으로 완료되면 새 버전을 사용할 준비가 된 것입니다. + +### PyTorch 업그레이드 + +PyTorch를 업그레이드하려면 [Windows 설치](#windows-installation) 섹션의 `pip install` 명령어로 업그레이드할 수 있습니다. PyTorch를 업그레이드할 때 `xformers`도 함께 업그레이드해야 합니다. + +## 크레딧 + +LoRA 구현은 [cloneofsimo의 저장소](https://github.com/cloneofsimo/lora)를 기반으로 합니다. 훌륭한 작업에 감사드립니다! + +Conv2d 3x3에 대한 LoRA 확장은 cloneofsimo에 의해 처음 공개되었으며, 그 효과는 KohakuBlueleaf에 의해 [LoCon](https://github.com/KohakuBlueleaf/LoCon)에서 입증되었습니다. KohakuBlueleaf님께 진심으로 감사드립니다! + +## 라이선스 + +대부분의 스크립트는 ASL 2.0 라이선스 하에 배포됩니다(Diffusers, cloneofsimo 및 LoCon의 코드 포함). 다만 프로젝트의 일부 구성 요소는 별도의 라이선스 조건이 적용됩니다: + +[Memory Efficient Attention Pytorch](https://github.com/lucidrains/memory-efficient-attention-pytorch): MIT + +[bitsandbytes](https://github.com/TimDettmers/bitsandbytes): MIT + +[BLIP](https://github.com/salesforce/BLIP): BSD-3-Clause + + +## Change History + +### Mar 21, 2025 / 2025-03-21 Version 0.9.1 + +- Fixed a bug where some of LoRA modules for CLIP Text Encoder were not trained. Thank you Nekotekina for PR [#1964](https://github.com/kohya-ss/sd-scripts/pull/1964) + - The LoRA modules for CLIP Text Encoder are now 264 modules, which is the same as before. Only 88 modules were trained in the previous version. + +### Jan 17, 2025 / 2025-01-17 Version 0.9.0 + +- __important__ The dependent libraries are updated. Please see [Upgrade](#upgrade) and update the libraries. + - bitsandbytes, transformers, accelerate and huggingface_hub are updated. + - If you encounter any issues, please report them. + +- The dev branch is merged into main. The documentation is delayed, and I apologize for that. I will gradually improve it. +- The state just before the merge is released as Version 0.8.8, so please use it if you encounter any issues. +- The following changes are included. + +#### 변경 사항 + +## 추가 정보 + +### LoRA 명명 규칙 + +`train_network.py`에서 지원하는 LoRA의 명칭을 혼동을 피하기 위해 변경하였습니다. 관련 문서도 업데이트되었습니다. 본 저장소에서 사용하는 LoRA 유형의 명칭은 다음과 같습니다. + +1. __LoRA-LierLa__ : (LoRA for __Li__ n __e__ a __r__ __La__ yers) + + LoRA for Linear layers and Conv2d layers with 1x1 kernel + +2. __LoRA-C3Lier__ : (LoRA for __C__ olutional layers with __3__ x3 Kernel and __Li__ n __e__ a __r__ layers) + + In addition to 1., LoRA for Conv2d layers with 3x3 kernel + +LoRA-LierLa는 `train_network.py`의 기본 LoRA 유형입니다(네트워크 인자 `conv_dim` 제외). + + +### 훈련 중 샘플 이미지 생성 + 예를 들어 프롬프트 파일은 다음과 같을 수 있습니다 + +``` +# prompt 1 +masterpiece, best quality, (1girl), in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n (low quality, worst quality), bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + + `#`로 시작하는 줄은 주석입니다. 프롬프트 뒤에 `--n`과 같은 옵션을 사용하여 생성된 이미지의 옵션을 지정할 수 있습니다. 다음을 사용할 수 있습니다. + + * `--n` 다음 옵션까지 프롬프트를 음수로 지정합니다. + * `--w` 생성된 이미지의 너비를 지정합니다. + * `--h` 생성된 이미지의 높이를 지정합니다. + * `--d` 생성된 이미지의 시드(seed)를 지정합니다. + * `--l` 생성된 이미지의 CFG 스케일을 지정합니다. + * `--s` 생성 과정의 단계 수를 지정합니다. + + `( )` 및 `[ ]`와 같은 프롬프트 가중치 기능이 작동합니다. diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 667417b..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,14 +0,0 @@ -# Security Policy - -## Supported Versions - -Versions that are currently being supported with security updates. - -| Version | Supported | -| ------- | ------------------ | -| 23.2.x | :white_check_mark: | -| < 23.1.x | :x: | - -## Reporting a Vulnerability - -Please open an issue if you discover a security issue. diff --git a/config_files/config-5080.json b/config_files/config-5080.json index 54b3b2d..4d53a47 100644 --- a/config_files/config-5080.json +++ b/config_files/config-5080.json @@ -3,7 +3,7 @@ "shuffle_caption": true, "caption_extension": ".txt", "keep_tokens": 1, - "seed": 1234 + "seed": 47 }, "model": { "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", diff --git a/config_files/config-5090.json b/config_files/config-5090.json index a16ddbd..bd3a23f 100644 --- a/config_files/config-5090.json +++ b/config_files/config-5090.json @@ -3,10 +3,10 @@ "shuffle_caption": true, "caption_extension": ".txt", "keep_tokens": 1, - "seed": 42 + "seed": 47 }, "model": { - "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5", + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", "vae": "stabilityai/sd-vae-ft-mse" }, "training": { diff --git a/run-train-simple.cmd b/run-train-simple.cmd deleted file mode 100644 index 309b1aa..0000000 --- a/run-train-simple.cmd +++ /dev/null @@ -1 +0,0 @@ -accelerate launch --num_cpu_threads_per_process 8 train_network.py --config_file=config_5080.json \ No newline at end of file diff --git a/run-train.cmd b/run-train.cmd deleted file mode 100644 index 7e59285..0000000 --- a/run-train.cmd +++ /dev/null @@ -1,31 +0,0 @@ -setx CUDA_VISIBLE_DEVICES "1" - -accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ - sdxl_train_network.py ^ - --pretrained_model_name_or_path="./models/stable-diffusion-xl-base-1.0" ^ - --train_data_dir="./dataset/train/mainchar" ^ - --output_dir="./output_model" ^ - --logging_dir="./logs" ^ - --output_name="karina" ^ - --network_module=networks.lora ^ - --network_dim=32 ^ - --network_alpha=16 ^ - --learning_rate=1e-4 ^ - --optimizer_type="AdamW8bit" ^ - --lr_scheduler="cosine" ^ - --lr_warmup_steps=100 ^ - --max_train_epochs=15 ^ - --save_every_n_epochs=1 ^ - --mixed_precision="bf16" ^ - --save_precision="bf16" ^ - --cache_latents ^ - --cache_latents_to_disk ^ - --cache_text_encoder_outputs ^ - --gradient_checkpointing ^ - --xformers ^ - --seed=42 ^ - --bucket_no_upscale ^ - --min_bucket_reso=512 ^ - --max_bucket_reso=2048 ^ - --bucket_reso_steps=64 ^ - --resolution="1024,1024" \ No newline at end of file diff --git a/run-venv.cmd b/run-venv.cmd deleted file mode 100644 index 1b69d01..0000000 --- a/run-venv.cmd +++ /dev/null @@ -1 +0,0 @@ -venv/Scripts/activate \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index c77ae04..33601e6 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit c77ae0492ee24e2279eb66d3405de22237ac757f +Subproject commit 33601e6e2facfa32139df998989ff036f1eebeb2 From aabc199d2bf346a1d031e71843143f5536d320c4 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 23:05:40 +0900 Subject: [PATCH 08/58] update --- Dockerfile | 2 -- docker-build.cmd | 3 +++ docker-compose.yml | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 docker-build.cmd create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile index 1bd5fa6..cfe4cab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,5 +45,3 @@ ENV OUTPUT_DIR=/workspace/output_model # 볼륨 마운트 포인트 VOLUME ["/workspace/dataset", "/workspace/output_model"] - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker-build.cmd b/docker-build.cmd new file mode 100644 index 0000000..f90d83c --- /dev/null +++ b/docker-build.cmd @@ -0,0 +1,3 @@ +docker build --no-cache -t aicompanion/sxdl_train_captioner:0.0.5 . + +docker tag aicompanion/sxdl_train_captioner:0.0.5 aicompanion/sxdl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..82d2228 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,48 @@ +services: + sxdl_train_captioner: + build: + context: . + dockerfile: Dockerfile + image: aicompanion/sxdl_train_captioner:latest + container_name: sxdl_train_captioner + + # GPU 설정 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + ports: + - "7860:7860" + + # 볼륨 마운트 (호스트 ↔ 컨테이너) + volumes: + - ./models:/workspace/sxdl_train_captioner/dataset + - ./outputs:/workspace/sxdl_train_captioner/output_model + + environment: + # GPU 선택 (필요 시 GPU ID 지정) + # - CUDA_VISIBLE_DEVICES=1 + - HF_HOME=/workspace/sxdl_train_captioner/models + - HF_HUB_CACHE=/workspace/sxdl_train_captioner/models + - PYTHONUNBUFFERED=1 + - TZ=Asia/Seoul + - OMP_NUM_THREADS=8 + - MKL_NUM_THREADS=8 + - TMPDIR=/dev/shm + + stdin_open: true + tty: true + restart: unless-stopped + shm_size: "16gb" + + working_dir: /workspace/sxdl_train_captioner + + # GPU 자동 탐색, 기본 0번 GPU +# command: ["python", "run-gradio.py"] + + # 여러개 GPU가 있는 경우 특정 GPU 번호를 지정하거나 gradio live를 사용하려는 경우 + # command: ["python", "run-gradio.py", " --device 3", "--share"] \ No newline at end of file From 0907af82a5a8be0e1a57cda8b01149093cf17d3d Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 24 Oct 2025 23:15:45 +0900 Subject: [PATCH 09/58] update --- Dockerfile | 22 ++++++++++++---------- docker-compose.yml | 14 +++++++------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index cfe4cab..d6a063c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,20 +13,20 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ENV PIP_NO_CACHE_DIR=1 # kohya_ss 전체 복사 (모델 포함) -COPY kohya_ss /app/kohya_ss +COPY . /app/sxdl_train_captioner # pip 업그레이드 및 공통 유틸 설치 RUN pip install --upgrade pip setuptools wheel \ && pip install --no-cache-dir accelerate bitsandbytes xformers # 두 requirements.txt 모두 설치 -RUN pip install --no-cache-dir -r /app/kohya_ss/requirements.txt --use-pep517 \ - && if [ -f /app/kohya_ss/sd-scripts/requirements.txt ]; then \ - pip install --no-cache-dir -r /app/kohya_ss/sd-scripts/requirements.txt --use-pep517; \ +RUN pip install --no-cache-dir -r /app/sxdl_train_captioner/requirements.txt --use-pep517 \ + && if [ -f /app/sxdl_train_captioner/sd-scripts/requirements.txt ]; then \ + pip install --no-cache-dir -r /app/sxdl_train_captioner/sd-scripts/requirements.txt --use-pep517; \ fi # 모델 파일 복사 (미리 포함시킬 가중치) -COPY kohya_ss/sd-scripts/models /app/kohya_ss/sd-scripts/models +#COPY ./models /app/sxdl_train_captioner/models # (선택) BLIP/WD14 등 관련 종속 추가 RUN pip install transformers==4.44.2 accelerate==0.33.0 \ @@ -35,13 +35,15 @@ RUN pip install transformers==4.44.2 accelerate==0.33.0 \ # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/kohya_ss/models || echo "⚠️ No models found" +WORKDIR /app/sxdl_train_captioner/sd-scripts + # 엔트리포인트 복사 및 실행 권한 -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +#COPY entrypoint.sh /entrypoint.sh +#RUN chmod +x /entrypoint.sh # 환경 변수 기본값 -ENV TRAIN_DIR=/workspace/dataset -ENV OUTPUT_DIR=/workspace/output_model +ENV TRAIN_DIR=/app/sxdl_train_captioner/dataset +ENV OUTPUT_DIR=/app/sxdl_train_captioner/output_model # 볼륨 마운트 포인트 -VOLUME ["/workspace/dataset", "/workspace/output_model"] +VOLUME ["/app/sxdl_train_captioner/dataset", "/app/sxdl_train_captioner/output_model"] diff --git a/docker-compose.yml b/docker-compose.yml index 82d2228..aebdfb4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,14 +20,14 @@ services: # 볼륨 마운트 (호스트 ↔ 컨테이너) volumes: - - ./models:/workspace/sxdl_train_captioner/dataset - - ./outputs:/workspace/sxdl_train_captioner/output_model + - ./models:/app/sxdl_train_captioner/dataset + - ./outputs:/app/sxdl_train_captioner/output_model environment: # GPU 선택 (필요 시 GPU ID 지정) - # - CUDA_VISIBLE_DEVICES=1 - - HF_HOME=/workspace/sxdl_train_captioner/models - - HF_HUB_CACHE=/workspace/sxdl_train_captioner/models + - CUDA_VISIBLE_DEVICES=3 + - HF_HOME=/app/sxdl_train_captioner/models + - HF_HUB_CACHE=/app/sxdl_train_captioner/models - PYTHONUNBUFFERED=1 - TZ=Asia/Seoul - OMP_NUM_THREADS=8 @@ -39,10 +39,10 @@ services: restart: unless-stopped shm_size: "16gb" - working_dir: /workspace/sxdl_train_captioner + working_dir: /app/sxdl_train_captioner # GPU 자동 탐색, 기본 0번 GPU -# command: ["python", "run-gradio.py"] + command: ["python", "cap-watcher.py"] # 여러개 GPU가 있는 경우 특정 GPU 번호를 지정하거나 gradio live를 사용하려는 경우 # command: ["python", "run-gradio.py", " --device 3", "--share"] \ No newline at end of file From 102ff9b9dbfaedeb319a8763364d776974769bab Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 02:18:01 +0900 Subject: [PATCH 10/58] update --- Dockerfile | 6 ++--- docker-compose.yaml | 59 --------------------------------------------- 2 files changed, 2 insertions(+), 63 deletions(-) delete mode 100644 docker-compose.yaml diff --git a/Dockerfile b/Dockerfile index d6a063c..762fe2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,10 +21,8 @@ RUN pip install --upgrade pip setuptools wheel \ # 두 requirements.txt 모두 설치 RUN pip install --no-cache-dir -r /app/sxdl_train_captioner/requirements.txt --use-pep517 \ - && if [ -f /app/sxdl_train_captioner/sd-scripts/requirements.txt ]; then \ - pip install --no-cache-dir -r /app/sxdl_train_captioner/sd-scripts/requirements.txt --use-pep517; \ - fi - + && pip install --no-cache-dir -r /app/sxdl_train_captioner/sd-scripts/requirements.txt --use-pep517 + # 모델 파일 복사 (미리 포함시킬 가중치) #COPY ./models /app/sxdl_train_captioner/models diff --git a/docker-compose.yaml b/docker-compose.yaml deleted file mode 100644 index cadffb0..0000000 --- a/docker-compose.yaml +++ /dev/null @@ -1,59 +0,0 @@ -services: - kohya-ss-gui: - container_name: kohya-ss-gui - image: ghcr.io/bmaltais/kohya-ss-gui:latest - user: 1000:0 - build: - context: . - args: - - UID=1000 - cache_from: - - ghcr.io/bmaltais/kohya-ss-gui:cache - cache_to: - - type=inline - ports: - - 7860:7860 - environment: - SAFETENSORS_FAST_GPU: 1 - TENSORBOARD_PORT: ${TENSORBOARD_PORT:-6006} - tmpfs: - - /tmp - volumes: - - /tmp/.X11-unix:/tmp/.X11-unix - - ./models:/app/models - - ./dataset:/dataset - - ./dataset/images:/app/data - - ./dataset/logs:/app/logs - - ./dataset/outputs:/app/outputs - - ./dataset/regularization:/app/regularization - - ./models:/app/models - - ./.cache/config:/app/config - - ./.cache/user:/home/1000/.cache - - ./.cache/triton:/home/1000/.triton - - ./.cache/nv:/home/1000/.nv - - ./.cache/keras:/home/1000/.keras - - ./.cache/config:/home/1000/.config # For backward compatibility - deploy: - resources: - reservations: - devices: - - driver: nvidia - capabilities: [gpu] - device_ids: ["all"] - - tensorboard: - container_name: tensorboard - image: tensorflow/tensorflow:latest-gpu - ports: - # !Please change the port in .env file - - ${TENSORBOARD_PORT:-6006}:6006 - volumes: - - ./dataset/logs:/app/logs - command: tensorboard --logdir=/app/logs --bind_all - deploy: - resources: - reservations: - devices: - - driver: nvidia - capabilities: [gpu] - device_ids: ["all"] From caf6303efb641b03cf7156ad68ef8e28b60108d0 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 04:38:17 +0900 Subject: [PATCH 11/58] update --- Dockerfile | 10 +++++----- sd-scripts | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 762fe2d..13b6606 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,15 +20,15 @@ RUN pip install --upgrade pip setuptools wheel \ && pip install --no-cache-dir accelerate bitsandbytes xformers # 두 requirements.txt 모두 설치 -RUN pip install --no-cache-dir -r /app/sxdl_train_captioner/requirements.txt --use-pep517 \ - && pip install --no-cache-dir -r /app/sxdl_train_captioner/sd-scripts/requirements.txt --use-pep517 - +WORKDIR /app/sxdl_train_captioner +RUN pip install --no-cache-dir -r requirements.txt + # 모델 파일 복사 (미리 포함시킬 가중치) #COPY ./models /app/sxdl_train_captioner/models # (선택) BLIP/WD14 등 관련 종속 추가 -RUN pip install transformers==4.44.2 accelerate==0.33.0 \ - torch torchvision torchaudio +RUN pip install transformers==4.44.2 accelerate==0.33.0 +# torch torchvision torchaudio # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/kohya_ss/models || echo "⚠️ No models found" diff --git a/sd-scripts b/sd-scripts index 33601e6..34d18e3 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 33601e6e2facfa32139df998989ff036f1eebeb2 +Subproject commit 34d18e3d26f31adf569a547b327ffe1e763f8850 From 8af0abafc9336bfd809f4b275a9f01d19b5ab0ed Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 04:58:51 +0900 Subject: [PATCH 12/58] Merged two requirements.txt s --- Dockerfile | 8 +--- requirements-org-bak.txt | 37 +++++++++++++++++++ requirements.txt | 80 +++++++++++++++++++++++----------------- 3 files changed, 86 insertions(+), 39 deletions(-) create mode 100644 requirements-org-bak.txt diff --git a/Dockerfile b/Dockerfile index 13b6606..b6a6c2b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,8 +16,8 @@ ENV PIP_NO_CACHE_DIR=1 COPY . /app/sxdl_train_captioner # pip 업그레이드 및 공통 유틸 설치 -RUN pip install --upgrade pip setuptools wheel \ - && pip install --no-cache-dir accelerate bitsandbytes xformers +RUN pip install --upgrade pip setuptools wheel +# && pip install --no-cache-dir accelerate bitsandbytes xformers # 두 requirements.txt 모두 설치 WORKDIR /app/sxdl_train_captioner @@ -26,10 +26,6 @@ RUN pip install --no-cache-dir -r requirements.txt # 모델 파일 복사 (미리 포함시킬 가중치) #COPY ./models /app/sxdl_train_captioner/models -# (선택) BLIP/WD14 등 관련 종속 추가 -RUN pip install transformers==4.44.2 accelerate==0.33.0 -# torch torchvision torchaudio - # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/kohya_ss/models || echo "⚠️ No models found" diff --git a/requirements-org-bak.txt b/requirements-org-bak.txt new file mode 100644 index 0000000..5c09438 --- /dev/null +++ b/requirements-org-bak.txt @@ -0,0 +1,37 @@ +accelerate>=1.7.0 +aiofiles==23.2.1 +altair==4.2.2 +dadaptation==3.2 +diffusers[torch]==0.32.2 +easygui==0.98.3 +einops==0.7.0 +fairscale==0.4.13 +ftfy==6.1.1 +gradio>=5.34.1 +huggingface-hub==0.29.3 +imagesize==1.4.1 +invisible-watermark==0.2.0 +lion-pytorch==0.0.6 +lycoris_lora==3.2.0.post2 +omegaconf==2.3.0 +prodigyopt==1.1.2 +protobuf==3.20.3 +open-clip-torch==2.20.0 +opencv-python==4.10.0.84 +prodigy-plus-schedule-free==1.8.0 +pytorch-lightning==1.9.0 +pytorch-optimizer==3.5.0 +rich>=13.7.1 +safetensors==0.4.4 +schedulefree==1.4 +scipy==1.11.4 +# for T5XXL tokenizer (SD3/FLUX) +sentencepiece==0.2.0 +timm==1.0.15 +tk==0.1.0 +toml==0.10.2 +transformers==4.44.2 +voluptuous==0.13.1 +wandb==0.18.0 +# for kohya_ss sd-scripts library +-e ./sd-scripts diff --git a/requirements.txt b/requirements.txt index 0e2bd28..2ebf969 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,38 +1,52 @@ -accelerate>=1.7.0 -aiofiles==23.2.1 -altair==4.2.2 -dadaptation==3.2 -diffusers[torch]==0.32.2 -easygui==0.98.3 -einops==0.7.0 -fairscale==0.4.13 +# Core packages +accelerate==0.33.0 +transformers==4.44.0 +diffusers[torch]==0.25.0 ftfy==6.1.1 -gradio>=5.34.1 -huggingface-hub==0.29.3 -imagesize==1.4.1 -invisible-watermark==0.2.0 -lion-pytorch==0.0.6 -lycoris_lora==3.2.0.post2 -omegaconf==2.3.0 -onnx==1.16.1 -prodigyopt==1.1.2 -protobuf==3.20.3 -open-clip-torch==2.20.0 -opencv-python==4.10.0.84 -prodigy-plus-schedule-free==1.8.0 +einops==0.7.0 pytorch-lightning==1.9.0 -pytorch-optimizer==3.5.0 -rich>=13.7.1 -safetensors==0.4.4 +lion-pytorch==0.0.6 schedulefree==1.4 -scipy==1.11.4 -# for T5XXL tokenizer (SD3/FLUX) -sentencepiece==0.2.0 -timm==1.0.15 -tk==0.1.0 +pytorch-optimizer==3.5.0 +prodigy-plus-schedule-free==1.9.0 +prodigyopt==1.1.2 +tensorboard +safetensors==0.4.4 +altair==4.2.2 +easygui==0.98.3 toml==0.10.2 -transformers==4.44.2 voluptuous==0.13.1 -wandb==0.18.0 -# for kohya_ss sd-scripts library --e ./sd-scripts +huggingface-hub==0.24.5 +imagesize==1.4.1 +numpy<=2.0 +requests==2.28.2 +timm==0.4.12 +fairscale==0.4.4 +opencv-python==4.5.5.64 +opencv-python-headless==4.5.5.64 + +# WD14 captioning (optional) +# tensorflow==2.10.1 +onnx==1.15.0 +# onnxruntime-gpu==1.17.1 +# onnxruntime==1.17.1 +onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ + +# BLIP captioning +blip==0.1.0 +# salesforce-lavis==1.0.2 # 필요 시 주석 해제 + +# NLP utils +nltk==3.9.2 +sentencepiece==0.2.0 + +# OpenCLIP for SDXL +open-clip-torch==2.20.0 + +# Logging +rich==13.7.0 + +# Kohya_ss library +xformers==0.0.26.post1 --index-url https://download.pytorch.org/whl/cu121 +triton==2.3.0 +-e . From 896510b5e7b075cb4370ea996113727b119ee601 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 05:25:26 +0900 Subject: [PATCH 13/58] update --- .dockerignore | 1 + .gitignore | 2 +- Dockerfile | 16 +++++++--------- docker-build.cmd | 4 ++-- docker-compose.yml | 22 ++++++++++------------ requirements.txt | 2 +- 6 files changed, 22 insertions(+), 25 deletions(-) diff --git a/.dockerignore b/.dockerignore index fd39772..ae60047 100644 --- a/.dockerignore +++ b/.dockerignore @@ -19,4 +19,5 @@ docs/ examples/ logs/ outputs/ +models/sd_xl_base_1.0.safetensors #models/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6fd9a76..67bdb19 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,4 @@ venv* .python-version output_model output_model/ -output_model/** \ No newline at end of file +output_model/** diff --git a/Dockerfile b/Dockerfile index b6a6c2b..e62f74f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,31 +13,29 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ENV PIP_NO_CACHE_DIR=1 # kohya_ss 전체 복사 (모델 포함) -COPY . /app/sxdl_train_captioner +COPY . /app/sdxl_train_captioner # pip 업그레이드 및 공통 유틸 설치 RUN pip install --upgrade pip setuptools wheel # && pip install --no-cache-dir accelerate bitsandbytes xformers # 두 requirements.txt 모두 설치 -WORKDIR /app/sxdl_train_captioner +WORKDIR /app/sdxl_train_captioner RUN pip install --no-cache-dir -r requirements.txt # 모델 파일 복사 (미리 포함시킬 가중치) -#COPY ./models /app/sxdl_train_captioner/models +#COPY ./models /app/sdxl_train_captioner/models # 모델 디렉토리 확인 로그 -RUN echo "✅ Copied models:" && ls -R /app/kohya_ss/models || echo "⚠️ No models found" - -WORKDIR /app/sxdl_train_captioner/sd-scripts +RUN echo "✅ Copied models:" && ls -R /app/sdxl_train_captioner/models || echo "⚠️ No models found" # 엔트리포인트 복사 및 실행 권한 #COPY entrypoint.sh /entrypoint.sh #RUN chmod +x /entrypoint.sh # 환경 변수 기본값 -ENV TRAIN_DIR=/app/sxdl_train_captioner/dataset -ENV OUTPUT_DIR=/app/sxdl_train_captioner/output_model +#ENV TRAIN_DIR=/app/sdxl_train_captioner/dataset +#ENV OUTPUT_DIR=/app/sdxl_train_captioner/output_model # 볼륨 마운트 포인트 -VOLUME ["/app/sxdl_train_captioner/dataset", "/app/sxdl_train_captioner/output_model"] +#VOLUME ["/app/sdxl_train_captioner/dataset", "/app/sdxl_train_captioner/output_model"] diff --git a/docker-build.cmd b/docker-build.cmd index f90d83c..efa9b94 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sxdl_train_captioner:0.0.5 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.0.5 . -docker tag aicompanion/sxdl_train_captioner:0.0.5 aicompanion/sxdl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.0.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index aebdfb4..d37ac75 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,10 @@ services: - sxdl_train_captioner: + sdxl_train_captioner: build: context: . dockerfile: Dockerfile - image: aicompanion/sxdl_train_captioner:latest - container_name: sxdl_train_captioner + image: aicompanion/sdxl_train_captioner:latest + container_name: sdxl_train_captioner # GPU 설정 deploy: @@ -20,14 +20,15 @@ services: # 볼륨 마운트 (호스트 ↔ 컨테이너) volumes: - - ./models:/app/sxdl_train_captioner/dataset - - ./outputs:/app/sxdl_train_captioner/output_model + - ./models:/app/sdxl_train_captioner/models + - ./dataset:/app/sdxl_train_captioner/dataset + - ./outputs:/app/sdxl_train_captioner/output_model environment: # GPU 선택 (필요 시 GPU ID 지정) - CUDA_VISIBLE_DEVICES=3 - - HF_HOME=/app/sxdl_train_captioner/models - - HF_HUB_CACHE=/app/sxdl_train_captioner/models + - HF_HOME=/app/sdxl_train_captioner/models + - HF_HUB_CACHE=/app/sdxl_train_captioner/models - PYTHONUNBUFFERED=1 - TZ=Asia/Seoul - OMP_NUM_THREADS=8 @@ -39,10 +40,7 @@ services: restart: unless-stopped shm_size: "16gb" - working_dir: /app/sxdl_train_captioner + working_dir: /app/sdxl_train_captioner/sd-scripts # GPU 자동 탐색, 기본 0번 GPU - command: ["python", "cap-watcher.py"] - - # 여러개 GPU가 있는 경우 특정 GPU 번호를 지정하거나 gradio live를 사용하려는 경우 - # command: ["python", "run-gradio.py", " --device 3", "--share"] \ No newline at end of file + command: ["python", "cap-watcher.py"] # "--device", "3", "--overwrite" diff --git a/requirements.txt b/requirements.txt index 2ebf969..cb0f1a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,4 +49,4 @@ rich==13.7.0 # Kohya_ss library xformers==0.0.26.post1 --index-url https://download.pytorch.org/whl/cu121 triton==2.3.0 --e . + From 5a0e17002d2a9e02e07ba989eea2b336d90c95fd Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 05:48:54 +0900 Subject: [PATCH 14/58] update --- .dockerignore | 1 + Dockerfile | 5 ++--- docker-compose.yml | 2 +- entrypoint.sh | 9 +++++++++ sd-scripts | 2 +- 5 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 entrypoint.sh diff --git a/.dockerignore b/.dockerignore index ae60047..eafb767 100644 --- a/.dockerignore +++ b/.dockerignore @@ -19,5 +19,6 @@ docs/ examples/ logs/ outputs/ +sd-scripts/venv models/sd_xl_base_1.0.safetensors #models/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index e62f74f..6fc454e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,14 +17,13 @@ COPY . /app/sdxl_train_captioner # pip 업그레이드 및 공통 유틸 설치 RUN pip install --upgrade pip setuptools wheel -# && pip install --no-cache-dir accelerate bitsandbytes xformers # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install -r requirements.txt # 모델 파일 복사 (미리 포함시킬 가중치) -#COPY ./models /app/sdxl_train_captioner/models +COPY ./models /app/sdxl_train_captioner/models # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/sdxl_train_captioner/models || echo "⚠️ No models found" diff --git a/docker-compose.yml b/docker-compose.yml index d37ac75..1a5bcab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,6 +41,6 @@ services: shm_size: "16gb" working_dir: /app/sdxl_train_captioner/sd-scripts - + command: > # GPU 자동 탐색, 기본 0번 GPU command: ["python", "cap-watcher.py"] # "--device", "3", "--overwrite" diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..0f43347 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# 모델 다운로드 +if [ ! -f /app/models/sd_xl_base_1.0.safetensors ]; then + wget -O /app/models/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors +fi + +# 캡셔너 실행 +cd sd-scripts +python cap-watcher.py diff --git a/sd-scripts b/sd-scripts index 34d18e3..a8d7480 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 34d18e3d26f31adf569a547b327ffe1e763f8850 +Subproject commit a8d7480869b09c3a6981e1b91a3a1778c90eb8bc From 98f7db33dc7dd44ac962a9f159af522ebd1b8471 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 05:58:28 +0900 Subject: [PATCH 15/58] update --- Dockerfile | 2 +- docker-build.cmd | 4 ++-- docker-compose.yml | 2 +- docker-up.cmd | 1 + entrypoint.sh | 4 ++-- 5 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 docker-up.cmd diff --git a/Dockerfile b/Dockerfile index 6fc454e..18da898 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app # 필수 패키지 설치 RUN apt-get update && apt-get install -y --no-install-recommends \ - git wget curl vim \ + git wget curl libgl1 libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # Python 패키지 캐싱 방지 diff --git a/docker-build.cmd b/docker-build.cmd index efa9b94..0828efb 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.0.5 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.5.0 . -docker tag aicompanion/sdxl_train_captioner:0.0.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.5.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 1a5bcab..d37ac75 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,6 +41,6 @@ services: shm_size: "16gb" working_dir: /app/sdxl_train_captioner/sd-scripts - command: > + # GPU 자동 탐색, 기본 0번 GPU command: ["python", "cap-watcher.py"] # "--device", "3", "--overwrite" diff --git a/docker-up.cmd b/docker-up.cmd new file mode 100644 index 0000000..5177d11 --- /dev/null +++ b/docker-up.cmd @@ -0,0 +1 @@ +docker-compose up -d \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 0f43347..6912398 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,5 +5,5 @@ if [ ! -f /app/models/sd_xl_base_1.0.safetensors ]; then fi # 캡셔너 실행 -cd sd-scripts -python cap-watcher.py +#cd sd-scripts +#python cap-watcher.py From 431e9fa4fffb38997881b6bbd3c50d0d133353c1 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 19:34:24 +0900 Subject: [PATCH 16/58] update --- Dockerfile | 2 +- docker-build.cmd | 4 ++-- docker-push.cmd | 1 + entrypoint.sh | 8 ++------ 4 files changed, 6 insertions(+), 9 deletions(-) create mode 100644 docker-push.cmd diff --git a/Dockerfile b/Dockerfile index 18da898..5428f8f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app # 필수 패키지 설치 RUN apt-get update && apt-get install -y --no-install-recommends \ - git wget curl libgl1 libglib2.0-0 \ + git wget curl libgl1 libglib2.0-0 libcudnn9 libcudnn9-dev \ && rm -rf /var/lib/apt/lists/* # Python 패키지 캐싱 방지 diff --git a/docker-build.cmd b/docker-build.cmd index 0828efb..e542b89 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.5.0 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.6.0 . -docker tag aicompanion/sdxl_train_captioner:0.5.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.6.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-push.cmd b/docker-push.cmd new file mode 100644 index 0000000..083a86e --- /dev/null +++ b/docker-push.cmd @@ -0,0 +1 @@ +docker push aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 6912398..83d7913 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,9 +1,5 @@ #!/bin/bash # 모델 다운로드 -if [ ! -f /app/models/sd_xl_base_1.0.safetensors ]; then - wget -O /app/models/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors +if [ ! -f /app/sdxl_train_captioner/models/sd_xl_base_1.0.safetensors ]; then + wget -O /app/sdxl_train_captioner/models/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors fi - -# 캡셔너 실행 -#cd sd-scripts -#python cap-watcher.py From 938250bda1fe41a2f2ae24003db07e0d7e1e7d36 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 19:47:54 +0900 Subject: [PATCH 17/58] update --- Dockerfile | 11 ++++++----- entrypoint.sh | 5 ----- sd-scripts | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) delete mode 100644 entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 5428f8f..6914196 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,18 +6,17 @@ WORKDIR /app # 필수 패키지 설치 RUN apt-get update && apt-get install -y --no-install-recommends \ - git wget curl libgl1 libglib2.0-0 libcudnn9 libcudnn9-dev \ + git wget curl libgl1 libglib2.0-0 libcudnn9-cuda-12 libcudnn9-dev-cuda-12 \ && rm -rf /var/lib/apt/lists/* # Python 패키지 캐싱 방지 ENV PIP_NO_CACHE_DIR=1 -# kohya_ss 전체 복사 (모델 포함) -COPY . /app/sdxl_train_captioner - # pip 업그레이드 및 공통 유틸 설치 RUN pip install --upgrade pip setuptools wheel +# kohya_ss 전체 복사 (모델 포함) +COPY . /app/sdxl_train_captioner # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner RUN pip install -r requirements.txt @@ -28,9 +27,11 @@ COPY ./models /app/sdxl_train_captioner/models # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/sdxl_train_captioner/models || echo "⚠️ No models found" +RUN chmod +x entrypoint.sh +ENTRYPOINT ["bash", "entrypoint.sh"] + # 엔트리포인트 복사 및 실행 권한 #COPY entrypoint.sh /entrypoint.sh -#RUN chmod +x /entrypoint.sh # 환경 변수 기본값 #ENV TRAIN_DIR=/app/sdxl_train_captioner/dataset diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100644 index 83d7913..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -# 모델 다운로드 -if [ ! -f /app/sdxl_train_captioner/models/sd_xl_base_1.0.safetensors ]; then - wget -O /app/sdxl_train_captioner/models/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors -fi diff --git a/sd-scripts b/sd-scripts index a8d7480..23c3736 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit a8d7480869b09c3a6981e1b91a3a1778c90eb8bc +Subproject commit 23c37369c9a5c74120c9a11efc9d7772d343be35 From d793ba8221079f384f82c84e1febab6657ea531b Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 21:13:12 +0900 Subject: [PATCH 18/58] update --- .gitignore | 6 +++--- Dockerfile | 12 ++++++++++-- config_files/config-5080.json | 2 +- config_files/config-5090.json | 2 +- docker-build.cmd | 4 ++-- docker-compose.yml | 2 +- docker-down.cmd | 1 + docker-start.cmd | 1 + docker-stop.cmd | 1 + sd-scripts | 2 +- 10 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 docker-down.cmd create mode 100644 docker-start.cmd create mode 100644 docker-stop.cmd diff --git a/.gitignore b/.gitignore index 67bdb19..902cfad 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,6 @@ sd-scripts/** venv venv* .python-version -output_model -output_model/ -output_model/** +output_models +output_models/ +output_models/** diff --git a/Dockerfile b/Dockerfile index 6914196..0190432 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # CUDA 12.1 + PyTorch 2.3.0 -FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel +FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel # 기본 작업 경로 설정 WORKDIR /app @@ -19,6 +19,13 @@ RUN pip install --upgrade pip setuptools wheel COPY . /app/sdxl_train_captioner # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner +RUN mkdir /app/sdxl_train_captioner/logs +RUN mkdir /app/sdxl_train_captioner/output_models +RUN mkdir /app/sdxl_train_captioner/captioning/background +RUN mkdir /app/sdxl_train_captioner/captioning/mainchar +RUN mkdir /app/sdxl_train_captioner/training/background +RUN mkdir /app/sdxl_train_captioner/training/mainchar + RUN pip install -r requirements.txt # 모델 파일 복사 (미리 포함시킬 가중치) @@ -27,7 +34,8 @@ COPY ./models /app/sdxl_train_captioner/models # 모델 디렉토리 확인 로그 RUN echo "✅ Copied models:" && ls -R /app/sdxl_train_captioner/models || echo "⚠️ No models found" -RUN chmod +x entrypoint.sh +WORKDIR /app/sdxl_train_captioner/sd-scripts +RUN chmod +x ./entrypoint.sh ENTRYPOINT ["bash", "entrypoint.sh"] # 엔트리포인트 복사 및 실행 권한 diff --git a/config_files/config-5080.json b/config_files/config-5080.json index 4d53a47..897f38f 100644 --- a/config_files/config-5080.json +++ b/config_files/config-5080.json @@ -25,7 +25,7 @@ "save_every_n_steps": 1000 }, "folders": { - "train_data_dir": "./data/train", + "train_data_dir": "./data/training", "reg_data_dir": "./data/reg", "output_dir": "./output_5080", "logging_dir": "./logs_5080" diff --git a/config_files/config-5090.json b/config_files/config-5090.json index bd3a23f..cf441b5 100644 --- a/config_files/config-5090.json +++ b/config_files/config-5090.json @@ -25,7 +25,7 @@ "save_every_n_steps": 1000 }, "folders": { - "train_data_dir": "./data/train", + "train_data_dir": "./data/training", "reg_data_dir": "./data/reg", "output_dir": "./output_5090", "logging_dir": "./logs_5090" diff --git a/docker-build.cmd b/docker-build.cmd index e542b89..d07ed62 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.6.0 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.7.0 . -docker tag aicompanion/sdxl_train_captioner:0.6.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.7.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index d37ac75..5c2157a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,7 +22,7 @@ services: volumes: - ./models:/app/sdxl_train_captioner/models - ./dataset:/app/sdxl_train_captioner/dataset - - ./outputs:/app/sdxl_train_captioner/output_model + - ./outputs:/app/sdxl_train_captioner/output_models environment: # GPU 선택 (필요 시 GPU ID 지정) diff --git a/docker-down.cmd b/docker-down.cmd new file mode 100644 index 0000000..10bd05b --- /dev/null +++ b/docker-down.cmd @@ -0,0 +1 @@ +docker-compose down --volumes --remove-orphans sdxl_train_captioner \ No newline at end of file diff --git a/docker-start.cmd b/docker-start.cmd new file mode 100644 index 0000000..8f51fa4 --- /dev/null +++ b/docker-start.cmd @@ -0,0 +1 @@ +docker-compose start sdxl_train_captioner \ No newline at end of file diff --git a/docker-stop.cmd b/docker-stop.cmd new file mode 100644 index 0000000..2a646cf --- /dev/null +++ b/docker-stop.cmd @@ -0,0 +1 @@ +docker-compose stop sdxl_train_captioner \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 23c3736..2431c46 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 23c37369c9a5c74120c9a11efc9d7772d343be35 +Subproject commit 2431c4673602cd7e42c6cdcafec951d811366505 From d8fec6b5bd99dadd11869ca9ff25785de3c96442 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 22:06:53 +0900 Subject: [PATCH 19/58] update --- Dockerfile | 15 ++++++--------- docker-build.cmd | 4 ++-- sd-scripts | 2 +- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0190432..74339ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,21 +19,18 @@ RUN pip install --upgrade pip setuptools wheel COPY . /app/sdxl_train_captioner # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner -RUN mkdir /app/sdxl_train_captioner/logs -RUN mkdir /app/sdxl_train_captioner/output_models -RUN mkdir /app/sdxl_train_captioner/captioning/background -RUN mkdir /app/sdxl_train_captioner/captioning/mainchar -RUN mkdir /app/sdxl_train_captioner/training/background -RUN mkdir /app/sdxl_train_captioner/training/mainchar +RUN mkdir -p /app/sdxl_train_captioner/logs +RUN mkdir -p /app/sdxl_train_captioner/output_models +RUN mkdir -p /app/sdxl_train_captioner/captioning/background +RUN mkdir -p /app/sdxl_train_captioner/captioning/mainchar +RUN mkdir -p /app/sdxl_train_captioner/training/background +RUN mkdir -p /app/sdxl_train_captioner/training/mainchar RUN pip install -r requirements.txt # 모델 파일 복사 (미리 포함시킬 가중치) COPY ./models /app/sdxl_train_captioner/models -# 모델 디렉토리 확인 로그 -RUN echo "✅ Copied models:" && ls -R /app/sdxl_train_captioner/models || echo "⚠️ No models found" - WORKDIR /app/sdxl_train_captioner/sd-scripts RUN chmod +x ./entrypoint.sh ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/docker-build.cmd b/docker-build.cmd index d07ed62..b23fdc0 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.7.0 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.0 . -docker tag aicompanion/sdxl_train_captioner:0.7.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.8.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 2431c46..15081d6 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 2431c4673602cd7e42c6cdcafec951d811366505 +Subproject commit 15081d6cade4a71cffe85fc54d36787a566f4fe3 From 993c7d2c2cf3552d074aa1d9de48a0441fe9292f Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 22:11:12 +0900 Subject: [PATCH 20/58] update --- Dockerfile | 19 +++++-------------- docker-compose.yml | 1 + 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index 74339ee..49c2ef5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,11 +20,12 @@ COPY . /app/sdxl_train_captioner # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner RUN mkdir -p /app/sdxl_train_captioner/logs +RUN mkdir -p /app/sdxl_train_captioner/models RUN mkdir -p /app/sdxl_train_captioner/output_models -RUN mkdir -p /app/sdxl_train_captioner/captioning/background -RUN mkdir -p /app/sdxl_train_captioner/captioning/mainchar -RUN mkdir -p /app/sdxl_train_captioner/training/background -RUN mkdir -p /app/sdxl_train_captioner/training/mainchar +RUN mkdir -p /app/sdxl_train_captioner/dataset/captioning/background +RUN mkdir -p /app/sdxl_train_captioner/dataset/captioning/mainchar +RUN mkdir -p /app/sdxl_train_captioner/dataset/training/background +RUN mkdir -p /app/sdxl_train_captioner/dataset/training/mainchar RUN pip install -r requirements.txt @@ -34,13 +35,3 @@ COPY ./models /app/sdxl_train_captioner/models WORKDIR /app/sdxl_train_captioner/sd-scripts RUN chmod +x ./entrypoint.sh ENTRYPOINT ["bash", "entrypoint.sh"] - -# 엔트리포인트 복사 및 실행 권한 -#COPY entrypoint.sh /entrypoint.sh - -# 환경 변수 기본값 -#ENV TRAIN_DIR=/app/sdxl_train_captioner/dataset -#ENV OUTPUT_DIR=/app/sdxl_train_captioner/output_model - -# 볼륨 마운트 포인트 -#VOLUME ["/app/sdxl_train_captioner/dataset", "/app/sdxl_train_captioner/output_model"] diff --git a/docker-compose.yml b/docker-compose.yml index 5c2157a..c1b50a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: - ./models:/app/sdxl_train_captioner/models - ./dataset:/app/sdxl_train_captioner/dataset - ./outputs:/app/sdxl_train_captioner/output_models + - ./logs:/app/sdxl_train_captioner/logs environment: # GPU 선택 (필요 시 GPU ID 지정) From 0029ecaac4a66b672073e186fdff1c47e0a32c80 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 22:19:51 +0900 Subject: [PATCH 21/58] update --- Dockerfile | 5 +---- sd-scripts | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 49c2ef5..31bd3ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,10 +22,7 @@ WORKDIR /app/sdxl_train_captioner RUN mkdir -p /app/sdxl_train_captioner/logs RUN mkdir -p /app/sdxl_train_captioner/models RUN mkdir -p /app/sdxl_train_captioner/output_models -RUN mkdir -p /app/sdxl_train_captioner/dataset/captioning/background -RUN mkdir -p /app/sdxl_train_captioner/dataset/captioning/mainchar -RUN mkdir -p /app/sdxl_train_captioner/dataset/training/background -RUN mkdir -p /app/sdxl_train_captioner/dataset/training/mainchar +RUN mkdir -p /app/sdxl_train_captioner/dataset RUN pip install -r requirements.txt diff --git a/sd-scripts b/sd-scripts index 15081d6..2dc567b 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 15081d6cade4a71cffe85fc54d36787a566f4fe3 +Subproject commit 2dc567b1df55a46c32d474b45aac03bb6bb6bcb0 From d7051c25ec21d691a0aef1c44410194ef2d13cf0 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 23:10:26 +0900 Subject: [PATCH 22/58] update --- Dockerfile | 2 -- sd-scripts | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 31bd3ea..ee7709e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,9 +19,7 @@ RUN pip install --upgrade pip setuptools wheel COPY . /app/sdxl_train_captioner # 두 requirements.txt 모두 설치 WORKDIR /app/sdxl_train_captioner -RUN mkdir -p /app/sdxl_train_captioner/logs RUN mkdir -p /app/sdxl_train_captioner/models -RUN mkdir -p /app/sdxl_train_captioner/output_models RUN mkdir -p /app/sdxl_train_captioner/dataset RUN pip install -r requirements.txt diff --git a/sd-scripts b/sd-scripts index 2dc567b..5ffcfb2 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 2dc567b1df55a46c32d474b45aac03bb6bb6bcb0 +Subproject commit 5ffcfb27ed6de83d6148180831b39539a91e4ba6 From 95f3457d691f437239552263ebc0c4e44c130c7e Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 23:29:58 +0900 Subject: [PATCH 23/58] update --- docker-compose.yml | 2 +- sd-scripts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c1b50a6..a5606b4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,4 +44,4 @@ services: working_dir: /app/sdxl_train_captioner/sd-scripts # GPU 자동 탐색, 기본 0번 GPU - command: ["python", "cap-watcher.py"] # "--device", "3", "--overwrite" +# command: ["python", "cap-watcher.py"] # "--device", "3", "--overwrite" diff --git a/sd-scripts b/sd-scripts index 5ffcfb2..cbe231a 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 5ffcfb27ed6de83d6148180831b39539a91e4ba6 +Subproject commit cbe231a17a257baa9c716b56208e891508cfb6b6 From 799d0afb3b7ab6a0f3e43dae954a80fd028b8884 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 25 Oct 2025 23:31:44 +0900 Subject: [PATCH 24/58] update --- docker-build.cmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-build.cmd b/docker-build.cmd index b23fdc0..be96cf1 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.0 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.5 . -docker tag aicompanion/sdxl_train_captioner:0.8.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.8.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file From b44647aaa95970feb1e8ccd8217cd5469356caf9 Mon Sep 17 00:00:00 2001 From: Sungjoon Kim Date: Sun, 26 Oct 2025 00:01:26 +0900 Subject: [PATCH 25/58] Update README.md --- README.md | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/README.md b/README.md index 2b0efa0..50e9cc7 100644 --- a/README.md +++ b/README.md @@ -49,18 +49,6 @@ FLUX.1 및 SD3/SD3.5 지원은 sd3 브랜치에서 이루어집니다. 해당 - 이미지 생성 - 모델 변환 (1.x 및 2.x, Stable Diffusion ckpt/safetensors 및 Diffusers 지원) -### 후원사 - -아래 기업들의 아낌없는 후원에 깊이 감사드립니다: - - - AiHUB Inc. - - -### 프로젝트 후원 - -이 프로젝트가 도움이 되셨다면 [GitHub 후원](https://github.com/sponsors/kohya-ss/)을 통해 개발을 후원해 주시기 바랍니다. 여러분의 후원에 깊이 감사드립니다! - ## requirements.txt 파일 안내 @@ -248,3 +236,4 @@ masterpiece, best quality, 1boy, in business suit, standing at street, looking b * `--s` 생성 과정의 단계 수를 지정합니다. `( )` 및 `[ ]`와 같은 프롬프트 가중치 기능이 작동합니다. + From eca06f6b871e89bce95a8e7db8be5aa6620383de Mon Sep 17 00:00:00 2001 From: Sungjoon Kim Date: Sun, 26 Oct 2025 00:05:06 +0900 Subject: [PATCH 26/58] Update README.md --- README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 50e9cc7..79d5658 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,16 @@ NVIDIA Studio Driver(SDR) : Windows 10/11 → 531.79 / 536.67 등 : https://www.nvidia.com/ko-kr/geforce/drivers/ 제일 낮은 버전이 아마 괜찮을 듯 함. -CUDA 12.3 : https://developer.nvidia.com/cuda-12-3-0-download-archive -cuDNN v9.5.0 : https://developer.nvidia.com/cudnn-9-5-0-download-archive +CUDA 12.4 : https://developer.nvidia.com/cuda-12-4-0-download-archive +CcuDNN v9.5.0 : https://developer.nvidia.com/cudnn-9-5-0-download-archive -cuDNN (예: C:\Program Files\NVIDIA\CUDNN\v9.5 )폴더 안에는 bin, include, lib 폴더가 있습니다. -각 폴더의 내용을 CUDA Toolkit이 설치된 경로 내의 해당 폴더에 복사합니다. -예시: cuDNN/bin을 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin으로 복사합니다. +## CuDNN 설치 +cuDNN (예) C:\Program Files\NVIDIA\CUDNN\v9.5\bin 폴더 안에는 Cuda Major 버전에 대응되는 라이브러리들이 있습니다. +해당폴더 하위의 파일들을 CUDA Toolkit이 설치된 경로 내의 해당 폴더에 복사합니다. + +예시: +C:\Program Files\NVIDIA\CUDNN\v9.5\bin\12.6 아래의 모든 dll 파일을 +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin으로 복사합니다. 4. SDXL 모델 다운로드 @@ -237,3 +241,4 @@ masterpiece, best quality, 1boy, in business suit, standing at street, looking b `( )` 및 `[ ]`와 같은 프롬프트 가중치 기능이 작동합니다. + From 7a498fff32991941bc874b081c84943b18207857 Mon Sep 17 00:00:00 2001 From: Sungjoon Kim Date: Sun, 26 Oct 2025 00:07:59 +0900 Subject: [PATCH 27/58] Update README.md --- README.md | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 79d5658..bd91eed 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,12 @@ NVIDIA Studio Driver(SDR) : Windows 10/11 → 531.79 / 536.67 등 : https://www.nvidia.com/ko-kr/geforce/drivers/ 제일 낮은 버전이 아마 괜찮을 듯 함. +## 1. 호환 버전 CUDA 12.4 : https://developer.nvidia.com/cuda-12-4-0-download-archive + CcuDNN v9.5.0 : https://developer.nvidia.com/cudnn-9-5-0-download-archive -## CuDNN 설치 +## 2. CuDNN 설치 cuDNN (예) C:\Program Files\NVIDIA\CUDNN\v9.5\bin 폴더 안에는 Cuda Major 버전에 대응되는 라이브러리들이 있습니다. 해당폴더 하위의 파일들을 CUDA Toolkit이 설치된 경로 내의 해당 폴더에 복사합니다. @@ -15,18 +17,11 @@ C:\Program Files\NVIDIA\CUDNN\v9.5\bin\12.6 아래의 모든 dll 파일을 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin으로 복사합니다. -4. SDXL 모델 다운로드 -SDXL 기본 해상도는 1024x1024 X이며, 다음 모델이 필요합니다: -필수 모델: -SDXL Base 모델 (.safetensors 또는 .ckpt) -Hugging Face 또는 -CivitAI에서 다운로드 - -권장: VAE 모델 (선택사항이지만 권장) -https://huggingface.co/madebyollin/sdxl-vae-fp16-fix -SDXL fp16 VAE GitHub: madebyollin/sdxl-vae-fp16-fix -모델을 원하는 폴더에 저장하세요 (예: C:/models/sdxl/) - +## 3. SDXL 모델 다운로드 +- 도커 컨테이너가 실행될 때 models 하위에 StableDiffusion XL 1.0 모델이 다운로드 됩니다. +- 만약에 해당 URL 지원이 종료 된 경우, 허깅페이지 또는 CIVITAI에서 다운로드 하세요. +- 현재 사용가능한 다운로드 주소는 아래와 같습니다. +- https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors @@ -242,3 +237,4 @@ masterpiece, best quality, 1boy, in business suit, standing at street, looking b `( )` 및 `[ ]`와 같은 프롬프트 가중치 기능이 작동합니다. + From f8198ab7a98d50700c03fffab9d0eae35b9257df Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 01:38:54 +0900 Subject: [PATCH 28/58] update --- Dockerfile | 2 +- docker-build.cmd | 4 ++-- docker-compose.yml | 2 +- requirements.txt | 2 +- sd-scripts | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index ee7709e..4c87cf9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# CUDA 12.1 + PyTorch 2.3.0 +# Python 3.11 + PyTorch 2.4.0 + CUDA 12.1 + CuDNN 9.5 FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel # 기본 작업 경로 설정 diff --git a/docker-build.cmd b/docker-build.cmd index be96cf1..a9bdf76 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.5 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.6 . -docker tag aicompanion/sdxl_train_captioner:0.8.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.8.6 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index a5606b4..036c79b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,7 +22,7 @@ services: volumes: - ./models:/app/sdxl_train_captioner/models - ./dataset:/app/sdxl_train_captioner/dataset - - ./outputs:/app/sdxl_train_captioner/output_models + - ./output_models:/app/sdxl_train_captioner/output_models - ./logs:/app/sdxl_train_captioner/logs environment: diff --git a/requirements.txt b/requirements.txt index cb0f1a7..5aab427 100644 --- a/requirements.txt +++ b/requirements.txt @@ -47,6 +47,6 @@ open-clip-torch==2.20.0 rich==13.7.0 # Kohya_ss library -xformers==0.0.26.post1 --index-url https://download.pytorch.org/whl/cu121 +xformers==0.0.27.post2 --index-url https://download.pytorch.org/whl/cu121 triton==2.3.0 diff --git a/sd-scripts b/sd-scripts index cbe231a..1614bc4 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit cbe231a17a257baa9c716b56208e891508cfb6b6 +Subproject commit 1614bc4863161f270a89e466cd30aa8891192447 From 805341e2c2b3fdb4e1b0218466a443841d195ce0 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 03:23:38 +0900 Subject: [PATCH 29/58] update --- README.md | 34 ++++++++++++++++++++++++++++ config_files/config-5080.json | 42 ----------------------------------- config_files/config-5090.json | 41 ---------------------------------- docker-build.cmd | 4 ++-- sd-scripts | 2 +- 5 files changed, 37 insertions(+), 86 deletions(-) delete mode 100644 config_files/config-5080.json delete mode 100644 config_files/config-5090.json diff --git a/README.md b/README.md index bd91eed..4d6d830 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,40 @@ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin으로 복사합니 - 현재 사용가능한 다운로드 주소는 아래와 같습니다. - https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors +## 학습방법 1: 폴더명 규칙 사용 (자동) +./train.sh config.json 0 + +# 방법 2: 강제로 15번 반복 +./train.sh config.json 0 15 + +# 방법 3: 강제로 20번 반복 +./train.sh config.json 0 20 + + +3가지 요소 비교 +1️⃣ 폴더 식별자 (예: 15_alice) +목적: Kohya 학습 시스템이 이미지를 분류하고 관리하는 용도 +training/ +├── 15_alice/ ← "alice"는 내부 식별용 +│ ├── img1.jpg +│ └── img1.txt +└── 10_background/ ← "background"는 내부 식별용 + ├── bg1.jpg + └── bg1.txt +특징: + +학습 시 로그에만 표시됨 +LoRA 모델이나 trigger word와 무관 +단순히 폴더 구분용 + +2️⃣ --output_name (예: karina) +목적: 저장되는 LoRA 파일명 + +3️⃣ LoRA 태그명 +- 학습에 사용되는 캡션 tag + 문장에서 가장 많이 발견되는 Unique Word가 태그명이 됩니다. +- 일반적으로 캡션의 제일 앞에 배치하고 그 뒤에 콤마를 찍고 나머지를 서술합니다. + + 이 저장소에는 Stable Diffusion용 훈련, 생성 및 유틸리티 스크립트가 포함되어 있습니다. diff --git a/config_files/config-5080.json b/config_files/config-5080.json deleted file mode 100644 index 897f38f..0000000 --- a/config_files/config-5080.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "general": { - "shuffle_caption": true, - "caption_extension": ".txt", - "keep_tokens": 1, - "seed": 47 - }, - "model": { - "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", - "vae": "stabilityai/sd-vae-ft-mse" - }, - "training": { - "resolution": "768,768", - "batch_size": 1, - "learning_rate": 0.00015, - "lr_scheduler": "cosine_with_restarts", - "max_train_steps": 4000, - "optimizer": "adamw8bit", - "mixed_precision": "fp16", - "gradient_checkpointing": true, - "clip_skip": 2, - "network_dim": 32, - "network_alpha": 16, - "save_precision": "fp16", - "save_every_n_steps": 1000 - }, - "folders": { - "train_data_dir": "./data/training", - "reg_data_dir": "./data/reg", - "output_dir": "./output_5080", - "logging_dir": "./logs_5080" - }, - "advanced": { - "bucket_reso_steps": 64, - "bucket_no_upscale": true, - "xformers": true, - "cache_latents": true, - "min_bucket_reso": 320, - "max_bucket_reso": 768 - } - } - \ No newline at end of file diff --git a/config_files/config-5090.json b/config_files/config-5090.json deleted file mode 100644 index cf441b5..0000000 --- a/config_files/config-5090.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "general": { - "shuffle_caption": true, - "caption_extension": ".txt", - "keep_tokens": 1, - "seed": 47 - }, - "model": { - "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", - "vae": "stabilityai/sd-vae-ft-mse" - }, - "training": { - "resolution": "1024,1024", - "batch_size": 2, - "learning_rate": 0.0001, - "lr_scheduler": "cosine_with_restarts", - "max_train_steps": 6000, - "optimizer": "adamw8bit", - "mixed_precision": "bf16", - "gradient_checkpointing": false, - "clip_skip": 2, - "network_dim": 64, - "network_alpha": 32, - "save_precision": "bf16", - "save_every_n_steps": 1000 - }, - "folders": { - "train_data_dir": "./data/training", - "reg_data_dir": "./data/reg", - "output_dir": "./output_5090", - "logging_dir": "./logs_5090" - }, - "advanced": { - "bucket_reso_steps": 64, - "bucket_no_upscale": true, - "xformers": true, - "cache_latents": true, - "min_bucket_reso": 512, - "max_bucket_reso": 1024 - } -} diff --git a/docker-build.cmd b/docker-build.cmd index a9bdf76..be79128 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.6 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.7 . -docker tag aicompanion/sdxl_train_captioner:0.8.6 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.8.7 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 1614bc4..9f1d862 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 1614bc4863161f270a89e466cd30aa8891192447 +Subproject commit 9f1d862c6065dbc2407a4a43a5a82fc83e1de985 From 6d162d2f7d4ee673aba3335b06dfae78ed2a591b Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 03:25:22 +0900 Subject: [PATCH 30/58] update --- docker-build.cmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-build.cmd b/docker-build.cmd index be79128..33a65bd 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.7 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.8 . -docker tag aicompanion/sdxl_train_captioner:0.8.7 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.8.8 aicompanion/sdxl_train_captioner:latest \ No newline at end of file From 0fbd96c63caebfdbbb66488797bf3016f07b22b4 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 14:56:45 +0900 Subject: [PATCH 31/58] update --- docker-build.cmd | 4 ++-- run-train.cmd | 5 +++++ sd-scripts | 2 +- tail-logs.cmd | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 run-train.cmd create mode 100644 tail-logs.cmd diff --git a/docker-build.cmd b/docker-build.cmd index 33a65bd..8a5a905 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.8.8 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.3. -docker tag aicompanion/sdxl_train_captioner:0.8.8 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.9.3 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/run-train.cmd b/run-train.cmd new file mode 100644 index 0000000..6e22b42 --- /dev/null +++ b/run-train.cmd @@ -0,0 +1,5 @@ +@echo off +REM 첫 번째 argument를 명령어로 받아서 컨테이너에서 실행 +REM 모든 argument를 그대로 넘기려면 %* 사용 + +docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts; ./run-train.sh config-24g.json 1 2>&1 | tee /app/sdxl_train_captioner/logs/train_$(date +%%Y%%m%%d_%%H%%M%%S).log" diff --git a/sd-scripts b/sd-scripts index 9f1d862..e400a22 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 9f1d862c6065dbc2407a4a43a5a82fc83e1de985 +Subproject commit e400a224f84a6d51feb275596644acb416bd629c diff --git a/tail-logs.cmd b/tail-logs.cmd new file mode 100644 index 0000000..74a3ccb --- /dev/null +++ b/tail-logs.cmd @@ -0,0 +1 @@ +docker logs -f sdxl_train_captioner \ No newline at end of file From f888c49561c768f6c85379421f93c4bf929aff24 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 15:37:35 +0900 Subject: [PATCH 32/58] update --- MANUAL_TRAIN.md | 114 ++++++++++++++ run-train-auto.py | 368 ++++++++++++++++++++++++++++++++++++++++++++ run-train-single.py | 365 +++++++++++++++++++++++++++++++++++++++++++ sd-scripts | 2 +- 4 files changed, 848 insertions(+), 1 deletion(-) create mode 100644 MANUAL_TRAIN.md create mode 100644 run-train-auto.py create mode 100644 run-train-single.py diff --git a/MANUAL_TRAIN.md b/MANUAL_TRAIN.md new file mode 100644 index 0000000..673fda8 --- /dev/null +++ b/MANUAL_TRAIN.md @@ -0,0 +1,114 @@ +# 단일폴더 수동 학습 사용 예시 + +## 1. 기본 사용 (자동 계산) +```bash +python train_single.py --folder ../dataset/training/01_alice +``` + +## 2. Epochs만 수동 지정 +```bash +python train_single.py --folder ../dataset/training/01_alice --epochs 25 +``` + +## 3. 세밀한 조정 +```bash +python train_single.py \ + --folder ../dataset/training/01_alice \ + --epochs 30 \ + --repeats 50 \ + --lr 0.00015 \ + --dim 64 \ + --alpha 32 +``` + +## 4. 고해상도 학습 +```bash +python train_single.py \ + --folder ../dataset/training/01_alice \ + --resolution 1024,1024 \ + --batch-size 1 +``` + +## 5. 빠른 테스트 +```bash +python train_single.py \ + --folder ../dataset/training/01_alice \ + --epochs 5 \ + --repeats 10 \ + --save-every 1 +``` + +## 6. 완전 수동 모드 +```bash +python train_single.py \ + --folder ../dataset/training/01_alice \ + --no-auto \ + --epochs 20 \ + --repeats 30 \ + --lr 0.0001 \ + --optimizer AdamW8bit \ + --scheduler cosine +``` + +## 주요 기능 +✨ 자동 + 수동 하이브리드 + +- 기본값은 자동 계산 +- 원하는 파라미터만 오버라이드 +- --no-auto 플래그로 완전 수동 제어 + +## 🎯 주요 파라미터 +| 파라미터 | 설명 | 예시 | +|---------|------|------| +| --folder | 학습 폴더 (필수) | ../dataset/training/01_alice | +| --output | 출력 이름 | alice_v2 | +| --epochs | Epoch 수 | 20 | +| --repeats | 반복 횟수 | 30 | +| --lr | Learning rate | 0.0001 | +| --dim | Network dimension | 64 | +| --alpha | Network alpha | 32 | +| --resolution | 해상도 | 1024,1024 | +| --batch-size | 배치 크기 | 2 | +| --optimizer | Optimizer | AdamW8bit, Lion, Prodigy | +| --scheduler | LR Scheduler | cosine, constant | +| --save-every | 저장 주기 | 5 | + +## 비교 +### train_batch.py (일괄 자동) +```bash +# 여러 폴더 자동 학습 +python train_batch.py +→ 01_alice, 02_bob, 03_background 모두 학습 +``` + +### train_single.py (단일 수동) +```bash +# 특정 폴더만 세밀 조정 +python train_single.py --folder ../dataset/training/mainchar/01_alice --epochs 30 --lr 0.00015 +→ alice만 커스텀 파라미터로 학습 +``` + +## 워크플로우 추천 + +### 초보자 +```bash +# 1. 먼저 일괄 자동으로 테스트 +python train_batch.py + +# 2. 결과가 좋지 않은 캐릭터만 재학습 +python train_single.py --folder ../dataset/training/mainchar/01_alice --epochs 25 +``` + +### 고급 사용자 +```bash +# 처음부터 세밀하게 조정 +python train_single.py \ + --folder ../dataset/training/mainchar/01_alice \ + --epochs 30 \ + --repeats 50 \ + --lr 0.00012 \ + --dim 64 \ + --alpha 32 \ + --optimizer Prodigy \ + --resolution 1024,1024 +``` diff --git a/run-train-auto.py b/run-train-auto.py new file mode 100644 index 0000000..dafded4 --- /dev/null +++ b/run-train-auto.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +""" +SDXL LoRA 일괄 학습 스크립트 +- 학습 폴더 하위의 여러 캐릭터/개념을 자동으로 개별 LoRA 학습 +- VRAM에 따른 자동 설정 (bf16/fp16) +- 이미지 수에 따른 최적 파라미터 자동 계산 +""" + +import os +import sys +import json +import subprocess +import argparse +from pathlib import Path + + +class TrainingConfig: + """학습 설정 관리""" + + def __init__(self, config_file, gpu_id=0, force_repeats=None): + self.config_file = config_file + self.gpu_id = gpu_id + self.force_repeats = force_repeats + + # VRAM 감지 + self.vram_size = self.get_vram_size() + + # VRAM에 따른 설정 + if self.vram_size >= 20: + self.precision = "bf16" + self.target_steps = 1800 + else: + # 16GB 이하는 fp16 config 사용 + self.config_file = "config-16g.json" + self.precision = "fp16" + self.target_steps = 1500 + + # Config 파일 로드 + self.load_config() + + def get_vram_size(self): + """NVIDIA GPU VRAM 크기 감지 (GB)""" + try: + cmd = [ + "nvidia-smi", + "--query-gpu=memory.total", + "--format=csv,noheader,nounits", + f"-i {self.gpu_id}" + ] + result = subprocess.run( + ' '.join(cmd), + shell=True, + capture_output=True, + text=True + ) + vram_mb = int(result.stdout.strip()) + vram_gb = vram_mb // 1024 + return vram_gb + except Exception as e: + print(f"⚠️ VRAM 감지 실패, 기본값(24GB) 사용: {e}") + return 24 + + def load_config(self): + """config.json 로드""" + if not os.path.exists(self.config_file): + print(f"❌ Config 파일 없음: {self.config_file}") + sys.exit(1) + + with open(self.config_file, 'r', encoding='utf-8') as f: + self.config = json.load(f) + + self.train_dir = self.config['folders']['train_data_dir'] + self.output_dir = self.config['folders']['output_dir'] + self.batch_size = self.config['training'].get('batch_size', 1) + + +class LoRATrainer: + """단일 LoRA 학습 실행""" + + def __init__(self, training_config): + self.config = training_config + self.image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} + + def find_training_folders(self): + """학습 폴더 찾기 (순서_이름 패턴)""" + train_dir = self.config.train_dir + + if not os.path.isdir(train_dir): + print(f"❌ 학습 디렉토리 없음: {train_dir}") + return [] + + folders = [] + for item in os.listdir(train_dir): + item_path = os.path.join(train_dir, item) + if not os.path.isdir(item_path): + continue + + # 패턴: 01_alice, 02_bob 등 + parts = item.split('_', 1) + if len(parts) == 2 and parts[0].isdigit(): + order = int(parts[0]) + name = parts[1] + folders.append({ + 'order': order, + 'name': name, + 'path': item_path, + 'folder': item + }) + + # 순서대로 정렬 + folders.sort(key=lambda x: x['order']) + return folders + + def count_images(self, folder_path): + """폴더 내 이미지 개수 세기""" + count = 0 + for file in os.listdir(folder_path): + if Path(file).suffix.lower() in self.image_extensions: + count += 1 + return count + + def calculate_training_params(self, image_count): + """이미지 수에 따른 최적 학습 파라미터 계산""" + batch_size = self.config.batch_size + target_steps = self.config.target_steps + + # 강제 반복 횟수가 지정되면 사용 + if self.config.force_repeats is not None: + optimal_repeats = self.config.force_repeats + else: + # 이미지 수에 따른 자동 계산 + if image_count < 20: + optimal_repeats = max(80, min(200, target_steps // (image_count * 10))) + elif image_count < 50: + optimal_repeats = max(30, min(80, target_steps // (image_count * 10))) + elif image_count < 100: + optimal_repeats = max(15, min(30, target_steps // (image_count * 10))) + else: + optimal_repeats = max(5, min(20, target_steps // (image_count * 10))) + + # Epochs 계산 + images_per_epoch = image_count * optimal_repeats + steps_per_epoch = images_per_epoch // batch_size + actual_epochs = max(1, round(target_steps / steps_per_epoch)) + actual_epochs = min(max(actual_epochs, 5), 30) + actual_total_steps = actual_epochs * steps_per_epoch + + return { + 'repeats': optimal_repeats, + 'epochs': actual_epochs, + 'steps_per_epoch': steps_per_epoch, + 'total_steps': actual_total_steps + } + + def train_single_lora(self, folder_info): + """단일 LoRA 학습 실행""" + name = folder_info['name'] + folder_path = folder_info['path'] + + print(f"\n{'=' * 70}") + print(f"🎯 Training LoRA: {name}") + print(f"{'=' * 70}") + + # 이미지 개수 확인 + image_count = self.count_images(folder_path) + if image_count == 0: + print(f"⚠️ 이미지 없음: {folder_path}") + print(f"{'=' * 70}\n") + return False + + # 파라미터 계산 + params = self.calculate_training_params(image_count) + + # 정보 출력 + print(f"📊 Training Configuration") + print(f"{'-' * 70}") + print(f" GPU ID: {self.config.gpu_id}") + print(f" VRAM: {self.config.vram_size}GB") + print(f" Precision: {self.config.precision}") + print(f" Config: {self.config.config_file}") + print(f" Folder: {folder_info['folder']}") + print(f" Images: {image_count}") + print(f" Repeats: {params['repeats']}" + + (" (forced)" if self.config.force_repeats else " (auto)")) + print(f" Images/epoch: {image_count * params['repeats']}") + print(f" Steps/epoch: {params['steps_per_epoch']}") + print(f" Epochs: {params['epochs']}") + print(f" Total steps: {params['total_steps']}") + print(f"{'-' * 70}\n") + + # accelerate 명령어 구성 + cmd = [ + "accelerate", "launch", + "--num_cpu_threads_per_process", "1", + "--mixed_precision", self.config.precision, + "sdxl_train_network.py", + f"--config_file={self.config.config_file}", + f"--train_data_dir={folder_path}", + f"--output_name={name}", + f"--max_train_epochs={params['epochs']}", + f"--dataset_repeats={params['repeats']}", + f"--mixed_precision={self.config.precision}" + ] + + # 실행 + try: + env = os.environ.copy() + env['CUDA_VISIBLE_DEVICES'] = str(self.config.gpu_id) + + print(f"🚀 Starting training...\n") + result = subprocess.run(cmd, env=env, check=True) + + print(f"\n✅ {name} 학습 완료!") + print(f"{'=' * 70}\n") + return True + + except subprocess.CalledProcessError as e: + print(f"\n❌ {name} 학습 실패: {e}") + print(f"{'=' * 70}\n") + return False + except KeyboardInterrupt: + print(f"\n⚠️ 사용자에 의해 중단됨") + return False + + def run_batch_training(self): + """일괄 학습 실행""" + folders = self.find_training_folders() + + if not folders: + print("❌ 학습 폴더를 찾을 수 없습니다!") + print(f" 경로: {self.config.train_dir}") + print(f" 패턴: 01_name, 02_name, ...") + return + + print(f"\n{'=' * 70}") + print(f"🚀 SDXL LoRA Batch Training") + print(f"{'=' * 70}") + print(f"📁 학습 폴더: {self.config.train_dir}") + print(f"💾 출력 폴더: {self.config.output_dir}") + print(f"🖥️ GPU: {self.config.gpu_id} ({self.config.vram_size}GB)") + print(f"⚡ Precision: {self.config.precision}") + print(f"📋 Config: {self.config.config_file}") + print(f"\n발견된 학습 폴더: {len(folders)}개") + print(f"{'-' * 70}") + for f in folders: + img_count = self.count_images(f['path']) + print(f" {f['order']:02d}. {f['name']:20s} ({img_count} images)") + print(f"{'=' * 70}\n") + + # 사용자 확인 + try: + response = input("학습을 시작하시겠습니까? (y/N): ") + if response.lower() not in ['y', 'yes']: + print("❌ 학습 취소됨") + return + except KeyboardInterrupt: + print("\n❌ 학습 취소됨") + return + + # 학습 실행 + results = [] + for i, folder in enumerate(folders, 1): + print(f"\n[{i}/{len(folders)}] Processing: {folder['name']}...") + success = self.train_single_lora(folder) + results.append({ + 'name': folder['name'], + 'success': success + }) + + # 실패 시 계속 진행할지 물어봄 + if not success: + try: + response = input("❓ 계속 진행하시겠습니까? (Y/n): ") + if response.lower() in ['n', 'no']: + print("⚠️ 나머지 학습 건너뜀") + break + except KeyboardInterrupt: + print("\n⚠️ 나머지 학습 건너뜀") + break + + # 결과 요약 + print(f"\n{'=' * 70}") + print(f"📊 Training Summary") + print(f"{'=' * 70}") + success_count = sum(1 for r in results if r['success']) + fail_count = len(results) - success_count + + for r in results: + status = "✅" if r['success'] else "❌" + print(f"{status} {r['name']}") + + print(f"{'-' * 70}") + print(f"✅ 성공: {success_count}/{len(results)}") + if fail_count > 0: + print(f"❌ 실패: {fail_count}/{len(results)}") + print(f"{'=' * 70}\n") + + +def main(): + parser = argparse.ArgumentParser( + description="SDXL LoRA 일괄 학습 스크립트", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +사용 예시: + python train_batch.py + python train_batch.py config-16g.json + python train_batch.py config-24g.json 0 15 + +폴더 구조: + training/ + ├── 01_alice/ + │ └── *.jpg + ├── 02_bob/ + │ └── *.jpg + └── 03_background/ + └── *.jpg + """ + ) + + parser.add_argument( + "config", + nargs="?", + default="config-24g.json", + help="Config 파일 (기본: config-24g.json)" + ) + + parser.add_argument( + "gpu_id", + nargs="?", + type=int, + default=0, + help="GPU ID (기본: 0)" + ) + + parser.add_argument( + "repeats", + nargs="?", + type=int, + default=None, + help="강제 반복 횟수 (기본: 자동 계산)" + ) + + args = parser.parse_args() + + try: + # 설정 로드 + training_config = TrainingConfig( + config_file=args.config, + gpu_id=args.gpu_id, + force_repeats=args.repeats + ) + + # 학습 실행 + trainer = LoRATrainer(training_config) + trainer.run_batch_training() + + except KeyboardInterrupt: + print("\n\n⚠️ 프로그램 중단됨") + sys.exit(1) + except Exception as e: + print(f"\n❌ 오류 발생: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/run-train-single.py b/run-train-single.py new file mode 100644 index 0000000..8571421 --- /dev/null +++ b/run-train-single.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python3 +""" +SDXL LoRA 단일 학습 스크립트 (고급 사용자용) +- 특정 폴더만 선택 학습 +- 세밀한 파라미터 조정 가능 +- Config 오버라이드 +""" + +import os +import sys +import json +import subprocess +import argparse +from pathlib import Path + + +def get_vram_size(gpu_id=0): + """NVIDIA GPU VRAM 크기 감지 (GB)""" + try: + cmd = f"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i {gpu_id}" + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + vram_mb = int(result.stdout.strip()) + return vram_mb // 1024 + except: + return 24 # 기본값 + + +def count_images(folder_path): + """폴더 내 이미지 개수 세기""" + extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} + count = 0 + for file in os.listdir(folder_path): + if Path(file).suffix.lower() in extensions: + count += 1 + return count + + +def calculate_auto_params(image_count, vram_size, batch_size=1): + """이미지 수 기반 자동 파라미터 계산""" + target_steps = 1800 if vram_size >= 20 else 1500 + + # Repeats 계산 + if image_count < 20: + repeats = max(80, min(200, target_steps // (image_count * 10))) + elif image_count < 50: + repeats = max(30, min(80, target_steps // (image_count * 10))) + elif image_count < 100: + repeats = max(15, min(30, target_steps // (image_count * 10))) + else: + repeats = max(5, min(20, target_steps // (image_count * 10))) + + # Epochs 계산 + images_per_epoch = image_count * repeats + steps_per_epoch = images_per_epoch // batch_size + epochs = max(1, round(target_steps / steps_per_epoch)) + epochs = min(max(epochs, 5), 30) + + return { + 'repeats': repeats, + 'epochs': epochs, + 'steps_per_epoch': steps_per_epoch, + 'total_steps': epochs * steps_per_epoch + } + + +def main(): + parser = argparse.ArgumentParser( + description="SDXL LoRA 단일 학습 (고급 설정)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +사용 예시: + # 기본 (자동 계산) + python train_single.py --folder ../dataset/training/01_alice + + # 수동 파라미터 지정 + python train_single.py --folder ../dataset/training/01_alice --epochs 20 --repeats 30 + + # Learning rate 조정 + python train_single.py --folder ../dataset/training/01_alice --lr 0.0002 + + # Network dim 변경 + python train_single.py --folder ../dataset/training/01_alice --dim 64 --alpha 32 + + # 전체 커스텀 + python train_single.py \\ + --folder ../dataset/training/01_alice \\ + --output alice_v2 \\ + --config config-24g.json \\ + --gpu 0 \\ + --epochs 25 \\ + --repeats 40 \\ + --lr 0.00015 \\ + --dim 64 \\ + --alpha 32 \\ + --batch-size 2 + """ + ) + + # 필수 인자 + parser.add_argument( + "--folder", + required=True, + help="학습할 폴더 경로 (예: ../dataset/training/01_alice)" + ) + + # 기본 설정 + parser.add_argument( + "--config", + default="config-24g.json", + help="Config 파일 (기본: config-24g.json)" + ) + + parser.add_argument( + "--output", + help="출력 LoRA 이름 (기본: 폴더명에서 추출)" + ) + + parser.add_argument( + "--gpu", + type=int, + default=0, + help="GPU ID (기본: 0)" + ) + + # 학습 파라미터 + parser.add_argument( + "--epochs", + type=int, + help="총 Epoch 수 (기본: 자동 계산)" + ) + + parser.add_argument( + "--repeats", + type=int, + help="이미지 반복 횟수 (기본: 자동 계산)" + ) + + parser.add_argument( + "--batch-size", + type=int, + help="배치 사이즈 (기본: config 값)" + ) + + parser.add_argument( + "--lr", + type=float, + help="Learning rate (기본: config 값, 보통 1e-4)" + ) + + parser.add_argument( + "--dim", + type=int, + help="Network dimension (기본: config 값, 보통 32)" + ) + + parser.add_argument( + "--alpha", + type=int, + help="Network alpha (기본: config 값, 보통 16)" + ) + + parser.add_argument( + "--resolution", + help="해상도 (예: 1024,1024 또는 768,768)" + ) + + parser.add_argument( + "--save-every", + type=int, + help="N epoch마다 저장 (기본: config 값)" + ) + + # 고급 옵션 + parser.add_argument( + "--optimizer", + help="Optimizer (예: AdamW8bit, Lion, Prodigy)" + ) + + parser.add_argument( + "--scheduler", + help="LR Scheduler (예: cosine, constant, polynomial)" + ) + + parser.add_argument( + "--no-auto", + action="store_true", + help="자동 계산 비활성화 (epochs/repeats 수동 지정 필수)" + ) + + args = parser.parse_args() + + # 폴더 확인 + folder_path = Path(args.folder) + if not folder_path.exists() or not folder_path.is_dir(): + print(f"❌ 폴더를 찾을 수 없습니다: {folder_path}") + sys.exit(1) + + # 이미지 개수 + image_count = count_images(folder_path) + if image_count == 0: + print(f"❌ 이미지가 없습니다: {folder_path}") + sys.exit(1) + + # VRAM 감지 + vram_size = get_vram_size(args.gpu) + + # Config 자동 선택 + if vram_size >= 20: + precision = "bf16" + if args.config == "config-24g.json": + config_file = "config-24g.json" + else: + precision = "fp16" + config_file = "config-16g.json" + print(f"⚠️ VRAM {vram_size}GB < 20GB, fp16 모드로 전환") + + # Config 로드 + if not os.path.exists(config_file): + print(f"❌ Config 파일 없음: {config_file}") + sys.exit(1) + + with open(config_file, 'r', encoding='utf-8') as f: + config = json.load(f) + + batch_size = args.batch_size or config['training'].get('batch_size', 1) + + # 출력 이름 결정 + if args.output: + output_name = args.output + else: + # 폴더명에서 추출 (01_alice → alice) + folder_name = folder_path.name + parts = folder_name.split('_', 1) + if len(parts) == 2 and parts[0].isdigit(): + output_name = parts[1] + else: + output_name = folder_name + + # 파라미터 결정 + if args.no_auto: + # 수동 모드 + if not args.epochs or not args.repeats: + print("❌ --no-auto 사용 시 --epochs와 --repeats 필수입니다") + sys.exit(1) + epochs = args.epochs + repeats = args.repeats + steps_per_epoch = (image_count * repeats) // batch_size + total_steps = epochs * steps_per_epoch + else: + # 자동 계산 (오버라이드 가능) + auto_params = calculate_auto_params(image_count, vram_size, batch_size) + epochs = args.epochs or auto_params['epochs'] + repeats = args.repeats or auto_params['repeats'] + steps_per_epoch = (image_count * repeats) // batch_size + total_steps = epochs * steps_per_epoch + + # 학습 정보 출력 + print(f"\n{'=' * 70}") + print(f"🎯 SDXL LoRA Training - Single Mode") + print(f"{'=' * 70}") + print(f"📁 Folder: {folder_path}") + print(f"💾 Output: {output_name}.safetensors") + print(f"📋 Config: {config_file}") + print(f"🖥️ GPU: {args.gpu} ({vram_size}GB VRAM)") + print(f"⚡ Precision: {precision}") + print(f"{'-' * 70}") + print(f"📊 Training Parameters") + print(f"{'-' * 70}") + print(f" Images: {image_count}") + print(f" Repeats: {repeats}" + (" (manual)" if args.repeats else " (auto)")) + print(f" Epochs: {epochs}" + (" (manual)" if args.epochs else " (auto)")) + print(f" Batch size: {batch_size}" + (" (override)" if args.batch_size else "")) + print(f" Images/epoch: {image_count * repeats}") + print(f" Steps/epoch: {steps_per_epoch}") + print(f" Total steps: {total_steps}") + + # 오버라이드된 파라미터 표시 + overrides = [] + if args.lr: + print(f" Learning rate: {args.lr} (override)") + overrides.append(('lr', args.lr)) + if args.dim: + print(f" Network dim: {args.dim} (override)") + overrides.append(('dim', args.dim)) + if args.alpha: + print(f" Network alpha: {args.alpha} (override)") + overrides.append(('alpha', args.alpha)) + if args.resolution: + print(f" Resolution: {args.resolution} (override)") + overrides.append(('resolution', args.resolution)) + if args.optimizer: + print(f" Optimizer: {args.optimizer} (override)") + overrides.append(('optimizer', args.optimizer)) + if args.scheduler: + print(f" LR Scheduler: {args.scheduler} (override)") + overrides.append(('scheduler', args.scheduler)) + if args.save_every: + print(f" Save every: {args.save_every} epochs (override)") + overrides.append(('save_every', args.save_every)) + + print(f"{'=' * 70}\n") + + # 사용자 확인 + try: + response = input("학습을 시작하시겠습니까? (y/N): ") + if response.lower() not in ['y', 'yes']: + print("❌ 학습 취소됨") + sys.exit(0) + except KeyboardInterrupt: + print("\n❌ 학습 취소됨") + sys.exit(0) + + # accelerate 명령어 구성 + cmd = [ + "accelerate", "launch", + "--num_cpu_threads_per_process", "1", + "--mixed_precision", precision, + "sdxl_train_network.py", + f"--config_file={config_file}", + f"--train_data_dir={folder_path}", + f"--output_name={output_name}", + f"--max_train_epochs={epochs}", + f"--dataset_repeats={repeats}", + f"--mixed_precision={precision}" + ] + + # 오버라이드 추가 + if args.batch_size: + cmd.append(f"--train_batch_size={args.batch_size}") + if args.lr: + cmd.append(f"--learning_rate={args.lr}") + if args.dim: + cmd.append(f"--network_dim={args.dim}") + if args.alpha: + cmd.append(f"--network_alpha={args.alpha}") + if args.resolution: + cmd.append(f"--resolution={args.resolution}") + if args.optimizer: + cmd.append(f"--optimizer_type={args.optimizer}") + if args.scheduler: + cmd.append(f"--lr_scheduler={args.scheduler}") + if args.save_every: + cmd.append(f"--save_every_n_epochs={args.save_every}") + + # 환경 변수 설정 + env = os.environ.copy() + env['CUDA_VISIBLE_DEVICES'] = str(args.gpu) + + # 실행 + try: + print(f"\n🚀 Starting training...\n") + subprocess.run(cmd, env=env, check=True) + print(f"\n✅ 학습 완료: {output_name}.safetensors") + print(f"{'=' * 70}\n") + + except subprocess.CalledProcessError as e: + print(f"\n❌ 학습 실패: {e}") + sys.exit(1) + except KeyboardInterrupt: + print(f"\n⚠️ 학습 중단됨") + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index e400a22..b97fa5f 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit e400a224f84a6d51feb275596644acb416bd629c +Subproject commit b97fa5fcb884e19ab50fb21b80ea65bf1e416b5c From 21808c58303d3db62cd74fe0b82d298c9cb2f1ae Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 19:48:05 +0900 Subject: [PATCH 33/58] update --- Dockerfile | 4 ++-- docker-build.cmd | 4 ++-- requirements.txt | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4c87cf9..b14e3ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -# Python 3.11 + PyTorch 2.4.0 + CUDA 12.1 + CuDNN 9.5 -FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel +# Python 3.11 + PyTorch 2.7.0 + CUDA 12.8 + CuDNN 9.5 +FROM pytorch/pytorch:2.7.0-cuda12.8-cudnn9-devel # 기본 작업 경로 설정 WORKDIR /app diff --git a/docker-build.cmd b/docker-build.cmd index 8a5a905..d932bff 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.3. +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.5 . -docker tag aicompanion/sdxl_train_captioner:0.9.3 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.9.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5aab427..b8dba16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -47,6 +47,6 @@ open-clip-torch==2.20.0 rich==13.7.0 # Kohya_ss library -xformers==0.0.27.post2 --index-url https://download.pytorch.org/whl/cu121 -triton==2.3.0 +xformers --index-url https://download.pytorch.org/whl/cu128 +triton==3.0.0 From 29f722315d8eb7e15dd3f9d38e6e417444a6302d Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sun, 26 Oct 2025 21:18:45 +0900 Subject: [PATCH 34/58] update --- MANUAL_TRAIN.md | 177 ++++++++++--- resume-train.cmd | 31 +++ run-caption-watcher.cmd | 6 + run-train.cmd => run-train-auto.cmd | 3 +- run-train-auto.py | 368 ---------------------------- run-train-single.cmd | 58 +++++ run-train-single.ps1 | 55 +++++ run-train-single.py | 365 --------------------------- sd-scripts | 2 +- 9 files changed, 297 insertions(+), 768 deletions(-) create mode 100644 resume-train.cmd create mode 100644 run-caption-watcher.cmd rename run-train.cmd => run-train-auto.cmd (60%) delete mode 100644 run-train-auto.py create mode 100644 run-train-single.cmd create mode 100644 run-train-single.ps1 delete mode 100644 run-train-single.py diff --git a/MANUAL_TRAIN.md b/MANUAL_TRAIN.md index 673fda8..9a1d7c2 100644 --- a/MANUAL_TRAIN.md +++ b/MANUAL_TRAIN.md @@ -1,52 +1,52 @@ -# 단일폴더 수동 학습 사용 예시 +# I. 단일폴더 수동 학습 사용 예시 ## 1. 기본 사용 (자동 계산) ```bash -python train_single.py --folder ../dataset/training/01_alice +run-train-single --folder ../dataset/training/01_alice ``` ## 2. Epochs만 수동 지정 ```bash -python train_single.py --folder ../dataset/training/01_alice --epochs 25 +run-train-single --folder ../dataset/training/01_alice --epochs 25 ``` ## 3. 세밀한 조정 ```bash -python train_single.py \ - --folder ../dataset/training/01_alice \ - --epochs 30 \ - --repeats 50 \ - --lr 0.00015 \ - --dim 64 \ +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --epochs 30 ^ + --repeats 50 ^ + --lr 0.00015 ^ + --dim 64 ^ --alpha 32 ``` ## 4. 고해상도 학습 ```bash -python train_single.py \ - --folder ../dataset/training/01_alice \ - --resolution 1024,1024 \ +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --resolution 1024,1024 ^ --batch-size 1 ``` ## 5. 빠른 테스트 ```bash -python train_single.py \ - --folder ../dataset/training/01_alice \ - --epochs 5 \ - --repeats 10 \ +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --epochs 5 ^ + --repeats 10 ^ --save-every 1 ``` ## 6. 완전 수동 모드 ```bash -python train_single.py \ - --folder ../dataset/training/01_alice \ - --no-auto \ - --epochs 20 \ - --repeats 30 \ - --lr 0.0001 \ - --optimizer AdamW8bit \ +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --no-auto ^ + --epochs 20 ^ + --repeats 30 ^ + --lr 0.0001 ^ + --optimizer AdamW8bit ^ --scheduler cosine ``` @@ -84,7 +84,7 @@ python train_batch.py ### train_single.py (단일 수동) ```bash # 특정 폴더만 세밀 조정 -python train_single.py --folder ../dataset/training/mainchar/01_alice --epochs 30 --lr 0.00015 +run-train-single --folder ../dataset/training/mainchar/01_alice --epochs 30 --lr 0.00015 → alice만 커스텀 파라미터로 학습 ``` @@ -96,19 +96,130 @@ python train_single.py --folder ../dataset/training/mainchar/01_alice --epochs 3 python train_batch.py # 2. 결과가 좋지 않은 캐릭터만 재학습 -python train_single.py --folder ../dataset/training/mainchar/01_alice --epochs 25 +run-train-single --folder ../dataset/training/mainchar/01_alice --epochs 25 ``` ### 고급 사용자 ```bash # 처음부터 세밀하게 조정 -python train_single.py \ - --folder ../dataset/training/mainchar/01_alice \ - --epochs 30 \ - --repeats 50 \ - --lr 0.00012 \ - --dim 64 \ - --alpha 32 \ - --optimizer Prodigy \ +run-train-single ^ + --folder ../dataset/training/mainchar/01_alice ^ + --epochs 30 ^ + --repeats 50 ^ + --lr 0.00012 ^ + --dim 64 ^ + --alpha 32 ^ + --optimizer Prodigy ^ --resolution 1024,1024 ``` + +# II. 단일폴더 학습재개(resume) 방법 + +## 1. 기본 Resume +```cmd +run-train-single --folder ../dataset/training/mainchar/01_alice --resume ../output_models/alice-epoch-010.safetensors +``` + +## 2. Resume + Learning Rate 조정 (Fine-tuning) +```cmd +run-train-single --folder ../dataset/training/mainchar/01_alice ^ + --folder ../dataset/training/01_alice ^ + --resume ../output_models/alice-epoch-010.safetensors ^ + --epochs 20 ^ + --lr 0.00005 +``` + +## 3. Resume + 더 많은 데이터 +```cmd +run-train-single --folder ../dataset/training/mainchar/01_alice ^ + --folder ../dataset/training/01_alice_more ^ + --resume ../output_models/alice-epoch-015.safetensors ^ + --epochs 10 +``` + +## 주의사항 +✅ Resume 시 동일하게 유지해야 할 것 + +- --dim (network_dim) +- --alpha (network_alpha) +- 네트워크 구조 관련 설정 + +## ⚠️ Resume 시 변경 가능한 것 + +- --epochs (더 학습) +- --lr (learning rate 조정) +- --repeats (데이터 반복) +- --optimizer (optimizer 변경) +- --scheduler (스케줄러 변경) + +## ❌ Resume 시 변경하면 안되는 것 +```cmd +# 잘못된 예 +run-train-single \ + --folder ../dataset/training/01_alice \ + --resume ../output_models/alice-epoch-010.safetensors \ + --dim 64 # ❌ 원래 32였으면 에러! +``` + +## 실전 예시 + +### 시나리오 1: 학습이 중단됨 +```cmd +# 10 epoch에서 중단 +# → 10 epoch부터 이어서 15 epoch까지 + +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --resume ../output_models/alice-epoch-010.safetensors ^ + --epochs 15 +``` + +### 시나리오 2: Overfitting 방지 (LR 감소) +```cmd +# 학습률 낮춰서 Fine-tuning +run-train-single ^ + --folder ../dataset/training/01_alice ^ + --resume ../output_models/alice-epoch-015.safetensors ^ + --epochs 25 ^ + --lr 0.00005 +``` + +### 시나리오 3: 데이터 추가 후 재학습 +```cmd +# 이미지 20장 → 50장으로 증가 +run-train-single ^ + --folder ../dataset/training/01_alice_extended ^ + --resume ../output_models/alice-epoch-015.safetensors ^ + --epochs 10 ^ + --repeats 20 +``` + +## 출력 예시 +``` +====================================================================== +🎯 SDXL LoRA Training - Single Mode +====================================================================== +📁 Folder: ../dataset/training/01_alice +💾 Output: alice.safetensors +📋 Config: config-24g.json +🖥️ GPU: 0 (24GB VRAM) +⚡ Precision: bf16 +🔄 Resume from: ../output_models/alice-epoch-010.safetensors +---------------------------------------------------------------------- +📊 Training Parameters +---------------------------------------------------------------------- + Images: 25 + Repeats: 48 (auto) + Epochs: 20 (manual) + Batch size: 1 + Images/epoch: 1200 + Steps/epoch: 1200 + Total steps: 24000 +====================================================================== + +학습을 시작하시겠습니까? (y/N): y + +🔄 Resuming from: ../output_models/alice-epoch-010.safetensors + +🚀 Starting training... +``` \ No newline at end of file diff --git a/resume-train.cmd b/resume-train.cmd new file mode 100644 index 0000000..67a9646 --- /dev/null +++ b/resume-train.cmd @@ -0,0 +1,31 @@ +accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ + sdxl_train_network.py ^ + --resume="../output_models" ^ + --max_train_epochs=20 ^ + --pretrained_model_name_or_path="../models/sd_xl_base_1.0.safetensors" ^ + --train_data_dir="../dataset/train/mainchar" ^ + --output_dir="../output_models" ^ + --logging_dir="../logs" ^ + --output_name="karina" ^ + --network_module=networks.lora ^ + --network_dim=32 ^ + --network_alpha=16 ^ + --learning_rate=1e-4 ^ + --optimizer_type="AdamW8bit" ^ + --lr_scheduler="cosine" ^ + --lr_warmup_steps=100 ^ + --save_every_n_epochs=1 ^ + --mixed_precision="bf16" ^ + --save_precision="bf16" ^ + --cache_latents ^ + --cache_latents_to_disk ^ + --gradient_checkpointing ^ + --xformers ^ + --seed=42 ^ + --bucket_no_upscale ^ + --min_bucket_reso=512 ^ + --max_bucket_reso=2048 ^ + --bucket_reso_steps=64 ^ + --resolution="1024,1024" ^ + --network_train_unet_only ^ + --cache_text_encoder_outputs \ No newline at end of file diff --git a/run-caption-watcher.cmd b/run-caption-watcher.cmd new file mode 100644 index 0000000..6097b88 --- /dev/null +++ b/run-caption-watcher.cmd @@ -0,0 +1,6 @@ +@echo off +setx CUDA_VISIBLE_DEVICES "3" +echo [Watcher] Starting caption watcher... +python cap-watcher.py --overwrite +REM --img_dir "../dataset/captioning/mainchar" --out_dir "../dataset/captioning/mainchar" +pause \ No newline at end of file diff --git a/run-train.cmd b/run-train-auto.cmd similarity index 60% rename from run-train.cmd rename to run-train-auto.cmd index 6e22b42..6152325 100644 --- a/run-train.cmd +++ b/run-train-auto.cmd @@ -1,5 +1,6 @@ @echo off REM 첫 번째 argument를 명령어로 받아서 컨테이너에서 실행 REM 모든 argument를 그대로 넘기려면 %* 사용 +docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts; ./run-train-auto.py 1 2>&1 | tee /app/sdxl_train_captioner/logs/train_$(date +%%Y%%m%%d_%%H%%M%%S).log" -docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts; ./run-train.sh config-24g.json 1 2>&1 | tee /app/sdxl_train_captioner/logs/train_$(date +%%Y%%m%%d_%%H%%M%%S).log" +pause \ No newline at end of file diff --git a/run-train-auto.py b/run-train-auto.py deleted file mode 100644 index dafded4..0000000 --- a/run-train-auto.py +++ /dev/null @@ -1,368 +0,0 @@ -#!/usr/bin/env python3 -""" -SDXL LoRA 일괄 학습 스크립트 -- 학습 폴더 하위의 여러 캐릭터/개념을 자동으로 개별 LoRA 학습 -- VRAM에 따른 자동 설정 (bf16/fp16) -- 이미지 수에 따른 최적 파라미터 자동 계산 -""" - -import os -import sys -import json -import subprocess -import argparse -from pathlib import Path - - -class TrainingConfig: - """학습 설정 관리""" - - def __init__(self, config_file, gpu_id=0, force_repeats=None): - self.config_file = config_file - self.gpu_id = gpu_id - self.force_repeats = force_repeats - - # VRAM 감지 - self.vram_size = self.get_vram_size() - - # VRAM에 따른 설정 - if self.vram_size >= 20: - self.precision = "bf16" - self.target_steps = 1800 - else: - # 16GB 이하는 fp16 config 사용 - self.config_file = "config-16g.json" - self.precision = "fp16" - self.target_steps = 1500 - - # Config 파일 로드 - self.load_config() - - def get_vram_size(self): - """NVIDIA GPU VRAM 크기 감지 (GB)""" - try: - cmd = [ - "nvidia-smi", - "--query-gpu=memory.total", - "--format=csv,noheader,nounits", - f"-i {self.gpu_id}" - ] - result = subprocess.run( - ' '.join(cmd), - shell=True, - capture_output=True, - text=True - ) - vram_mb = int(result.stdout.strip()) - vram_gb = vram_mb // 1024 - return vram_gb - except Exception as e: - print(f"⚠️ VRAM 감지 실패, 기본값(24GB) 사용: {e}") - return 24 - - def load_config(self): - """config.json 로드""" - if not os.path.exists(self.config_file): - print(f"❌ Config 파일 없음: {self.config_file}") - sys.exit(1) - - with open(self.config_file, 'r', encoding='utf-8') as f: - self.config = json.load(f) - - self.train_dir = self.config['folders']['train_data_dir'] - self.output_dir = self.config['folders']['output_dir'] - self.batch_size = self.config['training'].get('batch_size', 1) - - -class LoRATrainer: - """단일 LoRA 학습 실행""" - - def __init__(self, training_config): - self.config = training_config - self.image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} - - def find_training_folders(self): - """학습 폴더 찾기 (순서_이름 패턴)""" - train_dir = self.config.train_dir - - if not os.path.isdir(train_dir): - print(f"❌ 학습 디렉토리 없음: {train_dir}") - return [] - - folders = [] - for item in os.listdir(train_dir): - item_path = os.path.join(train_dir, item) - if not os.path.isdir(item_path): - continue - - # 패턴: 01_alice, 02_bob 등 - parts = item.split('_', 1) - if len(parts) == 2 and parts[0].isdigit(): - order = int(parts[0]) - name = parts[1] - folders.append({ - 'order': order, - 'name': name, - 'path': item_path, - 'folder': item - }) - - # 순서대로 정렬 - folders.sort(key=lambda x: x['order']) - return folders - - def count_images(self, folder_path): - """폴더 내 이미지 개수 세기""" - count = 0 - for file in os.listdir(folder_path): - if Path(file).suffix.lower() in self.image_extensions: - count += 1 - return count - - def calculate_training_params(self, image_count): - """이미지 수에 따른 최적 학습 파라미터 계산""" - batch_size = self.config.batch_size - target_steps = self.config.target_steps - - # 강제 반복 횟수가 지정되면 사용 - if self.config.force_repeats is not None: - optimal_repeats = self.config.force_repeats - else: - # 이미지 수에 따른 자동 계산 - if image_count < 20: - optimal_repeats = max(80, min(200, target_steps // (image_count * 10))) - elif image_count < 50: - optimal_repeats = max(30, min(80, target_steps // (image_count * 10))) - elif image_count < 100: - optimal_repeats = max(15, min(30, target_steps // (image_count * 10))) - else: - optimal_repeats = max(5, min(20, target_steps // (image_count * 10))) - - # Epochs 계산 - images_per_epoch = image_count * optimal_repeats - steps_per_epoch = images_per_epoch // batch_size - actual_epochs = max(1, round(target_steps / steps_per_epoch)) - actual_epochs = min(max(actual_epochs, 5), 30) - actual_total_steps = actual_epochs * steps_per_epoch - - return { - 'repeats': optimal_repeats, - 'epochs': actual_epochs, - 'steps_per_epoch': steps_per_epoch, - 'total_steps': actual_total_steps - } - - def train_single_lora(self, folder_info): - """단일 LoRA 학습 실행""" - name = folder_info['name'] - folder_path = folder_info['path'] - - print(f"\n{'=' * 70}") - print(f"🎯 Training LoRA: {name}") - print(f"{'=' * 70}") - - # 이미지 개수 확인 - image_count = self.count_images(folder_path) - if image_count == 0: - print(f"⚠️ 이미지 없음: {folder_path}") - print(f"{'=' * 70}\n") - return False - - # 파라미터 계산 - params = self.calculate_training_params(image_count) - - # 정보 출력 - print(f"📊 Training Configuration") - print(f"{'-' * 70}") - print(f" GPU ID: {self.config.gpu_id}") - print(f" VRAM: {self.config.vram_size}GB") - print(f" Precision: {self.config.precision}") - print(f" Config: {self.config.config_file}") - print(f" Folder: {folder_info['folder']}") - print(f" Images: {image_count}") - print(f" Repeats: {params['repeats']}" + - (" (forced)" if self.config.force_repeats else " (auto)")) - print(f" Images/epoch: {image_count * params['repeats']}") - print(f" Steps/epoch: {params['steps_per_epoch']}") - print(f" Epochs: {params['epochs']}") - print(f" Total steps: {params['total_steps']}") - print(f"{'-' * 70}\n") - - # accelerate 명령어 구성 - cmd = [ - "accelerate", "launch", - "--num_cpu_threads_per_process", "1", - "--mixed_precision", self.config.precision, - "sdxl_train_network.py", - f"--config_file={self.config.config_file}", - f"--train_data_dir={folder_path}", - f"--output_name={name}", - f"--max_train_epochs={params['epochs']}", - f"--dataset_repeats={params['repeats']}", - f"--mixed_precision={self.config.precision}" - ] - - # 실행 - try: - env = os.environ.copy() - env['CUDA_VISIBLE_DEVICES'] = str(self.config.gpu_id) - - print(f"🚀 Starting training...\n") - result = subprocess.run(cmd, env=env, check=True) - - print(f"\n✅ {name} 학습 완료!") - print(f"{'=' * 70}\n") - return True - - except subprocess.CalledProcessError as e: - print(f"\n❌ {name} 학습 실패: {e}") - print(f"{'=' * 70}\n") - return False - except KeyboardInterrupt: - print(f"\n⚠️ 사용자에 의해 중단됨") - return False - - def run_batch_training(self): - """일괄 학습 실행""" - folders = self.find_training_folders() - - if not folders: - print("❌ 학습 폴더를 찾을 수 없습니다!") - print(f" 경로: {self.config.train_dir}") - print(f" 패턴: 01_name, 02_name, ...") - return - - print(f"\n{'=' * 70}") - print(f"🚀 SDXL LoRA Batch Training") - print(f"{'=' * 70}") - print(f"📁 학습 폴더: {self.config.train_dir}") - print(f"💾 출력 폴더: {self.config.output_dir}") - print(f"🖥️ GPU: {self.config.gpu_id} ({self.config.vram_size}GB)") - print(f"⚡ Precision: {self.config.precision}") - print(f"📋 Config: {self.config.config_file}") - print(f"\n발견된 학습 폴더: {len(folders)}개") - print(f"{'-' * 70}") - for f in folders: - img_count = self.count_images(f['path']) - print(f" {f['order']:02d}. {f['name']:20s} ({img_count} images)") - print(f"{'=' * 70}\n") - - # 사용자 확인 - try: - response = input("학습을 시작하시겠습니까? (y/N): ") - if response.lower() not in ['y', 'yes']: - print("❌ 학습 취소됨") - return - except KeyboardInterrupt: - print("\n❌ 학습 취소됨") - return - - # 학습 실행 - results = [] - for i, folder in enumerate(folders, 1): - print(f"\n[{i}/{len(folders)}] Processing: {folder['name']}...") - success = self.train_single_lora(folder) - results.append({ - 'name': folder['name'], - 'success': success - }) - - # 실패 시 계속 진행할지 물어봄 - if not success: - try: - response = input("❓ 계속 진행하시겠습니까? (Y/n): ") - if response.lower() in ['n', 'no']: - print("⚠️ 나머지 학습 건너뜀") - break - except KeyboardInterrupt: - print("\n⚠️ 나머지 학습 건너뜀") - break - - # 결과 요약 - print(f"\n{'=' * 70}") - print(f"📊 Training Summary") - print(f"{'=' * 70}") - success_count = sum(1 for r in results if r['success']) - fail_count = len(results) - success_count - - for r in results: - status = "✅" if r['success'] else "❌" - print(f"{status} {r['name']}") - - print(f"{'-' * 70}") - print(f"✅ 성공: {success_count}/{len(results)}") - if fail_count > 0: - print(f"❌ 실패: {fail_count}/{len(results)}") - print(f"{'=' * 70}\n") - - -def main(): - parser = argparse.ArgumentParser( - description="SDXL LoRA 일괄 학습 스크립트", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -사용 예시: - python train_batch.py - python train_batch.py config-16g.json - python train_batch.py config-24g.json 0 15 - -폴더 구조: - training/ - ├── 01_alice/ - │ └── *.jpg - ├── 02_bob/ - │ └── *.jpg - └── 03_background/ - └── *.jpg - """ - ) - - parser.add_argument( - "config", - nargs="?", - default="config-24g.json", - help="Config 파일 (기본: config-24g.json)" - ) - - parser.add_argument( - "gpu_id", - nargs="?", - type=int, - default=0, - help="GPU ID (기본: 0)" - ) - - parser.add_argument( - "repeats", - nargs="?", - type=int, - default=None, - help="강제 반복 횟수 (기본: 자동 계산)" - ) - - args = parser.parse_args() - - try: - # 설정 로드 - training_config = TrainingConfig( - config_file=args.config, - gpu_id=args.gpu_id, - force_repeats=args.repeats - ) - - # 학습 실행 - trainer = LoRATrainer(training_config) - trainer.run_batch_training() - - except KeyboardInterrupt: - print("\n\n⚠️ 프로그램 중단됨") - sys.exit(1) - except Exception as e: - print(f"\n❌ 오류 발생: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/run-train-single.cmd b/run-train-single.cmd new file mode 100644 index 0000000..76e06e9 --- /dev/null +++ b/run-train-single.cmd @@ -0,0 +1,58 @@ +@echo off +setlocal enabledelayedexpansion + +REM =================================== +REM SDXL LoRA 단일 학습 (Windows → Docker) +REM =================================== + +REM 도움말 +if "%1"=="" ( + echo Usage: run-train-single.cmd --folder FOLDER [OPTIONS] + echo. + echo Examples: + echo run-train-single.cmd --folder ../dataset/training/01_alice + echo run-train-single.cmd --folder ../dataset/training/01_alice --epochs 25 + echo run-train-single.cmd --folder ../dataset/training/01_alice --lr 0.0002 --dim 64 + echo. + echo All arguments are passed to Python script inside container. + exit /b 1 +) + +REM 현재 시간 (로그 파일명용) +for /f "tokens=1-6 delims=/:. " %%a in ("%date% %time%") do ( + set timestamp=%%a%%b%%c_%%d%%e%%f +) +set timestamp=%timestamp: =0% + +REM 모든 arguments를 하나의 문자열로 결합 +set args=%* + +REM 작은따옴표 이스케이프 (Bash에서 안전하게) +set args=%args:'='\''% + +echo =================================== +echo Starting SDXL LoRA Training +echo =================================== +echo Arguments: %args% +echo Log file: train_%timestamp%.log +echo =================================== +echo. + +REM Docker에서 실행 +docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts && python run-train-single.py %args% 2>&1 | tee /app/sdxl_train_captioner/logs/train_%timestamp%.log" + +if %ERRORLEVEL% EQU 0 ( + echo. + echo =================================== + echo Training completed successfully! + echo =================================== +) else ( + echo. + echo =================================== + echo Training failed with error code: %ERRORLEVEL% + echo =================================== +) + +endlocal + +pause \ No newline at end of file diff --git a/run-train-single.ps1 b/run-train-single.ps1 new file mode 100644 index 0000000..60a20e7 --- /dev/null +++ b/run-train-single.ps1 @@ -0,0 +1,55 @@ +# =================================== +# SDXL LoRA 단일 학습 (PowerShell → Docker) +# =================================== + +param( + [Parameter(ValueFromRemainingArguments=$true)] + [string[]]$Arguments +) + +if ($Arguments.Count -eq 0) { + Write-Host "Usage: run-train-single.ps1 --folder FOLDER [OPTIONS]" -ForegroundColor Yellow + Write-Host "" + Write-Host "Examples:" -ForegroundColor Cyan + Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice" + Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice --epochs 25" + Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice --lr 0.0002 --dim 64" + exit 1 +} + +# 타임스탬프 +$timestamp = Get-Date -Format "yyyyMMdd_HHmmss" + +# Arguments를 문자열로 결합 +$argsString = $Arguments -join ' ' + +# 작은따옴표 이스케이프 +$argsString = $argsString -replace "'", "'\\''" + +Write-Host "===================================" -ForegroundColor Green +Write-Host "Starting SDXL LoRA Training" -ForegroundColor Green +Write-Host "===================================" -ForegroundColor Green +Write-Host "Arguments: $argsString" -ForegroundColor Cyan +Write-Host "Log file: train_$timestamp.log" -ForegroundColor Cyan +Write-Host "===================================" -ForegroundColor Green +Write-Host "" + +# Docker 명령어 +$dockerCmd = "cd /app/sdxl_train_captioner/sd-scripts && python run-train-single.py $argsString 2>&1 | tee /app/sdxl_train_captioner/logs/train_$timestamp.log" + +# 실행 +docker exec -it sdxl_train_captioner bash -c $dockerCmd + +if ($LASTEXITCODE -eq 0) { + Write-Host "" + Write-Host "===================================" -ForegroundColor Green + Write-Host "Training completed successfully!" -ForegroundColor Green + Write-Host "===================================" -ForegroundColor Green +} else { + Write-Host "" + Write-Host "===================================" -ForegroundColor Red + Write-Host "Training failed with error code: $LASTEXITCODE" -ForegroundColor Red + Write-Host "===================================" -ForegroundColor Red +} + +pause \ No newline at end of file diff --git a/run-train-single.py b/run-train-single.py deleted file mode 100644 index 8571421..0000000 --- a/run-train-single.py +++ /dev/null @@ -1,365 +0,0 @@ -#!/usr/bin/env python3 -""" -SDXL LoRA 단일 학습 스크립트 (고급 사용자용) -- 특정 폴더만 선택 학습 -- 세밀한 파라미터 조정 가능 -- Config 오버라이드 -""" - -import os -import sys -import json -import subprocess -import argparse -from pathlib import Path - - -def get_vram_size(gpu_id=0): - """NVIDIA GPU VRAM 크기 감지 (GB)""" - try: - cmd = f"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i {gpu_id}" - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - vram_mb = int(result.stdout.strip()) - return vram_mb // 1024 - except: - return 24 # 기본값 - - -def count_images(folder_path): - """폴더 내 이미지 개수 세기""" - extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} - count = 0 - for file in os.listdir(folder_path): - if Path(file).suffix.lower() in extensions: - count += 1 - return count - - -def calculate_auto_params(image_count, vram_size, batch_size=1): - """이미지 수 기반 자동 파라미터 계산""" - target_steps = 1800 if vram_size >= 20 else 1500 - - # Repeats 계산 - if image_count < 20: - repeats = max(80, min(200, target_steps // (image_count * 10))) - elif image_count < 50: - repeats = max(30, min(80, target_steps // (image_count * 10))) - elif image_count < 100: - repeats = max(15, min(30, target_steps // (image_count * 10))) - else: - repeats = max(5, min(20, target_steps // (image_count * 10))) - - # Epochs 계산 - images_per_epoch = image_count * repeats - steps_per_epoch = images_per_epoch // batch_size - epochs = max(1, round(target_steps / steps_per_epoch)) - epochs = min(max(epochs, 5), 30) - - return { - 'repeats': repeats, - 'epochs': epochs, - 'steps_per_epoch': steps_per_epoch, - 'total_steps': epochs * steps_per_epoch - } - - -def main(): - parser = argparse.ArgumentParser( - description="SDXL LoRA 단일 학습 (고급 설정)", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -사용 예시: - # 기본 (자동 계산) - python train_single.py --folder ../dataset/training/01_alice - - # 수동 파라미터 지정 - python train_single.py --folder ../dataset/training/01_alice --epochs 20 --repeats 30 - - # Learning rate 조정 - python train_single.py --folder ../dataset/training/01_alice --lr 0.0002 - - # Network dim 변경 - python train_single.py --folder ../dataset/training/01_alice --dim 64 --alpha 32 - - # 전체 커스텀 - python train_single.py \\ - --folder ../dataset/training/01_alice \\ - --output alice_v2 \\ - --config config-24g.json \\ - --gpu 0 \\ - --epochs 25 \\ - --repeats 40 \\ - --lr 0.00015 \\ - --dim 64 \\ - --alpha 32 \\ - --batch-size 2 - """ - ) - - # 필수 인자 - parser.add_argument( - "--folder", - required=True, - help="학습할 폴더 경로 (예: ../dataset/training/01_alice)" - ) - - # 기본 설정 - parser.add_argument( - "--config", - default="config-24g.json", - help="Config 파일 (기본: config-24g.json)" - ) - - parser.add_argument( - "--output", - help="출력 LoRA 이름 (기본: 폴더명에서 추출)" - ) - - parser.add_argument( - "--gpu", - type=int, - default=0, - help="GPU ID (기본: 0)" - ) - - # 학습 파라미터 - parser.add_argument( - "--epochs", - type=int, - help="총 Epoch 수 (기본: 자동 계산)" - ) - - parser.add_argument( - "--repeats", - type=int, - help="이미지 반복 횟수 (기본: 자동 계산)" - ) - - parser.add_argument( - "--batch-size", - type=int, - help="배치 사이즈 (기본: config 값)" - ) - - parser.add_argument( - "--lr", - type=float, - help="Learning rate (기본: config 값, 보통 1e-4)" - ) - - parser.add_argument( - "--dim", - type=int, - help="Network dimension (기본: config 값, 보통 32)" - ) - - parser.add_argument( - "--alpha", - type=int, - help="Network alpha (기본: config 값, 보통 16)" - ) - - parser.add_argument( - "--resolution", - help="해상도 (예: 1024,1024 또는 768,768)" - ) - - parser.add_argument( - "--save-every", - type=int, - help="N epoch마다 저장 (기본: config 값)" - ) - - # 고급 옵션 - parser.add_argument( - "--optimizer", - help="Optimizer (예: AdamW8bit, Lion, Prodigy)" - ) - - parser.add_argument( - "--scheduler", - help="LR Scheduler (예: cosine, constant, polynomial)" - ) - - parser.add_argument( - "--no-auto", - action="store_true", - help="자동 계산 비활성화 (epochs/repeats 수동 지정 필수)" - ) - - args = parser.parse_args() - - # 폴더 확인 - folder_path = Path(args.folder) - if not folder_path.exists() or not folder_path.is_dir(): - print(f"❌ 폴더를 찾을 수 없습니다: {folder_path}") - sys.exit(1) - - # 이미지 개수 - image_count = count_images(folder_path) - if image_count == 0: - print(f"❌ 이미지가 없습니다: {folder_path}") - sys.exit(1) - - # VRAM 감지 - vram_size = get_vram_size(args.gpu) - - # Config 자동 선택 - if vram_size >= 20: - precision = "bf16" - if args.config == "config-24g.json": - config_file = "config-24g.json" - else: - precision = "fp16" - config_file = "config-16g.json" - print(f"⚠️ VRAM {vram_size}GB < 20GB, fp16 모드로 전환") - - # Config 로드 - if not os.path.exists(config_file): - print(f"❌ Config 파일 없음: {config_file}") - sys.exit(1) - - with open(config_file, 'r', encoding='utf-8') as f: - config = json.load(f) - - batch_size = args.batch_size or config['training'].get('batch_size', 1) - - # 출력 이름 결정 - if args.output: - output_name = args.output - else: - # 폴더명에서 추출 (01_alice → alice) - folder_name = folder_path.name - parts = folder_name.split('_', 1) - if len(parts) == 2 and parts[0].isdigit(): - output_name = parts[1] - else: - output_name = folder_name - - # 파라미터 결정 - if args.no_auto: - # 수동 모드 - if not args.epochs or not args.repeats: - print("❌ --no-auto 사용 시 --epochs와 --repeats 필수입니다") - sys.exit(1) - epochs = args.epochs - repeats = args.repeats - steps_per_epoch = (image_count * repeats) // batch_size - total_steps = epochs * steps_per_epoch - else: - # 자동 계산 (오버라이드 가능) - auto_params = calculate_auto_params(image_count, vram_size, batch_size) - epochs = args.epochs or auto_params['epochs'] - repeats = args.repeats or auto_params['repeats'] - steps_per_epoch = (image_count * repeats) // batch_size - total_steps = epochs * steps_per_epoch - - # 학습 정보 출력 - print(f"\n{'=' * 70}") - print(f"🎯 SDXL LoRA Training - Single Mode") - print(f"{'=' * 70}") - print(f"📁 Folder: {folder_path}") - print(f"💾 Output: {output_name}.safetensors") - print(f"📋 Config: {config_file}") - print(f"🖥️ GPU: {args.gpu} ({vram_size}GB VRAM)") - print(f"⚡ Precision: {precision}") - print(f"{'-' * 70}") - print(f"📊 Training Parameters") - print(f"{'-' * 70}") - print(f" Images: {image_count}") - print(f" Repeats: {repeats}" + (" (manual)" if args.repeats else " (auto)")) - print(f" Epochs: {epochs}" + (" (manual)" if args.epochs else " (auto)")) - print(f" Batch size: {batch_size}" + (" (override)" if args.batch_size else "")) - print(f" Images/epoch: {image_count * repeats}") - print(f" Steps/epoch: {steps_per_epoch}") - print(f" Total steps: {total_steps}") - - # 오버라이드된 파라미터 표시 - overrides = [] - if args.lr: - print(f" Learning rate: {args.lr} (override)") - overrides.append(('lr', args.lr)) - if args.dim: - print(f" Network dim: {args.dim} (override)") - overrides.append(('dim', args.dim)) - if args.alpha: - print(f" Network alpha: {args.alpha} (override)") - overrides.append(('alpha', args.alpha)) - if args.resolution: - print(f" Resolution: {args.resolution} (override)") - overrides.append(('resolution', args.resolution)) - if args.optimizer: - print(f" Optimizer: {args.optimizer} (override)") - overrides.append(('optimizer', args.optimizer)) - if args.scheduler: - print(f" LR Scheduler: {args.scheduler} (override)") - overrides.append(('scheduler', args.scheduler)) - if args.save_every: - print(f" Save every: {args.save_every} epochs (override)") - overrides.append(('save_every', args.save_every)) - - print(f"{'=' * 70}\n") - - # 사용자 확인 - try: - response = input("학습을 시작하시겠습니까? (y/N): ") - if response.lower() not in ['y', 'yes']: - print("❌ 학습 취소됨") - sys.exit(0) - except KeyboardInterrupt: - print("\n❌ 학습 취소됨") - sys.exit(0) - - # accelerate 명령어 구성 - cmd = [ - "accelerate", "launch", - "--num_cpu_threads_per_process", "1", - "--mixed_precision", precision, - "sdxl_train_network.py", - f"--config_file={config_file}", - f"--train_data_dir={folder_path}", - f"--output_name={output_name}", - f"--max_train_epochs={epochs}", - f"--dataset_repeats={repeats}", - f"--mixed_precision={precision}" - ] - - # 오버라이드 추가 - if args.batch_size: - cmd.append(f"--train_batch_size={args.batch_size}") - if args.lr: - cmd.append(f"--learning_rate={args.lr}") - if args.dim: - cmd.append(f"--network_dim={args.dim}") - if args.alpha: - cmd.append(f"--network_alpha={args.alpha}") - if args.resolution: - cmd.append(f"--resolution={args.resolution}") - if args.optimizer: - cmd.append(f"--optimizer_type={args.optimizer}") - if args.scheduler: - cmd.append(f"--lr_scheduler={args.scheduler}") - if args.save_every: - cmd.append(f"--save_every_n_epochs={args.save_every}") - - # 환경 변수 설정 - env = os.environ.copy() - env['CUDA_VISIBLE_DEVICES'] = str(args.gpu) - - # 실행 - try: - print(f"\n🚀 Starting training...\n") - subprocess.run(cmd, env=env, check=True) - print(f"\n✅ 학습 완료: {output_name}.safetensors") - print(f"{'=' * 70}\n") - - except subprocess.CalledProcessError as e: - print(f"\n❌ 학습 실패: {e}") - sys.exit(1) - except KeyboardInterrupt: - print(f"\n⚠️ 학습 중단됨") - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index b97fa5f..49115c2 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit b97fa5fcb884e19ab50fb21b80ea65bf1e416b5c +Subproject commit 49115c25e9a0ffc6479b8f35d3ac76ba5352ca5b From 415fbe47905e1a43cad42f345ee347fc95de4073 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Mon, 27 Oct 2025 03:59:33 +0900 Subject: [PATCH 35/58] update --- .dockerignore | 1 - Dockerfile | 14 +++++++++----- README.md | 2 +- docker-build.cmd | 4 ++-- docker-compose.yml | 4 ++-- run-caption-watcher.cmd | 6 ------ 6 files changed, 14 insertions(+), 17 deletions(-) delete mode 100644 run-caption-watcher.cmd diff --git a/.dockerignore b/.dockerignore index eafb767..2c5799a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,4 +21,3 @@ logs/ outputs/ sd-scripts/venv models/sd_xl_base_1.0.safetensors -#models/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b14e3ce..ce4abd9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Python 3.11 + PyTorch 2.7.0 + CUDA 12.8 + CuDNN 9.5 -FROM pytorch/pytorch:2.7.0-cuda12.8-cudnn9-devel +FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel # 기본 작업 경로 설정 WORKDIR /app @@ -17,16 +17,20 @@ RUN pip install --upgrade pip setuptools wheel # kohya_ss 전체 복사 (모델 포함) COPY . /app/sdxl_train_captioner -# 두 requirements.txt 모두 설치 +# requirements.txt 설치 WORKDIR /app/sdxl_train_captioner -RUN mkdir -p /app/sdxl_train_captioner/models -RUN mkdir -p /app/sdxl_train_captioner/dataset -RUN pip install -r requirements.txt +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install flash-attn --no-build-isolation + +RUN mkdir -p /app/sdxl_train_captioner/dataset +RUN mkdir -p /app/sdxl_train_captioner/models # 모델 파일 복사 (미리 포함시킬 가중치) COPY ./models /app/sdxl_train_captioner/models WORKDIR /app/sdxl_train_captioner/sd-scripts + RUN chmod +x ./entrypoint.sh + ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/README.md b/README.md index 4d6d830..60dd1cb 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ cuDNN (예) C:\Program Files\NVIDIA\CUDNN\v9.5\bin 폴더 안에는 Cuda Major C:\Program Files\NVIDIA\CUDNN\v9.5\bin\12.6 아래의 모든 dll 파일을 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin으로 복사합니다. - +[README.md](../sdxl_train_captioner_runtime/README.md) ## 3. SDXL 모델 다운로드 - 도커 컨테이너가 실행될 때 models 하위에 StableDiffusion XL 1.0 모델이 다운로드 됩니다. - 만약에 해당 URL 지원이 종료 된 경우, 허깅페이지 또는 CIVITAI에서 다운로드 하세요. diff --git a/docker-build.cmd b/docker-build.cmd index d932bff..52065d3 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.5 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.7 . -docker tag aicompanion/sdxl_train_captioner:0.9.5 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.9.7 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 036c79b..b331a0b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,8 +15,8 @@ services: count: all capabilities: [gpu] - ports: - - "7860:7860" + # ports: + # - "7860:7860" # 볼륨 마운트 (호스트 ↔ 컨테이너) volumes: diff --git a/run-caption-watcher.cmd b/run-caption-watcher.cmd deleted file mode 100644 index 6097b88..0000000 --- a/run-caption-watcher.cmd +++ /dev/null @@ -1,6 +0,0 @@ -@echo off -setx CUDA_VISIBLE_DEVICES "3" -echo [Watcher] Starting caption watcher... -python cap-watcher.py --overwrite -REM --img_dir "../dataset/captioning/mainchar" --out_dir "../dataset/captioning/mainchar" -pause \ No newline at end of file From 7585c7ac96cbbf21ded834d30a144ba5b0d2ebe3 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Tue, 28 Oct 2025 17:27:53 +0900 Subject: [PATCH 36/58] update --- Dockerfile | 16 ++++++++---- doc/pytorch-cuda-ver-check.txt | 2 ++ docker-build.cmd | 4 +-- requirements.txt | 48 +++++++++++++++++----------------- 4 files changed, 39 insertions(+), 31 deletions(-) create mode 100644 doc/pytorch-cuda-ver-check.txt diff --git a/Dockerfile b/Dockerfile index ce4abd9..d13bae9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,15 @@ -# Python 3.11 + PyTorch 2.7.0 + CUDA 12.8 + CuDNN 9.5 +# Python 3.11 + PyTorch 2.7.1 + CUDA 12.8 + CuDNN 9.5 FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel # 기본 작업 경로 설정 WORKDIR /app # 필수 패키지 설치 -RUN apt-get update && apt-get install -y --no-install-recommends \ - git wget curl libgl1 libglib2.0-0 libcudnn9-cuda-12 libcudnn9-dev-cuda-12 \ - && rm -rf /var/lib/apt/lists/* +RUN sed -i 's|archive.ubuntu.com|mirror.kakao.com|g' /etc/apt/sources.list && \ + apt-get update && \ + apt-get install -y apt-utils && \ + apt-get install -y --no-install-recommends git wget curl && \ + rm -rf /var/lib/apt/lists/* # Python 패키지 캐싱 방지 ENV PIP_NO_CACHE_DIR=1 @@ -20,8 +22,12 @@ COPY . /app/sdxl_train_captioner # requirements.txt 설치 WORKDIR /app/sdxl_train_captioner +# 2. xformers +RUN pip install xformers==0.0.31 + RUN pip install --no-cache-dir -r requirements.txt -RUN pip install flash-attn --no-build-isolation +# 문제 발생 시 버전 고정: ==2.7.4.post1 +RUN pip install flash-attn --no-build-isolation RUN mkdir -p /app/sdxl_train_captioner/dataset RUN mkdir -p /app/sdxl_train_captioner/models diff --git a/doc/pytorch-cuda-ver-check.txt b/doc/pytorch-cuda-ver-check.txt new file mode 100644 index 0000000..301b6b5 --- /dev/null +++ b/doc/pytorch-cuda-ver-check.txt @@ -0,0 +1,2 @@ +python -c "import torch; print(torch.__version__)" +python -c "import transformers; print(transformers.__version__)" \ No newline at end of file diff --git a/docker-build.cmd b/docker-build.cmd index 52065d3..256ceba 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.7 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.8 . -docker tag aicompanion/sdxl_train_captioner:0.9.7 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:0.9.8 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b8dba16..73f8c28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,52 +1,52 @@ # Core packages -accelerate==0.33.0 -transformers==4.44.0 -diffusers[torch]==0.25.0 +torch==2.7.1+cu128 --index-url https://download.pytorch.org/whl/cu128 +torchvision==0.22.1+cu128 --index-url https://download.pytorch.org/whl/cu128 +torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu128 +accelerate==0.34.2 +transformers==4.45.2 # ✅ 업데이트 +diffusers[torch]==0.34.0 # ✅ 업데이트 ftfy==6.1.1 einops==0.7.0 -pytorch-lightning==1.9.0 +# pytorch-lightning==1.9.0 # ❌ 제거! (범인) +lightning==2.5.5 # ✅ 최신 버전 lion-pytorch==0.0.6 schedulefree==1.4 pytorch-optimizer==3.5.0 prodigy-plus-schedule-free==1.9.0 prodigyopt==1.1.2 tensorboard -safetensors==0.4.4 +safetensors==0.4.5 # ✅ 업데이트 altair==4.2.2 easygui==0.98.3 toml==0.10.2 voluptuous==0.13.1 -huggingface-hub==0.24.5 +huggingface-hub>=0.27.0 # ✅ 업데이트 imagesize==1.4.1 -numpy<=2.0 -requests==2.28.2 -timm==0.4.12 -fairscale==0.4.4 -opencv-python==4.5.5.64 -opencv-python-headless==4.5.5.64 +numpy==1.26.4 # ✅ 구체적 버전 +requests==2.31.0 # ✅ 업데이트 +timm==1.0.21 # ✅ 업데이트 (2번째 범인) +fairscale==0.4.13 # ✅ 업데이트 (3번째 범인) +opencv-python==4.7.0.72 +opencv-python-headless==4.7.0.72 +numpy==1.26.4 -# WD14 captioning (optional) -# tensorflow==2.10.1 -onnx==1.15.0 -# onnxruntime-gpu==1.17.1 -# onnxruntime==1.17.1 +# WD14 captioning +onnx==1.16.0 # ✅ 업데이트 onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # BLIP captioning -blip==0.1.0 -# salesforce-lavis==1.0.2 # 필요 시 주석 해제 +# blip==0.1.0 # ❌ 주석처리 (구버전, 문제 가능) +Pillow>=10.0.0 # ✅ BLIP 대신 필요하면 # NLP utils nltk==3.9.2 sentencepiece==0.2.0 # OpenCLIP for SDXL -open-clip-torch==2.20.0 +open-clip-torch==2.26.1 # ✅ 업데이트 # Logging -rich==13.7.0 +rich==13.9.4 # ✅ 업데이트 # Kohya_ss library -xformers --index-url https://download.pytorch.org/whl/cu128 -triton==3.0.0 - +triton==3.3.1 \ No newline at end of file From d22a5993e99b6c518f1ef3b7b9a8c3b7ada4754d Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Tue, 28 Oct 2025 17:40:39 +0900 Subject: [PATCH 37/58] update --- Dockerfile | 2 +- docker-compose.yml | 7 +++---- docker-up.cmd | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index d13bae9..4b6055e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ WORKDIR /app # 필수 패키지 설치 RUN sed -i 's|archive.ubuntu.com|mirror.kakao.com|g' /etc/apt/sources.list && \ apt-get update && \ - apt-get install -y apt-utils && \ + apt-get install -y apt-utils libgl1 libglib2.0-0 && \ apt-get install -y --no-install-recommends git wget curl && \ rm -rf /var/lib/apt/lists/* diff --git a/docker-compose.yml b/docker-compose.yml index b331a0b..60193be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,7 @@ services: dockerfile: Dockerfile image: aicompanion/sdxl_train_captioner:latest container_name: sdxl_train_captioner + runtime: nvidia # GPU 설정 deploy: @@ -15,9 +16,6 @@ services: count: all capabilities: [gpu] - # ports: - # - "7860:7860" - # 볼륨 마운트 (호스트 ↔ 컨테이너) volumes: - ./models:/app/sdxl_train_captioner/models @@ -27,7 +25,8 @@ services: environment: # GPU 선택 (필요 시 GPU ID 지정) - - CUDA_VISIBLE_DEVICES=3 + - NVIDIA_VISIBLE_DEVICES=all + - CUDA_VISIBLE_DEVICES=0 - HF_HOME=/app/sdxl_train_captioner/models - HF_HUB_CACHE=/app/sdxl_train_captioner/models - PYTHONUNBUFFERED=1 diff --git a/docker-up.cmd b/docker-up.cmd index 5177d11..a8d0f87 100644 --- a/docker-up.cmd +++ b/docker-up.cmd @@ -1 +1 @@ -docker-compose up -d \ No newline at end of file +docker-compose up -d --gpus all \ No newline at end of file From 086ce76d3d0c97f4023daf43baeb36c83abd7aba Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Tue, 28 Oct 2025 17:42:13 +0900 Subject: [PATCH 38/58] update --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 60193be..b79ced6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,7 +25,7 @@ services: environment: # GPU 선택 (필요 시 GPU ID 지정) - - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_VISIBLE_DEVICES=all - CUDA_VISIBLE_DEVICES=0 - HF_HOME=/app/sdxl_train_captioner/models - HF_HUB_CACHE=/app/sdxl_train_captioner/models From f01af5e8bcf9546d379622d0ee64c90ab579f720 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Tue, 28 Oct 2025 19:44:16 +0900 Subject: [PATCH 39/58] update --- Dockerfile | 6 +++-- docker-build.cmd | 4 +-- docker-up.cmd | 2 +- run-train-auto.cmd | 7 +----- run-train-single.cmd | 60 +++----------------------------------------- sd-scripts | 2 +- 6 files changed, 12 insertions(+), 69 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4b6055e..ff9c028 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,8 +26,8 @@ WORKDIR /app/sdxl_train_captioner RUN pip install xformers==0.0.31 RUN pip install --no-cache-dir -r requirements.txt -# 문제 발생 시 버전 고정: ==2.7.4.post1 -RUN pip install flash-attn --no-build-isolation +# 문제 발생 시 버전 고정: ====2.8.0 +RUN pip install flash-attn==2.8.0.post2 --no-build-isolation RUN mkdir -p /app/sdxl_train_captioner/dataset RUN mkdir -p /app/sdxl_train_captioner/models @@ -38,5 +38,7 @@ COPY ./models /app/sdxl_train_captioner/models WORKDIR /app/sdxl_train_captioner/sd-scripts RUN chmod +x ./entrypoint.sh +RUN chmod +x ./run-train-auto.sh +RUN chmod +x ./run-train-single.sh ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/docker-build.cmd b/docker-build.cmd index 256ceba..f27e6bf 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:0.9.8 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.0 . -docker tag aicompanion/sdxl_train_captioner:0.9.8 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:1.0.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/docker-up.cmd b/docker-up.cmd index a8d0f87..5177d11 100644 --- a/docker-up.cmd +++ b/docker-up.cmd @@ -1 +1 @@ -docker-compose up -d --gpus all \ No newline at end of file +docker-compose up -d \ No newline at end of file diff --git a/run-train-auto.cmd b/run-train-auto.cmd index 6152325..fb5f66b 100644 --- a/run-train-auto.cmd +++ b/run-train-auto.cmd @@ -1,6 +1 @@ -@echo off -REM 첫 번째 argument를 명령어로 받아서 컨테이너에서 실행 -REM 모든 argument를 그대로 넘기려면 %* 사용 -docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts; ./run-train-auto.py 1 2>&1 | tee /app/sdxl_train_captioner/logs/train_$(date +%%Y%%m%%d_%%H%%M%%S).log" - -pause \ No newline at end of file +docker exec -it sdxl_train_captioner bash -c "/app/sdxl_train_captioner/sd-scripts/run-train-auto.sh 1" diff --git a/run-train-single.cmd b/run-train-single.cmd index 76e06e9..601cc7e 100644 --- a/run-train-single.cmd +++ b/run-train-single.cmd @@ -1,58 +1,4 @@ @echo off -setlocal enabledelayedexpansion - -REM =================================== -REM SDXL LoRA 단일 학습 (Windows → Docker) -REM =================================== - -REM 도움말 -if "%1"=="" ( - echo Usage: run-train-single.cmd --folder FOLDER [OPTIONS] - echo. - echo Examples: - echo run-train-single.cmd --folder ../dataset/training/01_alice - echo run-train-single.cmd --folder ../dataset/training/01_alice --epochs 25 - echo run-train-single.cmd --folder ../dataset/training/01_alice --lr 0.0002 --dim 64 - echo. - echo All arguments are passed to Python script inside container. - exit /b 1 -) - -REM 현재 시간 (로그 파일명용) -for /f "tokens=1-6 delims=/:. " %%a in ("%date% %time%") do ( - set timestamp=%%a%%b%%c_%%d%%e%%f -) -set timestamp=%timestamp: =0% - -REM 모든 arguments를 하나의 문자열로 결합 -set args=%* - -REM 작은따옴표 이스케이프 (Bash에서 안전하게) -set args=%args:'='\''% - -echo =================================== -echo Starting SDXL LoRA Training -echo =================================== -echo Arguments: %args% -echo Log file: train_%timestamp%.log -echo =================================== -echo. - -REM Docker에서 실행 -docker exec -it sdxl_train_captioner bash -c "cd /app/sdxl_train_captioner/sd-scripts && python run-train-single.py %args% 2>&1 | tee /app/sdxl_train_captioner/logs/train_%timestamp%.log" - -if %ERRORLEVEL% EQU 0 ( - echo. - echo =================================== - echo Training completed successfully! - echo =================================== -) else ( - echo. - echo =================================== - echo Training failed with error code: %ERRORLEVEL% - echo =================================== -) - -endlocal - -pause \ No newline at end of file +REM 단순히 CMD에서 인자 전달만 +docker exec -it sdxl_train_captioner bash -c "/app/sdxl_train_captioner/sd-scripts/run-train-single.sh %*" +pause diff --git a/sd-scripts b/sd-scripts index 49115c2..6a32358 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 49115c25e9a0ffc6479b8f35d3ac76ba5352ca5b +Subproject commit 6a32358a24b870cd54fb27d13c2900f2509c9936 From a5bc5882365e2fea447a40be091b510699f90e35 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Tue, 28 Oct 2025 22:15:18 +0900 Subject: [PATCH 40/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 6a32358..7869ce6 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 6a32358a24b870cd54fb27d13c2900f2509c9936 +Subproject commit 7869ce6f2c6483a1b36f59aa59cf7403110c8f60 From 241a54279f5fd5b4d1de63181b63ea72f4bfea61 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 00:52:50 +0900 Subject: [PATCH 41/58] update --- requirements.txt | 1 + sd-scripts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 73f8c28..abc1620 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ easygui==0.98.3 toml==0.10.2 voluptuous==0.13.1 huggingface-hub>=0.27.0 # ✅ 업데이트 +bitsandbytes>=0.45.0 imagesize==1.4.1 numpy==1.26.4 # ✅ 구체적 버전 requests==2.31.0 # ✅ 업데이트 diff --git a/sd-scripts b/sd-scripts index 7869ce6..b56cddf 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 7869ce6f2c6483a1b36f59aa59cf7403110c8f60 +Subproject commit b56cddf8a69e216c23ec745195f55447bcdf7919 From 1165dc1394db09c67f18e327606c8bf544a60ff5 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 02:05:53 +0900 Subject: [PATCH 42/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index b56cddf..0cca440 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit b56cddf8a69e216c23ec745195f55447bcdf7919 +Subproject commit 0cca440eefaf78ce3f1ef0e78716a0c32a360f95 From 164141d9a49f540de7f3a44c1e045a584b38ab60 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 02:28:45 +0900 Subject: [PATCH 43/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 0cca440..3ac5c6d 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 0cca440eefaf78ce3f1ef0e78716a0c32a360f95 +Subproject commit 3ac5c6dd560769b475c47b728ca6db06165a5937 From ac804271a72a975ac9bce5c82a44eb51127f9917 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 02:31:47 +0900 Subject: [PATCH 44/58] update --- docker-build.cmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-build.cmd b/docker-build.cmd index f27e6bf..9aa432e 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.0 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.1 . -docker tag aicompanion/sdxl_train_captioner:1.0.0 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:1.0.1 aicompanion/sdxl_train_captioner:latest \ No newline at end of file From 7fb11d6b7e79c7d9418a5db486414c8b78b41679 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 03:03:44 +0900 Subject: [PATCH 45/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 3ac5c6d..802872f 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 3ac5c6dd560769b475c47b728ca6db06165a5937 +Subproject commit 802872ffa87f1b66338f989d76a5ba82da36f9e9 From 2ee22102b317f58b4f401b19af360b501f31b51e Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 04:05:45 +0900 Subject: [PATCH 46/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 802872f..f4db2dd 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 802872ffa87f1b66338f989d76a5ba82da36f9e9 +Subproject commit f4db2dd566b6eff4a9df8c10ff6d872051c8f59e From fc781dc61f95ec3a8990ced85a9a2ff275b6e282 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 04:37:55 +0900 Subject: [PATCH 47/58] update --- docker-build.cmd | 4 ++-- sd-scripts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-build.cmd b/docker-build.cmd index 9aa432e..31ce934 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.1 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.2 . -docker tag aicompanion/sdxl_train_captioner:1.0.1 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:1.0.2 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index f4db2dd..22f559d 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit f4db2dd566b6eff4a9df8c10ff6d872051c8f59e +Subproject commit 22f559dbe24db93979ce508a5033a8272989a80d From 9fcdc423b1dd0dcae5707e79d1cf0f1de3b04bb1 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 15:50:00 +0900 Subject: [PATCH 48/58] update --- MANUAL_TRAIN.md | 24 +++++++------ README.md | 1 + TRAINING.md | 82 ++++++++++++++++++++++++++++++++++++++++++++ docker-build.cmd | 4 +-- resume-train.cmd | 32 +---------------- run-train-single.ps1 | 55 ----------------------------- sd-scripts | 2 +- 7 files changed, 101 insertions(+), 99 deletions(-) create mode 100644 TRAINING.md delete mode 100644 run-train-single.ps1 diff --git a/MANUAL_TRAIN.md b/MANUAL_TRAIN.md index 9a1d7c2..fe2256d 100644 --- a/MANUAL_TRAIN.md +++ b/MANUAL_TRAIN.md @@ -1,17 +1,21 @@ # I. 단일폴더 수동 학습 사용 예시 ## 1. 기본 사용 (자동 계산) -```bash +```cmd run-train-single --folder ../dataset/training/01_alice ``` ## 2. Epochs만 수동 지정 -```bash +```cmd run-train-single --folder ../dataset/training/01_alice --epochs 25 + +또는 + +run-train-single --folder ../training/mainchar/01_alic3 --epochs 17 --resume alic3-000009.safetensors ``` ## 3. 세밀한 조정 -```bash +```cmd run-train-single ^ --folder ../dataset/training/01_alice ^ --epochs 30 ^ @@ -22,7 +26,7 @@ run-train-single ^ ``` ## 4. 고해상도 학습 -```bash +```cmd run-train-single ^ --folder ../dataset/training/01_alice ^ --resolution 1024,1024 ^ @@ -30,7 +34,7 @@ run-train-single ^ ``` ## 5. 빠른 테스트 -```bash +```cmd run-train-single ^ --folder ../dataset/training/01_alice ^ --epochs 5 ^ @@ -39,7 +43,7 @@ run-train-single ^ ``` ## 6. 완전 수동 모드 -```bash +```cmd run-train-single ^ --folder ../dataset/training/01_alice ^ --no-auto ^ @@ -75,14 +79,14 @@ run-train-single ^ ## 비교 ### train_batch.py (일괄 자동) -```bash +```cmd # 여러 폴더 자동 학습 python train_batch.py → 01_alice, 02_bob, 03_background 모두 학습 ``` ### train_single.py (단일 수동) -```bash +```cmd # 특정 폴더만 세밀 조정 run-train-single --folder ../dataset/training/mainchar/01_alice --epochs 30 --lr 0.00015 → alice만 커스텀 파라미터로 학습 @@ -91,7 +95,7 @@ run-train-single --folder ../dataset/training/mainchar/01_alice --epochs 30 --lr ## 워크플로우 추천 ### 초보자 -```bash +```cmd # 1. 먼저 일괄 자동으로 테스트 python train_batch.py @@ -100,7 +104,7 @@ run-train-single --folder ../dataset/training/mainchar/01_alice --epochs 25 ``` ### 고급 사용자 -```bash +```cmd # 처음부터 세밀하게 조정 run-train-single ^ --folder ../dataset/training/mainchar/01_alice ^ diff --git a/README.md b/README.md index 60dd1cb..dde98e7 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin으로 복사합니 ## 학습방법 1: 폴더명 규칙 사용 (자동) ./train.sh config.json 0 + # 방법 2: 강제로 15번 반복 ./train.sh config.json 0 15 diff --git a/TRAINING.md b/TRAINING.md new file mode 100644 index 0000000..042b245 --- /dev/null +++ b/TRAINING.md @@ -0,0 +1,82 @@ +🎨 폴더 명명 예시: + +```cmd +01_alice_woman (3단어와 언더스코어 2개: 클래스 포함) +- name=alice, class=woman -> --class_tokens=woman 적용. + +02_style (2단어와 언더스코어 1개: 클래스 미포함) +- name=style, class=None -> --class_tokens 미적용. +``` + +🎨 클래스 키워드 선정 기준 및 분류 예시 + +#### 클래스 키워드를 선정할 때는 **"이 대상이 없었을 때, 모델이 원래 무엇을 생성해야 하는가?"**를 생각하면 쉽습니다. + +### 1. 인물/캐릭터 학습 시 + +사람이나 캐릭터를 학습시킬 때는 성별, 연령대, 종족 등 가장 넓은 카테고리를 사용합니다. + +| 학습 대상 (ID 토큰) | 클래스 키워드 | 설명 | +|---------------------|---------------------------|-------| +| 특정 인물 (alice) | woman 또는 man | 성별을 나타내는 가장 일반적인 단어. | +| 특정 어린이 (child_A) | child 또는 kid | 연령대를 나타내는 일반적인 단어. | +| 특정 동물 캐릭터 (pet_dragon) | dragon 또는 monster | 해당 대상의 종족 또는 종류. | +| 만화체 캐릭터 (manga_hero) | person 또는 character | 스타일이 강한 경우 일반적인 '사람'이나 '캐릭터'로 정의. | + +### 2. 스타일/화풍 학습 시 +스타일을 학습시킬 때는 그 스타일이 적용될 가장 일반적인 대상을 클래스로 지정합니다. + +| 학습 대상 (ID 토큰) | 클래스 키워드 | 설명 | +|---------------------|---------------------------|--------| +| 특정 작가 화풍 (artstyle_A) | style 또는 art | '스타일' 자체를 클래스로 지정하는 것이 일반적입니다. | +| 특정 조명 효과 (cinema_light) | lighting 또는 effect | 모델이 **'조명'**의 개념을 잃지 않도록 합니다. | +| 특정 의상 (maid_dress_v2) | clothing 또는 dress | 의류의 일반적인 카테고리. | + +### 3. 사물/배경 학습 시 +특정 사물이나 배경을 학습시킬 때는 그 사물의 가장 넓은 범주를 사용합니다. + +| 학습 대상 (ID 토큰) | 클래스 키워드 | 설명 | +|---------------------|---------------------------|--------| +| 특정 자동차 모델 (car_v1) | car 또는 vehicle | 해당 사물의 일반적인 명칭. | +| 특정 건물 (my_house) | house 또는 building | 해당 배경의 일반적인 명칭. | +| 특정 음식 (korean_dish) | food 또는 dish | 해당 카테고리의 일반적인 명칭. | + + +### 🛑 클래스 키워드를 넣는 경우 (정규화의 중요성) + +클래스 키워드를 01_alice_woman처럼 넣는다는 것은 다음과 같은 의미가 있습니다. + +ID 토큰 정의: alice는 **woman**이다. + +정규화 활성화: Kohya_SS에게 **woman**이라는 클래스에 대한 **정규화 이미지(Regularization Images)**를 찾아서 함께 학습하라고 지시합니다. + +정규화 이미지가 없다면 클래스 키워드를 넣어도 효과가 없거나 미미합니다. 따라서 클래스 키워드를 정했다면, 해당 키워드(예: woman)로 생성된 수백~수천 장의 이미지를 정규화 폴더 (일반적으로 reg_woman 등)에 넣어 함께 학습해야 가장 좋은 품질을 얻을 수 있습니다. + + +### 🎯 SDXL Base 모델 학습 시 정규화 이미지의 역할 + +#### SDXL Base 1.0 모델을 기반으로 학습할 때, woman과 같은 일반적인 클래스 이미지(정규화 이미지)를 추가로 넣어줄 필요는 없습니다. + +### 1. SDXL의 강점: 이미 "알고 있음" +- SDXL Base 1.0은 수많은 이미지로 학습된 **대규모 모델(Foundation Model)**입니다. +- 이미 **woman (여성), car (자동차), house (집)**과 같은 일반적인 개념과 해당 개념의 다양한 형태를 매우 잘 이해하고 표현할 수 있습니다. +- LoRA 학습의 목표는 SDXL이 이미 알고 있는 **일반적인 지식(클래스 지식)**을 잊어버리지 않게 하면서, **새로운 고유 개념(ID 토큰)**만 추가로 학습시키는 것입니다. + +### 2. 정규화 이미지의 필요성 +- 일반 LoRA/Dreambooth 학습: SD 1.5 같은 구형 모델이나 미세 조정되지 않은 모델로 학습할 때는 정규화 이미지가 필수였습니다. 이는 모델이 일반적인 개념을 잊어버리는 **재앙적 망각(Catastrophic Forgetting)**을 방지하기 위함이었습니다. +- SDXL LoRA 학습: SDXL은 기본적으로 이 지식을 매우 잘 보존합니다. 따라서 **woman**이라는 클래스에 대해 수백, 수천 장의 정규화 이미지를 직접 수집하고 넣어주는 노동은 대부분의 경우 불필요합니다. + +### 3. Kohya_SS 설정 (SDXL에 특화된 방식) +- Kohya_SS를 포함한 최신 학습 툴들은 SDXL 학습 시 정규화 이미지 폴더를 비워두거나, 아예 지정하지 않는 방식을 지원합니다. +- 이 방식은 SDXL이 이미 방대한 내부 지식을 가지고 있기 때문에, 따로 외부 이미지를 가져와 정규화하지 않아도 충분히 안정적인 결과를 얻을 수 있다는 전제에 기반합니다. + +### ⚠️ 예외적인 경우: 정규화가 필요한 경우 + +- 대부분의 경우 woman에 대한 정규화는 불필요하지만, 매우 특수한 형태의 클래스를 학습시킬 때는 고려해 볼 수 있습니다. + +| 경우 | 예시 | 설명 | +|---------------------|---------------------------|--------| +| 강력한 스타일 학습 | artstyle_A (극도로 만화적인 스타일) | "스타일 자체가 SDXL이 '인간'으로 인식하는 방식을 크게 왜곡할 때, 일반적인 woman 이미지를 정규화하여 모델의 지식을 보호할 수 있습니다." | +| 특정 자세/배경에 과적합 우려 | 특정 포즈만 있는 pose_v1 |"pose 클래스에 대해 다양한 일반 포즈 이미지를 정규화하여, 모델이 모든 사람의 포즈를 해당 포즈로 고정시키지 않도록 방지합니다." | + +#### - 일반적으로 사람이나 평범한 사물을 학습할 때는 클래스 명만 지정(01_alice_woman)하고 정규화 이미지는 넣지 않는 것이 시간과 자원(디스크 공간)을 아끼는 가장 효율적인 방법입니다. diff --git a/docker-build.cmd b/docker-build.cmd index 31ce934..de3dfae 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.2 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.3 . -docker tag aicompanion/sdxl_train_captioner:1.0.2 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:1.0.3 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/resume-train.cmd b/resume-train.cmd index 67a9646..231f203 100644 --- a/resume-train.cmd +++ b/resume-train.cmd @@ -1,31 +1 @@ -accelerate launch --num_cpu_threads_per_process 1 --mixed_precision bf16 ^ - sdxl_train_network.py ^ - --resume="../output_models" ^ - --max_train_epochs=20 ^ - --pretrained_model_name_or_path="../models/sd_xl_base_1.0.safetensors" ^ - --train_data_dir="../dataset/train/mainchar" ^ - --output_dir="../output_models" ^ - --logging_dir="../logs" ^ - --output_name="karina" ^ - --network_module=networks.lora ^ - --network_dim=32 ^ - --network_alpha=16 ^ - --learning_rate=1e-4 ^ - --optimizer_type="AdamW8bit" ^ - --lr_scheduler="cosine" ^ - --lr_warmup_steps=100 ^ - --save_every_n_epochs=1 ^ - --mixed_precision="bf16" ^ - --save_precision="bf16" ^ - --cache_latents ^ - --cache_latents_to_disk ^ - --gradient_checkpointing ^ - --xformers ^ - --seed=42 ^ - --bucket_no_upscale ^ - --min_bucket_reso=512 ^ - --max_bucket_reso=2048 ^ - --bucket_reso_steps=64 ^ - --resolution="1024,1024" ^ - --network_train_unet_only ^ - --cache_text_encoder_outputs \ No newline at end of file +run-train-single --resume ../output_models/alic3-000009.safetensors --epochs 17 --folder ../training/mainchar/01_alic3 \ No newline at end of file diff --git a/run-train-single.ps1 b/run-train-single.ps1 deleted file mode 100644 index 60a20e7..0000000 --- a/run-train-single.ps1 +++ /dev/null @@ -1,55 +0,0 @@ -# =================================== -# SDXL LoRA 단일 학습 (PowerShell → Docker) -# =================================== - -param( - [Parameter(ValueFromRemainingArguments=$true)] - [string[]]$Arguments -) - -if ($Arguments.Count -eq 0) { - Write-Host "Usage: run-train-single.ps1 --folder FOLDER [OPTIONS]" -ForegroundColor Yellow - Write-Host "" - Write-Host "Examples:" -ForegroundColor Cyan - Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice" - Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice --epochs 25" - Write-Host " .\run-train-single.ps1 --folder ../dataset/training/01_alice --lr 0.0002 --dim 64" - exit 1 -} - -# 타임스탬프 -$timestamp = Get-Date -Format "yyyyMMdd_HHmmss" - -# Arguments를 문자열로 결합 -$argsString = $Arguments -join ' ' - -# 작은따옴표 이스케이프 -$argsString = $argsString -replace "'", "'\\''" - -Write-Host "===================================" -ForegroundColor Green -Write-Host "Starting SDXL LoRA Training" -ForegroundColor Green -Write-Host "===================================" -ForegroundColor Green -Write-Host "Arguments: $argsString" -ForegroundColor Cyan -Write-Host "Log file: train_$timestamp.log" -ForegroundColor Cyan -Write-Host "===================================" -ForegroundColor Green -Write-Host "" - -# Docker 명령어 -$dockerCmd = "cd /app/sdxl_train_captioner/sd-scripts && python run-train-single.py $argsString 2>&1 | tee /app/sdxl_train_captioner/logs/train_$timestamp.log" - -# 실행 -docker exec -it sdxl_train_captioner bash -c $dockerCmd - -if ($LASTEXITCODE -eq 0) { - Write-Host "" - Write-Host "===================================" -ForegroundColor Green - Write-Host "Training completed successfully!" -ForegroundColor Green - Write-Host "===================================" -ForegroundColor Green -} else { - Write-Host "" - Write-Host "===================================" -ForegroundColor Red - Write-Host "Training failed with error code: $LASTEXITCODE" -ForegroundColor Red - Write-Host "===================================" -ForegroundColor Red -} - -pause \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 22f559d..8cf42e0 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 22f559dbe24db93979ce508a5033a8272989a80d +Subproject commit 8cf42e0e2fbd1ff5dc55aa0a7e63acb5f98e89de From c1ca02b7d7dbd6dcbcc895b458219538c8baf2b8 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 16:15:20 +0900 Subject: [PATCH 49/58] update --- resume-train.cmd | 2 +- sd-scripts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resume-train.cmd b/resume-train.cmd index 231f203..e6ecec9 100644 --- a/resume-train.cmd +++ b/resume-train.cmd @@ -1 +1 @@ -run-train-single --resume ../output_models/alic3-000009.safetensors --epochs 17 --folder ../training/mainchar/01_alic3 \ No newline at end of file +run-train-single --resume ../output_models/alic3-000009.safetensors --epochs 17 --folder ../dataset/training/mainchar/01_alic3_woman \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 8cf42e0..9bd8532 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 8cf42e0e2fbd1ff5dc55aa0a7e63acb5f98e89de +Subproject commit 9bd85326377a996dfce43d4da4fd0d8e2a736654 From b09a96afee78c8b304258caf2728bfd268d6d495 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 18:00:17 +0900 Subject: [PATCH 50/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 9bd8532..2eee65f 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 9bd85326377a996dfce43d4da4fd0d8e2a736654 +Subproject commit 2eee65f8243799ef48ab9804e9fc953958926a24 From 6139b099e6734b9d6b40fb328df0abfe4343ba7e Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 19:23:34 +0900 Subject: [PATCH 51/58] update --- resume-train.cmd | 2 +- sd-scripts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resume-train.cmd b/resume-train.cmd index e6ecec9..13b26d5 100644 --- a/resume-train.cmd +++ b/resume-train.cmd @@ -1 +1 @@ -run-train-single --resume ../output_models/alic3-000009.safetensors --epochs 17 --folder ../dataset/training/mainchar/01_alic3_woman \ No newline at end of file +run-train-single --resume ../output_models/01_alic3_woman-000009.safetensors --epochs 17 --folder ../dataset/training/mainchar/01_alic3_woman \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 2eee65f..b009716 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 2eee65f8243799ef48ab9804e9fc953958926a24 +Subproject commit b009716fbd17fba6ef7082d7f79ffc1c3616e487 From 6dc1c492f7bb3851324856ee8548e6274141b89f Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 20:26:49 +0900 Subject: [PATCH 52/58] update --- MANUAL_TRAIN.md | 19 ++++++++++++++++++- sd-scripts | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/MANUAL_TRAIN.md b/MANUAL_TRAIN.md index fe2256d..06f5655 100644 --- a/MANUAL_TRAIN.md +++ b/MANUAL_TRAIN.md @@ -226,4 +226,21 @@ run-train-single ^ 🔄 Resuming from: ../output_models/alice-epoch-010.safetensors 🚀 Starting training... -``` \ No newline at end of file +``` + + +## 💡 선택 가이드 +### State Resume을 써야 할 때 + +✅ 학습이 중단됨 (컴퓨터 꺼짐, 에러 등) +✅ Epoch 수를 늘리고 싶음 (10 → 20) +✅ 데이터는 그대로, 학습만 더 +✅ Optimizer momentum 유지가 중요 + +## Network Weights를 써야 할 때 + +✅ 새로운 데이터 추가 +✅ 다른 스타일/컨셉 학습 +✅ Fine-tuning (특정 부분만 강화) +✅ 기존 모델 기반으로 파생 모델 생성 +✅ State 폴더가 없음 \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index b009716..b56eef6 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit b009716fbd17fba6ef7082d7f79ffc1c3616e487 +Subproject commit b56eef68cba3dbbc606d7d4f732e95429e22e313 From 1241c6125892f7514cda53c05e27a0b7b1c0810f Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 22:21:19 +0900 Subject: [PATCH 53/58] update --- Dockerfile | 1 + docker-compose.yml | 3 +++ sd-scripts | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ff9c028..5d0ce8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,7 @@ COPY ./models /app/sdxl_train_captioner/models WORKDIR /app/sdxl_train_captioner/sd-scripts RUN chmod +x ./entrypoint.sh +RUN chmod +x ./run-tensorboard.sh RUN chmod +x ./run-train-auto.sh RUN chmod +x ./run-train-single.sh diff --git a/docker-compose.yml b/docker-compose.yml index b79ced6..8015836 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,9 @@ services: - ./dataset:/app/sdxl_train_captioner/dataset - ./output_models:/app/sdxl_train_captioner/output_models - ./logs:/app/sdxl_train_captioner/logs + + ports: + - "6006:6006" environment: # GPU 선택 (필요 시 GPU ID 지정) diff --git a/sd-scripts b/sd-scripts index b56eef6..8ff2938 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit b56eef68cba3dbbc606d7d4f732e95429e22e313 +Subproject commit 8ff29382120bfcd8f15f2224dbf2f64ab1a1688d From e2f93ca585b7bf5eb12bf484dbf0c9a8620e940f Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Wed, 29 Oct 2025 23:42:21 +0900 Subject: [PATCH 54/58] update --- resume-train.cmd => resume-train-example.cmd | 0 run-tensorboard.cmd | 3 +++ sd-scripts | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) rename resume-train.cmd => resume-train-example.cmd (100%) create mode 100644 run-tensorboard.cmd diff --git a/resume-train.cmd b/resume-train-example.cmd similarity index 100% rename from resume-train.cmd rename to resume-train-example.cmd diff --git a/run-tensorboard.cmd b/run-tensorboard.cmd new file mode 100644 index 0000000..81824f5 --- /dev/null +++ b/run-tensorboard.cmd @@ -0,0 +1,3 @@ +docker exec -it sdxl_train_captioner bash -c "/app/sdxl_train_captioner/sd-scripts/run-tensorboard.sh 1" + +pause \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 8ff2938..8dbe330 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 8ff29382120bfcd8f15f2224dbf2f64ab1a1688d +Subproject commit 8dbe330986f402258a0fa6a0818c6df8ad59071f From 5448d24e63e1cbff690ac88f6b7fc20493285687 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Thu, 30 Oct 2025 14:22:36 +0900 Subject: [PATCH 55/58] update --- docker-down.cmd | 4 +++- docker-pull.cmd | 3 +++ docker-start.cmd | 4 +++- docker-stop.cmd | 4 +++- docker-up.cmd | 4 +++- 5 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 docker-pull.cmd diff --git a/docker-down.cmd b/docker-down.cmd index 10bd05b..068dd33 100644 --- a/docker-down.cmd +++ b/docker-down.cmd @@ -1 +1,3 @@ -docker-compose down --volumes --remove-orphans sdxl_train_captioner \ No newline at end of file +docker-compose down --volumes --remove-orphans sdxl_train_captioner + +pause \ No newline at end of file diff --git a/docker-pull.cmd b/docker-pull.cmd new file mode 100644 index 0000000..28e3971 --- /dev/null +++ b/docker-pull.cmd @@ -0,0 +1,3 @@ +docker pull aicompanion/sdxl_train_captioner:latest + +pause \ No newline at end of file diff --git a/docker-start.cmd b/docker-start.cmd index 8f51fa4..267bc43 100644 --- a/docker-start.cmd +++ b/docker-start.cmd @@ -1 +1,3 @@ -docker-compose start sdxl_train_captioner \ No newline at end of file +docker-compose start sdxl_train_captioner + +pause \ No newline at end of file diff --git a/docker-stop.cmd b/docker-stop.cmd index 2a646cf..478efa6 100644 --- a/docker-stop.cmd +++ b/docker-stop.cmd @@ -1 +1,3 @@ -docker-compose stop sdxl_train_captioner \ No newline at end of file +docker-compose stop sdxl_train_captioner + +pause \ No newline at end of file diff --git a/docker-up.cmd b/docker-up.cmd index 5177d11..60a818a 100644 --- a/docker-up.cmd +++ b/docker-up.cmd @@ -1 +1,3 @@ -docker-compose up -d \ No newline at end of file +docker-compose up -d + +pause \ No newline at end of file From 0bcc70e61f11d464da74c667d8e5ec960b0f7f43 Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Fri, 31 Oct 2025 01:32:39 +0900 Subject: [PATCH 56/58] update --- sd-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sd-scripts b/sd-scripts index 8dbe330..3d33f7c 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 8dbe330986f402258a0fa6a0818c6df8ad59071f +Subproject commit 3d33f7c60b76660a928518a84d9d9e952a76ffc7 From 1b5d35a99cc7cc8ee2abcc3e6e096245114cafcd Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 1 Nov 2025 19:13:55 +0900 Subject: [PATCH 57/58] update --- docker-build.cmd | 4 ++-- sd-scripts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-build.cmd b/docker-build.cmd index de3dfae..2edcbc2 100644 --- a/docker-build.cmd +++ b/docker-build.cmd @@ -1,3 +1,3 @@ -docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.3 . +docker build --no-cache -t aicompanion/sdxl_train_captioner:1.0.4 . -docker tag aicompanion/sdxl_train_captioner:1.0.3 aicompanion/sdxl_train_captioner:latest \ No newline at end of file +docker tag aicompanion/sdxl_train_captioner:1.0.4 aicompanion/sdxl_train_captioner:latest \ No newline at end of file diff --git a/sd-scripts b/sd-scripts index 3d33f7c..7443d08 160000 --- a/sd-scripts +++ b/sd-scripts @@ -1 +1 @@ -Subproject commit 3d33f7c60b76660a928518a84d9d9e952a76ffc7 +Subproject commit 7443d0806a05bea1c7fbcadddaa4cd8595703bb7 From 8426ed00f70dcc1c8dc62e2e8b16f4d19a0037ad Mon Sep 17 00:00:00 2001 From: sungjoonkim Date: Sat, 1 Nov 2025 19:16:20 +0900 Subject: [PATCH 58/58] update --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 902cfad..07943d8 100644 --- a/.gitignore +++ b/.gitignore @@ -53,9 +53,6 @@ dataset/** models data config.toml -sd-scripts -sd-scripts/ -sd-scripts/** venv venv* .python-version