pull/3501/merge
Cautioncrazy 2026-04-10 22:18:39 +00:00 committed by GitHub
commit 0c46d1268d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 801 additions and 0 deletions

801
kohya_ss_colab.ipynb Normal file
View File

@ -0,0 +1,801 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/Cautioncrazy/kohya_ss_LoRA-Trainer/blob/master/kohya_ss_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NrgcDwZxgDOe",
"collapsed": true
},
"outputs": [],
"source": [
"#@title Train with Kohya's Stable Diffusion Trainers\n",
"%cd /content\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"\n",
"!pip install dadaptation==3.1 diffusers[torch]==0.17.1 easygui==0.98.3 einops==0.6.0 fairscale==0.4.13 ftfy==6.1.1 gradio==3.36.1 huggingface-hub==0.14.1\n",
"!pip install lion-pytorch==0.0.6 lycoris_lora==1.8.0.dev6 open-clip-torch==2.20.0 prodigyopt==1.0 pytorch-lightning==1.9.0 safetensors==0.3.1 timm==0.6.12\n",
"!pip install tk==0.1.0 transformers==4.30.2 voluptuous==0.13.1 wandb==0.15.0 xformers==0.0.20 omegaconf\n",
"\n",
"%cd /content\n",
"!git clone -b 0.41.0 https://github.com/TimDettmers/bitsandbytes\n",
"%cd /content/bitsandbytes\n",
"!CUDA_VERSION=118 make cuda11x\n",
"!python setup.py install\n",
"\n",
"%cd /content\n",
"!git clone -b v1.0 https://github.com/camenduru/kohya_ss\n",
"%cd /content/kohya_ss\n",
"\n",
"!python kohya_gui.py --share --headless"
]
},
{
"cell_type": "markdown",
"source": [
"Run Kohya Gui"
],
"metadata": {
"id": "PDMNcVe-D0v4"
}
},
{
"cell_type": "code",
"source": [
"# 1. Clean up and install with a stable configuration to prevent the torch-reinstall loop\n",
"!pip install --upgrade pip\n",
"!pip install torch==2.5.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n",
"\n",
"# Install xformers separately without dependencies to prevent it from downgrading torch\n",
"!pip install xformers==0.0.28.post3 --no-deps\n",
"\n",
"# Install remaining requirements\n",
"!pip install accelerate==0.32.1 huggingface-hub==0.25.0 diffusers==0.25.0\n",
"!pip install transformers==4.44.0 safetensors==0.4.2 bitsandbytes==0.41.3.post2\n",
"!pip install gradio==3.36.1 easygui==0.98.3 einops==0.6.0 voluptuous==0.13.1\n",
"!pip install open-clip-torch==2.20.0 tensorboard==2.15.0\n",
"\n",
"# 2. Launch with environment variables and Low-RAM fix\n",
"%cd /content/kohya_ss\n",
"import os\n",
"import torch\n",
"\n",
"# Forces torch to be more aggressive with memory reuse\n",
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'\n",
"\n",
"# Pre-clear memory before starting\n",
"if torch.cuda.is_available():\n",
" torch.cuda.empty_cache()\n",
"\n",
"# Launching with --lowram to ensure the trainer doesn't spike System RAM during load\n",
"!python kohya_gui.py --share --headless"
],
"metadata": {
"collapsed": true,
"id": "wM6vFztrpnni"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9b6261da",
"outputId": "eca83b62-3490-4044-9646-c50b1c88c328"
},
"source": [
"#@title Direct SDXL Training Command (VRAM-Load Strategy)\n",
"import os\n",
"import torch\n",
"import gc\n",
"\n",
"# 1. System Deep Clean\n",
"gc.collect()\n",
"torch.cuda.empty_cache()\n",
"\n",
"# 2. Setup Paths\n",
"%cd /content/kohya_ss\n",
"\n",
"# 3. Apply the Direct-to-GPU Patch\n",
"# This forces the loader to use 'cuda' immediately, bypassing the System RAM (CPU) stage\n",
"!sed -i \"s/load_file(checkpoint_path)/load_file(checkpoint_path, device='cuda')/g\" library/sdxl_model_util.py\n",
"!sed -i \"s/load_file(model_path)/load_file(model_path, device='cuda')/g\" library/model_util.py\n",
"\n",
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:128'\n",
"\n",
"MODEL_PATH = \"/content/ponyDiffusionV6XL.safetensors\"\n",
"DATA_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project\"\n",
"OUTPUT_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem\"\n",
"LOG_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log\"\n",
"OUTPUT_NAME = \"pkmn_items_v1\"\n",
"\n",
"# 4. Launch with Low-RAM strategies and 512 resolution\n",
"!accelerate launch --num_cpu_threads_per_process=1 \"./sdxl_train_network.py\" \\\n",
" --enable_bucket \\\n",
" --pretrained_model_name_or_path=\"{MODEL_PATH}\" \\\n",
" --train_data_dir=\"{DATA_DIR}\" \\\n",
" --resolution=\"512,512\" \\\n",
" --output_dir=\"{OUTPUT_DIR}\" \\\n",
" --logging_dir=\"{LOG_DIR}\" \\\n",
" --network_alpha=16 \\\n",
" --save_model_as=safetensors \\\n",
" --network_module=networks.lora \\\n",
" --network_dim=32 \\\n",
" --output_name=\"{OUTPUT_NAME}\" \\\n",
" --lr_scheduler_num_cycles=1 \\\n",
" --lr_scheduler_power=1 \\\n",
" --no_half_vae \\\n",
" --learning_rate=0.0001 \\\n",
" --unet_lr=0.0001 \\\n",
" --network_train_unet_only \\\n",
" --lr_scheduler=cosine \\\n",
" --lr_warmup_steps=200 \\\n",
" --train_batch_size=1 \\\n",
" --max_train_steps=2000 \\\n",
" --save_every_n_epochs=2 \\\n",
" --mixed_precision=fp16 \\\n",
" --save_precision=fp16 \\\n",
" --seed=0 \\\n",
" --caption_extension=.txt \\\n",
" --optimizer_type=AdamW8bit \\\n",
" --bucket_reso_steps=64 \\\n",
" --gradient_checkpointing \\\n",
" --xformers \\\n",
" --bucket_no_upscale \\\n",
" --lowram \\\n",
" --mem_eff_attn"
],
"execution_count": 38,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/kohya_ss\n",
"The following values were not passed to `accelerate launch` and had defaults used instead:\n",
"\t`--num_processes` was set to a value of `1`\n",
"\t`--num_machines` was set to a value of `1`\n",
"\t`--mixed_precision` was set to a value of `'no'`\n",
"\t`--dynamo_backend` was set to a value of `'no'`\n",
"To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"2026-04-10 21:58:17.017253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
"E0000 00:00:1775858297.052001 85560 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"E0000 00:00:1775858297.062480 85560 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"W0000 00:00:1775858297.079987 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775858297.080015 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775858297.080019 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775858297.080023 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"/usr/local/lib/python3.12/dist-packages/timm/models/layers/__init__.py:49: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers\n",
" warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.layers\", FutureWarning)\n",
"prepare tokenizers\n",
"/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n",
"Using DreamBooth method.\n",
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: log\n",
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Outputs\n",
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Configs\n",
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: .ipynb_checkpoints\n",
"prepare images.\n",
"found directory /content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem contains 10 image files\n",
"200 train images with repeating.\n",
"0 reg images.\n",
"no regularization images / 正則化画像が見つかりませんでした\n",
"[Dataset 0]\n",
" batch_size: 1\n",
" resolution: (512, 512)\n",
" enable_bucket: True\n",
" min_bucket_reso: 256\n",
" max_bucket_reso: 1024\n",
" bucket_reso_steps: 64\n",
" bucket_no_upscale: True\n",
"\n",
" [Subset 0 of Dataset 0]\n",
" image_dir: \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem\"\n",
" image_count: 10\n",
" num_repeats: 20\n",
" shuffle_caption: False\n",
" keep_tokens: 0\n",
" caption_dropout_rate: 0.0\n",
" caption_dropout_every_n_epoches: 0\n",
" caption_tag_dropout_rate: 0.0\n",
" color_aug: False\n",
" flip_aug: False\n",
" face_crop_aug_range: None\n",
" random_crop: False\n",
" token_warmup_min: 1,\n",
" token_warmup_step: 0,\n",
" is_reg: False\n",
" class_tokens: pkmnessentialitem\n",
" caption_extension: .txt\n",
"\n",
"\n",
"[Dataset 0]\n",
"loading image sizes.\n",
"100% 10/10 [00:00<00:00, 284.23it/s]\n",
"make buckets\n",
"min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます\n",
"number of images (including repeats) / 各bucketの画像枚数繰り返し回数を含む\n",
"bucket 0: resolution (512, 512), count: 200\n",
"mean ar error (without repeats): 0.0\n",
"Traceback (most recent call last):\n",
" File \"/content/kohya_ss/./sdxl_train_network.py\", line 167, in <module>\n",
" trainer.train(args)\n",
" File \"/content/kohya_ss/train_network.py\", line 182, in train\n",
" current_epoch = Value(\"i\", 0)\n",
" ^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.12/multiprocessing/context.py\", line 135, in Value\n",
" return Value(typecode_or_type, *args, lock=lock,\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.12/multiprocessing/sharedctypes.py\", line 79, in Value\n",
" lock = ctx.RLock()\n",
" ^^^^^^^^^^^\n",
" File \"/usr/lib/python3.12/multiprocessing/context.py\", line 73, in RLock\n",
" return RLock(ctx=self.get_context())\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 194, in __init__\n",
" SemLock.__init__(self, RECURSIVE_MUTEX, 1, 1, ctx=ctx)\n",
" File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 57, in __init__\n",
" sl = self._semlock = _multiprocessing.SemLock(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"OSError: [Errno 28] No space left on device\n",
"Traceback (most recent call last):\n",
" File \"/usr/local/bin/accelerate\", line 6, in <module>\n",
" sys.exit(main())\n",
" ^^^^^^\n",
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/accelerate_cli.py\", line 48, in main\n",
" args.func(args)\n",
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 1097, in launch_command\n",
" simple_launcher(args)\n",
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 703, in simple_launcher\n",
" raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n",
"subprocess.CalledProcessError: Command '['/usr/bin/python3', './sdxl_train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=/content/ponyDiffusionV6XL.safetensors', '--train_data_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project', '--resolution=512,512', '--output_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem', '--logging_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log', '--network_alpha=16', '--save_model_as=safetensors', '--network_module=networks.lora', '--network_dim=32', '--output_name=pkmn_items_v1', '--lr_scheduler_num_cycles=1', '--lr_scheduler_power=1', '--no_half_vae', '--learning_rate=0.0001', '--unet_lr=0.0001', '--network_train_unet_only', '--lr_scheduler=cosine', '--lr_warmup_steps=200', '--train_batch_size=1', '--max_train_steps=2000', '--save_every_n_epochs=2', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=0', '--caption_extension=.txt', '--optimizer_type=AdamW8bit', '--bucket_reso_steps=64', '--gradient_checkpointing', '--xformers', '--bucket_no_upscale', '--lowram', '--mem_eff_attn']' returned non-zero exit status 1.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1fede1bc",
"outputId": "91d3aaba-8434-4742-cd6f-c354f507a175"
},
"source": [
"import psutil\n",
"import torch\n",
"\n",
"def print_memory_stats():\n",
" # System RAM\n",
" ram = psutil.virtual_memory()\n",
" print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
"\n",
" # GPU RAM\n",
" if torch.cuda.is_available():\n",
" for i in range(torch.cuda.device_count()):\n",
" total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
" reserved_gpu = torch.cuda.memory_reserved(i)\n",
" allocated_gpu = torch.cuda.memory_allocated(i)\n",
" print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
"\n",
"print_memory_stats()"
],
"execution_count": 37,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"System RAM Usage: 1.21 GB / 12.67 GB (57.1%)\n",
"GPU 0 Usage: 0.00 GB / 14.56 GB\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "523ddfed",
"outputId": "11e39ca9-eb83-47ac-d611-17a62b5ffd4f"
},
"source": [
"import os\n",
"import torch\n",
"from safetensors.torch import load_file\n",
"import psutil\n",
"\n",
"def verify_and_monitor():\n",
" model_path = \"/content/ponyDiffusionV6XL.safetensors\"\n",
" if not os.path.exists(model_path):\n",
" print(f\"ERROR: Model not found at {model_path}\")\n",
" return\n",
"\n",
" print(f\"Model size: {os.path.getsize(model_path) / 1024**3:.2f} GB\")\n",
"\n",
" # Check RAM before loading\n",
" ram = psutil.virtual_memory()\n",
" print(f\"Initial RAM: {ram.available / 1024**3:.2f} GB available\")\n",
"\n",
" try:\n",
" print(\"Testing model load to CPU (this spikes RAM)...\")\n",
" # Using mmap to see if we can avoid a full RAM load\n",
" sd = load_file(model_path, device='cpu')\n",
" print(\"Successfully read safetensors header.\")\n",
" del sd\n",
" gc.collect()\n",
" except Exception as e:\n",
" print(f\"Load test failed: {e}\")\n",
"\n",
"verify_and_monitor()"
],
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model size: 6.46 GB\n",
"Initial RAM: 5.51 GB available\n",
"Testing model load to CPU (this spikes RAM)...\n",
"Successfully read safetensors header.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "91722104",
"outputId": "cb16f2c6-acbd-455f-c70d-22206649724e"
},
"source": [
"import torch\n",
"from safetensors.torch import load_file\n",
"import psutil\n",
"\n",
"def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
" print(f'Checking if {path} can fit in VRAM...')\n",
" initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
" initial_ram = psutil.virtual_memory().used / 1024**3\n",
"\n",
" try:\n",
" # Load directly to GPU device\n",
" state_dict = load_file(path, device='cuda')\n",
"\n",
" final_vram = torch.cuda.memory_reserved() / 1024**3\n",
" final_ram = psutil.virtual_memory().used / 1024**3\n",
"\n",
" print(f'Success!')\n",
" print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
" print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
"\n",
" del state_dict\n",
" torch.cuda.empty_cache()\n",
" print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
" except Exception as e:\n",
" print(f'Load failed: {e}')\n",
"\n",
"check_vram_load()"
],
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
"Success!\n",
"System RAM used: -0.03 GB\n",
"GPU VRAM used: 6.99 GB\n",
"VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ba65b51c",
"outputId": "4be365ea-1e1b-4ccd-d91d-da012829108d"
},
"source": [
"import torch\n",
"from safetensors.torch import load_file\n",
"import psutil\n",
"\n",
"def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
" print(f'Checking if {path} can fit in VRAM...')\n",
" initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
" initial_ram = psutil.virtual_memory().used / 1024**3\n",
"\n",
" try:\n",
" # Load directly to GPU device\n",
" state_dict = load_file(path, device='cuda')\n",
"\n",
" final_vram = torch.cuda.memory_reserved() / 1024**3\n",
" final_ram = psutil.virtual_memory().used / 1024**3\n",
"\n",
" print(f'Success!')\n",
" print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
" print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
"\n",
" del state_dict\n",
" torch.cuda.empty_cache()\n",
" print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
" except Exception as e:\n",
" print(f'Load failed: {e}')\n",
"\n",
"check_vram_load()"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
"Success!\n",
"System RAM used: 0.00 GB\n",
"GPU VRAM used: 6.99 GB\n",
"VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "rei0_q-oChPx"
},
"source": [
"#@title Convert Safetensors to Diffusers (Updated)\n",
"# Download a more recent version of the conversion script\n",
"!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
"\n",
"# Convert Pony V6 (SDXL) to Diffusers format\n",
"# We remove the unrecognized --use_safetensors flag\n",
"!python3 convert_diffusers.py \\\n",
" --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
" --dump_path /content/pony_diffusers \\\n",
" --from_safetensors \\\n",
" --to_safetensors \\\n",
" --device cuda\n",
"\n",
"print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f48d0810",
"outputId": "5f684992-17ff-4e8b-cef7-917237918e12"
},
"source": [
"import psutil\n",
"import torch\n",
"\n",
"def print_memory_stats():\n",
" # System RAM\n",
" ram = psutil.virtual_memory()\n",
" print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
"\n",
" # GPU RAM\n",
" if torch.cuda.is_available():\n",
" for i in range(torch.cuda.device_count()):\n",
" total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
" reserved_gpu = torch.cuda.memory_reserved(i)\n",
" allocated_gpu = torch.cuda.memory_allocated(i)\n",
" print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
"\n",
"print_memory_stats()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"System RAM Usage: 1.00 GB / 12.67 GB (10.5%)\n",
"GPU 0 Usage: 0.00 GB / 14.56 GB\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "06468516",
"outputId": "b39b18b4-5722-4ba8-b209-2931af026252"
},
"source": [
"import gc\n",
"import torch\n",
"\n",
"# Clear Python and Torch memory\n",
"gc.collect()\n",
"torch.cuda.empty_cache()\n",
"\n",
"# Clear Linux system cache (RAM)\n",
"!sync && echo 3 > /proc/sys/vm/drop_caches\n",
"\n",
"print(\"System RAM and GPU memory cleared.\")"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/bin/bash: line 1: /proc/sys/vm/drop_caches: Read-only file system\n",
"System RAM and GPU memory cleared.\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "758df299",
"outputId": "cc97681b-477a-44b5-ddc8-c27924d868f5"
},
"source": [
"import torch\n",
"from safetensors.torch import load_file\n",
"\n",
"def check_model_load(path=\"/content/ponyDiffusionV6XL.safetensors\"):\n",
" print(f\"Attempting to pre-load {path} to GPU to verify memory availability...\")\n",
" try:\n",
" # Loading with mmap=True and moving to device piece-by-piece is easier on RAM\n",
" state_dict = load_file(path, device=\"cuda\")\n",
" print(\"Successfully loaded model to GPU VRAM.\")\n",
"\n",
" # We don't actually need to keep it in memory here, we just wanted to see if it fits\n",
" del state_dict\n",
" torch.cuda.empty_cache()\n",
" print(\"GPU Memory cleared and ready for trainer.\")\n",
" except Exception as e:\n",
" print(f\"Load failed: {e}\")\n",
"\n",
"check_model_load()"
],
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Attempting to pre-load /content/ponyDiffusionV6XL.safetensors to GPU to verify memory availability...\n",
"Successfully loaded model to GPU VRAM.\n",
"GPU Memory cleared and ready for trainer.\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2c0ea383"
},
"source": [
"### Pro-Tip for Kohya GUI Settings:\n",
"Now that we verified the model can load, ensure these are set in the **Parameters** tab to prevent the training process itself from spiking the RAM:\n",
"1. **High-level settings**: Check `Low RAM` if available.\n",
"2. **Memory management**: Ensure `Gradient Checkpointing` is ON.\n",
"3. **Optimizers**: Use `AdamW8bit` or `Prodigy` (8-bit versions save massive amounts of RAM)."
]
},
{
"cell_type": "code",
"metadata": {
"id": "83390a2d"
},
"source": [
"!pip install --upgrade huggingface-hub accelerate"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install --upgrade huggingface-hub accelerate"
],
"metadata": {
"collapsed": true,
"id": "YLcgomwon6f3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!wget \"https://civitai.com/api/download/models/290640\" -O /content/ponyDiffusionV6XL.safetensors"
],
"metadata": {
"id": "1QRr5GtLir3-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "18df2cdf",
"outputId": "8fbdd5fa-e378-4ce8-8a92-ef808d2d6ba9"
},
"source": [
"#@title Convert Safetensors to Diffusers (Updated)\n",
"# Download a more recent version of the conversion script\n",
"!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
"\n",
"# Convert Pony V6 (SDXL) to Diffusers format\n",
"# We remove the unrecognized --use_safetensors flag\n",
"!python3 convert_diffusers.py \\\n",
" --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
" --dump_path /content/pony_diffusers \\\n",
" --from_safetensors \\\n",
" --to_safetensors \\\n",
" --device cuda\n",
"\n",
"print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"2026-04-10 20:15:04.758845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
"E0000 00:00:1775852104.780318 58974 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"E0000 00:00:1775852104.786834 58974 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"W0000 00:00:1775852104.802905 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775852104.802933 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775852104.802937 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1775852104.802942 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
" torch.utils._pytree._register_pytree_node(\n",
"/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n",
"config.json: 4.52kB [00:00, 16.5MB/s]\n",
"tokenizer_config.json: 100% 904/904 [00:00<00:00, 6.12MB/s]\n",
"vocab.json: 862kB [00:00, 36.3MB/s]\n",
"merges.txt: 525kB [00:00, 85.8MB/s]\n",
"special_tokens_map.json: 100% 389/389 [00:00<00:00, 2.91MB/s]\n",
"tokenizer.json: 2.22MB [00:00, 133MB/s]\n",
"config.json: 4.88kB [00:00, 21.8MB/s]\n",
"^C\n",
"\n",
"Conversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zhCjdQRFTbYp"
},
"outputs": [],
"source": [
"#@title Convert Safetensors to Diffusers\n",
"from_safetensors_url = '' #@param {type:\"string\"}\n",
"!wget -q https://raw.githubusercontent.com/huggingface/diffusers/v0.17.1/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
"!wget {from_safetensors_url} -O /content/model.safetensors\n",
"!python3 convert_original_stable_diffusion_to_diffusers.py --half --from_safetensors --checkpoint_path model.safetensors --dump_path /content/model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "j-hterftTbYq"
},
"outputs": [],
"source": [
"#@title Push to HF.co\n",
"\n",
"import os\n",
"from huggingface_hub import create_repo, upload_folder\n",
"\n",
"hf_token = 'HF_WRITE_TOKEN' #@param {type:\"string\"}\n",
"repo_id = 'username/reponame' #@param {type:\"string\"}\n",
"commit_message = '\\u2764' #@param {type:\"string\"}\n",
"create_repo(repo_id, private=True, token=hf_token)\n",
"model_path = '/content/train/model' #@param {type:\"string\"}\n",
"upload_folder(folder_path=f'{model_path}', path_in_repo='', repo_id=repo_id, commit_message=commit_message, token=hf_token)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nyYYYmtlTbYq"
},
"outputs": [],
"source": [
"#@title Push to DagsHub.com\n",
"\n",
"!pip -q install dagshub\n",
"from dagshub.upload import Repo, create_repo\n",
"\n",
"repo_id = 'reponame' #@param {type:\"string\"}\n",
"org_name = 'orgname' #@param {type:\"string\"}\n",
"commit_message = '\\u2764' #@param {type:\"string\"}\n",
"create_repo(f\"{repo_id}\", org_name=f\"{org_name}\")\n",
"repo = Repo(f\"{org_name}\", f\"{repo_id}\")\n",
"model_path = '/content/train/model' #@param {type:\"string\"}\n",
"repo.upload(\"/content/models\", remote_path=\"data\", commit_message=f\"{commit_message}\", force=True)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}