Created using Colab

2026-04-10 18:18:35 -04:00 · 2026-04-10 18:18:35 -04:00 · 4fb1521430
parent 104d190479
commit 4fb1521430
1 changed files with 657 additions and 0 deletions
--- a/kohya_ss_colab.ipynb
+++ b/kohya_ss_colab.ipynb
@ -42,6 +42,604 @@
        "!python kohya_gui.py --share --headless"
      ]
    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Run Kohya Gui"
+      ],
+      "metadata": {
+        "id": "PDMNcVe-D0v4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 1. Clean up and install with a stable configuration to prevent the torch-reinstall loop\n",
+        "!pip install --upgrade pip\n",
+        "!pip install torch==2.5.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n",
+        "\n",
+        "# Install xformers separately without dependencies to prevent it from downgrading torch\n",
+        "!pip install xformers==0.0.28.post3 --no-deps\n",
+        "\n",
+        "# Install remaining requirements\n",
+        "!pip install accelerate==0.32.1 huggingface-hub==0.25.0 diffusers==0.25.0\n",
+        "!pip install transformers==4.44.0 safetensors==0.4.2 bitsandbytes==0.41.3.post2\n",
+        "!pip install gradio==3.36.1 easygui==0.98.3 einops==0.6.0 voluptuous==0.13.1\n",
+        "!pip install open-clip-torch==2.20.0 tensorboard==2.15.0\n",
+        "\n",
+        "# 2. Launch with environment variables and Low-RAM fix\n",
+        "%cd /content/kohya_ss\n",
+        "import os\n",
+        "import torch\n",
+        "\n",
+        "# Forces torch to be more aggressive with memory reuse\n",
+        "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'\n",
+        "\n",
+        "# Pre-clear memory before starting\n",
+        "if torch.cuda.is_available():\n",
+        "    torch.cuda.empty_cache()\n",
+        "\n",
+        "# Launching with --lowram to ensure the trainer doesn't spike System RAM during load\n",
+        "!python kohya_gui.py --share --headless"
+      ],
+      "metadata": {
+        "collapsed": true,
+        "id": "wM6vFztrpnni"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "9b6261da",
+        "outputId": "eca83b62-3490-4044-9646-c50b1c88c328"
+      },
+      "source": [
+        "#@title Direct SDXL Training Command (VRAM-Load Strategy)\n",
+        "import os\n",
+        "import torch\n",
+        "import gc\n",
+        "\n",
+        "# 1. System Deep Clean\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "# 2. Setup Paths\n",
+        "%cd /content/kohya_ss\n",
+        "\n",
+        "# 3. Apply the Direct-to-GPU Patch\n",
+        "# This forces the loader to use 'cuda' immediately, bypassing the System RAM (CPU) stage\n",
+        "!sed -i \"s/load_file(checkpoint_path)/load_file(checkpoint_path, device='cuda')/g\" library/sdxl_model_util.py\n",
+        "!sed -i \"s/load_file(model_path)/load_file(model_path, device='cuda')/g\" library/model_util.py\n",
+        "\n",
+        "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:128'\n",
+        "\n",
+        "MODEL_PATH = \"/content/ponyDiffusionV6XL.safetensors\"\n",
+        "DATA_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project\"\n",
+        "OUTPUT_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem\"\n",
+        "LOG_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log\"\n",
+        "OUTPUT_NAME = \"pkmn_items_v1\"\n",
+        "\n",
+        "# 4. Launch with Low-RAM strategies and 512 resolution\n",
+        "!accelerate launch --num_cpu_threads_per_process=1 \"./sdxl_train_network.py\" \\\n",
+        "    --enable_bucket \\\n",
+        "    --pretrained_model_name_or_path=\"{MODEL_PATH}\" \\\n",
+        "    --train_data_dir=\"{DATA_DIR}\" \\\n",
+        "    --resolution=\"512,512\" \\\n",
+        "    --output_dir=\"{OUTPUT_DIR}\" \\\n",
+        "    --logging_dir=\"{LOG_DIR}\" \\\n",
+        "    --network_alpha=16 \\\n",
+        "    --save_model_as=safetensors \\\n",
+        "    --network_module=networks.lora \\\n",
+        "    --network_dim=32 \\\n",
+        "    --output_name=\"{OUTPUT_NAME}\" \\\n",
+        "    --lr_scheduler_num_cycles=1 \\\n",
+        "    --lr_scheduler_power=1 \\\n",
+        "    --no_half_vae \\\n",
+        "    --learning_rate=0.0001 \\\n",
+        "    --unet_lr=0.0001 \\\n",
+        "    --network_train_unet_only \\\n",
+        "    --lr_scheduler=cosine \\\n",
+        "    --lr_warmup_steps=200 \\\n",
+        "    --train_batch_size=1 \\\n",
+        "    --max_train_steps=2000 \\\n",
+        "    --save_every_n_epochs=2 \\\n",
+        "    --mixed_precision=fp16 \\\n",
+        "    --save_precision=fp16 \\\n",
+        "    --seed=0 \\\n",
+        "    --caption_extension=.txt \\\n",
+        "    --optimizer_type=AdamW8bit \\\n",
+        "    --bucket_reso_steps=64 \\\n",
+        "    --gradient_checkpointing \\\n",
+        "    --xformers \\\n",
+        "    --bucket_no_upscale \\\n",
+        "    --lowram \\\n",
+        "    --mem_eff_attn"
+      ],
+      "execution_count": 38,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/content/kohya_ss\n",
+            "The following values were not passed to `accelerate launch` and had defaults used instead:\n",
+            "\t`--num_processes` was set to a value of `1`\n",
+            "\t`--num_machines` was set to a value of `1`\n",
+            "\t`--mixed_precision` was set to a value of `'no'`\n",
+            "\t`--dynamo_backend` was set to a value of `'no'`\n",
+            "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "2026-04-10 21:58:17.017253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+            "E0000 00:00:1775858297.052001   85560 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+            "E0000 00:00:1775858297.062480   85560 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+            "W0000 00:00:1775858297.079987   85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775858297.080015   85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775858297.080019   85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775858297.080023   85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "/usr/local/lib/python3.12/dist-packages/timm/models/layers/__init__.py:49: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers\n",
+            "  warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.layers\", FutureWarning)\n",
+            "prepare tokenizers\n",
+            "/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
+            "  warnings.warn(\n",
+            "Using DreamBooth method.\n",
+            "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: log\n",
+            "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Outputs\n",
+            "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Configs\n",
+            "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: .ipynb_checkpoints\n",
+            "prepare images.\n",
+            "found directory /content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem contains 10 image files\n",
+            "200 train images with repeating.\n",
+            "0 reg images.\n",
+            "no regularization images / 正則化画像が見つかりませんでした\n",
+            "[Dataset 0]\n",
+            "  batch_size: 1\n",
+            "  resolution: (512, 512)\n",
+            "  enable_bucket: True\n",
+            "  min_bucket_reso: 256\n",
+            "  max_bucket_reso: 1024\n",
+            "  bucket_reso_steps: 64\n",
+            "  bucket_no_upscale: True\n",
+            "\n",
+            "  [Subset 0 of Dataset 0]\n",
+            "    image_dir: \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem\"\n",
+            "    image_count: 10\n",
+            "    num_repeats: 20\n",
+            "    shuffle_caption: False\n",
+            "    keep_tokens: 0\n",
+            "    caption_dropout_rate: 0.0\n",
+            "    caption_dropout_every_n_epoches: 0\n",
+            "    caption_tag_dropout_rate: 0.0\n",
+            "    color_aug: False\n",
+            "    flip_aug: False\n",
+            "    face_crop_aug_range: None\n",
+            "    random_crop: False\n",
+            "    token_warmup_min: 1,\n",
+            "    token_warmup_step: 0,\n",
+            "    is_reg: False\n",
+            "    class_tokens: pkmnessentialitem\n",
+            "    caption_extension: .txt\n",
+            "\n",
+            "\n",
+            "[Dataset 0]\n",
+            "loading image sizes.\n",
+            "100% 10/10 [00:00<00:00, 284.23it/s]\n",
+            "make buckets\n",
+            "min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます\n",
+            "number of images (including repeats) / 各bucketの画像枚数（繰り返し回数を含む）\n",
+            "bucket 0: resolution (512, 512), count: 200\n",
+            "mean ar error (without repeats): 0.0\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/content/kohya_ss/./sdxl_train_network.py\", line 167, in <module>\n",
+            "    trainer.train(args)\n",
+            "  File \"/content/kohya_ss/train_network.py\", line 182, in train\n",
+            "    current_epoch = Value(\"i\", 0)\n",
+            "                    ^^^^^^^^^^^^^\n",
+            "  File \"/usr/lib/python3.12/multiprocessing/context.py\", line 135, in Value\n",
+            "    return Value(typecode_or_type, *args, lock=lock,\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/usr/lib/python3.12/multiprocessing/sharedctypes.py\", line 79, in Value\n",
+            "    lock = ctx.RLock()\n",
+            "           ^^^^^^^^^^^\n",
+            "  File \"/usr/lib/python3.12/multiprocessing/context.py\", line 73, in RLock\n",
+            "    return RLock(ctx=self.get_context())\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 194, in __init__\n",
+            "    SemLock.__init__(self, RECURSIVE_MUTEX, 1, 1, ctx=ctx)\n",
+            "  File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 57, in __init__\n",
+            "    sl = self._semlock = _multiprocessing.SemLock(\n",
+            "                         ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "OSError: [Errno 28] No space left on device\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/usr/local/bin/accelerate\", line 6, in <module>\n",
+            "    sys.exit(main())\n",
+            "             ^^^^^^\n",
+            "  File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/accelerate_cli.py\", line 48, in main\n",
+            "    args.func(args)\n",
+            "  File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 1097, in launch_command\n",
+            "    simple_launcher(args)\n",
+            "  File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 703, in simple_launcher\n",
+            "    raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n",
+            "subprocess.CalledProcessError: Command '['/usr/bin/python3', './sdxl_train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=/content/ponyDiffusionV6XL.safetensors', '--train_data_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project', '--resolution=512,512', '--output_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem', '--logging_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log', '--network_alpha=16', '--save_model_as=safetensors', '--network_module=networks.lora', '--network_dim=32', '--output_name=pkmn_items_v1', '--lr_scheduler_num_cycles=1', '--lr_scheduler_power=1', '--no_half_vae', '--learning_rate=0.0001', '--unet_lr=0.0001', '--network_train_unet_only', '--lr_scheduler=cosine', '--lr_warmup_steps=200', '--train_batch_size=1', '--max_train_steps=2000', '--save_every_n_epochs=2', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=0', '--caption_extension=.txt', '--optimizer_type=AdamW8bit', '--bucket_reso_steps=64', '--gradient_checkpointing', '--xformers', '--bucket_no_upscale', '--lowram', '--mem_eff_attn']' returned non-zero exit status 1.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1fede1bc",
+        "outputId": "91d3aaba-8434-4742-cd6f-c354f507a175"
+      },
+      "source": [
+        "import psutil\n",
+        "import torch\n",
+        "\n",
+        "def print_memory_stats():\n",
+        "    # System RAM\n",
+        "    ram = psutil.virtual_memory()\n",
+        "    print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
+        "\n",
+        "    # GPU RAM\n",
+        "    if torch.cuda.is_available():\n",
+        "        for i in range(torch.cuda.device_count()):\n",
+        "            total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
+        "            reserved_gpu = torch.cuda.memory_reserved(i)\n",
+        "            allocated_gpu = torch.cuda.memory_allocated(i)\n",
+        "            print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
+        "\n",
+        "print_memory_stats()"
+      ],
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "System RAM Usage: 1.21 GB / 12.67 GB (57.1%)\n",
+            "GPU 0 Usage: 0.00 GB / 14.56 GB\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "523ddfed",
+        "outputId": "11e39ca9-eb83-47ac-d611-17a62b5ffd4f"
+      },
+      "source": [
+        "import os\n",
+        "import torch\n",
+        "from safetensors.torch import load_file\n",
+        "import psutil\n",
+        "\n",
+        "def verify_and_monitor():\n",
+        "    model_path = \"/content/ponyDiffusionV6XL.safetensors\"\n",
+        "    if not os.path.exists(model_path):\n",
+        "        print(f\"ERROR: Model not found at {model_path}\")\n",
+        "        return\n",
+        "\n",
+        "    print(f\"Model size: {os.path.getsize(model_path) / 1024**3:.2f} GB\")\n",
+        "\n",
+        "    # Check RAM before loading\n",
+        "    ram = psutil.virtual_memory()\n",
+        "    print(f\"Initial RAM: {ram.available / 1024**3:.2f} GB available\")\n",
+        "\n",
+        "    try:\n",
+        "        print(\"Testing model load to CPU (this spikes RAM)...\")\n",
+        "        # Using mmap to see if we can avoid a full RAM load\n",
+        "        sd = load_file(model_path, device='cpu')\n",
+        "        print(\"Successfully read safetensors header.\")\n",
+        "        del sd\n",
+        "        gc.collect()\n",
+        "    except Exception as e:\n",
+        "        print(f\"Load test failed: {e}\")\n",
+        "\n",
+        "verify_and_monitor()"
+      ],
+      "execution_count": 34,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model size: 6.46 GB\n",
+            "Initial RAM: 5.51 GB available\n",
+            "Testing model load to CPU (this spikes RAM)...\n",
+            "Successfully read safetensors header.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "91722104",
+        "outputId": "cb16f2c6-acbd-455f-c70d-22206649724e"
+      },
+      "source": [
+        "import torch\n",
+        "from safetensors.torch import load_file\n",
+        "import psutil\n",
+        "\n",
+        "def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
+        "    print(f'Checking if {path} can fit in VRAM...')\n",
+        "    initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
+        "    initial_ram = psutil.virtual_memory().used / 1024**3\n",
+        "\n",
+        "    try:\n",
+        "        # Load directly to GPU device\n",
+        "        state_dict = load_file(path, device='cuda')\n",
+        "\n",
+        "        final_vram = torch.cuda.memory_reserved() / 1024**3\n",
+        "        final_ram = psutil.virtual_memory().used / 1024**3\n",
+        "\n",
+        "        print(f'Success!')\n",
+        "        print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
+        "        print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
+        "\n",
+        "        del state_dict\n",
+        "        torch.cuda.empty_cache()\n",
+        "        print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
+        "    except Exception as e:\n",
+        "        print(f'Load failed: {e}')\n",
+        "\n",
+        "check_vram_load()"
+      ],
+      "execution_count": 35,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
+            "Success!\n",
+            "System RAM used: -0.03 GB\n",
+            "GPU VRAM used: 6.99 GB\n",
+            "VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ba65b51c",
+        "outputId": "4be365ea-1e1b-4ccd-d91d-da012829108d"
+      },
+      "source": [
+        "import torch\n",
+        "from safetensors.torch import load_file\n",
+        "import psutil\n",
+        "\n",
+        "def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
+        "    print(f'Checking if {path} can fit in VRAM...')\n",
+        "    initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
+        "    initial_ram = psutil.virtual_memory().used / 1024**3\n",
+        "\n",
+        "    try:\n",
+        "        # Load directly to GPU device\n",
+        "        state_dict = load_file(path, device='cuda')\n",
+        "\n",
+        "        final_vram = torch.cuda.memory_reserved() / 1024**3\n",
+        "        final_ram = psutil.virtual_memory().used / 1024**3\n",
+        "\n",
+        "        print(f'Success!')\n",
+        "        print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
+        "        print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
+        "\n",
+        "        del state_dict\n",
+        "        torch.cuda.empty_cache()\n",
+        "        print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
+        "    except Exception as e:\n",
+        "        print(f'Load failed: {e}')\n",
+        "\n",
+        "check_vram_load()"
+      ],
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
+            "Success!\n",
+            "System RAM used: 0.00 GB\n",
+            "GPU VRAM used: 6.99 GB\n",
+            "VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rei0_q-oChPx"
+      },
+      "source": [
+        "#@title Convert Safetensors to Diffusers (Updated)\n",
+        "# Download a more recent version of the conversion script\n",
+        "!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
+        "\n",
+        "# Convert Pony V6 (SDXL) to Diffusers format\n",
+        "# We remove the unrecognized --use_safetensors flag\n",
+        "!python3 convert_diffusers.py \\\n",
+        "    --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
+        "    --dump_path /content/pony_diffusers \\\n",
+        "    --from_safetensors \\\n",
+        "    --to_safetensors \\\n",
+        "    --device cuda\n",
+        "\n",
+        "print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "f48d0810",
+        "outputId": "5f684992-17ff-4e8b-cef7-917237918e12"
+      },
+      "source": [
+        "import psutil\n",
+        "import torch\n",
+        "\n",
+        "def print_memory_stats():\n",
+        "    # System RAM\n",
+        "    ram = psutil.virtual_memory()\n",
+        "    print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
+        "\n",
+        "    # GPU RAM\n",
+        "    if torch.cuda.is_available():\n",
+        "        for i in range(torch.cuda.device_count()):\n",
+        "            total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
+        "            reserved_gpu = torch.cuda.memory_reserved(i)\n",
+        "            allocated_gpu = torch.cuda.memory_allocated(i)\n",
+        "            print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
+        "\n",
+        "print_memory_stats()"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "System RAM Usage: 1.00 GB / 12.67 GB (10.5%)\n",
+            "GPU 0 Usage: 0.00 GB / 14.56 GB\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "06468516",
+        "outputId": "b39b18b4-5722-4ba8-b209-2931af026252"
+      },
+      "source": [
+        "import gc\n",
+        "import torch\n",
+        "\n",
+        "# Clear Python and Torch memory\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "# Clear Linux system cache (RAM)\n",
+        "!sync && echo 3 > /proc/sys/vm/drop_caches\n",
+        "\n",
+        "print(\"System RAM and GPU memory cleared.\")"
+      ],
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/bin/bash: line 1: /proc/sys/vm/drop_caches: Read-only file system\n",
+            "System RAM and GPU memory cleared.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "758df299",
+        "outputId": "cc97681b-477a-44b5-ddc8-c27924d868f5"
+      },
+      "source": [
+        "import torch\n",
+        "from safetensors.torch import load_file\n",
+        "\n",
+        "def check_model_load(path=\"/content/ponyDiffusionV6XL.safetensors\"):\n",
+        "    print(f\"Attempting to pre-load {path} to GPU to verify memory availability...\")\n",
+        "    try:\n",
+        "        # Loading with mmap=True and moving to device piece-by-piece is easier on RAM\n",
+        "        state_dict = load_file(path, device=\"cuda\")\n",
+        "        print(\"Successfully loaded model to GPU VRAM.\")\n",
+        "\n",
+        "        # We don't actually need to keep it in memory here, we just wanted to see if it fits\n",
+        "        del state_dict\n",
+        "        torch.cuda.empty_cache()\n",
+        "        print(\"GPU Memory cleared and ready for trainer.\")\n",
+        "    except Exception as e:\n",
+        "        print(f\"Load failed: {e}\")\n",
+        "\n",
+        "check_model_load()"
+      ],
+      "execution_count": 18,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Attempting to pre-load /content/ponyDiffusionV6XL.safetensors to GPU to verify memory availability...\n",
+            "Successfully loaded model to GPU VRAM.\n",
+            "GPU Memory cleared and ready for trainer.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2c0ea383"
+      },
+      "source": [
+        "### Pro-Tip for Kohya GUI Settings:\n",
+        "Now that we verified the model can load, ensure these are set in the **Parameters** tab to prevent the training process itself from spiking the RAM:\n",
+        "1. **High-level settings**: Check `Low RAM` if available.\n",
+        "2. **Memory management**: Ensure `Gradient Checkpointing` is ON.\n",
+        "3. **Optimizers**: Use `AdamW8bit` or `Prodigy` (8-bit versions save massive amounts of RAM)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "83390a2d"
+      },
+      "source": [
+        "!pip install --upgrade huggingface-hub accelerate"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
    {
      "cell_type": "code",
      "source": [
@ -65,6 +663,65 @@
      "execution_count": null,
      "outputs": []
    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "18df2cdf",
+        "outputId": "8fbdd5fa-e378-4ce8-8a92-ef808d2d6ba9"
+      },
+      "source": [
+        "#@title Convert Safetensors to Diffusers (Updated)\n",
+        "# Download a more recent version of the conversion script\n",
+        "!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
+        "\n",
+        "# Convert Pony V6 (SDXL) to Diffusers format\n",
+        "# We remove the unrecognized --use_safetensors flag\n",
+        "!python3 convert_diffusers.py \\\n",
+        "    --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
+        "    --dump_path /content/pony_diffusers \\\n",
+        "    --from_safetensors \\\n",
+        "    --to_safetensors \\\n",
+        "    --device cuda\n",
+        "\n",
+        "print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
+      ],
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "2026-04-10 20:15:04.758845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+            "E0000 00:00:1775852104.780318   58974 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+            "E0000 00:00:1775852104.786834   58974 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+            "W0000 00:00:1775852104.802905   58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775852104.802933   58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775852104.802937   58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "W0000 00:00:1775852104.802942   58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+            "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
+            "  torch.utils._pytree._register_pytree_node(\n",
+            "/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
+            "  warnings.warn(\n",
+            "config.json: 4.52kB [00:00, 16.5MB/s]\n",
+            "tokenizer_config.json: 100% 904/904 [00:00<00:00, 6.12MB/s]\n",
+            "vocab.json: 862kB [00:00, 36.3MB/s]\n",
+            "merges.txt: 525kB [00:00, 85.8MB/s]\n",
+            "special_tokens_map.json: 100% 389/389 [00:00<00:00, 2.91MB/s]\n",
+            "tokenizer.json: 2.22MB [00:00, 133MB/s]\n",
+            "config.json: 4.88kB [00:00, 21.8MB/s]\n",
+            "^C\n",
+            "\n",
+            "Conversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\n"
+          ]
+        }
+      ]
+    },
    {
      "cell_type": "code",
      "execution_count": null,