diff --git a/kohya_ss_colab.ipynb b/kohya_ss_colab.ipynb index a188b1e..f6fe4ba 100644 --- a/kohya_ss_colab.ipynb +++ b/kohya_ss_colab.ipynb @@ -42,6 +42,604 @@ "!python kohya_gui.py --share --headless" ] }, + { + "cell_type": "markdown", + "source": [ + "Run Kohya Gui" + ], + "metadata": { + "id": "PDMNcVe-D0v4" + } + }, + { + "cell_type": "code", + "source": [ + "# 1. Clean up and install with a stable configuration to prevent the torch-reinstall loop\n", + "!pip install --upgrade pip\n", + "!pip install torch==2.5.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n", + "\n", + "# Install xformers separately without dependencies to prevent it from downgrading torch\n", + "!pip install xformers==0.0.28.post3 --no-deps\n", + "\n", + "# Install remaining requirements\n", + "!pip install accelerate==0.32.1 huggingface-hub==0.25.0 diffusers==0.25.0\n", + "!pip install transformers==4.44.0 safetensors==0.4.2 bitsandbytes==0.41.3.post2\n", + "!pip install gradio==3.36.1 easygui==0.98.3 einops==0.6.0 voluptuous==0.13.1\n", + "!pip install open-clip-torch==2.20.0 tensorboard==2.15.0\n", + "\n", + "# 2. Launch with environment variables and Low-RAM fix\n", + "%cd /content/kohya_ss\n", + "import os\n", + "import torch\n", + "\n", + "# Forces torch to be more aggressive with memory reuse\n", + "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'\n", + "\n", + "# Pre-clear memory before starting\n", + "if torch.cuda.is_available():\n", + " torch.cuda.empty_cache()\n", + "\n", + "# Launching with --lowram to ensure the trainer doesn't spike System RAM during load\n", + "!python kohya_gui.py --share --headless" + ], + "metadata": { + "collapsed": true, + "id": "wM6vFztrpnni" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9b6261da", + "outputId": "eca83b62-3490-4044-9646-c50b1c88c328" + }, + "source": [ + "#@title Direct SDXL Training Command (VRAM-Load Strategy)\n", + "import os\n", + "import torch\n", + "import gc\n", + "\n", + "# 1. System Deep Clean\n", + "gc.collect()\n", + "torch.cuda.empty_cache()\n", + "\n", + "# 2. Setup Paths\n", + "%cd /content/kohya_ss\n", + "\n", + "# 3. Apply the Direct-to-GPU Patch\n", + "# This forces the loader to use 'cuda' immediately, bypassing the System RAM (CPU) stage\n", + "!sed -i \"s/load_file(checkpoint_path)/load_file(checkpoint_path, device='cuda')/g\" library/sdxl_model_util.py\n", + "!sed -i \"s/load_file(model_path)/load_file(model_path, device='cuda')/g\" library/model_util.py\n", + "\n", + "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:128'\n", + "\n", + "MODEL_PATH = \"/content/ponyDiffusionV6XL.safetensors\"\n", + "DATA_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project\"\n", + "OUTPUT_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem\"\n", + "LOG_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log\"\n", + "OUTPUT_NAME = \"pkmn_items_v1\"\n", + "\n", + "# 4. Launch with Low-RAM strategies and 512 resolution\n", + "!accelerate launch --num_cpu_threads_per_process=1 \"./sdxl_train_network.py\" \\\n", + " --enable_bucket \\\n", + " --pretrained_model_name_or_path=\"{MODEL_PATH}\" \\\n", + " --train_data_dir=\"{DATA_DIR}\" \\\n", + " --resolution=\"512,512\" \\\n", + " --output_dir=\"{OUTPUT_DIR}\" \\\n", + " --logging_dir=\"{LOG_DIR}\" \\\n", + " --network_alpha=16 \\\n", + " --save_model_as=safetensors \\\n", + " --network_module=networks.lora \\\n", + " --network_dim=32 \\\n", + " --output_name=\"{OUTPUT_NAME}\" \\\n", + " --lr_scheduler_num_cycles=1 \\\n", + " --lr_scheduler_power=1 \\\n", + " --no_half_vae \\\n", + " --learning_rate=0.0001 \\\n", + " --unet_lr=0.0001 \\\n", + " --network_train_unet_only \\\n", + " --lr_scheduler=cosine \\\n", + " --lr_warmup_steps=200 \\\n", + " --train_batch_size=1 \\\n", + " --max_train_steps=2000 \\\n", + " --save_every_n_epochs=2 \\\n", + " --mixed_precision=fp16 \\\n", + " --save_precision=fp16 \\\n", + " --seed=0 \\\n", + " --caption_extension=.txt \\\n", + " --optimizer_type=AdamW8bit \\\n", + " --bucket_reso_steps=64 \\\n", + " --gradient_checkpointing \\\n", + " --xformers \\\n", + " --bucket_no_upscale \\\n", + " --lowram \\\n", + " --mem_eff_attn" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/kohya_ss\n", + "The following values were not passed to `accelerate launch` and had defaults used instead:\n", + "\t`--num_processes` was set to a value of `1`\n", + "\t`--num_machines` was set to a value of `1`\n", + "\t`--mixed_precision` was set to a value of `'no'`\n", + "\t`--dynamo_backend` was set to a value of `'no'`\n", + "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n", + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "2026-04-10 21:58:17.017253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1775858297.052001 85560 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1775858297.062480 85560 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "W0000 00:00:1775858297.079987 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775858297.080015 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775858297.080019 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775858297.080023 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "/usr/local/lib/python3.12/dist-packages/timm/models/layers/__init__.py:49: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers\n", + " warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.layers\", FutureWarning)\n", + "prepare tokenizers\n", + "/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n", + "Using DreamBooth method.\n", + "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: log\n", + "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Outputs\n", + "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Configs\n", + "ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: .ipynb_checkpoints\n", + "prepare images.\n", + "found directory /content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem contains 10 image files\n", + "200 train images with repeating.\n", + "0 reg images.\n", + "no regularization images / 正則化画像が見つかりませんでした\n", + "[Dataset 0]\n", + " batch_size: 1\n", + " resolution: (512, 512)\n", + " enable_bucket: True\n", + " min_bucket_reso: 256\n", + " max_bucket_reso: 1024\n", + " bucket_reso_steps: 64\n", + " bucket_no_upscale: True\n", + "\n", + " [Subset 0 of Dataset 0]\n", + " image_dir: \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem\"\n", + " image_count: 10\n", + " num_repeats: 20\n", + " shuffle_caption: False\n", + " keep_tokens: 0\n", + " caption_dropout_rate: 0.0\n", + " caption_dropout_every_n_epoches: 0\n", + " caption_tag_dropout_rate: 0.0\n", + " color_aug: False\n", + " flip_aug: False\n", + " face_crop_aug_range: None\n", + " random_crop: False\n", + " token_warmup_min: 1,\n", + " token_warmup_step: 0,\n", + " is_reg: False\n", + " class_tokens: pkmnessentialitem\n", + " caption_extension: .txt\n", + "\n", + "\n", + "[Dataset 0]\n", + "loading image sizes.\n", + "100% 10/10 [00:00<00:00, 284.23it/s]\n", + "make buckets\n", + "min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます\n", + "number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む)\n", + "bucket 0: resolution (512, 512), count: 200\n", + "mean ar error (without repeats): 0.0\n", + "Traceback (most recent call last):\n", + " File \"/content/kohya_ss/./sdxl_train_network.py\", line 167, in \n", + " trainer.train(args)\n", + " File \"/content/kohya_ss/train_network.py\", line 182, in train\n", + " current_epoch = Value(\"i\", 0)\n", + " ^^^^^^^^^^^^^\n", + " File \"/usr/lib/python3.12/multiprocessing/context.py\", line 135, in Value\n", + " return Value(typecode_or_type, *args, lock=lock,\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/lib/python3.12/multiprocessing/sharedctypes.py\", line 79, in Value\n", + " lock = ctx.RLock()\n", + " ^^^^^^^^^^^\n", + " File \"/usr/lib/python3.12/multiprocessing/context.py\", line 73, in RLock\n", + " return RLock(ctx=self.get_context())\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 194, in __init__\n", + " SemLock.__init__(self, RECURSIVE_MUTEX, 1, 1, ctx=ctx)\n", + " File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 57, in __init__\n", + " sl = self._semlock = _multiprocessing.SemLock(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "OSError: [Errno 28] No space left on device\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/bin/accelerate\", line 6, in \n", + " sys.exit(main())\n", + " ^^^^^^\n", + " File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/accelerate_cli.py\", line 48, in main\n", + " args.func(args)\n", + " File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 1097, in launch_command\n", + " simple_launcher(args)\n", + " File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 703, in simple_launcher\n", + " raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n", + "subprocess.CalledProcessError: Command '['/usr/bin/python3', './sdxl_train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=/content/ponyDiffusionV6XL.safetensors', '--train_data_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project', '--resolution=512,512', '--output_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem', '--logging_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log', '--network_alpha=16', '--save_model_as=safetensors', '--network_module=networks.lora', '--network_dim=32', '--output_name=pkmn_items_v1', '--lr_scheduler_num_cycles=1', '--lr_scheduler_power=1', '--no_half_vae', '--learning_rate=0.0001', '--unet_lr=0.0001', '--network_train_unet_only', '--lr_scheduler=cosine', '--lr_warmup_steps=200', '--train_batch_size=1', '--max_train_steps=2000', '--save_every_n_epochs=2', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=0', '--caption_extension=.txt', '--optimizer_type=AdamW8bit', '--bucket_reso_steps=64', '--gradient_checkpointing', '--xformers', '--bucket_no_upscale', '--lowram', '--mem_eff_attn']' returned non-zero exit status 1.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1fede1bc", + "outputId": "91d3aaba-8434-4742-cd6f-c354f507a175" + }, + "source": [ + "import psutil\n", + "import torch\n", + "\n", + "def print_memory_stats():\n", + " # System RAM\n", + " ram = psutil.virtual_memory()\n", + " print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n", + "\n", + " # GPU RAM\n", + " if torch.cuda.is_available():\n", + " for i in range(torch.cuda.device_count()):\n", + " total_gpu = torch.cuda.get_device_properties(i).total_memory\n", + " reserved_gpu = torch.cuda.memory_reserved(i)\n", + " allocated_gpu = torch.cuda.memory_allocated(i)\n", + " print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n", + "\n", + "print_memory_stats()" + ], + "execution_count": 37, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "System RAM Usage: 1.21 GB / 12.67 GB (57.1%)\n", + "GPU 0 Usage: 0.00 GB / 14.56 GB\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "523ddfed", + "outputId": "11e39ca9-eb83-47ac-d611-17a62b5ffd4f" + }, + "source": [ + "import os\n", + "import torch\n", + "from safetensors.torch import load_file\n", + "import psutil\n", + "\n", + "def verify_and_monitor():\n", + " model_path = \"/content/ponyDiffusionV6XL.safetensors\"\n", + " if not os.path.exists(model_path):\n", + " print(f\"ERROR: Model not found at {model_path}\")\n", + " return\n", + "\n", + " print(f\"Model size: {os.path.getsize(model_path) / 1024**3:.2f} GB\")\n", + "\n", + " # Check RAM before loading\n", + " ram = psutil.virtual_memory()\n", + " print(f\"Initial RAM: {ram.available / 1024**3:.2f} GB available\")\n", + "\n", + " try:\n", + " print(\"Testing model load to CPU (this spikes RAM)...\")\n", + " # Using mmap to see if we can avoid a full RAM load\n", + " sd = load_file(model_path, device='cpu')\n", + " print(\"Successfully read safetensors header.\")\n", + " del sd\n", + " gc.collect()\n", + " except Exception as e:\n", + " print(f\"Load test failed: {e}\")\n", + "\n", + "verify_and_monitor()" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model size: 6.46 GB\n", + "Initial RAM: 5.51 GB available\n", + "Testing model load to CPU (this spikes RAM)...\n", + "Successfully read safetensors header.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "91722104", + "outputId": "cb16f2c6-acbd-455f-c70d-22206649724e" + }, + "source": [ + "import torch\n", + "from safetensors.torch import load_file\n", + "import psutil\n", + "\n", + "def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n", + " print(f'Checking if {path} can fit in VRAM...')\n", + " initial_vram = torch.cuda.memory_reserved() / 1024**3\n", + " initial_ram = psutil.virtual_memory().used / 1024**3\n", + "\n", + " try:\n", + " # Load directly to GPU device\n", + " state_dict = load_file(path, device='cuda')\n", + "\n", + " final_vram = torch.cuda.memory_reserved() / 1024**3\n", + " final_ram = psutil.virtual_memory().used / 1024**3\n", + "\n", + " print(f'Success!')\n", + " print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n", + " print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n", + "\n", + " del state_dict\n", + " torch.cuda.empty_cache()\n", + " print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n", + " except Exception as e:\n", + " print(f'Load failed: {e}')\n", + "\n", + "check_vram_load()" + ], + "execution_count": 35, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n", + "Success!\n", + "System RAM used: -0.03 GB\n", + "GPU VRAM used: 6.99 GB\n", + "VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ba65b51c", + "outputId": "4be365ea-1e1b-4ccd-d91d-da012829108d" + }, + "source": [ + "import torch\n", + "from safetensors.torch import load_file\n", + "import psutil\n", + "\n", + "def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n", + " print(f'Checking if {path} can fit in VRAM...')\n", + " initial_vram = torch.cuda.memory_reserved() / 1024**3\n", + " initial_ram = psutil.virtual_memory().used / 1024**3\n", + "\n", + " try:\n", + " # Load directly to GPU device\n", + " state_dict = load_file(path, device='cuda')\n", + "\n", + " final_vram = torch.cuda.memory_reserved() / 1024**3\n", + " final_ram = psutil.virtual_memory().used / 1024**3\n", + "\n", + " print(f'Success!')\n", + " print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n", + " print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n", + "\n", + " del state_dict\n", + " torch.cuda.empty_cache()\n", + " print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n", + " except Exception as e:\n", + " print(f'Load failed: {e}')\n", + "\n", + "check_vram_load()" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n", + "Success!\n", + "System RAM used: 0.00 GB\n", + "GPU VRAM used: 6.99 GB\n", + "VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rei0_q-oChPx" + }, + "source": [ + "#@title Convert Safetensors to Diffusers (Updated)\n", + "# Download a more recent version of the conversion script\n", + "!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n", + "\n", + "# Convert Pony V6 (SDXL) to Diffusers format\n", + "# We remove the unrecognized --use_safetensors flag\n", + "!python3 convert_diffusers.py \\\n", + " --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n", + " --dump_path /content/pony_diffusers \\\n", + " --from_safetensors \\\n", + " --to_safetensors \\\n", + " --device cuda\n", + "\n", + "print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "f48d0810", + "outputId": "5f684992-17ff-4e8b-cef7-917237918e12" + }, + "source": [ + "import psutil\n", + "import torch\n", + "\n", + "def print_memory_stats():\n", + " # System RAM\n", + " ram = psutil.virtual_memory()\n", + " print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n", + "\n", + " # GPU RAM\n", + " if torch.cuda.is_available():\n", + " for i in range(torch.cuda.device_count()):\n", + " total_gpu = torch.cuda.get_device_properties(i).total_memory\n", + " reserved_gpu = torch.cuda.memory_reserved(i)\n", + " allocated_gpu = torch.cuda.memory_allocated(i)\n", + " print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n", + "\n", + "print_memory_stats()" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "System RAM Usage: 1.00 GB / 12.67 GB (10.5%)\n", + "GPU 0 Usage: 0.00 GB / 14.56 GB\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "06468516", + "outputId": "b39b18b4-5722-4ba8-b209-2931af026252" + }, + "source": [ + "import gc\n", + "import torch\n", + "\n", + "# Clear Python and Torch memory\n", + "gc.collect()\n", + "torch.cuda.empty_cache()\n", + "\n", + "# Clear Linux system cache (RAM)\n", + "!sync && echo 3 > /proc/sys/vm/drop_caches\n", + "\n", + "print(\"System RAM and GPU memory cleared.\")" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/bash: line 1: /proc/sys/vm/drop_caches: Read-only file system\n", + "System RAM and GPU memory cleared.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "758df299", + "outputId": "cc97681b-477a-44b5-ddc8-c27924d868f5" + }, + "source": [ + "import torch\n", + "from safetensors.torch import load_file\n", + "\n", + "def check_model_load(path=\"/content/ponyDiffusionV6XL.safetensors\"):\n", + " print(f\"Attempting to pre-load {path} to GPU to verify memory availability...\")\n", + " try:\n", + " # Loading with mmap=True and moving to device piece-by-piece is easier on RAM\n", + " state_dict = load_file(path, device=\"cuda\")\n", + " print(\"Successfully loaded model to GPU VRAM.\")\n", + "\n", + " # We don't actually need to keep it in memory here, we just wanted to see if it fits\n", + " del state_dict\n", + " torch.cuda.empty_cache()\n", + " print(\"GPU Memory cleared and ready for trainer.\")\n", + " except Exception as e:\n", + " print(f\"Load failed: {e}\")\n", + "\n", + "check_model_load()" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Attempting to pre-load /content/ponyDiffusionV6XL.safetensors to GPU to verify memory availability...\n", + "Successfully loaded model to GPU VRAM.\n", + "GPU Memory cleared and ready for trainer.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2c0ea383" + }, + "source": [ + "### Pro-Tip for Kohya GUI Settings:\n", + "Now that we verified the model can load, ensure these are set in the **Parameters** tab to prevent the training process itself from spiking the RAM:\n", + "1. **High-level settings**: Check `Low RAM` if available.\n", + "2. **Memory management**: Ensure `Gradient Checkpointing` is ON.\n", + "3. **Optimizers**: Use `AdamW8bit` or `Prodigy` (8-bit versions save massive amounts of RAM)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "83390a2d" + }, + "source": [ + "!pip install --upgrade huggingface-hub accelerate" + ], + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -65,6 +663,65 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "18df2cdf", + "outputId": "8fbdd5fa-e378-4ce8-8a92-ef808d2d6ba9" + }, + "source": [ + "#@title Convert Safetensors to Diffusers (Updated)\n", + "# Download a more recent version of the conversion script\n", + "!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n", + "\n", + "# Convert Pony V6 (SDXL) to Diffusers format\n", + "# We remove the unrecognized --use_safetensors flag\n", + "!python3 convert_diffusers.py \\\n", + " --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n", + " --dump_path /content/pony_diffusers \\\n", + " --from_safetensors \\\n", + " --to_safetensors \\\n", + " --device cuda\n", + "\n", + "print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "2026-04-10 20:15:04.758845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1775852104.780318 58974 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1775852104.786834 58974 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "W0000 00:00:1775852104.802905 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775852104.802933 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775852104.802937 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1775852104.802942 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n", + " torch.utils._pytree._register_pytree_node(\n", + "/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n", + "config.json: 4.52kB [00:00, 16.5MB/s]\n", + "tokenizer_config.json: 100% 904/904 [00:00<00:00, 6.12MB/s]\n", + "vocab.json: 862kB [00:00, 36.3MB/s]\n", + "merges.txt: 525kB [00:00, 85.8MB/s]\n", + "special_tokens_map.json: 100% 389/389 [00:00<00:00, 2.91MB/s]\n", + "tokenizer.json: 2.22MB [00:00, 133MB/s]\n", + "config.json: 4.88kB [00:00, 21.8MB/s]\n", + "^C\n", + "\n", + "Conversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\n" + ] + } + ] + }, { "cell_type": "code", "execution_count": null,