mirror of https://github.com/bmaltais/kohya_ss
Created using Colab
parent
104d190479
commit
4fb1521430
|
|
@ -42,6 +42,604 @@
|
|||
"!python kohya_gui.py --share --headless"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Run Kohya Gui"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "PDMNcVe-D0v4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# 1. Clean up and install with a stable configuration to prevent the torch-reinstall loop\n",
|
||||
"!pip install --upgrade pip\n",
|
||||
"!pip install torch==2.5.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n",
|
||||
"\n",
|
||||
"# Install xformers separately without dependencies to prevent it from downgrading torch\n",
|
||||
"!pip install xformers==0.0.28.post3 --no-deps\n",
|
||||
"\n",
|
||||
"# Install remaining requirements\n",
|
||||
"!pip install accelerate==0.32.1 huggingface-hub==0.25.0 diffusers==0.25.0\n",
|
||||
"!pip install transformers==4.44.0 safetensors==0.4.2 bitsandbytes==0.41.3.post2\n",
|
||||
"!pip install gradio==3.36.1 easygui==0.98.3 einops==0.6.0 voluptuous==0.13.1\n",
|
||||
"!pip install open-clip-torch==2.20.0 tensorboard==2.15.0\n",
|
||||
"\n",
|
||||
"# 2. Launch with environment variables and Low-RAM fix\n",
|
||||
"%cd /content/kohya_ss\n",
|
||||
"import os\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"# Forces torch to be more aggressive with memory reuse\n",
|
||||
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'\n",
|
||||
"\n",
|
||||
"# Pre-clear memory before starting\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
"\n",
|
||||
"# Launching with --lowram to ensure the trainer doesn't spike System RAM during load\n",
|
||||
"!python kohya_gui.py --share --headless"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"id": "wM6vFztrpnni"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "9b6261da",
|
||||
"outputId": "eca83b62-3490-4044-9646-c50b1c88c328"
|
||||
},
|
||||
"source": [
|
||||
"#@title Direct SDXL Training Command (VRAM-Load Strategy)\n",
|
||||
"import os\n",
|
||||
"import torch\n",
|
||||
"import gc\n",
|
||||
"\n",
|
||||
"# 1. System Deep Clean\n",
|
||||
"gc.collect()\n",
|
||||
"torch.cuda.empty_cache()\n",
|
||||
"\n",
|
||||
"# 2. Setup Paths\n",
|
||||
"%cd /content/kohya_ss\n",
|
||||
"\n",
|
||||
"# 3. Apply the Direct-to-GPU Patch\n",
|
||||
"# This forces the loader to use 'cuda' immediately, bypassing the System RAM (CPU) stage\n",
|
||||
"!sed -i \"s/load_file(checkpoint_path)/load_file(checkpoint_path, device='cuda')/g\" library/sdxl_model_util.py\n",
|
||||
"!sed -i \"s/load_file(model_path)/load_file(model_path, device='cuda')/g\" library/model_util.py\n",
|
||||
"\n",
|
||||
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:128'\n",
|
||||
"\n",
|
||||
"MODEL_PATH = \"/content/ponyDiffusionV6XL.safetensors\"\n",
|
||||
"DATA_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project\"\n",
|
||||
"OUTPUT_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem\"\n",
|
||||
"LOG_DIR = \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log\"\n",
|
||||
"OUTPUT_NAME = \"pkmn_items_v1\"\n",
|
||||
"\n",
|
||||
"# 4. Launch with Low-RAM strategies and 512 resolution\n",
|
||||
"!accelerate launch --num_cpu_threads_per_process=1 \"./sdxl_train_network.py\" \\\n",
|
||||
" --enable_bucket \\\n",
|
||||
" --pretrained_model_name_or_path=\"{MODEL_PATH}\" \\\n",
|
||||
" --train_data_dir=\"{DATA_DIR}\" \\\n",
|
||||
" --resolution=\"512,512\" \\\n",
|
||||
" --output_dir=\"{OUTPUT_DIR}\" \\\n",
|
||||
" --logging_dir=\"{LOG_DIR}\" \\\n",
|
||||
" --network_alpha=16 \\\n",
|
||||
" --save_model_as=safetensors \\\n",
|
||||
" --network_module=networks.lora \\\n",
|
||||
" --network_dim=32 \\\n",
|
||||
" --output_name=\"{OUTPUT_NAME}\" \\\n",
|
||||
" --lr_scheduler_num_cycles=1 \\\n",
|
||||
" --lr_scheduler_power=1 \\\n",
|
||||
" --no_half_vae \\\n",
|
||||
" --learning_rate=0.0001 \\\n",
|
||||
" --unet_lr=0.0001 \\\n",
|
||||
" --network_train_unet_only \\\n",
|
||||
" --lr_scheduler=cosine \\\n",
|
||||
" --lr_warmup_steps=200 \\\n",
|
||||
" --train_batch_size=1 \\\n",
|
||||
" --max_train_steps=2000 \\\n",
|
||||
" --save_every_n_epochs=2 \\\n",
|
||||
" --mixed_precision=fp16 \\\n",
|
||||
" --save_precision=fp16 \\\n",
|
||||
" --seed=0 \\\n",
|
||||
" --caption_extension=.txt \\\n",
|
||||
" --optimizer_type=AdamW8bit \\\n",
|
||||
" --bucket_reso_steps=64 \\\n",
|
||||
" --gradient_checkpointing \\\n",
|
||||
" --xformers \\\n",
|
||||
" --bucket_no_upscale \\\n",
|
||||
" --lowram \\\n",
|
||||
" --mem_eff_attn"
|
||||
],
|
||||
"execution_count": 38,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"/content/kohya_ss\n",
|
||||
"The following values were not passed to `accelerate launch` and had defaults used instead:\n",
|
||||
"\t`--num_processes` was set to a value of `1`\n",
|
||||
"\t`--num_machines` was set to a value of `1`\n",
|
||||
"\t`--mixed_precision` was set to a value of `'no'`\n",
|
||||
"\t`--dynamo_backend` was set to a value of `'no'`\n",
|
||||
"To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"2026-04-10 21:58:17.017253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
||||
"E0000 00:00:1775858297.052001 85560 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"E0000 00:00:1775858297.062480 85560 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"W0000 00:00:1775858297.079987 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775858297.080015 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775858297.080019 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775858297.080023 85560 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/timm/models/layers/__init__.py:49: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers\n",
|
||||
" warnings.warn(f\"Importing from {__name__} is deprecated, please import via timm.layers\", FutureWarning)\n",
|
||||
"prepare tokenizers\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
|
||||
" warnings.warn(\n",
|
||||
"Using DreamBooth method.\n",
|
||||
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: log\n",
|
||||
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Outputs\n",
|
||||
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: Configs\n",
|
||||
"ignore directory without repeats / 繰り返し回数のないディレクトリを無視します: .ipynb_checkpoints\n",
|
||||
"prepare images.\n",
|
||||
"found directory /content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem contains 10 image files\n",
|
||||
"200 train images with repeating.\n",
|
||||
"0 reg images.\n",
|
||||
"no regularization images / 正則化画像が見つかりませんでした\n",
|
||||
"[Dataset 0]\n",
|
||||
" batch_size: 1\n",
|
||||
" resolution: (512, 512)\n",
|
||||
" enable_bucket: True\n",
|
||||
" min_bucket_reso: 256\n",
|
||||
" max_bucket_reso: 1024\n",
|
||||
" bucket_reso_steps: 64\n",
|
||||
" bucket_no_upscale: True\n",
|
||||
"\n",
|
||||
" [Subset 0 of Dataset 0]\n",
|
||||
" image_dir: \"/content/drive/Othercomputers/My Laptop/pokemon_lora_project/20_pkmnessentialitem\"\n",
|
||||
" image_count: 10\n",
|
||||
" num_repeats: 20\n",
|
||||
" shuffle_caption: False\n",
|
||||
" keep_tokens: 0\n",
|
||||
" caption_dropout_rate: 0.0\n",
|
||||
" caption_dropout_every_n_epoches: 0\n",
|
||||
" caption_tag_dropout_rate: 0.0\n",
|
||||
" color_aug: False\n",
|
||||
" flip_aug: False\n",
|
||||
" face_crop_aug_range: None\n",
|
||||
" random_crop: False\n",
|
||||
" token_warmup_min: 1,\n",
|
||||
" token_warmup_step: 0,\n",
|
||||
" is_reg: False\n",
|
||||
" class_tokens: pkmnessentialitem\n",
|
||||
" caption_extension: .txt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"[Dataset 0]\n",
|
||||
"loading image sizes.\n",
|
||||
"100% 10/10 [00:00<00:00, 284.23it/s]\n",
|
||||
"make buckets\n",
|
||||
"min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます\n",
|
||||
"number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む)\n",
|
||||
"bucket 0: resolution (512, 512), count: 200\n",
|
||||
"mean ar error (without repeats): 0.0\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/content/kohya_ss/./sdxl_train_network.py\", line 167, in <module>\n",
|
||||
" trainer.train(args)\n",
|
||||
" File \"/content/kohya_ss/train_network.py\", line 182, in train\n",
|
||||
" current_epoch = Value(\"i\", 0)\n",
|
||||
" ^^^^^^^^^^^^^\n",
|
||||
" File \"/usr/lib/python3.12/multiprocessing/context.py\", line 135, in Value\n",
|
||||
" return Value(typecode_or_type, *args, lock=lock,\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/usr/lib/python3.12/multiprocessing/sharedctypes.py\", line 79, in Value\n",
|
||||
" lock = ctx.RLock()\n",
|
||||
" ^^^^^^^^^^^\n",
|
||||
" File \"/usr/lib/python3.12/multiprocessing/context.py\", line 73, in RLock\n",
|
||||
" return RLock(ctx=self.get_context())\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 194, in __init__\n",
|
||||
" SemLock.__init__(self, RECURSIVE_MUTEX, 1, 1, ctx=ctx)\n",
|
||||
" File \"/usr/lib/python3.12/multiprocessing/synchronize.py\", line 57, in __init__\n",
|
||||
" sl = self._semlock = _multiprocessing.SemLock(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
"OSError: [Errno 28] No space left on device\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/usr/local/bin/accelerate\", line 6, in <module>\n",
|
||||
" sys.exit(main())\n",
|
||||
" ^^^^^^\n",
|
||||
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/accelerate_cli.py\", line 48, in main\n",
|
||||
" args.func(args)\n",
|
||||
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 1097, in launch_command\n",
|
||||
" simple_launcher(args)\n",
|
||||
" File \"/usr/local/lib/python3.12/dist-packages/accelerate/commands/launch.py\", line 703, in simple_launcher\n",
|
||||
" raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n",
|
||||
"subprocess.CalledProcessError: Command '['/usr/bin/python3', './sdxl_train_network.py', '--enable_bucket', '--pretrained_model_name_or_path=/content/ponyDiffusionV6XL.safetensors', '--train_data_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project', '--resolution=512,512', '--output_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/Outputs/pkmnessentialitem', '--logging_dir=/content/drive/Othercomputers/My Laptop/pokemon_lora_project/log', '--network_alpha=16', '--save_model_as=safetensors', '--network_module=networks.lora', '--network_dim=32', '--output_name=pkmn_items_v1', '--lr_scheduler_num_cycles=1', '--lr_scheduler_power=1', '--no_half_vae', '--learning_rate=0.0001', '--unet_lr=0.0001', '--network_train_unet_only', '--lr_scheduler=cosine', '--lr_warmup_steps=200', '--train_batch_size=1', '--max_train_steps=2000', '--save_every_n_epochs=2', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=0', '--caption_extension=.txt', '--optimizer_type=AdamW8bit', '--bucket_reso_steps=64', '--gradient_checkpointing', '--xformers', '--bucket_no_upscale', '--lowram', '--mem_eff_attn']' returned non-zero exit status 1.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "1fede1bc",
|
||||
"outputId": "91d3aaba-8434-4742-cd6f-c354f507a175"
|
||||
},
|
||||
"source": [
|
||||
"import psutil\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"def print_memory_stats():\n",
|
||||
" # System RAM\n",
|
||||
" ram = psutil.virtual_memory()\n",
|
||||
" print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
|
||||
"\n",
|
||||
" # GPU RAM\n",
|
||||
" if torch.cuda.is_available():\n",
|
||||
" for i in range(torch.cuda.device_count()):\n",
|
||||
" total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
|
||||
" reserved_gpu = torch.cuda.memory_reserved(i)\n",
|
||||
" allocated_gpu = torch.cuda.memory_allocated(i)\n",
|
||||
" print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
|
||||
"\n",
|
||||
"print_memory_stats()"
|
||||
],
|
||||
"execution_count": 37,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"System RAM Usage: 1.21 GB / 12.67 GB (57.1%)\n",
|
||||
"GPU 0 Usage: 0.00 GB / 14.56 GB\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "523ddfed",
|
||||
"outputId": "11e39ca9-eb83-47ac-d611-17a62b5ffd4f"
|
||||
},
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import torch\n",
|
||||
"from safetensors.torch import load_file\n",
|
||||
"import psutil\n",
|
||||
"\n",
|
||||
"def verify_and_monitor():\n",
|
||||
" model_path = \"/content/ponyDiffusionV6XL.safetensors\"\n",
|
||||
" if not os.path.exists(model_path):\n",
|
||||
" print(f\"ERROR: Model not found at {model_path}\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" print(f\"Model size: {os.path.getsize(model_path) / 1024**3:.2f} GB\")\n",
|
||||
"\n",
|
||||
" # Check RAM before loading\n",
|
||||
" ram = psutil.virtual_memory()\n",
|
||||
" print(f\"Initial RAM: {ram.available / 1024**3:.2f} GB available\")\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" print(\"Testing model load to CPU (this spikes RAM)...\")\n",
|
||||
" # Using mmap to see if we can avoid a full RAM load\n",
|
||||
" sd = load_file(model_path, device='cpu')\n",
|
||||
" print(\"Successfully read safetensors header.\")\n",
|
||||
" del sd\n",
|
||||
" gc.collect()\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Load test failed: {e}\")\n",
|
||||
"\n",
|
||||
"verify_and_monitor()"
|
||||
],
|
||||
"execution_count": 34,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Model size: 6.46 GB\n",
|
||||
"Initial RAM: 5.51 GB available\n",
|
||||
"Testing model load to CPU (this spikes RAM)...\n",
|
||||
"Successfully read safetensors header.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "91722104",
|
||||
"outputId": "cb16f2c6-acbd-455f-c70d-22206649724e"
|
||||
},
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from safetensors.torch import load_file\n",
|
||||
"import psutil\n",
|
||||
"\n",
|
||||
"def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
|
||||
" print(f'Checking if {path} can fit in VRAM...')\n",
|
||||
" initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
|
||||
" initial_ram = psutil.virtual_memory().used / 1024**3\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Load directly to GPU device\n",
|
||||
" state_dict = load_file(path, device='cuda')\n",
|
||||
"\n",
|
||||
" final_vram = torch.cuda.memory_reserved() / 1024**3\n",
|
||||
" final_ram = psutil.virtual_memory().used / 1024**3\n",
|
||||
"\n",
|
||||
" print(f'Success!')\n",
|
||||
" print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
|
||||
" print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
|
||||
"\n",
|
||||
" del state_dict\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
" print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f'Load failed: {e}')\n",
|
||||
"\n",
|
||||
"check_vram_load()"
|
||||
],
|
||||
"execution_count": 35,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
|
||||
"Success!\n",
|
||||
"System RAM used: -0.03 GB\n",
|
||||
"GPU VRAM used: 6.99 GB\n",
|
||||
"VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "ba65b51c",
|
||||
"outputId": "4be365ea-1e1b-4ccd-d91d-da012829108d"
|
||||
},
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from safetensors.torch import load_file\n",
|
||||
"import psutil\n",
|
||||
"\n",
|
||||
"def check_vram_load(path='/content/ponyDiffusionV6XL.safetensors'):\n",
|
||||
" print(f'Checking if {path} can fit in VRAM...')\n",
|
||||
" initial_vram = torch.cuda.memory_reserved() / 1024**3\n",
|
||||
" initial_ram = psutil.virtual_memory().used / 1024**3\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Load directly to GPU device\n",
|
||||
" state_dict = load_file(path, device='cuda')\n",
|
||||
"\n",
|
||||
" final_vram = torch.cuda.memory_reserved() / 1024**3\n",
|
||||
" final_ram = psutil.virtual_memory().used / 1024**3\n",
|
||||
"\n",
|
||||
" print(f'Success!')\n",
|
||||
" print(f'System RAM used: {final_ram - initial_ram:.2f} GB')\n",
|
||||
" print(f'GPU VRAM used: {final_vram - initial_vram:.2f} GB')\n",
|
||||
"\n",
|
||||
" del state_dict\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
" print('VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f'Load failed: {e}')\n",
|
||||
"\n",
|
||||
"check_vram_load()"
|
||||
],
|
||||
"execution_count": 21,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Checking if /content/ponyDiffusionV6XL.safetensors can fit in VRAM...\n",
|
||||
"Success!\n",
|
||||
"System RAM used: 0.00 GB\n",
|
||||
"GPU VRAM used: 6.99 GB\n",
|
||||
"VRAM cleared. If RAM usage stayed low, the VRAM-load strategy works.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "rei0_q-oChPx"
|
||||
},
|
||||
"source": [
|
||||
"#@title Convert Safetensors to Diffusers (Updated)\n",
|
||||
"# Download a more recent version of the conversion script\n",
|
||||
"!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
|
||||
"\n",
|
||||
"# Convert Pony V6 (SDXL) to Diffusers format\n",
|
||||
"# We remove the unrecognized --use_safetensors flag\n",
|
||||
"!python3 convert_diffusers.py \\\n",
|
||||
" --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
|
||||
" --dump_path /content/pony_diffusers \\\n",
|
||||
" --from_safetensors \\\n",
|
||||
" --to_safetensors \\\n",
|
||||
" --device cuda\n",
|
||||
"\n",
|
||||
"print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "f48d0810",
|
||||
"outputId": "5f684992-17ff-4e8b-cef7-917237918e12"
|
||||
},
|
||||
"source": [
|
||||
"import psutil\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"def print_memory_stats():\n",
|
||||
" # System RAM\n",
|
||||
" ram = psutil.virtual_memory()\n",
|
||||
" print(f\"System RAM Usage: {ram.used / 1024**3:.2f} GB / {ram.total / 1024**3:.2f} GB ({ram.percent}%)\")\n",
|
||||
"\n",
|
||||
" # GPU RAM\n",
|
||||
" if torch.cuda.is_available():\n",
|
||||
" for i in range(torch.cuda.device_count()):\n",
|
||||
" total_gpu = torch.cuda.get_device_properties(i).total_memory\n",
|
||||
" reserved_gpu = torch.cuda.memory_reserved(i)\n",
|
||||
" allocated_gpu = torch.cuda.memory_allocated(i)\n",
|
||||
" print(f\"GPU {i} Usage: {allocated_gpu / 1024**3:.2f} GB / {total_gpu / 1024**3:.2f} GB\")\n",
|
||||
"\n",
|
||||
"print_memory_stats()"
|
||||
],
|
||||
"execution_count": 16,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"System RAM Usage: 1.00 GB / 12.67 GB (10.5%)\n",
|
||||
"GPU 0 Usage: 0.00 GB / 14.56 GB\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "06468516",
|
||||
"outputId": "b39b18b4-5722-4ba8-b209-2931af026252"
|
||||
},
|
||||
"source": [
|
||||
"import gc\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"# Clear Python and Torch memory\n",
|
||||
"gc.collect()\n",
|
||||
"torch.cuda.empty_cache()\n",
|
||||
"\n",
|
||||
"# Clear Linux system cache (RAM)\n",
|
||||
"!sync && echo 3 > /proc/sys/vm/drop_caches\n",
|
||||
"\n",
|
||||
"print(\"System RAM and GPU memory cleared.\")"
|
||||
],
|
||||
"execution_count": 15,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"/bin/bash: line 1: /proc/sys/vm/drop_caches: Read-only file system\n",
|
||||
"System RAM and GPU memory cleared.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "758df299",
|
||||
"outputId": "cc97681b-477a-44b5-ddc8-c27924d868f5"
|
||||
},
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from safetensors.torch import load_file\n",
|
||||
"\n",
|
||||
"def check_model_load(path=\"/content/ponyDiffusionV6XL.safetensors\"):\n",
|
||||
" print(f\"Attempting to pre-load {path} to GPU to verify memory availability...\")\n",
|
||||
" try:\n",
|
||||
" # Loading with mmap=True and moving to device piece-by-piece is easier on RAM\n",
|
||||
" state_dict = load_file(path, device=\"cuda\")\n",
|
||||
" print(\"Successfully loaded model to GPU VRAM.\")\n",
|
||||
"\n",
|
||||
" # We don't actually need to keep it in memory here, we just wanted to see if it fits\n",
|
||||
" del state_dict\n",
|
||||
" torch.cuda.empty_cache()\n",
|
||||
" print(\"GPU Memory cleared and ready for trainer.\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Load failed: {e}\")\n",
|
||||
"\n",
|
||||
"check_model_load()"
|
||||
],
|
||||
"execution_count": 18,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Attempting to pre-load /content/ponyDiffusionV6XL.safetensors to GPU to verify memory availability...\n",
|
||||
"Successfully loaded model to GPU VRAM.\n",
|
||||
"GPU Memory cleared and ready for trainer.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "2c0ea383"
|
||||
},
|
||||
"source": [
|
||||
"### Pro-Tip for Kohya GUI Settings:\n",
|
||||
"Now that we verified the model can load, ensure these are set in the **Parameters** tab to prevent the training process itself from spiking the RAM:\n",
|
||||
"1. **High-level settings**: Check `Low RAM` if available.\n",
|
||||
"2. **Memory management**: Ensure `Gradient Checkpointing` is ON.\n",
|
||||
"3. **Optimizers**: Use `AdamW8bit` or `Prodigy` (8-bit versions save massive amounts of RAM)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "83390a2d"
|
||||
},
|
||||
"source": [
|
||||
"!pip install --upgrade huggingface-hub accelerate"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
|
|
@ -65,6 +663,65 @@
|
|||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "18df2cdf",
|
||||
"outputId": "8fbdd5fa-e378-4ce8-8a92-ef808d2d6ba9"
|
||||
},
|
||||
"source": [
|
||||
"#@title Convert Safetensors to Diffusers (Updated)\n",
|
||||
"# Download a more recent version of the conversion script\n",
|
||||
"!wget -q -O convert_diffusers.py https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py\n",
|
||||
"\n",
|
||||
"# Convert Pony V6 (SDXL) to Diffusers format\n",
|
||||
"# We remove the unrecognized --use_safetensors flag\n",
|
||||
"!python3 convert_diffusers.py \\\n",
|
||||
" --checkpoint_path /content/ponyDiffusionV6XL.safetensors \\\n",
|
||||
" --dump_path /content/pony_diffusers \\\n",
|
||||
" --from_safetensors \\\n",
|
||||
" --to_safetensors \\\n",
|
||||
" --device cuda\n",
|
||||
"\n",
|
||||
"print(\"\\nConversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\")"
|
||||
],
|
||||
"execution_count": 14,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"2026-04-10 20:15:04.758845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
||||
"E0000 00:00:1775852104.780318 58974 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"E0000 00:00:1775852104.786834 58974 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"W0000 00:00:1775852104.802905 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775852104.802933 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775852104.802937 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"W0000 00:00:1775852104.802942 58974 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
|
||||
" torch.utils._pytree._register_pytree_node(\n",
|
||||
"/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
|
||||
" warnings.warn(\n",
|
||||
"config.json: 4.52kB [00:00, 16.5MB/s]\n",
|
||||
"tokenizer_config.json: 100% 904/904 [00:00<00:00, 6.12MB/s]\n",
|
||||
"vocab.json: 862kB [00:00, 36.3MB/s]\n",
|
||||
"merges.txt: 525kB [00:00, 85.8MB/s]\n",
|
||||
"special_tokens_map.json: 100% 389/389 [00:00<00:00, 2.91MB/s]\n",
|
||||
"tokenizer.json: 2.22MB [00:00, 133MB/s]\n",
|
||||
"config.json: 4.88kB [00:00, 21.8MB/s]\n",
|
||||
"^C\n",
|
||||
"\n",
|
||||
"Conversion complete! In Kohya GUI, use '/content/pony_diffusers' as your source model path.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
|
|||
Loading…
Reference in New Issue