Merge pull request #3252 from bmaltais/dev

v25.1.0
2025-05-25 18:12:14 -04:00 · 2025-05-25 18:12:14 -04:00 · 17075c4d25
parent f3e1cb8f41 451f051d52
commit 17075c4d25
34 changed files with 2982 additions and 2220 deletions
--- a/.github/workflows/typos.yaml
+++ b/.github/workflows/typos.yaml
@ -18,4 +18,4 @@ jobs:
      - uses: actions/checkout@v4

      - name: typos-action
-        uses: crate-ci/typos@v1.31.1
+        uses: crate-ci/typos@v1.31.2
--- a/.python-version
+++ b/.python-version
@ -1 +1 @@
-3.10
+3.11
--- a/.release
+++ b/.release
@ -1 +1 @@
-v25.0.3
+v25.1.0
--- a/142
+++ b/142
@ -3,7 +3,47 @@ ARG UID=1000
 ARG VERSION=EDGE
 ARG RELEASE=0

-FROM python:3.10-slim as build
+########################################
+# Base stage
+########################################
+FROM docker.io/library/python:3.11-slim-bullseye AS base
+
+# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
+ARG TARGETARCH
+ARG TARGETVARIANT
+
+WORKDIR /tmp
+
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Install CUDA partially
+# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/#debian
+# Installing the complete CUDA Toolkit system-wide usually adds around 8GB to the image size.
+# Since most CUDA packages already installed through pip, there's no need to download the entire toolkit.
+# Therefore, we opt to install only the essential libraries.
+# Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64
+
+ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring_x86_64.deb
+RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
+    --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
+    dpkg -i cuda-keyring_x86_64.deb && \
+    rm -f cuda-keyring_x86_64.deb && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+    # !If you experience any related issues, replace the following line with `cuda-12-4` to obtain the complete CUDA package.
+    cuda-nvcc-12-4
+
+ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}"
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
+ENV CUDA_VERSION=12.4
+ENV NVIDIA_REQUIRE_CUDA=cuda>=12.4
+ENV CUDA_HOME=/usr/local/cuda
+
+########################################
+# Build stage
+########################################
+FROM base AS build

 # RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
 ARG TARGETARCH
@ -11,10 +51,14 @@ ARG TARGETVARIANT

 WORKDIR /app

-# Install under /root/.local
-ENV PIP_USER="true"
-ARG PIP_NO_WARN_SCRIPT_LOCATION=0
-ARG PIP_ROOT_USER_ACTION="ignore"
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+ENV UV_PROJECT_ENVIRONMENT=/venv
+ENV VIRTUAL_ENV=/venv
+ENV UV_LINK_MODE=copy
+ENV UV_PYTHON_DOWNLOADS=0
+ENV UV_INDEX=https://download.pytorch.org/whl/cu124

 # Install build dependencies
 RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
@ -22,23 +66,28 @@ RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/v
    apt-get update && apt-get upgrade -y && \
    apt-get install -y --no-install-recommends python3-launchpadlib git curl

-# Install PyTorch
-# The versions must align and be in sync with the requirements_linux_docker.txt
-# hadolint ignore=SC2102
-RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
-    pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \
-    torch==2.1.2 torchvision==0.16.2 \
-    xformers==0.0.23.post1 \
-    ninja \
-    pip setuptools wheel
+# Install big dependencies separately for layer caching
+# !Please note that the version restrictions should be the same as pyproject.toml
+# No packages listed should be removed in the next `uv sync` command
+# If this happens, please update the version restrictions or update the uv.lock file
+RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
+    uv venv --system-site-packages /venv && \
+    uv pip install --no-deps \
+    # torch (866.2MiB)
+    torch==2.5.1+cu124 \ 
+    # triton (199.8MiB)
+    triton==3.1.0 \
+    # tensorflow (615.0MiB)
+    tensorflow>=2.16.1 \
+    # onnxruntime-gpu (215.7MiB)
+    onnxruntime-gpu==1.19.2

-# Install requirements
-RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
-    --mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
-    --mount=source=requirements.txt,target=requirements.txt \
-    --mount=source=setup/docker_setup.py,target=setup.py \
-    --mount=source=sd-scripts,target=sd-scripts,rw \
-    pip install -r requirements_linux_docker.txt -r requirements.txt
+# Install dependencies
+RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=sd-scripts,target=sd-scripts,rw \
+    uv sync --frozen --no-dev --no-install-project --no-editable

 # Replace pillow with pillow-simd (Only for x86)
 ARG TARGETPLATFORM
@ -46,44 +95,24 @@ RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/v
    --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
    if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
    apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \
-    pip uninstall -y pillow && \
-    CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \
+    uv pip uninstall pillow && \
+    CC="cc -mavx2" uv pip install pillow-simd; \
    fi

-FROM python:3.10-slim as final
+########################################
+# Final stage
+########################################
+FROM base AS final

 ARG TARGETARCH
 ARG TARGETVARIANT

-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
 WORKDIR /tmp

-ENV CUDA_VERSION=12.1.1
-ENV NV_CUDA_CUDART_VERSION=12.1.105-1
-ENV NVIDIA_REQUIRE_CUDA=cuda>=12.1
-ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1
-
-# Install CUDA partially
-ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb .
-RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
-    --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
-    dpkg -i cuda-keyring_1.0-1_all.deb && \
-    rm cuda-keyring_1.0-1_all.deb && \
-    sed -i 's/^Components: main$/& contrib/' /etc/apt/sources.list.d/debian.sources && \
-    apt-get update && \
-    apt-get install -y --no-install-recommends \
-    # Installing the whole CUDA typically increases the image size by approximately **8GB**.
-    # To decrease the image size, we opt to install only the necessary libraries.
-    # Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64
-    # !If you experience any related issues, replace the following line with `cuda-12-1` to obtain the complete CUDA package.
-    cuda-cudart-12-1=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} libcusparse-12-1 libnvjitlink-12-1
-
 # Install runtime dependencies
 RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
    --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
-    apt-get update && \
+    apt-get update && apt-get upgrade -y && \
    apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init

 # Fix missing libnvinfer7
@ -98,20 +127,23 @@ RUN groupadd -g $UID $UID && \
 # Create directories with correct permissions
 RUN install -d -m 775 -o $UID -g 0 /dataset && \
    install -d -m 775 -o $UID -g 0 /licenses && \
-    install -d -m 775 -o $UID -g 0 /app
+    install -d -m 775 -o $UID -g 0 /app && \
+    install -d -m 775 -o $UID -g 0 /venv

 # Copy licenses (OpenShift Policy)
 COPY --link --chmod=775 LICENSE.md /licenses/LICENSE.md

 # Copy dependencies and code (and support arbitrary uid for OpenShift best practice)
-COPY --link --chown=$UID:0 --chmod=775 --from=build /root/.local /home/$UID/.local
+COPY --link --chown=$UID:0 --chmod=775 --from=build /venv /venv
 COPY --link --chown=$UID:0 --chmod=775 . /app

-ENV PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:/home/$UID/.local/bin:$PATH"
-ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages" 
-ENV LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+ENV PATH="/venv/bin${PATH:+:${PATH}}"
+ENV PYTHONPATH="/venv/lib/python3.11/site-packages"
+
+ENV LD_LIBRARY_PATH="/venv/lib/python3.11/site-packages/nvidia/cudnn/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
 ENV LD_PRELOAD=libtcmalloc.so
 ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+
 # Rich logging
 # https://rich.readthedocs.io/en/stable/console.html#interactive-mode
 ENV FORCE_COLOR="true"
@ -130,7 +162,7 @@ STOPSIGNAL SIGINT

 # Use dumb-init as PID 1 to handle signals properly
 ENTRYPOINT ["dumb-init", "--"]
-CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless"]
+CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless", "--noverify"]

 ARG VERSION
 ARG RELEASE
@ -144,4 +176,4 @@ LABEL name="bmaltais/kohya_ss" \
    release=${RELEASE} \
    io.k8s.display-name="kohya_ss" \
    summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \
-    description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
+    description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
--- a/README.md
+++ b/README.md
@ -1,8 +1,18 @@
 # Kohya's GUI

-This repository primarily provides a Gradio GUI for [Kohya's Stable Diffusion trainers](https://github.com/kohya-ss/sd-scripts). However, support for Linux OS is also offered through community contributions. macOS support is not optimal at the moment but might work if the conditions are favorable.
+[![GitHub stars](https://img.shields.io/github/stars/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/stargazers)
+[![GitHub forks](https://img.shields.io/github/forks/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/network/members)
+[![License](https://img.shields.io/github/license/bmaltais/kohya_ss)](LICENSE.md)
+[![GitHub issues](https://img.shields.io/github/issues/bmaltais/kohya_ss)](https://github.com/bmaltais/kohya_ss/issues)

-The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model.
+This project provides a user-friendly Gradio-based Graphical User Interface (GUI) for [Kohya's Stable Diffusion training scripts](https://github.com/kohya-ss/sd-scripts). Stable Diffusion training empowers users to customize image generation models by fine-tuning existing models, creating unique artistic styles, and training specialized models like LoRA (Low-Rank Adaptation).
+
+Key features of this GUI include:
+*   Easy-to-use interface for setting a wide range of training parameters.
+*   Automatic generation of the command-line interface (CLI) commands required to run the training scripts.
+*   Support for various training methods, including LoRA, Dreambooth, fine-tuning, and SDXL training.
+
+Support for Linux and macOS is also available. While Linux support is actively maintained through community contributions, macOS compatibility may vary.

 ## Table of Contents

@ -13,10 +23,13 @@ The GUI allows you to set the training parameters and generate and run the requi
    - [Windows](#windows)
      - [Windows Pre-requirements](#windows-pre-requirements)
      - [Setup Windows](#setup-windows)
-      - [Optional: CUDNN 8.9.6.50](#optional-cudnn-89650)
+        - [Using uv based package manager](#using-uv-based-package-manager)
+        - [Using the pip package manager](#using-the-pip-package-manager)
    - [Linux and macOS](#linux-and-macos)
      - [Linux Pre-requirements](#linux-pre-requirements)
      - [Setup Linux](#setup-linux)
+        - [Using uv based package manager](#using-uv-based-package-manager-1)
+        - [Using pip based package manager](#using-pip-based-package-manager)
      - [Install Location](#install-location)
    - [Runpod](#runpod)
      - [Manual installation](#manual-installation)
@ -61,7 +74,7 @@ The GUI allows you to set the training parameters and generate and run the requi
    - [v25.0.2](#v2502)
    - [v25.0.1](#v2501)
    - [v25.0.0](#v2500)
-
+  
 ## 🦒 Colab

 This Colab notebook was not created or maintained by me; however, it appears to function effectively. The source can be found at: <https://github.com/camenduru/kohya_ss-colab>.
@ -72,24 +85,29 @@ I would like to express my gratitude to camenduru for their valuable contributio
 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ |
 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/kohya_ss-colab/blob/main/kohya_ss_colab.ipynb) | kohya_ss_gui_colab |

-## Installation
+## Installation Methods

-### Windows
+This project offers two primary methods for installing and running the GUI: using the `uv` package manager (recommended for ease of use and automatic updates) or using the traditional `pip` package manager. Below, you'll find details on both approaches. Please read this section to decide which method best suits your needs before proceeding to the OS-specific installation prerequisites.

-#### Windows Pre-requirements
+**Key Differences:**

-To install the necessary dependencies on a Windows system, follow these steps:
+*   **`uv` method:**
+    *   Simplifies the setup process.
+    *   Automatically handles updates when you run `gui-uv.bat` (Windows) or `gui-uv.sh` (Linux).
+    *   No need to run `setup.bat` or `setup.sh` after the initial clone.
+    *   This is the recommended method for most users on Windows and Linux.
+    *   **Not recommended for Runpod or macOS installations.** For these, please use the `pip` method.
+*   **`pip` method:**
+    *   The traditional method, requiring manual execution of `setup.bat` (Windows) or `setup.sh` (Linux) after cloning and for updates.
+    *   Necessary for environments like Runpod and macOS where the `uv` scripts are not intended to be used.

-1. Install [Python 3.10.11](https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe).
-   - During the installation process, ensure that you select the option to add Python to the 'PATH' environment variable.
+Subsequent sections will detail the specific commands for each method.

-2. Install [CUDA 12.4 toolkit](https://developer.nvidia.com/cuda-12-4-0-download-archive?target_os=Windows&target_arch=x86_64).
+### Using `uv` (Recommended)
+This method utilizes the `uv` package manager for a streamlined setup and automatic updates. It is the preferred approach for most users on Windows and Linux.

-3. Install [Git](https://git-scm.com/download/win).
-
-4. Install the [Visual Studio 2015, 2017, 2019, and 2022 redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
-
-#### Setup Windows
+> [!NOTE]
+> This method is not intended for runpod or MacOS installation. Use the "pip based package manager" setup instead.

 To set up the project, follow these steps:

@ -107,11 +125,36 @@ To set up the project, follow these steps:
   cd kohya_ss
   ```

-4. If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
+For Linux, the steps are similar (clone and change directory as above).

-   Run one of the following setup script by executing the following command:
+### Using `pip` (Traditional Method)
+This method uses the traditional `pip` package manager and requires manual script execution for setup and updates. It is necessary for environments like Runpod or macOS, or if you prefer managing your environment with `pip`.

-   For systems with only python 3.10.11 installed:
+Regardless of your OS, start with these steps:
+
+1. Open a terminal and navigate to the desired installation directory.
+
+2. Clone the repository by running the following command:
+
+   ```shell
+   git clone --recursive https://github.com/bmaltais/kohya_ss.git
+   ```
+
+3. Change into the `kohya_ss` directory:
+
+   ```shell
+   cd kohya_ss
+   ```
+
+Then, proceed with OS-specific instructions:
+
+**For Windows:**
+
+*   If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
+
+    Run one of the following setup script by executing the following command:
+
+    For systems with only python 3.10.11 installed:

   ```shell
   .\setup.bat
@ -123,13 +166,64 @@ To set up the project, follow these steps:
   .\setup-3.10.bat
   ```

-   During the accelerate config step, use the default values as proposed during the configuration unless you know your hardware demands otherwise. The amount of VRAM on your GPU does not impact the values used.
+    During the accelerate config step, use the default values as proposed during the configuration unless you know your hardware demands otherwise. The amount of VRAM on your GPU does not impact the values used.

-#### Optional: CUDNN 8.9.6.50
+*   Optional: CUDNN 8.9.6.50

-The following steps are optional but will improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds.
+    The following steps are optional but will improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds.

-1. Run `.\setup.bat` and select `2. (Optional) Install cudnn files (if you want to use the latest supported cudnn version)`.
+    Run `.\setup.bat` and select `2. (Optional) Install cudnn files (if you want to use the latest supported cudnn version)`.
+
+**For Linux and macOS:**
+
+*   If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
+
+    If you encounter permission issues, make the `setup.sh` script executable by running the following command:
+
+   ```shell
+   chmod +x ./setup.sh
+   ```
+
+   Run the setup script by executing the following command:
+
+   ```shell
+   ./setup.sh
+   ```
+
+   > [!NOTE]
+   > If you need additional options or information about the runpod environment, you can use `setup.sh -h` or `setup.sh --help` to display the help message.
+
+## Prerequisites
+
+Before you begin, ensure you have the following software and hardware:
+
+*   **Python:** Version 3.10.x or 3.11.x. (Python 3.11.9 is used in Windows pre-requirements, Python 3.10.9+ for Linux).
+*   **Git:** For cloning the repository and managing updates.
+*   **NVIDIA CUDA Toolkit:** Version 12.4 or compatible (as per installation steps).
+*   **NVIDIA GPU:** A compatible NVIDIA graphics card is required. VRAM requirements vary depending on the model and training parameters.
+*   **(Optional but Recommended) NVIDIA cuDNN:** For accelerated performance on compatible NVIDIA GPUs. (Often included with CUDA Toolkit or installed separately).
+*   **For Windows Users:** Visual Studio 2015, 2017, 2019, and 2022 Redistributable.
+
+## Installation
+
+### Windows
+
+#### Windows Pre-requirements
+
+To install the necessary dependencies on a Windows system, follow these steps:
+
+1. Install [Python 3.11.9](https://www.python.org/ftp/python/3.11.9/python-3.11.9-amd64.exe).
+   - During the installation process, ensure that you select the option to add Python to the 'PATH' environment variable.
+
+2. Install [CUDA 12.4 toolkit](https://developer.nvidia.com/cuda-12-4-0-download-archive?target_os=Windows&target_arch=x86_64).
+
+3. Install [Git](https://git-scm.com/download/win).
+
+4. Install the [Visual Studio 2015, 2017, 2019, and 2022 redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
+
+#### Setup Windows
+
+For detailed setup instructions using either `uv` or `pip`, please refer to the 'Installation Methods' section above. Ensure you have met the Windows Pre-requirements before proceeding with either method.

 ### Linux and macOS

@ -149,40 +243,12 @@ To install the necessary dependencies on a Linux system, ensure that you fulfill

 #### Setup Linux

-To set up the project on Linux or macOS, perform the following steps:
-
-1. Open a terminal and navigate to the desired installation directory.
-
-2. Clone the repository by running the following command:
-
-   ```shell
-   git clone --recursive https://github.com/bmaltais/kohya_ss.git
-   ```
-
-3. Change into the `kohya_ss` directory:
-
-   ```shell
-   cd kohya_ss
-   ```
-
-4. If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
-
-   If you encounter permission issues, make the `setup.sh` script executable by running the following command:
-
-   ```shell
-   chmod +x ./setup.sh
-   ```
-
-   Run the setup script by executing the following command:
-
-   ```shell
-   ./setup.sh
-   ```
-
-   Note: If you need additional options or information about the runpod environment, you can use `setup.sh -h` or `setup.sh --help` to display the help message.
+For detailed setup instructions using either `uv` or `pip`, please refer to the 'Installation Methods' section above. Ensure you have met the Linux Pre-requirements before proceeding with either method.

 #### Install Location

+Note: The information below regarding install location applies to both `uv` and `pip` installation methods described in the 'Installation Methods' section.
+
 The default installation location on Linux is the directory where the script is located. If a previous installation is detected in that location, the setup will proceed there. Otherwise, the installation will fall back to `/opt/kohya_ss`. If `/opt` is not writable, the fallback location will be `$HOME/kohya_ss`. Finally, if none of the previous options are viable, the installation will be performed in the current directory.

 For macOS and other non-Linux systems, the installation process will attempt to detect the previous installation directory based on where the script is run. If a previous installation is not found, the default location will be `$HOME/kohya_ss`. You can override this behavior by specifying a custom installation directory using the `-d` or `--dir` option when running the setup script.
@ -191,128 +257,15 @@ If you choose to use the interactive mode, the default values for the accelerate

 ### Runpod

-#### Manual installation
-
-To install the necessary components for Runpod and run kohya_ss, follow these steps:
-
-1. Select the Runpod pytorch 2.2.0 template. This is important. Other templates may not work.
-
-2. SSH into the Runpod.
-
-3. Clone the repository by running the following command:
-
-   ```shell
-   cd /workspace
-   git clone --recursive https://github.com/bmaltais/kohya_ss.git
-   ```
-
-4. Run the setup script:
-
-   ```shell
-   cd kohya_ss
-   ./setup-runpod.sh
-   ```
-
-5. Run the GUI with:
-
-   ```shell
-   ./gui.sh --share --headless
-   ```
-
-   or with this if you expose 7860 directly via the runpod configuration:
-
-   ```shell
-   ./gui.sh --listen=0.0.0.0 --headless
-   ```
-
-6. Connect to the public URL displayed after the installation process is completed.
-
-#### Pre-built Runpod template
-
-To run from a pre-built Runpod template, you can:
-
-1. Open the Runpod template by clicking on <https://runpod.io/gsc?template=ya6013lj5a&ref=w18gds2n>.
-
-2. Deploy the template on the desired host.
-
-3. Once deployed, connect to the Runpod on HTTP 3010 to access the kohya_ss GUI. You can also connect to auto1111 on HTTP 3000.
+See [Runpod Installation Guide](docs/installation_runpod.md) for details.

 ### Novita

-#### Pre-built Novita template
-
-1. Open the Novita template by clicking on <https://novita.ai/gpus-console?templateId=312>.
-
-2. Deploy the template on the desired host.
-
-3. Once deployed, connect to the Novita on HTTP 7860 to access the kohya_ss GUI.
+See [Novita Installation Guide](docs/installation_novita.md) for details.

 ### Docker

-#### Get your Docker ready for GPU support
-
-##### Windows
-
-Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go.
-
-Here is the official documentation for further reference.  
-<https://docs.nvidia.com/cuda/wsl-user-guide/index.html#nvidia-compute-software-support-on-wsl-2>
-<https://docs.docker.com/desktop/wsl/use-wsl/#gpu-support>
-
-##### Linux, OSX
-
-Install an NVIDIA GPU Driver if you do not already have one installed.  
-<https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html>
-
-Install the NVIDIA Container Toolkit with this guide.  
-<https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
-
-#### Design of our Dockerfile
-
- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`.
- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path.
- TensorBoard has been separated from the project.
-  - TensorBoard is not included in the Docker image.
-  - The "Start TensorBoard" button has been hidden.
-  - TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41).
- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/)
- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version.
-
-#### Use the pre-built Docker image
-
-```bash
-git clone --recursive https://github.com/bmaltais/kohya_ss.git
-cd kohya_ss
-docker compose up -d
-```
-
-To update the system, do `docker compose down && docker compose up -d --pull always`
-
-#### Local docker build
-
-> [!IMPORTANT]  
-> Clone the Git repository ***recursively*** to include submodules:  
-> `git clone --recursive https://github.com/bmaltais/kohya_ss.git`
-
-```bash
-git clone --recursive https://github.com/bmaltais/kohya_ss.git
-cd kohya_ss
-docker compose up -d --build
-```
-
-> [!NOTE]  
-> Building the image may take up to 20 minutes to complete.
-
-To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always`
-
-> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker).
-
-#### ashleykleynhans runpod docker builds
-
-You may want to use the following repositories when running on runpod:
-
- Standalone Kohya_ss template: <https://github.com/ashleykleynhans/kohya-docker>
- Auto1111 + Kohya_ss GUI template: <https://github.com/ashleykleynhans/stable-diffusion-docker>
+See [Docker Installation Guide](docs/installation_docker.md) for details.

 ## Upgrading

@ -320,41 +273,51 @@ To upgrade your installation to a new version, follow the instructions below.

 ### Windows Upgrade

-If a new release becomes available, you can upgrade your repository by running the following commands from the root directory of the project:
+If a new release becomes available, you can upgrade your repository by following these steps:

-1. Pull the latest changes from the repository:
+*   **If you are using the `uv`-based installation (`gui-uv.bat`):**
+    1.  Pull the latest changes from the repository:
+        ```powershell
+        git pull
+        ```
+    2.  Updates to the Python environment are handled automatically when you next run the `gui-uv.bat` script. No separate setup script execution is needed.

-   ```powershell
-   git pull
-   ```
-
-2. Run the setup script (if you do not use the uv script. If you use the uv versions of the scripts the updates are done for you automatically.):
-
-   ```powershell
-   .\setup.bat
-   ```
+*   **If you are using the `pip`-based installation (`gui.bat` or `gui.ps1`):**
+    1.  Pull the latest changes from the repository:
+        ```powershell
+        git pull
+        ```
+    2.  Run the setup script to update dependencies:
+        ```powershell
+        .\setup.bat
+        ```

 ### Linux and macOS Upgrade

 To upgrade your installation on Linux or macOS, follow these steps:

-1. Open a terminal and navigate to the root directory of the project.
+*   **If you are using the `uv`-based installation (`gui-uv.sh`):**
+    1.  Open a terminal and navigate to the root directory of the project.
+    2.  Pull the latest changes from the repository:
+        ```bash
+        git pull
+        ```
+    3.  Updates to the Python environment are handled automatically when you next run the `gui-uv.sh` script. No separate setup script execution is needed.

-2. Pull the latest changes from the repository:
-
-   ```bash
-   git pull
-   ```
-
-3. Refresh and update everything (if you do not use the uv based script. If you use the uv versions of the scripts the updates are done for you automatically.):
-
-   ```bash
-   ./setup.sh
-   ```
+*   **If you are using the `pip`-based installation (`gui.sh`):**
+    1.  Open a terminal and navigate to the root directory of the project.
+    2.  Pull the latest changes from the repository:
+        ```bash
+        git pull
+        ```
+    3.  Refresh and update everything by running the setup script:
+        ```bash
+        ./setup.sh
+        ```

 ## Starting GUI Service

-To launch the GUI service, you can use the provided scripts or run the `kohya_gui.py` script directly. Use the command line arguments listed below to configure the underlying service.
+To launch the GUI service, use the script corresponding to your chosen installation method (`uv` or `pip`), or run the `kohya_gui.py` script directly. Use the command line arguments listed below to configure the underlying service.

 ```text
  --help                show this help message and exit
@ -380,9 +343,9 @@ To launch the GUI service, you can use the provided scripts or run the `kohya_gu
  --noverify            Disable requirements verification
 ```

-### Launching the GUI on Windows (non uv based method)
+### Launching the GUI on Windows (pip method)

-On Windows, you can use either the `gui.ps1` or `gui.bat` script located in the root directory. Choose the script that suits your preference and run it in a terminal, providing the desired command line arguments. Here's an example:
+If you installed using the `pip` method, use either the `gui.ps1` or `gui.bat` script located in the root directory. Choose the script that suits your preference and run it in a terminal, providing the desired command line arguments. Here's an example:

 ```powershell
 gui.ps1 --listen 127.0.0.1 --server_port 7860 --inbrowser --share
@ -394,9 +357,11 @@ or
 gui.bat --listen 127.0.0.1 --server_port 7860 --inbrowser --share
 ```

-### Alternative: UV-based Method
+### Launching the GUI on Windows (uv method)

-To use the UV-based method for running the GUI, which does not require running `setup.bat`, follow these steps:
+If you installed using the `uv` method, use the `gui-uv.bat` script to start the GUI. Follow these steps:
+
+When you run `gui-uv.bat`, it will first check if `uv` is installed on your system. If `uv` is not found, the script will prompt you, asking if you'd like to attempt an automatic installation. You can choose 'Y' to let the script try to install `uv` for you, or 'N' to cancel. If you cancel, you'll need to install `uv` manually from [https://astral.sh/uv](https://astral.sh/uv) before running `gui-uv.bat` again.

 ```cmd
 .\gui-uv.bat
@ -408,19 +373,21 @@ or
 .\gui-uv.bat --listen 127.0.0.1 --server_port 7860 --inbrowser --share
 ```

-This method provides an alternative way to run the GUI with the latest features, including support for Flux.1 and SD3, and eliminates the need for the setup script.
+This script utilizes the `uv` managed environment.

 ### Launching the GUI on Linux and macOS

-To launch the GUI on Linux or macOS, run the `gui.sh` script located in the root directory. Provide the desired command line arguments as follows:
+If you installed using the `pip` method on Linux or macOS, run the `gui.sh` script located in the root directory. Provide the desired command line arguments as follows:

 ```bash
 ./gui.sh --listen 127.0.0.1 --server_port 7860 --inbrowser --share
 ```

-### Alternative: UV-based Method for Linux
+### Launching the GUI on Linux (uv method)

-To use the UV-based method for running the GUI, which does not require running `setup.sh`, follow these steps:
+If you installed using the `uv` method on Linux, use the `gui-uv.sh` script to start the GUI. Follow these steps:
+
+When you run `gui-uv.sh`, it will first check if `uv` is installed on your system. If `uv` is not found, the script will prompt you, asking if you'd like to attempt an automatic installation. You can choose 'Y' (or 'y') to let the script try to install `uv` for you, or 'N' (or 'n') to cancel. If you cancel, you'll need to install `uv` manually from [https://astral.sh/uv](https://astral.sh/uv) before running `gui-uv.sh` again.

 ```shell
 ./gui-uv.sh --listen 127.0.0.1 --server_port 7860 --inbrowser --share
@ -432,7 +399,7 @@ If you are running on a headless server, use:
 ./gui-uv.sh --headless --listen 127.0.0.1 --server_port 7860 --inbrowser --share
 ```

-This method provides an alternative way to run the GUI with the latest features, including support for Flux.1 and SD3, and eliminates the need for the setup script.
+This script utilizes the `uv` managed environment.

 ## Custom Path Defaults

@ -454,6 +421,10 @@ To train a LoRA, you can currently use the `train_network.py` code. You can crea

 Once you have created the LoRA network, you can generate images using auto1111 by installing [this extension](https://github.com/kohya-ss/sd-webui-additional-networks).

+For more detailed information on LoRA training options and advanced configurations, please refer to our LoRA documentation:
+- [LoRA Training Guide](docs/LoRA/top_level.md)
+- [LoRA Training Options](docs/LoRA/options.md)
+
 ## Sample image generation during training

 A prompt file might look like this, for example:
@ -491,24 +462,18 @@ If you encounter an error indicating that the module `tkinter` is not found, try

 ### LORA Training on TESLA V100 - GPU Utilization Issue

-#### Issue Summary
-
-When training LORA on a TESLA V100, users reported low GPU utilization. Additionally, there was difficulty in specifying GPUs other than the default for training.
-
-#### Potential Solutions
-
- **GPU Selection:** Users can specify GPU IDs in the setup configuration to select the desired GPUs for training.
- **Improving GPU Load:** Utilizing `adamW8bit` optimizer and increasing the batch size can help achieve 70-80% GPU utilization without exceeding GPU memory limits.
+See [Troubleshooting LORA Training on TESLA V100](docs/troubleshooting_tesla_v100.md) for details.

 ## SDXL training

-The documentation in this section will be moved to a separate document later.
+For detailed guidance on SDXL training, please refer to the [official sd-scripts documentation](https://github.com/kohya-ss/sd-scripts/blob/main/README.md#sdxl-training) and relevant sections in our [LoRA Training Guide](docs/LoRA/top_level.md).

 ## Masked loss

 The masked loss is supported in each training script. To enable the masked loss, specify the `--masked_loss` option.

-The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue.
+> [!WARNING]
+> The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue.

 ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. The pixel values 0-255 are converted to 0-1 (i.e., the pixel value 128 is treated as the half weight of the loss). See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset).

@ -541,6 +506,17 @@ For more details, visit the [GitHub issue](https://github.com/bmaltais/kohya_ss/

 To finetune HunyuanDiT models or create LoRAs, visit this [fork](https://github.com/Tencent/HunyuanDiT/tree/main/kohya_ss-hydit)

+## Contributing
+
+Contributions are welcome! If you'd like to contribute to this project, please consider the following:
+- For bug reports or feature requests, please open an issue on the [GitHub Issues page](https://github.com/bmaltais/kohya_ss/issues).
+- If you'd like to submit code changes, please open a pull request. Ensure your changes are well-tested and follow the existing code style.
+- For security-related concerns, please refer to our `SECURITY.md` file.
+
+## License
+
+This project is licensed under the Apache License 2.0. See the [LICENSE.md](LICENSE.md) file for details.
+
 ## Change History

 ### v25.0.3
@ -558,12 +534,9 @@ To finetune HunyuanDiT models or create LoRAs, visit this [fork](https://github.

 ### v25.0.0

-This is a SIGNIFICANT upgrade. I am groing in uncharted territory here because kohya has not merged any of the recent flux.1 and sd3 updated to his code in his main branch yet... but I feel updates in his code has pretty much dried down and I think his code is probably ready for prime time. So instead of keeping my GUI in the cave man ages, I am opting to move the code for the GUI with support for flux.1 and sd3 to the main branch of my project. Perhaps this will bite me in the proverbias ass... but for those who would rather stay on the older pre "flux.1 and sd3" updates, you can always do:
-
-```shell
-git checkout v24.1.7
-```
-
-after cloning the repo.
-
-For all the info regarding the new flux.1 and sd3 parameters, see <https://github.com/kohya-ss/sd-scripts/blob/sd3/README.md> for more details.
+- Major update: Introduced support for flux.1 and sd3, moving the GUI to align with more recent script functionalities.
+- Users preferring the pre-flux.1/sd3 version can check out tag `v24.1.7`.
+  ```shell
+  git checkout v24.1.7
+  ```
+- For details on new flux.1 and sd3 parameters, refer to the [sd-scripts README](https://github.com/kohya-ss/sd-scripts/blob/sd3/README.md).
--- a/deprecated/dreambooth_gui.py
+++ b/deprecated/dreambooth_gui.py
@ -1,110 +0,0 @@
-import argparse
-import gradio as gr
-import os
-
-from kohya_gui.dreambooth_gui import dreambooth_tab
-from kohya_gui.utilities import utilities_tab
-
-from kohya_gui.custom_logging import setup_logging
-from kohya_gui.localization_ext import add_javascript
-
-
-# Set up logging
-log = setup_logging()
-
-
-def UI(**kwargs):
-    add_javascript(kwargs.get("language"))
-    css = ""
-
-    headless = kwargs.get("headless", False)
-    log.info(f"headless: {headless}")
-
-    if os.path.exists("./assets/style.css"):
-        with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
-            log.info("Load CSS...")
-            css += file.read() + "\n"
-
-    interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
-
-    with interface:
-        with gr.Tab("Dreambooth"):
-            (
-                train_data_dir_input,
-                reg_data_dir_input,
-                output_dir_input,
-                logging_dir_input,
-            ) = dreambooth_tab(headless=headless)
-        with gr.Tab("Utilities"):
-            utilities_tab(
-                train_data_dir_input=train_data_dir_input,
-                reg_data_dir_input=reg_data_dir_input,
-                output_dir_input=output_dir_input,
-                logging_dir_input=logging_dir_input,
-                enable_copy_info_button=True,
-                headless=headless,
-            )
-
-    # Show the interface
-    launch_kwargs = {}
-    username = kwargs.get("username")
-    password = kwargs.get("password")
-    server_port = kwargs.get("server_port", 0)
-    inbrowser = kwargs.get("inbrowser", False)
-    share = kwargs.get("share", False)
-    server_name = kwargs.get("listen")
-
-    launch_kwargs["server_name"] = server_name
-    if username and password:
-        launch_kwargs["auth"] = (username, password)
-    if server_port > 0:
-        launch_kwargs["server_port"] = server_port
-    if inbrowser:
-        launch_kwargs["inbrowser"] = inbrowser
-    if share:
-        launch_kwargs["share"] = share
-    interface.launch(**launch_kwargs)
-
-
-if __name__ == "__main__":
-    # torch.cuda.set_per_process_memory_fraction(0.48)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--listen",
-        type=str,
-        default="127.0.0.1",
-        help="IP to listen on for connections to Gradio",
-    )
-    parser.add_argument(
-        "--username", type=str, default="", help="Username for authentication"
-    )
-    parser.add_argument(
-        "--password", type=str, default="", help="Password for authentication"
-    )
-    parser.add_argument(
-        "--server_port",
-        type=int,
-        default=0,
-        help="Port to run the server listener on",
-    )
-    parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
-    parser.add_argument("--share", action="store_true", help="Share the gradio UI")
-    parser.add_argument(
-        "--headless", action="store_true", help="Is the server headless"
-    )
-    parser.add_argument(
-        "--language", type=str, default=None, help="Set custom language"
-    )
-
-    args = parser.parse_args()
-
-    UI(
-        username=args.username,
-        password=args.password,
-        inbrowser=args.inbrowser,
-        server_port=args.server_port,
-        share=args.share,
-        listen=args.listen,
-        headless=args.headless,
-        language=args.language,
-    )
--- a/deprecated/finetune_gui.py
+++ b/deprecated/finetune_gui.py
@ -1,97 +0,0 @@
-import argparse
-import gradio as gr
-import os
-
-from kohya_gui.utilities import utilities_tab
-from kohya_gui.finetune_gui import finetune_tab
-
-from kohya_gui.custom_logging import setup_logging
-from kohya_gui.localization_ext import add_javascript
-
-# Set up logging
-log = setup_logging()
-
-
-def UI(**kwargs):
-    add_javascript(kwargs.get("language"))
-    css = ""
-
-    headless = kwargs.get("headless", False)
-    log.info(f"headless: {headless}")
-
-    if os.path.exists("./assets/style.css"):
-        with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
-            log.info("Load CSS...")
-            css += file.read() + "\n"
-
-    interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
-
-    with interface:
-        with gr.Tab("Finetune"):
-            finetune_tab(headless=headless)
-        with gr.Tab("Utilities"):
-            utilities_tab(enable_dreambooth_tab=False, headless=headless)
-
-    # Show the interface
-    launch_kwargs = {}
-    username = kwargs.get("username")
-    password = kwargs.get("password")
-    server_port = kwargs.get("server_port", 0)
-    inbrowser = kwargs.get("inbrowser", False)
-    share = kwargs.get("share", False)
-    server_name = kwargs.get("listen")
-
-    launch_kwargs["server_name"] = server_name
-    if username and password:
-        launch_kwargs["auth"] = (username, password)
-    if server_port > 0:
-        launch_kwargs["server_port"] = server_port
-    if inbrowser:
-        launch_kwargs["inbrowser"] = inbrowser
-    if share:
-        launch_kwargs["share"] = share
-    interface.launch(**launch_kwargs)
-
-
-if __name__ == "__main__":
-    # torch.cuda.set_per_process_memory_fraction(0.48)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--listen",
-        type=str,
-        default="127.0.0.1",
-        help="IP to listen on for connections to Gradio",
-    )
-    parser.add_argument(
-        "--username", type=str, default="", help="Username for authentication"
-    )
-    parser.add_argument(
-        "--password", type=str, default="", help="Password for authentication"
-    )
-    parser.add_argument(
-        "--server_port",
-        type=int,
-        default=0,
-        help="Port to run the server listener on",
-    )
-    parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
-    parser.add_argument("--share", action="store_true", help="Share the gradio UI")
-    parser.add_argument(
-        "--headless", action="store_true", help="Is the server headless"
-    )
-    parser.add_argument(
-        "--language", type=str, default=None, help="Set custom language"
-    )
-
-    args = parser.parse_args()
-
-    UI(
-        username=args.username,
-        password=args.password,
-        inbrowser=args.inbrowser,
-        server_port=args.server_port,
-        share=args.share,
-        listen=args.listen,
-        headless=args.headless,
-        language=args.language,
-    )
--- a/deprecated/lora_gui.py
+++ b/deprecated/lora_gui.py
@ -1,118 +0,0 @@
-import argparse
-import gradio as gr
-import os
-
-from kohya_gui.utilities import utilities_tab
-from kohya_gui.lora_gui import lora_tab
-
-from kohya_gui.custom_logging import setup_logging
-from kohya_gui.localization_ext import add_javascript
-
-# Set up logging
-log = setup_logging()
-
-
-def UI(**kwargs):
-    try:
-        # Your main code goes here
-        while True:
-            add_javascript(kwargs.get("language"))
-            css = ""
-
-            headless = kwargs.get("headless", False)
-            log.info(f"headless: {headless}")
-
-            if os.path.exists("./assets/style.css"):
-                with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
-                    log.info("Load CSS...")
-                    css += file.read() + "\n"
-
-            interface = gr.Blocks(
-                css=css, title="Kohya_ss GUI", theme=gr.themes.Default()
-            )
-
-            with interface:
-                with gr.Tab("LoRA"):
-                    (
-                        train_data_dir_input,
-                        reg_data_dir_input,
-                        output_dir_input,
-                        logging_dir_input,
-                    ) = lora_tab(headless=headless)
-                with gr.Tab("Utilities"):
-                    utilities_tab(
-                        train_data_dir_input=train_data_dir_input,
-                        reg_data_dir_input=reg_data_dir_input,
-                        output_dir_input=output_dir_input,
-                        logging_dir_input=logging_dir_input,
-                        enable_copy_info_button=True,
-                        headless=headless,
-                    )
-
-            # Show the interface
-            launch_kwargs = {}
-            username = kwargs.get("username")
-            password = kwargs.get("password")
-            server_port = kwargs.get("server_port", 0)
-            inbrowser = kwargs.get("inbrowser", False)
-            share = kwargs.get("share", False)
-            server_name = kwargs.get("listen")
-
-            launch_kwargs["server_name"] = server_name
-            if username and password:
-                launch_kwargs["auth"] = (username, password)
-            if server_port > 0:
-                launch_kwargs["server_port"] = server_port
-            if inbrowser:
-                launch_kwargs["inbrowser"] = inbrowser
-            if share:
-                launch_kwargs["share"] = share
-            log.info(launch_kwargs)
-            interface.launch(**launch_kwargs)
-    except KeyboardInterrupt:
-        # Code to execute when Ctrl+C is pressed
-        print("You pressed Ctrl+C!")
-
-
-if __name__ == "__main__":
-    # torch.cuda.set_per_process_memory_fraction(0.48)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--listen",
-        type=str,
-        default="127.0.0.1",
-        help="IP to listen on for connections to Gradio",
-    )
-    parser.add_argument(
-        "--username", type=str, default="", help="Username for authentication"
-    )
-    parser.add_argument(
-        "--password", type=str, default="", help="Password for authentication"
-    )
-    parser.add_argument(
-        "--server_port",
-        type=int,
-        default=0,
-        help="Port to run the server listener on",
-    )
-    parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
-    parser.add_argument("--share", action="store_true", help="Share the gradio UI")
-    parser.add_argument(
-        "--headless", action="store_true", help="Is the server headless"
-    )
-    parser.add_argument(
-        "--language", type=str, default=None, help="Set custom language"
-    )
-
-    args = parser.parse_args()
-
-    UI(
-        username=args.username,
-        password=args.password,
-        inbrowser=args.inbrowser,
-        server_port=args.server_port,
-        share=args.share,
-        listen=args.listen,
-        headless=args.headless,
-        language=args.language,
-    )
--- a/deprecated/textual_inversion_gui.py
+++ b/deprecated/textual_inversion_gui.py
@ -1,110 +0,0 @@
-import argparse
-import gradio as gr
-import os
-
-from kohya_gui.textual_inversion_gui import ti_tab
-from kohya_gui.utilities import utilities_tab
-
-from kohya_gui.custom_logging import setup_logging
-from kohya_gui.localization_ext import add_javascript
-
-
-# Set up logging
-log = setup_logging()
-
-
-def UI(**kwargs):
-    add_javascript(kwargs.get("language"))
-    css = ""
-
-    headless = kwargs.get("headless", False)
-    log.info(f"headless: {headless}")
-
-    if os.path.exists("./assets/style.css"):
-        with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
-            log.info("Load CSS...")
-            css += file.read() + "\n"
-
-    interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
-
-    with interface:
-        with gr.Tab("Dreambooth TI"):
-            (
-                train_data_dir_input,
-                reg_data_dir_input,
-                output_dir_input,
-                logging_dir_input,
-            ) = ti_tab(headless=headless)
-        with gr.Tab("Utilities"):
-            utilities_tab(
-                train_data_dir_input=train_data_dir_input,
-                reg_data_dir_input=reg_data_dir_input,
-                output_dir_input=output_dir_input,
-                logging_dir_input=logging_dir_input,
-                enable_copy_info_button=True,
-                headless=headless,
-            )
-
-    # Show the interface
-    launch_kwargs = {}
-    username = kwargs.get("username")
-    password = kwargs.get("password")
-    server_port = kwargs.get("server_port", 0)
-    inbrowser = kwargs.get("inbrowser", False)
-    share = kwargs.get("share", False)
-    server_name = kwargs.get("listen")
-
-    launch_kwargs["server_name"] = server_name
-    if username and password:
-        launch_kwargs["auth"] = (username, password)
-    if server_port > 0:
-        launch_kwargs["server_port"] = server_port
-    if inbrowser:
-        launch_kwargs["inbrowser"] = inbrowser
-    if share:
-        launch_kwargs["share"] = share
-    interface.launch(**launch_kwargs)
-
-
-if __name__ == "__main__":
-    # torch.cuda.set_per_process_memory_fraction(0.48)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--listen",
-        type=str,
-        default="127.0.0.1",
-        help="IP to listen on for connections to Gradio",
-    )
-    parser.add_argument(
-        "--username", type=str, default="", help="Username for authentication"
-    )
-    parser.add_argument(
-        "--password", type=str, default="", help="Password for authentication"
-    )
-    parser.add_argument(
-        "--server_port",
-        type=int,
-        default=0,
-        help="Port to run the server listener on",
-    )
-    parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
-    parser.add_argument("--share", action="store_true", help="Share the gradio UI")
-    parser.add_argument(
-        "--headless", action="store_true", help="Is the server headless"
-    )
-    parser.add_argument(
-        "--language", type=str, default=None, help="Set custom language"
-    )
-
-    args = parser.parse_args()
-
-    UI(
-        username=args.username,
-        password=args.password,
-        inbrowser=args.inbrowser,
-        server_port=args.server_port,
-        share=args.share,
-        listen=args.listen,
-        headless=args.headless,
-        language=args.language,
-    )
--- a/deprecated/utilities_gui.py
+++ b/deprecated/utilities_gui.py
@ -1,69 +0,0 @@
-import argparse
-import gradio as gr
-import os
-
-from kohya_gui.utilities import utilities_tab
-
-from kohya_gui.custom_logging import setup_logging
-from kohya_gui.localization_ext import add_javascript
-
-
-# Set up logging
-log = setup_logging()
-
-
-def UI(**kwargs):
-    css = ''
-
-    if os.path.exists('./assets/style.css'):
-        with open(os.path.join('./assets/style.css'), 'r', encoding='utf8') as file:
-            print('Load CSS...')
-            css += file.read() + '\n'
-
-    interface = gr.Blocks(css=css)
-
-    with interface:
-        utilities_tab()
-
-    # Show the interface
-    launch_kwargs = {}
-    if not kwargs.get('username', None) == '':
-        launch_kwargs['auth'] = (
-            kwargs.get('username', None),
-            kwargs.get('password', None),
-        )
-    if kwargs.get('server_port', 0) > 0:
-        launch_kwargs['server_port'] = kwargs.get('server_port', 0)
-    if kwargs.get('inbrowser', False):
-        launch_kwargs['inbrowser'] = kwargs.get('inbrowser', False)
-    print(launch_kwargs)
-    interface.launch(**launch_kwargs)
-
-
-if __name__ == '__main__':
-    # torch.cuda.set_per_process_memory_fraction(0.48)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--username', type=str, default='', help='Username for authentication'
-    )
-    parser.add_argument(
-        '--password', type=str, default='', help='Password for authentication'
-    )
-    parser.add_argument(
-        '--server_port',
-        type=int,
-        default=0,
-        help='Port to run the server listener on',
-    )
-    parser.add_argument(
-        '--inbrowser', action='store_true', help='Open in browser'
-    )
-
-    args = parser.parse_args()
-
-    UI(
-        username=args.username,
-        password=args.password,
-        inbrowser=args.inbrowser,
-        server_port=args.server_port,
-    )
--- a/docs/installation_docker.md
+++ b/docs/installation_docker.md
@ -0,0 +1,67 @@
+### Docker
+
+#### Get your Docker ready for GPU support
+
+##### Windows
+
+Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go.
+
+Here is the official documentation for further reference.  
+<https://docs.nvidia.com/cuda/wsl-user-guide/index.html#nvidia-compute-software-support-on-wsl-2>
+<https://docs.docker.com/desktop/wsl/use-wsl/#gpu-support>
+
+##### Linux, OSX
+
+Install an NVIDIA GPU Driver if you do not already have one installed.  
+<https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html>
+
+Install the NVIDIA Container Toolkit with this guide.  
+<https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
+
+#### Design of our Dockerfile
+
+- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`.
+- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path.
+- TensorBoard has been separated from the project.
+  - TensorBoard is not included in the Docker image.
+  - The "Start TensorBoard" button has been hidden.
+  - TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41).
+- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/)
+- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version.
+
+#### Use the pre-built Docker image
+
+```bash
+git clone --recursive https://github.com/bmaltais/kohya_ss.git
+cd kohya_ss
+docker compose up -d
+```
+
+To update the system, do `docker compose down && docker compose up -d --pull always`
+
+#### Local docker build
+
+> [!IMPORTANT]  
+> Clone the Git repository ***recursively*** to include submodules:  
+> `git clone --recursive https://github.com/bmaltais/kohya_ss.git`
+
+```bash
+git clone --recursive https://github.com/bmaltais/kohya_ss.git
+cd kohya_ss
+docker compose up -d --build
+```
+
+> [!NOTE]  
+> Building the image may take up to 20 minutes to complete.
+
+To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always`
+
+> [!NOTE]
+> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker).
+
+#### ashleykleynhans runpod docker builds
+
+You may want to use the following repositories when running on runpod:
+
+- Standalone Kohya_ss template: <https://github.com/ashleykleynhans/kohya-docker>
+- Auto1111 + Kohya_ss GUI template: <https://github.com/ashleykleynhans/stable-diffusion-docker>
--- a/docs/installation_novita.md
+++ b/docs/installation_novita.md
@ -0,0 +1,9 @@
+### Novita
+
+#### Pre-built Novita template
+
+1. Open the Novita template by clicking on <https://novita.ai/gpus-console?templateId=312>.
+
+2. Deploy the template on the desired host.
+
+3. Once deployed, connect to the Novita on HTTP 7860 to access the kohya_ss GUI.
--- a/docs/installation_runpod.md
+++ b/docs/installation_runpod.md
@ -0,0 +1,47 @@
+### Runpod
+
+#### Manual installation
+
+To install the necessary components for Runpod and run kohya_ss, follow these steps:
+
+1. Select the Runpod pytorch 2.2.0 template. This is important. Other templates may not work.
+
+2. SSH into the Runpod.
+
+3. Clone the repository by running the following command:
+
+   ```shell
+   cd /workspace
+   git clone --recursive https://github.com/bmaltais/kohya_ss.git
+   ```
+
+4. Run the setup script:
+
+   ```shell
+   cd kohya_ss
+   ./setup-runpod.sh
+   ```
+
+5. Run the GUI with:
+
+   ```shell
+   ./gui.sh --share --headless
+   ```
+
+   or with this if you expose 7860 directly via the runpod configuration:
+
+   ```shell
+   ./gui.sh --listen=0.0.0.0 --headless
+   ```
+
+6. Connect to the public URL displayed after the installation process is completed.
+
+#### Pre-built Runpod template
+
+To run from a pre-built Runpod template, you can:
+
+1. Open the Runpod template by clicking on <https://runpod.io/gsc?template=ya6013lj5a&ref=w18gds2n>.
+
+2. Deploy the template on the desired host.
+
+3. Once deployed, connect to the Runpod on HTTP 3010 to access the kohya_ss GUI. You can also connect to auto1111 on HTTP 3000.
--- a/docs/troubleshooting_tesla_v100.md
+++ b/docs/troubleshooting_tesla_v100.md
@ -0,0 +1,10 @@
+### LORA Training on TESLA V100 - GPU Utilization Issue
+
+#### Issue Summary
+
+When training LORA on a TESLA V100, users reported low GPU utilization. Additionally, there was difficulty in specifying GPUs other than the default for training.
+
+#### Potential Solutions
+
+- **GPU Selection:** Users can specify GPU IDs in the setup configuration to select the desired GPUs for training.
+- **Improving GPU Load:** Utilizing `adamW8bit` optimizer and increasing the batch size can help achieve 70-80% GPU utilization without exceeding GPU memory limits.
--- a/gui-uv.bat
+++ b/gui-uv.bat
@ -2,8 +2,19 @@
 set VIRTUAL_ENV=.venv
 echo VIRTUAL_ENV is set to %VIRTUAL_ENV%

-:: Install uv latest version
-pip install --upgrade uv -q
+:: Check if uv is installed
+setlocal enabledelayedexpansion
+where uv >nul 2>nul
+if %errorlevel% neq 0 (
+    set /p INSTALL_UV="uv is not installed. We can try to install it for you, or you can install it manually from https://astral.sh/uv before running this script again. Would you like to attempt automatic installation now? (Y/N) "
+    if /i "!INSTALL_UV!"=="Y" (
+        pip install --upgrade uv -q
+    ) else (
+        echo Okay, please install uv manually from https://astral.sh/uv and then re-run this script. Exiting.
+        exit /b 1
+    )
+)
+endlocal

 set PATH=%PATH%;%~dp0venv\Lib\site-packages\torch\lib

--- a/gui-uv.sh
+++ b/gui-uv.sh
@ -39,9 +39,14 @@ cd "$SCRIPT_DIR" || exit 1

 # Check if uv is already installed
 if ! command -v uv &> /dev/null; then
-    # Setup uv
+  read -p "uv is not installed. We can try to install it for you, or you can install it manually from https://astral.sh/uv before running this script again. Would you like to attempt automatic installation now? [Y/n]: " install_uv
+  if [[ "$install_uv" =~ ^[Yy]$ ]]; then
    curl -LsSf https://astral.sh/uv/install.sh | sh
    source $HOME/.local/bin/env
+  else
+    echo "Okay, please install uv manually from https://astral.sh/uv and then re-run this script. Exiting."
+    exit 1
+  fi
 fi

 git submodule update --init --recursive
--- a/kohya_gui/blip2_caption_gui.py
+++ b/kohya_gui/blip2_caption_gui.py
@ -13,10 +13,17 @@ log = setup_logging()

 def load_model():
    # Set the device to GPU if available, otherwise use CPU
-    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if hasattr(torch, 'cuda') and torch.cuda.is_available():
+        device = 'cuda'
+    elif hasattr(torch, 'mps') and torch.mps.is_available():
+        device = 'mps'
+    else:
+        device = 'cpu'
+

    # Initialize the BLIP2 processor
    processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+    log.debug('Processor initialized: %s', processor)

    # Initialize the BLIP2 model
    model = Blip2ForConditionalGeneration.from_pretrained(
--- a/kohya_gui/class_basic_training.py
+++ b/kohya_gui/class_basic_training.py
@ -213,6 +213,7 @@ class BasicTraining:
                    "PagedLion8bit",
                    "Prodigy",
                    "prodigyplus.ProdigyPlusScheduleFree",
+                    "pytorch_optimizer.CAME",
                    "RAdamScheduleFree",
                    "SGDNesterov",
                    "SGDNesterov8bit",
@ -229,13 +230,7 @@ class BasicTraining:
        """
        with gr.Row():
            # Initialize the maximum gradient norm slider
-            self.max_grad_norm = gr.Slider(
-                label="Max grad norm",
-                value=self.config.get("basic.max_grad_norm", 1.0),
-                minimum=0.0,
-                maximum=1.0,
-                interactive=True,
-            )
+            self.max_grad_norm = gr.Number(label='Max grad norm', value=1.0, interactive=True)
            # Initialize the learning rate scheduler extra arguments textbox
            self.lr_scheduler_args = gr.Textbox(
                label="LR scheduler extra arguments",
--- a/kohya_gui/class_source_model.py
+++ b/kohya_gui/class_source_model.py
@ -278,11 +278,11 @@ class SourceModel:

                            def toggle_checkboxes(v2, v_parameterization, sdxl_checkbox, sd3_checkbox, flux1_checkbox):
                                # Check if all checkboxes are unchecked
-                                if not v2 and not v_parameterization and not sdxl_checkbox and not sd3_checkbox and not flux1_checkbox:
+                                if not v2 and not sdxl_checkbox and not sd3_checkbox and not flux1_checkbox:
                                    # If all unchecked, return new interactive checkboxes
                                    return (
                                        gr.Checkbox(interactive=True),  # v2 checkbox
-                                        gr.Checkbox(interactive=True),  # v_parameterization checkbox
+                                        gr.Checkbox(interactive=False, value=False),  # v_parameterization checkbox
                                        gr.Checkbox(interactive=True),  # sdxl_checkbox
                                        gr.Checkbox(interactive=True),  # sd3_checkbox
                                        gr.Checkbox(interactive=True),  # sd3_checkbox
@ -291,7 +291,7 @@ class SourceModel:
                                    # If any checkbox is checked, return checkboxes with current interactive state
                                    return (
                                        gr.Checkbox(interactive=v2),  # v2 checkbox
-                                        gr.Checkbox(interactive=v_parameterization),  # v_parameterization checkbox
+                                        gr.Checkbox(interactive=sdxl_checkbox),  # v_parameterization checkbox
                                        gr.Checkbox(interactive=sdxl_checkbox),  # sdxl_checkbox
                                        gr.Checkbox(interactive=sd3_checkbox),  # sd3_checkbox
                                        gr.Checkbox(interactive=flux1_checkbox),  # flux1_checkbox
--- a/kohya_gui/dreambooth_gui.py
+++ b/kohya_gui/dreambooth_gui.py
@ -132,6 +132,7 @@ def save_configuration(
    keep_tokens,
    lr_scheduler_num_cycles,
    lr_scheduler_power,
+    max_grad_norm,
    persistent_data_loader_workers,
    bucket_no_upscale,
    random_crop,
@ -342,6 +343,7 @@ def open_configuration(
    keep_tokens,
    lr_scheduler_num_cycles,
    lr_scheduler_power,
+    max_grad_norm,
    persistent_data_loader_workers,
    bucket_no_upscale,
    random_crop,
@ -547,6 +549,7 @@ def train_model(
    keep_tokens,
    lr_scheduler_num_cycles,
    lr_scheduler_power,
+    max_grad_norm,
    persistent_data_loader_workers,
    bucket_no_upscale,
    random_crop,
@ -864,12 +867,12 @@ def train_model(
        sd3_checkbox and sd3_cache_text_encoder_outputs_to_disk
    ) or (flux1_checkbox and flux1_cache_text_encoder_outputs_to_disk)
    no_half_vae = sdxl and sdxl_no_half_vae
-    if max_data_loader_n_workers == "" or None:
+    if max_data_loader_n_workers in ("", None):
        max_data_loader_n_workers = 0
    else:
        max_data_loader_n_workers = int(max_data_loader_n_workers)

-    if max_train_steps == "" or None:
+    if max_train_steps in ("", None):
        max_train_steps = 0
    else:
        max_train_steps = int(max_train_steps)
@ -890,7 +893,7 @@ def train_model(
    # def save_huggingface_to_toml(self, toml_file_path: str):
    config_toml_data = {
        # Update the values in the TOML data
-        "adaptive_noise_scale": adaptive_noise_scale if not 0 else None,
+        "adaptive_noise_scale": adaptive_noise_scale if adaptive_noise_scale != 0 else None,
        "async_upload": async_upload,
        "bucket_no_upscale": bucket_no_upscale,
        "bucket_reso_steps": bucket_reso_steps,
@ -953,6 +956,7 @@ def train_model(
        "lr_warmup_steps": lr_warmup_steps,
        "masked_loss": masked_loss,
        "max_bucket_reso": max_bucket_reso,
+        "max_grad_norm": max_grad_norm,
        "max_timestep": max_timestep if max_timestep != 0 else None,
        "max_token_length": int(max_token_length),
        "max_train_epochs": (
@ -970,10 +974,10 @@ def train_model(
        "min_timestep": min_timestep if min_timestep != 0 else None,
        "mixed_precision": mixed_precision,
        "multires_noise_discount": multires_noise_discount,
-        "multires_noise_iterations": multires_noise_iterations if not 0 else None,
+        "multires_noise_iterations": multires_noise_iterations if multires_noise_iterations != 0 else None,
        "no_half_vae": no_half_vae,
        "no_token_padding": no_token_padding,
-        "noise_offset": noise_offset if not 0 else None,
+        "noise_offset": noise_offset if noise_offset != 0 else None,
        "noise_offset_random_strength": noise_offset_random_strength,
        "noise_offset_type": noise_offset_type,
        "optimizer_args": (
@ -1329,6 +1333,7 @@ def dreambooth_tab(
            advanced_training.keep_tokens,
            basic_training.lr_scheduler_num_cycles,
            basic_training.lr_scheduler_power,
+            basic_training.max_grad_norm,
            advanced_training.persistent_data_loader_workers,
            advanced_training.bucket_no_upscale,
            advanced_training.random_crop,
--- a/kohya_gui/finetune_gui.py
+++ b/kohya_gui/finetune_gui.py
@ -933,12 +933,12 @@ def train_model(
    ) or (flux1_checkbox and flux1_cache_text_encoder_outputs_to_disk)
    no_half_vae = sdxl_checkbox and sdxl_no_half_vae

-    if max_data_loader_n_workers == "" or None:
+    if max_data_loader_n_workers in ("", None):
        max_data_loader_n_workers = 0
    else:
        max_data_loader_n_workers = int(max_data_loader_n_workers)

-    if max_train_steps == "" or None:
+    if max_train_steps in ("", None):
        max_train_steps = 0
    else:
        max_train_steps = int(max_train_steps)
--- a/kohya_gui/lora_gui.py
+++ b/kohya_gui/lora_gui.py
--- a/kohya_gui/textual_inversion_gui.py
+++ b/kohya_gui/textual_inversion_gui.py
@ -739,12 +739,12 @@ def train_model(
    else:
        run_cmd.append(rf"{scriptdir}/sd-scripts/train_textual_inversion.py")

-    if max_data_loader_n_workers == "" or None:
+    if max_data_loader_n_workers in ("", None):
        max_data_loader_n_workers = 0
    else:
        max_data_loader_n_workers = int(max_data_loader_n_workers)

-    if max_train_steps == "" or None:
+    if max_train_steps in ("", None):
        max_train_steps = 0
    else:
        max_train_steps = int(max_train_steps)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,15 +1,16 @@
 [project]
 name = "kohya-ss"
-version = "0.1.0"
-description = "Add your description here"
+version = "25.1.0"
+description = "Kohya_ss GUI"
 readme = "README.md"
-requires-python = ">=3.10,<3.11"
+requires-python = ">=3.11,<3.13"
 dependencies = [
    "accelerate==0.33.0",
    "aiofiles==23.2.1",
    "altair==4.2.2",
    "bitsandbytes==0.44.0",
    "dadaptation==3.2",
+    "deepspeed; sys_platform == 'linux'",
    "diffusers[torch]==0.32.2",
    "easygui==0.98.3",
    "einops==0.7.0",
@ -21,7 +22,7 @@ dependencies = [
    "invisible-watermark==0.2.0",
    "library",
    "lion-pytorch==0.0.6",
-    "lycoris-lora==3.1.0",
+    "lycoris-lora==3.2.0.post2",
    "omegaconf==2.3.0",
    "onnx==1.16.1",
    "onnxruntime-gpu==1.19.2",
@ -39,20 +40,37 @@ dependencies = [
    "sentencepiece==0.2.0",
    "tensorboard>=2.18.0",
    "tensorflow>=2.16.1",
-    "tensorflow-io-gcs-filesystem==0.31.0",
-    "timm==0.6.12",
+    "tensorflow-io-gcs-filesystem==0.31.0; sys_platform == 'win32'",
+    "tensorflow-io-gcs-filesystem>=0.37.1; sys_platform == 'linux'",
+    "timm==0.6.7",
    "tk==0.1.0",
    "toml==0.10.2",
    "torch>=2.5.0",
    "torchvision>=0.20.0",
    "transformers==4.44.2",
+    "triton==3.1.0; sys_platform == 'linux'",
    "voluptuous==0.13.1",
    "wandb==0.18.0",
    "xformers>=0.0.28.post2",
 ]

 [tool.uv.sources]
+torch = [
+  { index = "pytorch-cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu124", marker = "sys_platform == 'win32'" }
+]
+torchvision = [
+  { index = "pytorch-cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu124", marker = "sys_platform == 'win32'" }
+]
 library = { path = "sd-scripts" }

 [[tool.uv.index]]
+name = "pytorch-cu124"
 url = "https://download.pytorch.org/whl/cu124"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu126"
+url = "https://download.pytorch.org/whl/cu126"
+explicit = true
--- a/requirements.txt
+++ b/requirements.txt
@ -2,6 +2,7 @@ accelerate==0.33.0
 aiofiles==23.2.1
 altair==4.2.2
 dadaptation==3.2
+deepspeed==0.16.7
 diffusers[torch]==0.32.2
 easygui==0.98.3
 einops==0.7.0
@ -12,7 +13,7 @@ huggingface-hub==0.29.3
 imagesize==1.4.1
 invisible-watermark==0.2.0
 lion-pytorch==0.0.6
-lycoris_lora==3.1.0
+lycoris_lora==3.2.0.post2
 omegaconf==2.3.0
 onnx==1.16.1
 prodigyopt==1.1.2
@ -28,7 +29,7 @@ schedulefree==1.4
 scipy==1.11.4
 # for T5XXL tokenizer (SD3/FLUX)
 sentencepiece==0.2.0
-timm==0.6.12
+timm==0.6.7
 tk==0.1.0
 toml==0.10.2
 transformers==4.44.2
--- a/requirements_linux.txt
+++ b/requirements_linux.txt
@ -1,8 +1,8 @@
 # Custom index URL for specific packages
--extra-index-url https://download.pytorch.org/whl/cu124
+--extra-index-url https://download.pytorch.org/whl/cu126

-torch==2.5.0+cu124
-torchvision==0.20.0+cu124
+torch==2.5.0+cu126
+torchvision>=0.20.0+cu126
 xformers==0.0.28.post2

 bitsandbytes==0.44.0
--- a/requirements_linux_docker.txt
+++ b/requirements_linux_docker.txt
@ -1,4 +0,0 @@
-xformers>=0.0.20
-bitsandbytes==0.44.0
-accelerate==0.33.0
-tensorboard
--- a/requirements_macos_arm64.txt
+++ b/requirements_macos_arm64.txt
@ -1,5 +1,10 @@
-torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
-xformers bitsandbytes==0.43.3
-tensorflow-macos tensorflow-metal tensorboard==2.14.1
+--extra-index-url https://download.pytorch.org/whl/nightly/cpu
+torch==2.8.0.*
+torchvision==0.22.*
+xformers==0.0.29.* 
+git+https://github.com/bitsandbytes-foundation/bitsandbytes.git/#0.45.5
+tensorflow-macos 
+tensorflow-metal
+tensorboard==2.14.1
 onnxruntime==1.17.1
 -r requirements.txt
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit b11c053b8fcd1c4532dc3a37e70109e08aafa2ec
+Subproject commit 5753b8ff6bc045c27c1c61535e35195da860269c
--- a/setup/validate_requirements.py
+++ b/setup/validate_requirements.py
@ -96,6 +96,9 @@ def check_torch():
            log.debug("XPU is available, logging XPU info...")
            log_xpu_info(torch, ipex)
        # Log a warning if no GPU is available
+        elif hasattr(torch, "mps") and torch.mps.is_available():
+            log.info("MPS is available, logging MPS info...")
+            log_mps_info(torch)
        else:
            log.warning("Torch reports GPU not available")

@ -130,6 +133,15 @@ def log_cuda_info(torch):
            f"Torch detected GPU: {props.name} VRAM {round(props.total_memory / 1024 / 1024)}MB Arch {props.major}.{props.minor} Cores {props.multi_processor_count}"
        )

+def log_mps_info(torch):
+    """Log information about Apple Silicone (MPS)"""
+    max_reccomended_mem = round(torch.mps.recommended_max_memory() / 1024**2)
+    log.info(
+        f"Torch detected Apple MPS: {max_reccomended_mem}MB Unified Memory Available"
+    )
+    log.warning('MPS support is still experimental, proceed with caution.')
+
+
 def log_xpu_info(torch, ipex):
    """Log information about Intel XPU-enabled GPUs."""
    # Log the Intel Extension for PyTorch (IPEX) version if available
--- a/test/config/Standard-AdamW.json
+++ b/test/config/Standard-AdamW.json
@ -12,7 +12,7 @@
  "cache_latents_to_disk": false,
  "caption_dropout_every_n_epochs": 0,
  "caption_dropout_rate": 0.05,
-  "caption_extension": "",
+  "caption_extension": ".txt",
  "clip_g": "",
  "clip_l": "",
  "clip_skip": 2,
--- a/test/config/t5clrs.json
+++ b/test/config/t5clrs.json
@ -0,0 +1,123 @@
+{
+    "LoRA_type": "Standard",
+    "LyCORIS_preset": "full",
+    "adaptive_noise_scale": 0,
+    "additional_parameters": " --optimizer_args \"weight_decay=0.01\" \"d_coef=1\" \"use_bias_correction=True\" \"safeguard_warmup=False\" \"betas=0.9,0.99\"",
+    "block_alphas": "",
+    "block_dims": "",
+    "block_lr_zero_threshold": "",
+    "bucket_no_upscale": true,
+    "bucket_reso_steps": 256,
+    "cache_latents": true,
+    "cache_latents_to_disk": true,
+    "caption_dropout_every_n_epochs": 0.0,
+    "caption_dropout_rate": 0,
+    "caption_extension": ".txt",
+    "clip_skip": 2,
+    "color_aug": false,
+    "constrain": 0.0,
+    "conv_alpha": 1,
+    "conv_block_alphas": "",
+    "conv_block_dims": "",
+    "conv_dim": 1,
+    "debiased_estimation_loss": false,
+    "decompose_both": false,
+    "dim_from_weights": false,
+    "down_lr_weight": "",
+    "enable_bucket": true,
+    "epoch": 50,
+    "factor": -1,
+    "flip_aug": false,
+    "fp8_base": false,
+    "full_bf16": true,
+    "full_fp16": false,
+    "gradient_accumulation_steps": "1",
+    "gradient_checkpointing": true,
+    "keep_tokens": 4,
+    "learning_rate": 1.0,
+    "logging_dir": "E:/work/LoRa_work/logging",
+    "lora_network_weights": "",
+    "lr_scheduler": "cosine",
+    "lr_scheduler_args": "",
+    "lr_scheduler_num_cycles": "",
+    "lr_scheduler_power": "",
+    "lr_warmup": 0,
+    "max_bucket_reso": 2048,
+    "max_data_loader_n_workers": "0",
+    "max_grad_norm": 1,
+    "max_resolution": "1024,1024",
+    "max_timestep": 1000,
+    "max_token_length": "75",
+    "max_train_epochs": "50",
+    "max_train_steps": "",
+    "mem_eff_attn": false,
+    "mid_lr_weight": "",
+    "min_bucket_reso": 512,
+    "min_snr_gamma": 0,
+    "min_timestep": 0,
+    "mixed_precision": "bf16",
+    "model_list": "custom",
+    "module_dropout": 0,
+    "multires_noise_discount": 0,
+    "multires_noise_iterations": 0,
+    "network_alpha": 2,
+    "network_dim": 16,
+    "network_dropout": 0,
+    "no_token_padding": false,
+    "noise_offset": 0,
+    "noise_offset_type": "Original",
+    "num_cpu_threads_per_process": 2,
+    "optimizer": "Prodigy",
+    "optimizer_args": "",
+    "output_dir": "E:/work/output",
+    "output_name": "zunko_pony_v1",
+    "persistent_data_loader_workers": false,
+    "pretrained_model_name_or_path": "E:/models/ponyDiffusionV6XL_v6StartWithThisOne.safetensors",
+    "prior_loss_weight": 1.0,
+    "random_crop": false,
+    "rank_dropout": 0,
+    "rank_dropout_scale": false,
+    "reg_data_dir": "",
+    "rescaled": false,
+    "resume": "",
+    "sample_every_n_epochs": 0,
+    "sample_every_n_steps": 0,
+    "sample_prompts": "",
+    "sample_sampler": "euler_a",
+    "save_every_n_epochs": 10,
+    "save_every_n_steps": 0,
+    "save_last_n_steps": 0,
+    "save_last_n_steps_state": 0,
+    "save_model_as": "safetensors",
+    "save_precision": "bf16",
+    "save_state": false,
+    "scale_v_pred_loss_like_noise_pred": false,
+    "scale_weight_norms": 0,
+    "sdxl": true,
+    "sdxl_cache_text_encoder_outputs": false,
+    "sdxl_no_half_vae": true,
+    "seed": "1055217506",
+    "shuffle_caption": true,
+    "stop_text_encoder_training": 0,
+    "text_encoder_lr": 0.0,
+    "train_batch_size": 3,
+    "train_data_dir": "E:/work/input/zunko_dataset",
+    "train_norm": false,
+    "train_on_input": true,
+    "training_comment": "",
+    "unet_lr": 0.0,
+    "unit": 1,
+    "up_lr_weight": "",
+    "use_cp": false,
+    "use_scalar": false,
+    "use_tucker": false,
+    "use_wandb": false,
+    "v2": false,
+    "v_parameterization": false,
+    "v_pred_like_loss": 0,
+    "vae": "",
+    "vae_batch_size": 0,
+    "wandb_api_key": "",
+    "weighted_captions": false,
+    "xformers": "xformers"
+  }
--- a/tools/extract_lora_from_models-nw.py
+++ b/tools/extract_lora_from_models-nw.py
@ -0,0 +1,360 @@
+# extract approximating LoRA by svd from two SD models
+# The code is based on https://github.com/cloneofsimo/lora/blob/develop/lora_diffusion/cli_svd.py
+# Thanks to cloneofsimo!
+
+import argparse
+import json
+import os
+import time
+import torch
+from safetensors.torch import load_file, save_file
+from tqdm import tqdm
+from library import sai_model_spec, model_util, sdxl_model_util
+import lora
+from library.utils import setup_logging
+setup_logging()
+import logging
+logger = logging.getLogger(__name__)
+
+# CLAMP_QUANTILE = 0.99
+# MIN_DIFF = 1e-1
+
+
+def save_to_file(file_name, model, state_dict, dtype):
+    if dtype is not None:
+        for key in list(state_dict.keys()):
+            if type(state_dict[key]) == torch.Tensor:
+                state_dict[key] = state_dict[key].to(dtype)
+
+    if os.path.splitext(file_name)[1] == ".safetensors":
+        save_file(model, file_name)
+    else:
+        torch.save(model, file_name)
+
+
+def svd(
+    model_org=None,
+    model_tuned=None,
+    save_to=None,
+    dim=4,
+    v2=None,
+    sdxl=None,
+    conv_dim=None,
+    v_parameterization=None,
+    device=None,
+    save_precision=None,
+    clamp_quantile=0.99,
+    min_diff=0.01,
+    no_metadata=False,
+    load_precision=None,
+    load_original_model_to=None,
+    load_tuned_model_to=None,
+):
+    def str_to_dtype(p):
+        if p == "float":
+            return torch.float
+        if p == "fp16":
+            return torch.float16
+        if p == "bf16":
+            return torch.bfloat16
+        return None
+
+    assert v2 != sdxl or (not v2 and not sdxl), "v2 and sdxl cannot be specified at the same time / v2とsdxlは同時に指定できません"
+    if v_parameterization is None:
+        v_parameterization = v2
+
+    load_dtype = str_to_dtype(load_precision) if load_precision else None
+    save_dtype = str_to_dtype(save_precision)
+    work_device = "cpu"
+
+    # load models
+    if not sdxl:
+        logger.info(f"loading original SD model : {model_org}")
+        text_encoder_o, _, unet_o = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_org)
+        text_encoders_o = [text_encoder_o]
+        if load_dtype is not None:
+            text_encoder_o = text_encoder_o.to(load_dtype)
+            unet_o = unet_o.to(load_dtype)
+
+        logger.info(f"loading tuned SD model : {model_tuned}")
+        text_encoder_t, _, unet_t = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_tuned)
+        text_encoders_t = [text_encoder_t]
+        if load_dtype is not None:
+            text_encoder_t = text_encoder_t.to(load_dtype)
+            unet_t = unet_t.to(load_dtype)
+
+        model_version = model_util.get_model_version_str_for_sd1_sd2(v2, v_parameterization)
+    else:
+        device_org = load_original_model_to if load_original_model_to else "cpu"
+        device_tuned = load_tuned_model_to if load_tuned_model_to else "cpu"
+
+        logger.info(f"loading original SDXL model : {model_org}")
+        text_encoder_o1, text_encoder_o2, _, unet_o, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint(
+            sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_org, device_org
+        )
+        text_encoders_o = [text_encoder_o1, text_encoder_o2]
+        if load_dtype is not None:
+            text_encoder_o1 = text_encoder_o1.to(load_dtype)
+            text_encoder_o2 = text_encoder_o2.to(load_dtype)
+            unet_o = unet_o.to(load_dtype)
+
+        logger.info(f"loading original SDXL model : {model_tuned}")
+        text_encoder_t1, text_encoder_t2, _, unet_t, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint(
+            sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_tuned, device_tuned
+        )
+        text_encoders_t = [text_encoder_t1, text_encoder_t2]
+        if load_dtype is not None:
+            text_encoder_t1 = text_encoder_t1.to(load_dtype)
+            text_encoder_t2 = text_encoder_t2.to(load_dtype)
+            unet_t = unet_t.to(load_dtype)
+
+        model_version = sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0
+
+    # create LoRA network to extract weights: Use dim (rank) as alpha
+    if conv_dim is None:
+        kwargs = {}
+    else:
+        kwargs = {"conv_dim": conv_dim, "conv_alpha": conv_dim}
+
+    lora_network_o = lora.create_network(1.0, dim, dim, None, text_encoders_o, unet_o, **kwargs)
+    lora_network_t = lora.create_network(1.0, dim, dim, None, text_encoders_t, unet_t, **kwargs)
+    assert len(lora_network_o.text_encoder_loras) == len(
+        lora_network_t.text_encoder_loras
+    ), f"model version is different (SD1.x vs SD2.x) / それぞれのモデルのバージョンが違います（SD1.xベースとSD2.xベース） "
+
+    # get diffs
+    diffs = {}
+    text_encoder_different = False
+    for i, (lora_o, lora_t) in enumerate(zip(lora_network_o.text_encoder_loras, lora_network_t.text_encoder_loras)):
+        lora_name = lora_o.lora_name
+        module_o = lora_o.org_module
+        module_t = lora_t.org_module
+        diff = module_t.weight.to(work_device) - module_o.weight.to(work_device)
+
+        # clear weight to save memory
+        module_o.weight = None
+        module_t.weight = None
+
+        # Text Encoder might be same
+        if not text_encoder_different and torch.max(torch.abs(diff)) > min_diff:
+            text_encoder_different = True
+            logger.info(f"Text encoder is different. {torch.max(torch.abs(diff))} > {min_diff}")
+
+        diffs[lora_name] = diff
+
+    # clear target Text Encoder to save memory
+    for text_encoder in text_encoders_t:
+        del text_encoder
+
+    if not text_encoder_different:
+        logger.warning("Text encoder is same. Extract U-Net only.")
+        lora_network_o.text_encoder_loras = []
+        diffs = {}  # clear diffs
+
+    for i, (lora_o, lora_t) in enumerate(zip(lora_network_o.unet_loras, lora_network_t.unet_loras)):
+        lora_name = lora_o.lora_name
+        module_o = lora_o.org_module
+        module_t = lora_t.org_module
+        diff = module_t.weight.to(work_device) - module_o.weight.to(work_device)
+
+        # clear weight to save memory
+        module_o.weight = None
+        module_t.weight = None
+
+        diffs[lora_name] = diff
+
+    # clear LoRA network, target U-Net to save memory
+    del lora_network_o
+    del lora_network_t
+    del unet_t
+
+    # make LoRA with svd
+    logger.info("calculating by svd")
+    lora_weights = {}
+    with torch.no_grad():
+        for lora_name, mat in tqdm(list(diffs.items())):
+            if args.device:
+                mat = mat.to(args.device)
+            mat = mat.to(torch.float)  # calc by float
+
+            # if conv_dim is None, diffs do not include LoRAs for conv2d-3x3
+            conv2d = len(mat.size()) == 4
+            kernel_size = None if not conv2d else mat.size()[2:4]
+            conv2d_3x3 = conv2d and kernel_size != (1, 1)
+
+            rank = dim if not conv2d_3x3 or conv_dim is None else conv_dim
+            out_dim, in_dim = mat.size()[0:2]
+
+            if device:
+                mat = mat.to(device)
+
+            # logger.info(lora_name, mat.size(), mat.device, rank, in_dim, out_dim)
+            rank = min(rank, in_dim, out_dim)  # LoRA rank cannot exceed the original dim
+
+            if conv2d:
+                if conv2d_3x3:
+                    mat = mat.flatten(start_dim=1)
+                else:
+                    mat = mat.squeeze()
+
+            U, S, Vh = torch.linalg.svd(mat)
+
+            U = U[:, :rank]
+            S = S[:rank]
+            U = U @ torch.diag(S)
+
+            Vh = Vh[:rank, :]
+
+            dist = torch.cat([U.flatten(), Vh.flatten()])
+            hi_val = torch.quantile(dist, clamp_quantile)
+            low_val = -hi_val
+
+            U = U.clamp(low_val, hi_val)
+            Vh = Vh.clamp(low_val, hi_val)
+
+            if conv2d:
+                U = U.reshape(out_dim, rank, 1, 1)
+                Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1])
+
+            U = U.to(work_device, dtype=save_dtype).contiguous()
+            Vh = Vh.to(work_device, dtype=save_dtype).contiguous()
+
+            lora_weights[lora_name] = (U, Vh)
+
+    # make state dict for LoRA
+    lora_sd = {}
+    for lora_name, (up_weight, down_weight) in lora_weights.items():
+        lora_sd[lora_name + ".lora_up.weight"] = up_weight
+        lora_sd[lora_name + ".lora_down.weight"] = down_weight
+        lora_sd[lora_name + ".alpha"] = torch.tensor(down_weight.size()[0])
+
+    # load state dict to LoRA and save it
+    lora_network_save, lora_sd = lora.create_network_from_weights(1.0, None, None, text_encoders_o, unet_o, weights_sd=lora_sd)
+    lora_network_save.apply_to(text_encoders_o, unet_o)  # create internal module references for state_dict
+
+    info = lora_network_save.load_state_dict(lora_sd)
+    logger.info(f"Loading extracted LoRA weights: {info}")
+
+    dir_name = os.path.dirname(save_to)
+    if dir_name and not os.path.exists(dir_name):
+        os.makedirs(dir_name, exist_ok=True)
+
+    # minimum metadata
+    net_kwargs = {}
+    if conv_dim is not None:
+        net_kwargs["conv_dim"] = str(conv_dim)
+        net_kwargs["conv_alpha"] = str(float(conv_dim))
+
+    metadata = {
+        "ss_v2": str(v2),
+        "ss_base_model_version": model_version,
+        "ss_network_module": "networks.lora",
+        "ss_network_dim": str(dim),
+        "ss_network_alpha": str(float(dim)),
+        "ss_network_args": json.dumps(net_kwargs),
+    }
+
+    if not no_metadata:
+        title = os.path.splitext(os.path.basename(save_to))[0]
+        sai_metadata = sai_model_spec.build_metadata(None, v2, v_parameterization, sdxl, True, False, time.time(), title=title)
+        metadata.update(sai_metadata)
+
+    lora_network_save.save_weights(save_to, save_dtype, metadata)
+    logger.info(f"LoRA weights are saved to: {save_to}")
+
+
+def setup_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--v2", action="store_true", help="load Stable Diffusion v2.x model / Stable Diffusion 2.xのモデルを読み込む")
+    parser.add_argument(
+        "--v_parameterization",
+        action="store_true",
+        default=None,
+        help="make LoRA metadata for v-parameterization (default is same to v2) / 作成するLoRAのメタデータにv-parameterization用と設定する（省略時はv2と同じ）",
+    )
+    parser.add_argument(
+        "--sdxl", action="store_true", help="load Stable Diffusion SDXL base model / Stable Diffusion SDXL baseのモデルを読み込む"
+    )
+    parser.add_argument(
+        "--load_precision",
+        type=str,
+        default=None,
+        choices=[None, "float", "fp16", "bf16"],
+        help="precision in loading, model default if omitted / 読み込み時に精度を変更して読み込む、省略時はモデルファイルによる"
+    )
+    parser.add_argument(
+        "--save_precision",
+        type=str,
+        default=None,
+        choices=[None, "float", "fp16", "bf16"],
+        help="precision in saving, same to merging if omitted / 保存時に精度を変更して保存する、省略時はfloat",
+    )
+    parser.add_argument(
+        "--model_org",
+        type=str,
+        default=None,
+        required=True,
+        help="Stable Diffusion original model: ckpt or safetensors file / 元モデル、ckptまたはsafetensors",
+    )
+    parser.add_argument(
+        "--model_tuned",
+        type=str,
+        default=None,
+        required=True,
+        help="Stable Diffusion tuned model, LoRA is difference of `original to tuned`: ckpt or safetensors file / 派生モデル（生成されるLoRAは元→派生の差分になります）、ckptまたはsafetensors",
+    )
+    parser.add_argument(
+        "--save_to",
+        type=str,
+        default=None,
+        required=True,
+        help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors",
+    )
+    parser.add_argument("--dim", type=int, default=4, help="dimension (rank) of LoRA (default 4) / LoRAの次元数（rank）（デフォルト4）")
+    parser.add_argument(
+        "--conv_dim",
+        type=int,
+        default=None,
+        help="dimension (rank) of LoRA for Conv2d-3x3 (default None, disabled) / LoRAのConv2d-3x3の次元数（rank）（デフォルトNone、適用なし）",
+    )
+    parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う")
+    parser.add_argument(
+        "--clamp_quantile",
+        type=float,
+        default=0.99,
+        help="Quantile clamping value, float, (0-1). Default = 0.99 / 値をクランプするための分位点、float、(0-1)。デフォルトは0.99",
+    )
+    parser.add_argument(
+        "--min_diff",
+        type=float,
+        default=0.01,
+        help="Minimum difference between finetuned model and base to consider them different enough to extract, float, (0-1). Default = 0.01 /"
+        + "LoRAを抽出するために元モデルと派生モデルの差分の最小値、float、(0-1)。デフォルトは0.01",
+    )
+    parser.add_argument(
+        "--no_metadata",
+        action="store_true",
+        help="do not save sai modelspec metadata (minimum ss_metadata for LoRA is saved) / "
+        + "sai modelspecのメタデータを保存しない（LoRAの最低限のss_metadataは保存される）",
+    )
+    parser.add_argument(
+        "--load_original_model_to",
+        type=str,
+        default=None,
+        help="location to load original model, cpu or cuda, cuda:0, etc, default is cpu, only for SDXL / 元モデル読み込み先、cpuまたはcuda、cuda:0など、省略時はcpu、SDXLのみ有効",
+    )
+    parser.add_argument(
+        "--load_tuned_model_to",
+        type=str,
+        default=None,
+        help="location to load tuned model, cpu or cuda, cuda:0, etc, default is cpu, only for SDXL / 派生モデル読み込み先、cpuまたはcuda、cuda:0など、省略時はcpu、SDXLのみ有効",
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = setup_parser()
+
+    args = parser.parse_args()
+    svd(**vars(args))
--- a/uv.lock
+++ b/uv.lock
 @ -1 +1 @@
 .10
 .11