Merge pull request #3252 from bmaltais/dev

v25.1.0
pull/3263/head v25.1.0
bmaltais 2025-05-25 18:12:14 -04:00 committed by GitHub
commit 17075c4d25
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 2982 additions and 2220 deletions

View File

@ -18,4 +18,4 @@ jobs:
- uses: actions/checkout@v4
- name: typos-action
uses: crate-ci/typos@v1.31.1
uses: crate-ci/typos@v1.31.2

View File

@ -1 +1 @@
3.10
3.11

View File

@ -1 +1 @@
v25.0.3
v25.1.0

View File

@ -3,7 +3,47 @@ ARG UID=1000
ARG VERSION=EDGE
ARG RELEASE=0
FROM python:3.10-slim as build
########################################
# Base stage
########################################
FROM docker.io/library/python:3.11-slim-bullseye AS base
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
WORKDIR /tmp
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Install CUDA partially
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/#debian
# Installing the complete CUDA Toolkit system-wide usually adds around 8GB to the image size.
# Since most CUDA packages already installed through pip, there's no need to download the entire toolkit.
# Therefore, we opt to install only the essential libraries.
# Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring_x86_64.deb
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
dpkg -i cuda-keyring_x86_64.deb && \
rm -f cuda-keyring_x86_64.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
# !If you experience any related issues, replace the following line with `cuda-12-4` to obtain the complete CUDA package.
cuda-nvcc-12-4
ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}"
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
ENV CUDA_VERSION=12.4
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.4
ENV CUDA_HOME=/usr/local/cuda
########################################
# Build stage
########################################
FROM base AS build
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
@ -11,10 +51,14 @@ ARG TARGETVARIANT
WORKDIR /app
# Install under /root/.local
ENV PIP_USER="true"
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
ARG PIP_ROOT_USER_ACTION="ignore"
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ENV UV_PROJECT_ENVIRONMENT=/venv
ENV VIRTUAL_ENV=/venv
ENV UV_LINK_MODE=copy
ENV UV_PYTHON_DOWNLOADS=0
ENV UV_INDEX=https://download.pytorch.org/whl/cu124
# Install build dependencies
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
@ -22,23 +66,28 @@ RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/v
apt-get update && apt-get upgrade -y && \
apt-get install -y --no-install-recommends python3-launchpadlib git curl
# Install PyTorch
# The versions must align and be in sync with the requirements_linux_docker.txt
# hadolint ignore=SC2102
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \
torch==2.1.2 torchvision==0.16.2 \
xformers==0.0.23.post1 \
ninja \
pip setuptools wheel
# Install big dependencies separately for layer caching
# !Please note that the version restrictions should be the same as pyproject.toml
# No packages listed should be removed in the next `uv sync` command
# If this happens, please update the version restrictions or update the uv.lock file
RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
uv venv --system-site-packages /venv && \
uv pip install --no-deps \
# torch (866.2MiB)
torch==2.5.1+cu124 \
# triton (199.8MiB)
triton==3.1.0 \
# tensorflow (615.0MiB)
tensorflow>=2.16.1 \
# onnxruntime-gpu (215.7MiB)
onnxruntime-gpu==1.19.2
# Install requirements
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
--mount=source=requirements.txt,target=requirements.txt \
--mount=source=setup/docker_setup.py,target=setup.py \
--mount=source=sd-scripts,target=sd-scripts,rw \
pip install -r requirements_linux_docker.txt -r requirements.txt
# Install dependencies
RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=sd-scripts,target=sd-scripts,rw \
uv sync --frozen --no-dev --no-install-project --no-editable
# Replace pillow with pillow-simd (Only for x86)
ARG TARGETPLATFORM
@ -46,44 +95,24 @@ RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/v
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \
pip uninstall -y pillow && \
CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \
uv pip uninstall pillow && \
CC="cc -mavx2" uv pip install pillow-simd; \
fi
FROM python:3.10-slim as final
########################################
# Final stage
########################################
FROM base AS final
ARG TARGETARCH
ARG TARGETVARIANT
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
WORKDIR /tmp
ENV CUDA_VERSION=12.1.1
ENV NV_CUDA_CUDART_VERSION=12.1.105-1
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.1
ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1
# Install CUDA partially
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb .
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
dpkg -i cuda-keyring_1.0-1_all.deb && \
rm cuda-keyring_1.0-1_all.deb && \
sed -i 's/^Components: main$/& contrib/' /etc/apt/sources.list.d/debian.sources && \
apt-get update && \
apt-get install -y --no-install-recommends \
# Installing the whole CUDA typically increases the image size by approximately **8GB**.
# To decrease the image size, we opt to install only the necessary libraries.
# Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64
# !If you experience any related issues, replace the following line with `cuda-12-1` to obtain the complete CUDA package.
cuda-cudart-12-1=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} libcusparse-12-1 libnvjitlink-12-1
# Install runtime dependencies
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
apt-get update && \
apt-get update && apt-get upgrade -y && \
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init
# Fix missing libnvinfer7
@ -98,20 +127,23 @@ RUN groupadd -g $UID $UID && \
# Create directories with correct permissions
RUN install -d -m 775 -o $UID -g 0 /dataset && \
install -d -m 775 -o $UID -g 0 /licenses && \
install -d -m 775 -o $UID -g 0 /app
install -d -m 775 -o $UID -g 0 /app && \
install -d -m 775 -o $UID -g 0 /venv
# Copy licenses (OpenShift Policy)
COPY --link --chmod=775 LICENSE.md /licenses/LICENSE.md
# Copy dependencies and code (and support arbitrary uid for OpenShift best practice)
COPY --link --chown=$UID:0 --chmod=775 --from=build /root/.local /home/$UID/.local
COPY --link --chown=$UID:0 --chmod=775 --from=build /venv /venv
COPY --link --chown=$UID:0 --chmod=775 . /app
ENV PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:/home/$UID/.local/bin:$PATH"
ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV PATH="/venv/bin${PATH:+:${PATH}}"
ENV PYTHONPATH="/venv/lib/python3.11/site-packages"
ENV LD_LIBRARY_PATH="/venv/lib/python3.11/site-packages/nvidia/cudnn/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
ENV LD_PRELOAD=libtcmalloc.so
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
# Rich logging
# https://rich.readthedocs.io/en/stable/console.html#interactive-mode
ENV FORCE_COLOR="true"
@ -130,7 +162,7 @@ STOPSIGNAL SIGINT
# Use dumb-init as PID 1 to handle signals properly
ENTRYPOINT ["dumb-init", "--"]
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless"]
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless", "--noverify"]
ARG VERSION
ARG RELEASE
@ -144,4 +176,4 @@ LABEL name="bmaltais/kohya_ss" \
release=${RELEASE} \
io.k8s.display-name="kohya_ss" \
summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \
description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."

417
README.md
View File

@ -1,8 +1,18 @@
# Kohya's GUI
This repository primarily provides a Gradio GUI for [Kohya's Stable Diffusion trainers](https://github.com/kohya-ss/sd-scripts). However, support for Linux OS is also offered through community contributions. macOS support is not optimal at the moment but might work if the conditions are favorable.
[![GitHub stars](https://img.shields.io/github/stars/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/stargazers)
[![GitHub forks](https://img.shields.io/github/forks/bmaltais/kohya_ss?style=social)](https://github.com/bmaltais/kohya_ss/network/members)
[![License](https://img.shields.io/github/license/bmaltais/kohya_ss)](LICENSE.md)
[![GitHub issues](https://img.shields.io/github/issues/bmaltais/kohya_ss)](https://github.com/bmaltais/kohya_ss/issues)
The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model.
This project provides a user-friendly Gradio-based Graphical User Interface (GUI) for [Kohya's Stable Diffusion training scripts](https://github.com/kohya-ss/sd-scripts). Stable Diffusion training empowers users to customize image generation models by fine-tuning existing models, creating unique artistic styles, and training specialized models like LoRA (Low-Rank Adaptation).
Key features of this GUI include:
* Easy-to-use interface for setting a wide range of training parameters.
* Automatic generation of the command-line interface (CLI) commands required to run the training scripts.
* Support for various training methods, including LoRA, Dreambooth, fine-tuning, and SDXL training.
Support for Linux and macOS is also available. While Linux support is actively maintained through community contributions, macOS compatibility may vary.
## Table of Contents
@ -13,10 +23,13 @@ The GUI allows you to set the training parameters and generate and run the requi
- [Windows](#windows)
- [Windows Pre-requirements](#windows-pre-requirements)
- [Setup Windows](#setup-windows)
- [Optional: CUDNN 8.9.6.50](#optional-cudnn-89650)
- [Using uv based package manager](#using-uv-based-package-manager)
- [Using the pip package manager](#using-the-pip-package-manager)
- [Linux and macOS](#linux-and-macos)
- [Linux Pre-requirements](#linux-pre-requirements)
- [Setup Linux](#setup-linux)
- [Using uv based package manager](#using-uv-based-package-manager-1)
- [Using pip based package manager](#using-pip-based-package-manager)
- [Install Location](#install-location)
- [Runpod](#runpod)
- [Manual installation](#manual-installation)
@ -61,7 +74,7 @@ The GUI allows you to set the training parameters and generate and run the requi
- [v25.0.2](#v2502)
- [v25.0.1](#v2501)
- [v25.0.0](#v2500)
## 🦒 Colab
This Colab notebook was not created or maintained by me; however, it appears to function effectively. The source can be found at: <https://github.com/camenduru/kohya_ss-colab>.
@ -72,24 +85,29 @@ I would like to express my gratitude to camenduru for their valuable contributio
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ |
| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/kohya_ss-colab/blob/main/kohya_ss_colab.ipynb) | kohya_ss_gui_colab |
## Installation
## Installation Methods
### Windows
This project offers two primary methods for installing and running the GUI: using the `uv` package manager (recommended for ease of use and automatic updates) or using the traditional `pip` package manager. Below, you'll find details on both approaches. Please read this section to decide which method best suits your needs before proceeding to the OS-specific installation prerequisites.
#### Windows Pre-requirements
**Key Differences:**
To install the necessary dependencies on a Windows system, follow these steps:
* **`uv` method:**
* Simplifies the setup process.
* Automatically handles updates when you run `gui-uv.bat` (Windows) or `gui-uv.sh` (Linux).
* No need to run `setup.bat` or `setup.sh` after the initial clone.
* This is the recommended method for most users on Windows and Linux.
* **Not recommended for Runpod or macOS installations.** For these, please use the `pip` method.
* **`pip` method:**
* The traditional method, requiring manual execution of `setup.bat` (Windows) or `setup.sh` (Linux) after cloning and for updates.
* Necessary for environments like Runpod and macOS where the `uv` scripts are not intended to be used.
1. Install [Python 3.10.11](https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe).
- During the installation process, ensure that you select the option to add Python to the 'PATH' environment variable.
Subsequent sections will detail the specific commands for each method.
2. Install [CUDA 12.4 toolkit](https://developer.nvidia.com/cuda-12-4-0-download-archive?target_os=Windows&target_arch=x86_64).
### Using `uv` (Recommended)
This method utilizes the `uv` package manager for a streamlined setup and automatic updates. It is the preferred approach for most users on Windows and Linux.
3. Install [Git](https://git-scm.com/download/win).
4. Install the [Visual Studio 2015, 2017, 2019, and 2022 redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
#### Setup Windows
> [!NOTE]
> This method is not intended for runpod or MacOS installation. Use the "pip based package manager" setup instead.
To set up the project, follow these steps:
@ -107,11 +125,36 @@ To set up the project, follow these steps:
cd kohya_ss
```
4. If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
For Linux, the steps are similar (clone and change directory as above).
Run one of the following setup script by executing the following command:
### Using `pip` (Traditional Method)
This method uses the traditional `pip` package manager and requires manual script execution for setup and updates. It is necessary for environments like Runpod or macOS, or if you prefer managing your environment with `pip`.
For systems with only python 3.10.11 installed:
Regardless of your OS, start with these steps:
1. Open a terminal and navigate to the desired installation directory.
2. Clone the repository by running the following command:
```shell
git clone --recursive https://github.com/bmaltais/kohya_ss.git
```
3. Change into the `kohya_ss` directory:
```shell
cd kohya_ss
```
Then, proceed with OS-specific instructions:
**For Windows:**
* If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
Run one of the following setup script by executing the following command:
For systems with only python 3.10.11 installed:
```shell
.\setup.bat
@ -123,13 +166,64 @@ To set up the project, follow these steps:
.\setup-3.10.bat
```
During the accelerate config step, use the default values as proposed during the configuration unless you know your hardware demands otherwise. The amount of VRAM on your GPU does not impact the values used.
During the accelerate config step, use the default values as proposed during the configuration unless you know your hardware demands otherwise. The amount of VRAM on your GPU does not impact the values used.
#### Optional: CUDNN 8.9.6.50
* Optional: CUDNN 8.9.6.50
The following steps are optional but will improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds.
The following steps are optional but will improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds.
1. Run `.\setup.bat` and select `2. (Optional) Install cudnn files (if you want to use the latest supported cudnn version)`.
Run `.\setup.bat` and select `2. (Optional) Install cudnn files (if you want to use the latest supported cudnn version)`.
**For Linux and macOS:**
* If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
If you encounter permission issues, make the `setup.sh` script executable by running the following command:
```shell
chmod +x ./setup.sh
```
Run the setup script by executing the following command:
```shell
./setup.sh
```
> [!NOTE]
> If you need additional options or information about the runpod environment, you can use `setup.sh -h` or `setup.sh --help` to display the help message.
## Prerequisites
Before you begin, ensure you have the following software and hardware:
* **Python:** Version 3.10.x or 3.11.x. (Python 3.11.9 is used in Windows pre-requirements, Python 3.10.9+ for Linux).
* **Git:** For cloning the repository and managing updates.
* **NVIDIA CUDA Toolkit:** Version 12.4 or compatible (as per installation steps).
* **NVIDIA GPU:** A compatible NVIDIA graphics card is required. VRAM requirements vary depending on the model and training parameters.
* **(Optional but Recommended) NVIDIA cuDNN:** For accelerated performance on compatible NVIDIA GPUs. (Often included with CUDA Toolkit or installed separately).
* **For Windows Users:** Visual Studio 2015, 2017, 2019, and 2022 Redistributable.
## Installation
### Windows
#### Windows Pre-requirements
To install the necessary dependencies on a Windows system, follow these steps:
1. Install [Python 3.11.9](https://www.python.org/ftp/python/3.11.9/python-3.11.9-amd64.exe).
- During the installation process, ensure that you select the option to add Python to the 'PATH' environment variable.
2. Install [CUDA 12.4 toolkit](https://developer.nvidia.com/cuda-12-4-0-download-archive?target_os=Windows&target_arch=x86_64).
3. Install [Git](https://git-scm.com/download/win).
4. Install the [Visual Studio 2015, 2017, 2019, and 2022 redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe).
#### Setup Windows
For detailed setup instructions using either `uv` or `pip`, please refer to the 'Installation Methods' section above. Ensure you have met the Windows Pre-requirements before proceeding with either method.
### Linux and macOS
@ -149,40 +243,12 @@ To install the necessary dependencies on a Linux system, ensure that you fulfill
#### Setup Linux
To set up the project on Linux or macOS, perform the following steps:
1. Open a terminal and navigate to the desired installation directory.
2. Clone the repository by running the following command:
```shell
git clone --recursive https://github.com/bmaltais/kohya_ss.git
```
3. Change into the `kohya_ss` directory:
```shell
cd kohya_ss
```
4. If you want to use the new uv based version of the script to run the GUI, you do not need to follow this step. On the other hand, if you want to use the legacy "pip" based method, please follow this next step.
If you encounter permission issues, make the `setup.sh` script executable by running the following command:
```shell
chmod +x ./setup.sh
```
Run the setup script by executing the following command:
```shell
./setup.sh
```
Note: If you need additional options or information about the runpod environment, you can use `setup.sh -h` or `setup.sh --help` to display the help message.
For detailed setup instructions using either `uv` or `pip`, please refer to the 'Installation Methods' section above. Ensure you have met the Linux Pre-requirements before proceeding with either method.
#### Install Location
Note: The information below regarding install location applies to both `uv` and `pip` installation methods described in the 'Installation Methods' section.
The default installation location on Linux is the directory where the script is located. If a previous installation is detected in that location, the setup will proceed there. Otherwise, the installation will fall back to `/opt/kohya_ss`. If `/opt` is not writable, the fallback location will be `$HOME/kohya_ss`. Finally, if none of the previous options are viable, the installation will be performed in the current directory.
For macOS and other non-Linux systems, the installation process will attempt to detect the previous installation directory based on where the script is run. If a previous installation is not found, the default location will be `$HOME/kohya_ss`. You can override this behavior by specifying a custom installation directory using the `-d` or `--dir` option when running the setup script.
@ -191,128 +257,15 @@ If you choose to use the interactive mode, the default values for the accelerate
### Runpod
#### Manual installation
To install the necessary components for Runpod and run kohya_ss, follow these steps:
1. Select the Runpod pytorch 2.2.0 template. This is important. Other templates may not work.
2. SSH into the Runpod.
3. Clone the repository by running the following command:
```shell
cd /workspace
git clone --recursive https://github.com/bmaltais/kohya_ss.git
```
4. Run the setup script:
```shell
cd kohya_ss
./setup-runpod.sh
```
5. Run the GUI with:
```shell
./gui.sh --share --headless
```
or with this if you expose 7860 directly via the runpod configuration:
```shell
./gui.sh --listen=0.0.0.0 --headless
```
6. Connect to the public URL displayed after the installation process is completed.
#### Pre-built Runpod template
To run from a pre-built Runpod template, you can:
1. Open the Runpod template by clicking on <https://runpod.io/gsc?template=ya6013lj5a&ref=w18gds2n>.
2. Deploy the template on the desired host.
3. Once deployed, connect to the Runpod on HTTP 3010 to access the kohya_ss GUI. You can also connect to auto1111 on HTTP 3000.
See [Runpod Installation Guide](docs/installation_runpod.md) for details.
### Novita
#### Pre-built Novita template
1. Open the Novita template by clicking on <https://novita.ai/gpus-console?templateId=312>.
2. Deploy the template on the desired host.
3. Once deployed, connect to the Novita on HTTP 7860 to access the kohya_ss GUI.
See [Novita Installation Guide](docs/installation_novita.md) for details.
### Docker
#### Get your Docker ready for GPU support
##### Windows
Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go.
Here is the official documentation for further reference.
<https://docs.nvidia.com/cuda/wsl-user-guide/index.html#nvidia-compute-software-support-on-wsl-2>
<https://docs.docker.com/desktop/wsl/use-wsl/#gpu-support>
##### Linux, OSX
Install an NVIDIA GPU Driver if you do not already have one installed.
<https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html>
Install the NVIDIA Container Toolkit with this guide.
<https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
#### Design of our Dockerfile
- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`.
- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path.
- TensorBoard has been separated from the project.
- TensorBoard is not included in the Docker image.
- The "Start TensorBoard" button has been hidden.
- TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41).
- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/)
- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version.
#### Use the pre-built Docker image
```bash
git clone --recursive https://github.com/bmaltais/kohya_ss.git
cd kohya_ss
docker compose up -d
```
To update the system, do `docker compose down && docker compose up -d --pull always`
#### Local docker build
> [!IMPORTANT]
> Clone the Git repository ***recursively*** to include submodules:
> `git clone --recursive https://github.com/bmaltais/kohya_ss.git`
```bash
git clone --recursive https://github.com/bmaltais/kohya_ss.git
cd kohya_ss
docker compose up -d --build
```
> [!NOTE]
> Building the image may take up to 20 minutes to complete.
To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always`
> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker).
#### ashleykleynhans runpod docker builds
You may want to use the following repositories when running on runpod:
- Standalone Kohya_ss template: <https://github.com/ashleykleynhans/kohya-docker>
- Auto1111 + Kohya_ss GUI template: <https://github.com/ashleykleynhans/stable-diffusion-docker>
See [Docker Installation Guide](docs/installation_docker.md) for details.
## Upgrading
@ -320,41 +273,51 @@ To upgrade your installation to a new version, follow the instructions below.
### Windows Upgrade
If a new release becomes available, you can upgrade your repository by running the following commands from the root directory of the project:
If a new release becomes available, you can upgrade your repository by following these steps:
1. Pull the latest changes from the repository:
* **If you are using the `uv`-based installation (`gui-uv.bat`):**
1. Pull the latest changes from the repository:
```powershell
git pull
```
2. Updates to the Python environment are handled automatically when you next run the `gui-uv.bat` script. No separate setup script execution is needed.
```powershell
git pull
```
2. Run the setup script (if you do not use the uv script. If you use the uv versions of the scripts the updates are done for you automatically.):
```powershell
.\setup.bat
```
* **If you are using the `pip`-based installation (`gui.bat` or `gui.ps1`):**
1. Pull the latest changes from the repository:
```powershell
git pull
```
2. Run the setup script to update dependencies:
```powershell
.\setup.bat
```
### Linux and macOS Upgrade
To upgrade your installation on Linux or macOS, follow these steps:
1. Open a terminal and navigate to the root directory of the project.
* **If you are using the `uv`-based installation (`gui-uv.sh`):**
1. Open a terminal and navigate to the root directory of the project.
2. Pull the latest changes from the repository:
```bash
git pull
```
3. Updates to the Python environment are handled automatically when you next run the `gui-uv.sh` script. No separate setup script execution is needed.
2. Pull the latest changes from the repository:
```bash
git pull
```
3. Refresh and update everything (if you do not use the uv based script. If you use the uv versions of the scripts the updates are done for you automatically.):
```bash
./setup.sh
```
* **If you are using the `pip`-based installation (`gui.sh`):**
1. Open a terminal and navigate to the root directory of the project.
2. Pull the latest changes from the repository:
```bash
git pull
```
3. Refresh and update everything by running the setup script:
```bash
./setup.sh
```
## Starting GUI Service
To launch the GUI service, you can use the provided scripts or run the `kohya_gui.py` script directly. Use the command line arguments listed below to configure the underlying service.
To launch the GUI service, use the script corresponding to your chosen installation method (`uv` or `pip`), or run the `kohya_gui.py` script directly. Use the command line arguments listed below to configure the underlying service.
```text
--help show this help message and exit
@ -380,9 +343,9 @@ To launch the GUI service, you can use the provided scripts or run the `kohya_gu
--noverify Disable requirements verification
```
### Launching the GUI on Windows (non uv based method)
### Launching the GUI on Windows (pip method)
On Windows, you can use either the `gui.ps1` or `gui.bat` script located in the root directory. Choose the script that suits your preference and run it in a terminal, providing the desired command line arguments. Here's an example:
If you installed using the `pip` method, use either the `gui.ps1` or `gui.bat` script located in the root directory. Choose the script that suits your preference and run it in a terminal, providing the desired command line arguments. Here's an example:
```powershell
gui.ps1 --listen 127.0.0.1 --server_port 7860 --inbrowser --share
@ -394,9 +357,11 @@ or
gui.bat --listen 127.0.0.1 --server_port 7860 --inbrowser --share
```
### Alternative: UV-based Method
### Launching the GUI on Windows (uv method)
To use the UV-based method for running the GUI, which does not require running `setup.bat`, follow these steps:
If you installed using the `uv` method, use the `gui-uv.bat` script to start the GUI. Follow these steps:
When you run `gui-uv.bat`, it will first check if `uv` is installed on your system. If `uv` is not found, the script will prompt you, asking if you'd like to attempt an automatic installation. You can choose 'Y' to let the script try to install `uv` for you, or 'N' to cancel. If you cancel, you'll need to install `uv` manually from [https://astral.sh/uv](https://astral.sh/uv) before running `gui-uv.bat` again.
```cmd
.\gui-uv.bat
@ -408,19 +373,21 @@ or
.\gui-uv.bat --listen 127.0.0.1 --server_port 7860 --inbrowser --share
```
This method provides an alternative way to run the GUI with the latest features, including support for Flux.1 and SD3, and eliminates the need for the setup script.
This script utilizes the `uv` managed environment.
### Launching the GUI on Linux and macOS
To launch the GUI on Linux or macOS, run the `gui.sh` script located in the root directory. Provide the desired command line arguments as follows:
If you installed using the `pip` method on Linux or macOS, run the `gui.sh` script located in the root directory. Provide the desired command line arguments as follows:
```bash
./gui.sh --listen 127.0.0.1 --server_port 7860 --inbrowser --share
```
### Alternative: UV-based Method for Linux
### Launching the GUI on Linux (uv method)
To use the UV-based method for running the GUI, which does not require running `setup.sh`, follow these steps:
If you installed using the `uv` method on Linux, use the `gui-uv.sh` script to start the GUI. Follow these steps:
When you run `gui-uv.sh`, it will first check if `uv` is installed on your system. If `uv` is not found, the script will prompt you, asking if you'd like to attempt an automatic installation. You can choose 'Y' (or 'y') to let the script try to install `uv` for you, or 'N' (or 'n') to cancel. If you cancel, you'll need to install `uv` manually from [https://astral.sh/uv](https://astral.sh/uv) before running `gui-uv.sh` again.
```shell
./gui-uv.sh --listen 127.0.0.1 --server_port 7860 --inbrowser --share
@ -432,7 +399,7 @@ If you are running on a headless server, use:
./gui-uv.sh --headless --listen 127.0.0.1 --server_port 7860 --inbrowser --share
```
This method provides an alternative way to run the GUI with the latest features, including support for Flux.1 and SD3, and eliminates the need for the setup script.
This script utilizes the `uv` managed environment.
## Custom Path Defaults
@ -454,6 +421,10 @@ To train a LoRA, you can currently use the `train_network.py` code. You can crea
Once you have created the LoRA network, you can generate images using auto1111 by installing [this extension](https://github.com/kohya-ss/sd-webui-additional-networks).
For more detailed information on LoRA training options and advanced configurations, please refer to our LoRA documentation:
- [LoRA Training Guide](docs/LoRA/top_level.md)
- [LoRA Training Options](docs/LoRA/options.md)
## Sample image generation during training
A prompt file might look like this, for example:
@ -491,24 +462,18 @@ If you encounter an error indicating that the module `tkinter` is not found, try
### LORA Training on TESLA V100 - GPU Utilization Issue
#### Issue Summary
When training LORA on a TESLA V100, users reported low GPU utilization. Additionally, there was difficulty in specifying GPUs other than the default for training.
#### Potential Solutions
- **GPU Selection:** Users can specify GPU IDs in the setup configuration to select the desired GPUs for training.
- **Improving GPU Load:** Utilizing `adamW8bit` optimizer and increasing the batch size can help achieve 70-80% GPU utilization without exceeding GPU memory limits.
See [Troubleshooting LORA Training on TESLA V100](docs/troubleshooting_tesla_v100.md) for details.
## SDXL training
The documentation in this section will be moved to a separate document later.
For detailed guidance on SDXL training, please refer to the [official sd-scripts documentation](https://github.com/kohya-ss/sd-scripts/blob/main/README.md#sdxl-training) and relevant sections in our [LoRA Training Guide](docs/LoRA/top_level.md).
## Masked loss
The masked loss is supported in each training script. To enable the masked loss, specify the `--masked_loss` option.
The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue.
> [!WARNING]
> The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue.
ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. The pixel values 0-255 are converted to 0-1 (i.e., the pixel value 128 is treated as the half weight of the loss). See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset).
@ -541,6 +506,17 @@ For more details, visit the [GitHub issue](https://github.com/bmaltais/kohya_ss/
To finetune HunyuanDiT models or create LoRAs, visit this [fork](https://github.com/Tencent/HunyuanDiT/tree/main/kohya_ss-hydit)
## Contributing
Contributions are welcome! If you'd like to contribute to this project, please consider the following:
- For bug reports or feature requests, please open an issue on the [GitHub Issues page](https://github.com/bmaltais/kohya_ss/issues).
- If you'd like to submit code changes, please open a pull request. Ensure your changes are well-tested and follow the existing code style.
- For security-related concerns, please refer to our `SECURITY.md` file.
## License
This project is licensed under the Apache License 2.0. See the [LICENSE.md](LICENSE.md) file for details.
## Change History
### v25.0.3
@ -558,12 +534,9 @@ To finetune HunyuanDiT models or create LoRAs, visit this [fork](https://github.
### v25.0.0
This is a SIGNIFICANT upgrade. I am groing in uncharted territory here because kohya has not merged any of the recent flux.1 and sd3 updated to his code in his main branch yet... but I feel updates in his code has pretty much dried down and I think his code is probably ready for prime time. So instead of keeping my GUI in the cave man ages, I am opting to move the code for the GUI with support for flux.1 and sd3 to the main branch of my project. Perhaps this will bite me in the proverbias ass... but for those who would rather stay on the older pre "flux.1 and sd3" updates, you can always do:
```shell
git checkout v24.1.7
```
after cloning the repo.
For all the info regarding the new flux.1 and sd3 parameters, see <https://github.com/kohya-ss/sd-scripts/blob/sd3/README.md> for more details.
- Major update: Introduced support for flux.1 and sd3, moving the GUI to align with more recent script functionalities.
- Users preferring the pre-flux.1/sd3 version can check out tag `v24.1.7`.
```shell
git checkout v24.1.7
```
- For details on new flux.1 and sd3 parameters, refer to the [sd-scripts README](https://github.com/kohya-ss/sd-scripts/blob/sd3/README.md).

View File

@ -1,110 +0,0 @@
import argparse
import gradio as gr
import os
from kohya_gui.dreambooth_gui import dreambooth_tab
from kohya_gui.utilities import utilities_tab
from kohya_gui.custom_logging import setup_logging
from kohya_gui.localization_ext import add_javascript
# Set up logging
log = setup_logging()
def UI(**kwargs):
add_javascript(kwargs.get("language"))
css = ""
headless = kwargs.get("headless", False)
log.info(f"headless: {headless}")
if os.path.exists("./assets/style.css"):
with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
log.info("Load CSS...")
css += file.read() + "\n"
interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
with interface:
with gr.Tab("Dreambooth"):
(
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
logging_dir_input,
) = dreambooth_tab(headless=headless)
with gr.Tab("Utilities"):
utilities_tab(
train_data_dir_input=train_data_dir_input,
reg_data_dir_input=reg_data_dir_input,
output_dir_input=output_dir_input,
logging_dir_input=logging_dir_input,
enable_copy_info_button=True,
headless=headless,
)
# Show the interface
launch_kwargs = {}
username = kwargs.get("username")
password = kwargs.get("password")
server_port = kwargs.get("server_port", 0)
inbrowser = kwargs.get("inbrowser", False)
share = kwargs.get("share", False)
server_name = kwargs.get("listen")
launch_kwargs["server_name"] = server_name
if username and password:
launch_kwargs["auth"] = (username, password)
if server_port > 0:
launch_kwargs["server_port"] = server_port
if inbrowser:
launch_kwargs["inbrowser"] = inbrowser
if share:
launch_kwargs["share"] = share
interface.launch(**launch_kwargs)
if __name__ == "__main__":
# torch.cuda.set_per_process_memory_fraction(0.48)
parser = argparse.ArgumentParser()
parser.add_argument(
"--listen",
type=str,
default="127.0.0.1",
help="IP to listen on for connections to Gradio",
)
parser.add_argument(
"--username", type=str, default="", help="Username for authentication"
)
parser.add_argument(
"--password", type=str, default="", help="Password for authentication"
)
parser.add_argument(
"--server_port",
type=int,
default=0,
help="Port to run the server listener on",
)
parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
parser.add_argument("--share", action="store_true", help="Share the gradio UI")
parser.add_argument(
"--headless", action="store_true", help="Is the server headless"
)
parser.add_argument(
"--language", type=str, default=None, help="Set custom language"
)
args = parser.parse_args()
UI(
username=args.username,
password=args.password,
inbrowser=args.inbrowser,
server_port=args.server_port,
share=args.share,
listen=args.listen,
headless=args.headless,
language=args.language,
)

View File

@ -1,97 +0,0 @@
import argparse
import gradio as gr
import os
from kohya_gui.utilities import utilities_tab
from kohya_gui.finetune_gui import finetune_tab
from kohya_gui.custom_logging import setup_logging
from kohya_gui.localization_ext import add_javascript
# Set up logging
log = setup_logging()
def UI(**kwargs):
add_javascript(kwargs.get("language"))
css = ""
headless = kwargs.get("headless", False)
log.info(f"headless: {headless}")
if os.path.exists("./assets/style.css"):
with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
log.info("Load CSS...")
css += file.read() + "\n"
interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
with interface:
with gr.Tab("Finetune"):
finetune_tab(headless=headless)
with gr.Tab("Utilities"):
utilities_tab(enable_dreambooth_tab=False, headless=headless)
# Show the interface
launch_kwargs = {}
username = kwargs.get("username")
password = kwargs.get("password")
server_port = kwargs.get("server_port", 0)
inbrowser = kwargs.get("inbrowser", False)
share = kwargs.get("share", False)
server_name = kwargs.get("listen")
launch_kwargs["server_name"] = server_name
if username and password:
launch_kwargs["auth"] = (username, password)
if server_port > 0:
launch_kwargs["server_port"] = server_port
if inbrowser:
launch_kwargs["inbrowser"] = inbrowser
if share:
launch_kwargs["share"] = share
interface.launch(**launch_kwargs)
if __name__ == "__main__":
# torch.cuda.set_per_process_memory_fraction(0.48)
parser = argparse.ArgumentParser()
parser.add_argument(
"--listen",
type=str,
default="127.0.0.1",
help="IP to listen on for connections to Gradio",
)
parser.add_argument(
"--username", type=str, default="", help="Username for authentication"
)
parser.add_argument(
"--password", type=str, default="", help="Password for authentication"
)
parser.add_argument(
"--server_port",
type=int,
default=0,
help="Port to run the server listener on",
)
parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
parser.add_argument("--share", action="store_true", help="Share the gradio UI")
parser.add_argument(
"--headless", action="store_true", help="Is the server headless"
)
parser.add_argument(
"--language", type=str, default=None, help="Set custom language"
)
args = parser.parse_args()
UI(
username=args.username,
password=args.password,
inbrowser=args.inbrowser,
server_port=args.server_port,
share=args.share,
listen=args.listen,
headless=args.headless,
language=args.language,
)

View File

@ -1,118 +0,0 @@
import argparse
import gradio as gr
import os
from kohya_gui.utilities import utilities_tab
from kohya_gui.lora_gui import lora_tab
from kohya_gui.custom_logging import setup_logging
from kohya_gui.localization_ext import add_javascript
# Set up logging
log = setup_logging()
def UI(**kwargs):
try:
# Your main code goes here
while True:
add_javascript(kwargs.get("language"))
css = ""
headless = kwargs.get("headless", False)
log.info(f"headless: {headless}")
if os.path.exists("./assets/style.css"):
with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
log.info("Load CSS...")
css += file.read() + "\n"
interface = gr.Blocks(
css=css, title="Kohya_ss GUI", theme=gr.themes.Default()
)
with interface:
with gr.Tab("LoRA"):
(
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
logging_dir_input,
) = lora_tab(headless=headless)
with gr.Tab("Utilities"):
utilities_tab(
train_data_dir_input=train_data_dir_input,
reg_data_dir_input=reg_data_dir_input,
output_dir_input=output_dir_input,
logging_dir_input=logging_dir_input,
enable_copy_info_button=True,
headless=headless,
)
# Show the interface
launch_kwargs = {}
username = kwargs.get("username")
password = kwargs.get("password")
server_port = kwargs.get("server_port", 0)
inbrowser = kwargs.get("inbrowser", False)
share = kwargs.get("share", False)
server_name = kwargs.get("listen")
launch_kwargs["server_name"] = server_name
if username and password:
launch_kwargs["auth"] = (username, password)
if server_port > 0:
launch_kwargs["server_port"] = server_port
if inbrowser:
launch_kwargs["inbrowser"] = inbrowser
if share:
launch_kwargs["share"] = share
log.info(launch_kwargs)
interface.launch(**launch_kwargs)
except KeyboardInterrupt:
# Code to execute when Ctrl+C is pressed
print("You pressed Ctrl+C!")
if __name__ == "__main__":
# torch.cuda.set_per_process_memory_fraction(0.48)
parser = argparse.ArgumentParser()
parser.add_argument(
"--listen",
type=str,
default="127.0.0.1",
help="IP to listen on for connections to Gradio",
)
parser.add_argument(
"--username", type=str, default="", help="Username for authentication"
)
parser.add_argument(
"--password", type=str, default="", help="Password for authentication"
)
parser.add_argument(
"--server_port",
type=int,
default=0,
help="Port to run the server listener on",
)
parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
parser.add_argument("--share", action="store_true", help="Share the gradio UI")
parser.add_argument(
"--headless", action="store_true", help="Is the server headless"
)
parser.add_argument(
"--language", type=str, default=None, help="Set custom language"
)
args = parser.parse_args()
UI(
username=args.username,
password=args.password,
inbrowser=args.inbrowser,
server_port=args.server_port,
share=args.share,
listen=args.listen,
headless=args.headless,
language=args.language,
)

View File

@ -1,110 +0,0 @@
import argparse
import gradio as gr
import os
from kohya_gui.textual_inversion_gui import ti_tab
from kohya_gui.utilities import utilities_tab
from kohya_gui.custom_logging import setup_logging
from kohya_gui.localization_ext import add_javascript
# Set up logging
log = setup_logging()
def UI(**kwargs):
add_javascript(kwargs.get("language"))
css = ""
headless = kwargs.get("headless", False)
log.info(f"headless: {headless}")
if os.path.exists("./assets/style.css"):
with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file:
log.info("Load CSS...")
css += file.read() + "\n"
interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default())
with interface:
with gr.Tab("Dreambooth TI"):
(
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
logging_dir_input,
) = ti_tab(headless=headless)
with gr.Tab("Utilities"):
utilities_tab(
train_data_dir_input=train_data_dir_input,
reg_data_dir_input=reg_data_dir_input,
output_dir_input=output_dir_input,
logging_dir_input=logging_dir_input,
enable_copy_info_button=True,
headless=headless,
)
# Show the interface
launch_kwargs = {}
username = kwargs.get("username")
password = kwargs.get("password")
server_port = kwargs.get("server_port", 0)
inbrowser = kwargs.get("inbrowser", False)
share = kwargs.get("share", False)
server_name = kwargs.get("listen")
launch_kwargs["server_name"] = server_name
if username and password:
launch_kwargs["auth"] = (username, password)
if server_port > 0:
launch_kwargs["server_port"] = server_port
if inbrowser:
launch_kwargs["inbrowser"] = inbrowser
if share:
launch_kwargs["share"] = share
interface.launch(**launch_kwargs)
if __name__ == "__main__":
# torch.cuda.set_per_process_memory_fraction(0.48)
parser = argparse.ArgumentParser()
parser.add_argument(
"--listen",
type=str,
default="127.0.0.1",
help="IP to listen on for connections to Gradio",
)
parser.add_argument(
"--username", type=str, default="", help="Username for authentication"
)
parser.add_argument(
"--password", type=str, default="", help="Password for authentication"
)
parser.add_argument(
"--server_port",
type=int,
default=0,
help="Port to run the server listener on",
)
parser.add_argument("--inbrowser", action="store_true", help="Open in browser")
parser.add_argument("--share", action="store_true", help="Share the gradio UI")
parser.add_argument(
"--headless", action="store_true", help="Is the server headless"
)
parser.add_argument(
"--language", type=str, default=None, help="Set custom language"
)
args = parser.parse_args()
UI(
username=args.username,
password=args.password,
inbrowser=args.inbrowser,
server_port=args.server_port,
share=args.share,
listen=args.listen,
headless=args.headless,
language=args.language,
)

View File

@ -1,69 +0,0 @@
import argparse
import gradio as gr
import os
from kohya_gui.utilities import utilities_tab
from kohya_gui.custom_logging import setup_logging
from kohya_gui.localization_ext import add_javascript
# Set up logging
log = setup_logging()
def UI(**kwargs):
css = ''
if os.path.exists('./assets/style.css'):
with open(os.path.join('./assets/style.css'), 'r', encoding='utf8') as file:
print('Load CSS...')
css += file.read() + '\n'
interface = gr.Blocks(css=css)
with interface:
utilities_tab()
# Show the interface
launch_kwargs = {}
if not kwargs.get('username', None) == '':
launch_kwargs['auth'] = (
kwargs.get('username', None),
kwargs.get('password', None),
)
if kwargs.get('server_port', 0) > 0:
launch_kwargs['server_port'] = kwargs.get('server_port', 0)
if kwargs.get('inbrowser', False):
launch_kwargs['inbrowser'] = kwargs.get('inbrowser', False)
print(launch_kwargs)
interface.launch(**launch_kwargs)
if __name__ == '__main__':
# torch.cuda.set_per_process_memory_fraction(0.48)
parser = argparse.ArgumentParser()
parser.add_argument(
'--username', type=str, default='', help='Username for authentication'
)
parser.add_argument(
'--password', type=str, default='', help='Password for authentication'
)
parser.add_argument(
'--server_port',
type=int,
default=0,
help='Port to run the server listener on',
)
parser.add_argument(
'--inbrowser', action='store_true', help='Open in browser'
)
args = parser.parse_args()
UI(
username=args.username,
password=args.password,
inbrowser=args.inbrowser,
server_port=args.server_port,
)

View File

@ -0,0 +1,67 @@
### Docker
#### Get your Docker ready for GPU support
##### Windows
Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go.
Here is the official documentation for further reference.
<https://docs.nvidia.com/cuda/wsl-user-guide/index.html#nvidia-compute-software-support-on-wsl-2>
<https://docs.docker.com/desktop/wsl/use-wsl/#gpu-support>
##### Linux, OSX
Install an NVIDIA GPU Driver if you do not already have one installed.
<https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html>
Install the NVIDIA Container Toolkit with this guide.
<https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
#### Design of our Dockerfile
- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`.
- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path.
- TensorBoard has been separated from the project.
- TensorBoard is not included in the Docker image.
- The "Start TensorBoard" button has been hidden.
- TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41).
- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/)
- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version.
#### Use the pre-built Docker image
```bash
git clone --recursive https://github.com/bmaltais/kohya_ss.git
cd kohya_ss
docker compose up -d
```
To update the system, do `docker compose down && docker compose up -d --pull always`
#### Local docker build
> [!IMPORTANT]
> Clone the Git repository ***recursively*** to include submodules:
> `git clone --recursive https://github.com/bmaltais/kohya_ss.git`
```bash
git clone --recursive https://github.com/bmaltais/kohya_ss.git
cd kohya_ss
docker compose up -d --build
```
> [!NOTE]
> Building the image may take up to 20 minutes to complete.
To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always`
> [!NOTE]
> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker).
#### ashleykleynhans runpod docker builds
You may want to use the following repositories when running on runpod:
- Standalone Kohya_ss template: <https://github.com/ashleykleynhans/kohya-docker>
- Auto1111 + Kohya_ss GUI template: <https://github.com/ashleykleynhans/stable-diffusion-docker>

View File

@ -0,0 +1,9 @@
### Novita
#### Pre-built Novita template
1. Open the Novita template by clicking on <https://novita.ai/gpus-console?templateId=312>.
2. Deploy the template on the desired host.
3. Once deployed, connect to the Novita on HTTP 7860 to access the kohya_ss GUI.

View File

@ -0,0 +1,47 @@
### Runpod
#### Manual installation
To install the necessary components for Runpod and run kohya_ss, follow these steps:
1. Select the Runpod pytorch 2.2.0 template. This is important. Other templates may not work.
2. SSH into the Runpod.
3. Clone the repository by running the following command:
```shell
cd /workspace
git clone --recursive https://github.com/bmaltais/kohya_ss.git
```
4. Run the setup script:
```shell
cd kohya_ss
./setup-runpod.sh
```
5. Run the GUI with:
```shell
./gui.sh --share --headless
```
or with this if you expose 7860 directly via the runpod configuration:
```shell
./gui.sh --listen=0.0.0.0 --headless
```
6. Connect to the public URL displayed after the installation process is completed.
#### Pre-built Runpod template
To run from a pre-built Runpod template, you can:
1. Open the Runpod template by clicking on <https://runpod.io/gsc?template=ya6013lj5a&ref=w18gds2n>.
2. Deploy the template on the desired host.
3. Once deployed, connect to the Runpod on HTTP 3010 to access the kohya_ss GUI. You can also connect to auto1111 on HTTP 3000.

View File

@ -0,0 +1,10 @@
### LORA Training on TESLA V100 - GPU Utilization Issue
#### Issue Summary
When training LORA on a TESLA V100, users reported low GPU utilization. Additionally, there was difficulty in specifying GPUs other than the default for training.
#### Potential Solutions
- **GPU Selection:** Users can specify GPU IDs in the setup configuration to select the desired GPUs for training.
- **Improving GPU Load:** Utilizing `adamW8bit` optimizer and increasing the batch size can help achieve 70-80% GPU utilization without exceeding GPU memory limits.

View File

@ -2,8 +2,19 @@
set VIRTUAL_ENV=.venv
echo VIRTUAL_ENV is set to %VIRTUAL_ENV%
:: Install uv latest version
pip install --upgrade uv -q
:: Check if uv is installed
setlocal enabledelayedexpansion
where uv >nul 2>nul
if %errorlevel% neq 0 (
set /p INSTALL_UV="uv is not installed. We can try to install it for you, or you can install it manually from https://astral.sh/uv before running this script again. Would you like to attempt automatic installation now? (Y/N) "
if /i "!INSTALL_UV!"=="Y" (
pip install --upgrade uv -q
) else (
echo Okay, please install uv manually from https://astral.sh/uv and then re-run this script. Exiting.
exit /b 1
)
)
endlocal
set PATH=%PATH%;%~dp0venv\Lib\site-packages\torch\lib

View File

@ -39,9 +39,14 @@ cd "$SCRIPT_DIR" || exit 1
# Check if uv is already installed
if ! command -v uv &> /dev/null; then
# Setup uv
read -p "uv is not installed. We can try to install it for you, or you can install it manually from https://astral.sh/uv before running this script again. Would you like to attempt automatic installation now? [Y/n]: " install_uv
if [[ "$install_uv" =~ ^[Yy]$ ]]; then
curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env
else
echo "Okay, please install uv manually from https://astral.sh/uv and then re-run this script. Exiting."
exit 1
fi
fi
git submodule update --init --recursive

View File

@ -13,10 +13,17 @@ log = setup_logging()
def load_model():
# Set the device to GPU if available, otherwise use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
if hasattr(torch, 'cuda') and torch.cuda.is_available():
device = 'cuda'
elif hasattr(torch, 'mps') and torch.mps.is_available():
device = 'mps'
else:
device = 'cpu'
# Initialize the BLIP2 processor
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
log.debug('Processor initialized: %s', processor)
# Initialize the BLIP2 model
model = Blip2ForConditionalGeneration.from_pretrained(

View File

@ -213,6 +213,7 @@ class BasicTraining:
"PagedLion8bit",
"Prodigy",
"prodigyplus.ProdigyPlusScheduleFree",
"pytorch_optimizer.CAME",
"RAdamScheduleFree",
"SGDNesterov",
"SGDNesterov8bit",
@ -229,13 +230,7 @@ class BasicTraining:
"""
with gr.Row():
# Initialize the maximum gradient norm slider
self.max_grad_norm = gr.Slider(
label="Max grad norm",
value=self.config.get("basic.max_grad_norm", 1.0),
minimum=0.0,
maximum=1.0,
interactive=True,
)
self.max_grad_norm = gr.Number(label='Max grad norm', value=1.0, interactive=True)
# Initialize the learning rate scheduler extra arguments textbox
self.lr_scheduler_args = gr.Textbox(
label="LR scheduler extra arguments",

View File

@ -278,11 +278,11 @@ class SourceModel:
def toggle_checkboxes(v2, v_parameterization, sdxl_checkbox, sd3_checkbox, flux1_checkbox):
# Check if all checkboxes are unchecked
if not v2 and not v_parameterization and not sdxl_checkbox and not sd3_checkbox and not flux1_checkbox:
if not v2 and not sdxl_checkbox and not sd3_checkbox and not flux1_checkbox:
# If all unchecked, return new interactive checkboxes
return (
gr.Checkbox(interactive=True), # v2 checkbox
gr.Checkbox(interactive=True), # v_parameterization checkbox
gr.Checkbox(interactive=False, value=False), # v_parameterization checkbox
gr.Checkbox(interactive=True), # sdxl_checkbox
gr.Checkbox(interactive=True), # sd3_checkbox
gr.Checkbox(interactive=True), # sd3_checkbox
@ -291,7 +291,7 @@ class SourceModel:
# If any checkbox is checked, return checkboxes with current interactive state
return (
gr.Checkbox(interactive=v2), # v2 checkbox
gr.Checkbox(interactive=v_parameterization), # v_parameterization checkbox
gr.Checkbox(interactive=sdxl_checkbox), # v_parameterization checkbox
gr.Checkbox(interactive=sdxl_checkbox), # sdxl_checkbox
gr.Checkbox(interactive=sd3_checkbox), # sd3_checkbox
gr.Checkbox(interactive=flux1_checkbox), # flux1_checkbox

View File

@ -132,6 +132,7 @@ def save_configuration(
keep_tokens,
lr_scheduler_num_cycles,
lr_scheduler_power,
max_grad_norm,
persistent_data_loader_workers,
bucket_no_upscale,
random_crop,
@ -342,6 +343,7 @@ def open_configuration(
keep_tokens,
lr_scheduler_num_cycles,
lr_scheduler_power,
max_grad_norm,
persistent_data_loader_workers,
bucket_no_upscale,
random_crop,
@ -547,6 +549,7 @@ def train_model(
keep_tokens,
lr_scheduler_num_cycles,
lr_scheduler_power,
max_grad_norm,
persistent_data_loader_workers,
bucket_no_upscale,
random_crop,
@ -864,12 +867,12 @@ def train_model(
sd3_checkbox and sd3_cache_text_encoder_outputs_to_disk
) or (flux1_checkbox and flux1_cache_text_encoder_outputs_to_disk)
no_half_vae = sdxl and sdxl_no_half_vae
if max_data_loader_n_workers == "" or None:
if max_data_loader_n_workers in ("", None):
max_data_loader_n_workers = 0
else:
max_data_loader_n_workers = int(max_data_loader_n_workers)
if max_train_steps == "" or None:
if max_train_steps in ("", None):
max_train_steps = 0
else:
max_train_steps = int(max_train_steps)
@ -890,7 +893,7 @@ def train_model(
# def save_huggingface_to_toml(self, toml_file_path: str):
config_toml_data = {
# Update the values in the TOML data
"adaptive_noise_scale": adaptive_noise_scale if not 0 else None,
"adaptive_noise_scale": adaptive_noise_scale if adaptive_noise_scale != 0 else None,
"async_upload": async_upload,
"bucket_no_upscale": bucket_no_upscale,
"bucket_reso_steps": bucket_reso_steps,
@ -953,6 +956,7 @@ def train_model(
"lr_warmup_steps": lr_warmup_steps,
"masked_loss": masked_loss,
"max_bucket_reso": max_bucket_reso,
"max_grad_norm": max_grad_norm,
"max_timestep": max_timestep if max_timestep != 0 else None,
"max_token_length": int(max_token_length),
"max_train_epochs": (
@ -970,10 +974,10 @@ def train_model(
"min_timestep": min_timestep if min_timestep != 0 else None,
"mixed_precision": mixed_precision,
"multires_noise_discount": multires_noise_discount,
"multires_noise_iterations": multires_noise_iterations if not 0 else None,
"multires_noise_iterations": multires_noise_iterations if multires_noise_iterations != 0 else None,
"no_half_vae": no_half_vae,
"no_token_padding": no_token_padding,
"noise_offset": noise_offset if not 0 else None,
"noise_offset": noise_offset if noise_offset != 0 else None,
"noise_offset_random_strength": noise_offset_random_strength,
"noise_offset_type": noise_offset_type,
"optimizer_args": (
@ -1329,6 +1333,7 @@ def dreambooth_tab(
advanced_training.keep_tokens,
basic_training.lr_scheduler_num_cycles,
basic_training.lr_scheduler_power,
basic_training.max_grad_norm,
advanced_training.persistent_data_loader_workers,
advanced_training.bucket_no_upscale,
advanced_training.random_crop,

View File

@ -933,12 +933,12 @@ def train_model(
) or (flux1_checkbox and flux1_cache_text_encoder_outputs_to_disk)
no_half_vae = sdxl_checkbox and sdxl_no_half_vae
if max_data_loader_n_workers == "" or None:
if max_data_loader_n_workers in ("", None):
max_data_loader_n_workers = 0
else:
max_data_loader_n_workers = int(max_data_loader_n_workers)
if max_train_steps == "" or None:
if max_train_steps in ("", None):
max_train_steps = 0
else:
max_train_steps = int(max_train_steps)

File diff suppressed because it is too large Load Diff

View File

@ -739,12 +739,12 @@ def train_model(
else:
run_cmd.append(rf"{scriptdir}/sd-scripts/train_textual_inversion.py")
if max_data_loader_n_workers == "" or None:
if max_data_loader_n_workers in ("", None):
max_data_loader_n_workers = 0
else:
max_data_loader_n_workers = int(max_data_loader_n_workers)
if max_train_steps == "" or None:
if max_train_steps in ("", None):
max_train_steps = 0
else:
max_train_steps = int(max_train_steps)

View File

@ -1,15 +1,16 @@
[project]
name = "kohya-ss"
version = "0.1.0"
description = "Add your description here"
version = "25.1.0"
description = "Kohya_ss GUI"
readme = "README.md"
requires-python = ">=3.10,<3.11"
requires-python = ">=3.11,<3.13"
dependencies = [
"accelerate==0.33.0",
"aiofiles==23.2.1",
"altair==4.2.2",
"bitsandbytes==0.44.0",
"dadaptation==3.2",
"deepspeed; sys_platform == 'linux'",
"diffusers[torch]==0.32.2",
"easygui==0.98.3",
"einops==0.7.0",
@ -21,7 +22,7 @@ dependencies = [
"invisible-watermark==0.2.0",
"library",
"lion-pytorch==0.0.6",
"lycoris-lora==3.1.0",
"lycoris-lora==3.2.0.post2",
"omegaconf==2.3.0",
"onnx==1.16.1",
"onnxruntime-gpu==1.19.2",
@ -39,20 +40,37 @@ dependencies = [
"sentencepiece==0.2.0",
"tensorboard>=2.18.0",
"tensorflow>=2.16.1",
"tensorflow-io-gcs-filesystem==0.31.0",
"timm==0.6.12",
"tensorflow-io-gcs-filesystem==0.31.0; sys_platform == 'win32'",
"tensorflow-io-gcs-filesystem>=0.37.1; sys_platform == 'linux'",
"timm==0.6.7",
"tk==0.1.0",
"toml==0.10.2",
"torch>=2.5.0",
"torchvision>=0.20.0",
"transformers==4.44.2",
"triton==3.1.0; sys_platform == 'linux'",
"voluptuous==0.13.1",
"wandb==0.18.0",
"xformers>=0.0.28.post2",
]
[tool.uv.sources]
torch = [
{ index = "pytorch-cu124", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu124", marker = "sys_platform == 'win32'" }
]
torchvision = [
{ index = "pytorch-cu124", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu124", marker = "sys_platform == 'win32'" }
]
library = { path = "sd-scripts" }
[[tool.uv.index]]
name = "pytorch-cu124"
url = "https://download.pytorch.org/whl/cu124"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu126"
url = "https://download.pytorch.org/whl/cu126"
explicit = true

View File

@ -2,6 +2,7 @@ accelerate==0.33.0
aiofiles==23.2.1
altair==4.2.2
dadaptation==3.2
deepspeed==0.16.7
diffusers[torch]==0.32.2
easygui==0.98.3
einops==0.7.0
@ -12,7 +13,7 @@ huggingface-hub==0.29.3
imagesize==1.4.1
invisible-watermark==0.2.0
lion-pytorch==0.0.6
lycoris_lora==3.1.0
lycoris_lora==3.2.0.post2
omegaconf==2.3.0
onnx==1.16.1
prodigyopt==1.1.2
@ -28,7 +29,7 @@ schedulefree==1.4
scipy==1.11.4
# for T5XXL tokenizer (SD3/FLUX)
sentencepiece==0.2.0
timm==0.6.12
timm==0.6.7
tk==0.1.0
toml==0.10.2
transformers==4.44.2

View File

@ -1,8 +1,8 @@
# Custom index URL for specific packages
--extra-index-url https://download.pytorch.org/whl/cu124
--extra-index-url https://download.pytorch.org/whl/cu126
torch==2.5.0+cu124
torchvision==0.20.0+cu124
torch==2.5.0+cu126
torchvision>=0.20.0+cu126
xformers==0.0.28.post2
bitsandbytes==0.44.0

View File

@ -1,4 +0,0 @@
xformers>=0.0.20
bitsandbytes==0.44.0
accelerate==0.33.0
tensorboard

View File

@ -1,5 +1,10 @@
torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
xformers bitsandbytes==0.43.3
tensorflow-macos tensorflow-metal tensorboard==2.14.1
--extra-index-url https://download.pytorch.org/whl/nightly/cpu
torch==2.8.0.*
torchvision==0.22.*
xformers==0.0.29.*
git+https://github.com/bitsandbytes-foundation/bitsandbytes.git/#0.45.5
tensorflow-macos
tensorflow-metal
tensorboard==2.14.1
onnxruntime==1.17.1
-r requirements.txt

@ -1 +1 @@
Subproject commit b11c053b8fcd1c4532dc3a37e70109e08aafa2ec
Subproject commit 5753b8ff6bc045c27c1c61535e35195da860269c

View File

@ -96,6 +96,9 @@ def check_torch():
log.debug("XPU is available, logging XPU info...")
log_xpu_info(torch, ipex)
# Log a warning if no GPU is available
elif hasattr(torch, "mps") and torch.mps.is_available():
log.info("MPS is available, logging MPS info...")
log_mps_info(torch)
else:
log.warning("Torch reports GPU not available")
@ -130,6 +133,15 @@ def log_cuda_info(torch):
f"Torch detected GPU: {props.name} VRAM {round(props.total_memory / 1024 / 1024)}MB Arch {props.major}.{props.minor} Cores {props.multi_processor_count}"
)
def log_mps_info(torch):
"""Log information about Apple Silicone (MPS)"""
max_reccomended_mem = round(torch.mps.recommended_max_memory() / 1024**2)
log.info(
f"Torch detected Apple MPS: {max_reccomended_mem}MB Unified Memory Available"
)
log.warning('MPS support is still experimental, proceed with caution.')
def log_xpu_info(torch, ipex):
"""Log information about Intel XPU-enabled GPUs."""
# Log the Intel Extension for PyTorch (IPEX) version if available

View File

@ -12,7 +12,7 @@
"cache_latents_to_disk": false,
"caption_dropout_every_n_epochs": 0,
"caption_dropout_rate": 0.05,
"caption_extension": "",
"caption_extension": ".txt",
"clip_g": "",
"clip_l": "",
"clip_skip": 2,

123
test/config/t5clrs.json Normal file
View File

@ -0,0 +1,123 @@
{
"LoRA_type": "Standard",
"LyCORIS_preset": "full",
"adaptive_noise_scale": 0,
"additional_parameters": " --optimizer_args \"weight_decay=0.01\" \"d_coef=1\" \"use_bias_correction=True\" \"safeguard_warmup=False\" \"betas=0.9,0.99\"",
"block_alphas": "",
"block_dims": "",
"block_lr_zero_threshold": "",
"bucket_no_upscale": true,
"bucket_reso_steps": 256,
"cache_latents": true,
"cache_latents_to_disk": true,
"caption_dropout_every_n_epochs": 0.0,
"caption_dropout_rate": 0,
"caption_extension": ".txt",
"clip_skip": 2,
"color_aug": false,
"constrain": 0.0,
"conv_alpha": 1,
"conv_block_alphas": "",
"conv_block_dims": "",
"conv_dim": 1,
"debiased_estimation_loss": false,
"decompose_both": false,
"dim_from_weights": false,
"down_lr_weight": "",
"enable_bucket": true,
"epoch": 50,
"factor": -1,
"flip_aug": false,
"fp8_base": false,
"full_bf16": true,
"full_fp16": false,
"gradient_accumulation_steps": "1",
"gradient_checkpointing": true,
"keep_tokens": 4,
"learning_rate": 1.0,
"logging_dir": "E:/work/LoRa_work/logging",
"lora_network_weights": "",
"lr_scheduler": "cosine",
"lr_scheduler_args": "",
"lr_scheduler_num_cycles": "",
"lr_scheduler_power": "",
"lr_warmup": 0,
"max_bucket_reso": 2048,
"max_data_loader_n_workers": "0",
"max_grad_norm": 1,
"max_resolution": "1024,1024",
"max_timestep": 1000,
"max_token_length": "75",
"max_train_epochs": "50",
"max_train_steps": "",
"mem_eff_attn": false,
"mid_lr_weight": "",
"min_bucket_reso": 512,
"min_snr_gamma": 0,
"min_timestep": 0,
"mixed_precision": "bf16",
"model_list": "custom",
"module_dropout": 0,
"multires_noise_discount": 0,
"multires_noise_iterations": 0,
"network_alpha": 2,
"network_dim": 16,
"network_dropout": 0,
"no_token_padding": false,
"noise_offset": 0,
"noise_offset_type": "Original",
"num_cpu_threads_per_process": 2,
"optimizer": "Prodigy",
"optimizer_args": "",
"output_dir": "E:/work/output",
"output_name": "zunko_pony_v1",
"persistent_data_loader_workers": false,
"pretrained_model_name_or_path": "E:/models/ponyDiffusionV6XL_v6StartWithThisOne.safetensors",
"prior_loss_weight": 1.0,
"random_crop": false,
"rank_dropout": 0,
"rank_dropout_scale": false,
"reg_data_dir": "",
"rescaled": false,
"resume": "",
"sample_every_n_epochs": 0,
"sample_every_n_steps": 0,
"sample_prompts": "",
"sample_sampler": "euler_a",
"save_every_n_epochs": 10,
"save_every_n_steps": 0,
"save_last_n_steps": 0,
"save_last_n_steps_state": 0,
"save_model_as": "safetensors",
"save_precision": "bf16",
"save_state": false,
"scale_v_pred_loss_like_noise_pred": false,
"scale_weight_norms": 0,
"sdxl": true,
"sdxl_cache_text_encoder_outputs": false,
"sdxl_no_half_vae": true,
"seed": "1055217506",
"shuffle_caption": true,
"stop_text_encoder_training": 0,
"text_encoder_lr": 0.0,
"train_batch_size": 3,
"train_data_dir": "E:/work/input/zunko_dataset",
"train_norm": false,
"train_on_input": true,
"training_comment": "",
"unet_lr": 0.0,
"unit": 1,
"up_lr_weight": "",
"use_cp": false,
"use_scalar": false,
"use_tucker": false,
"use_wandb": false,
"v2": false,
"v_parameterization": false,
"v_pred_like_loss": 0,
"vae": "",
"vae_batch_size": 0,
"wandb_api_key": "",
"weighted_captions": false,
"xformers": "xformers"
}

View File

@ -0,0 +1,360 @@
# extract approximating LoRA by svd from two SD models
# The code is based on https://github.com/cloneofsimo/lora/blob/develop/lora_diffusion/cli_svd.py
# Thanks to cloneofsimo!
import argparse
import json
import os
import time
import torch
from safetensors.torch import load_file, save_file
from tqdm import tqdm
from library import sai_model_spec, model_util, sdxl_model_util
import lora
from library.utils import setup_logging
setup_logging()
import logging
logger = logging.getLogger(__name__)
# CLAMP_QUANTILE = 0.99
# MIN_DIFF = 1e-1
def save_to_file(file_name, model, state_dict, dtype):
if dtype is not None:
for key in list(state_dict.keys()):
if type(state_dict[key]) == torch.Tensor:
state_dict[key] = state_dict[key].to(dtype)
if os.path.splitext(file_name)[1] == ".safetensors":
save_file(model, file_name)
else:
torch.save(model, file_name)
def svd(
model_org=None,
model_tuned=None,
save_to=None,
dim=4,
v2=None,
sdxl=None,
conv_dim=None,
v_parameterization=None,
device=None,
save_precision=None,
clamp_quantile=0.99,
min_diff=0.01,
no_metadata=False,
load_precision=None,
load_original_model_to=None,
load_tuned_model_to=None,
):
def str_to_dtype(p):
if p == "float":
return torch.float
if p == "fp16":
return torch.float16
if p == "bf16":
return torch.bfloat16
return None
assert v2 != sdxl or (not v2 and not sdxl), "v2 and sdxl cannot be specified at the same time / v2とsdxlは同時に指定できません"
if v_parameterization is None:
v_parameterization = v2
load_dtype = str_to_dtype(load_precision) if load_precision else None
save_dtype = str_to_dtype(save_precision)
work_device = "cpu"
# load models
if not sdxl:
logger.info(f"loading original SD model : {model_org}")
text_encoder_o, _, unet_o = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_org)
text_encoders_o = [text_encoder_o]
if load_dtype is not None:
text_encoder_o = text_encoder_o.to(load_dtype)
unet_o = unet_o.to(load_dtype)
logger.info(f"loading tuned SD model : {model_tuned}")
text_encoder_t, _, unet_t = model_util.load_models_from_stable_diffusion_checkpoint(v2, model_tuned)
text_encoders_t = [text_encoder_t]
if load_dtype is not None:
text_encoder_t = text_encoder_t.to(load_dtype)
unet_t = unet_t.to(load_dtype)
model_version = model_util.get_model_version_str_for_sd1_sd2(v2, v_parameterization)
else:
device_org = load_original_model_to if load_original_model_to else "cpu"
device_tuned = load_tuned_model_to if load_tuned_model_to else "cpu"
logger.info(f"loading original SDXL model : {model_org}")
text_encoder_o1, text_encoder_o2, _, unet_o, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint(
sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_org, device_org
)
text_encoders_o = [text_encoder_o1, text_encoder_o2]
if load_dtype is not None:
text_encoder_o1 = text_encoder_o1.to(load_dtype)
text_encoder_o2 = text_encoder_o2.to(load_dtype)
unet_o = unet_o.to(load_dtype)
logger.info(f"loading original SDXL model : {model_tuned}")
text_encoder_t1, text_encoder_t2, _, unet_t, _, _ = sdxl_model_util.load_models_from_sdxl_checkpoint(
sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0, model_tuned, device_tuned
)
text_encoders_t = [text_encoder_t1, text_encoder_t2]
if load_dtype is not None:
text_encoder_t1 = text_encoder_t1.to(load_dtype)
text_encoder_t2 = text_encoder_t2.to(load_dtype)
unet_t = unet_t.to(load_dtype)
model_version = sdxl_model_util.MODEL_VERSION_SDXL_BASE_V1_0
# create LoRA network to extract weights: Use dim (rank) as alpha
if conv_dim is None:
kwargs = {}
else:
kwargs = {"conv_dim": conv_dim, "conv_alpha": conv_dim}
lora_network_o = lora.create_network(1.0, dim, dim, None, text_encoders_o, unet_o, **kwargs)
lora_network_t = lora.create_network(1.0, dim, dim, None, text_encoders_t, unet_t, **kwargs)
assert len(lora_network_o.text_encoder_loras) == len(
lora_network_t.text_encoder_loras
), f"model version is different (SD1.x vs SD2.x) / それぞれのモデルのバージョンが違いますSD1.xベースとSD2.xベース "
# get diffs
diffs = {}
text_encoder_different = False
for i, (lora_o, lora_t) in enumerate(zip(lora_network_o.text_encoder_loras, lora_network_t.text_encoder_loras)):
lora_name = lora_o.lora_name
module_o = lora_o.org_module
module_t = lora_t.org_module
diff = module_t.weight.to(work_device) - module_o.weight.to(work_device)
# clear weight to save memory
module_o.weight = None
module_t.weight = None
# Text Encoder might be same
if not text_encoder_different and torch.max(torch.abs(diff)) > min_diff:
text_encoder_different = True
logger.info(f"Text encoder is different. {torch.max(torch.abs(diff))} > {min_diff}")
diffs[lora_name] = diff
# clear target Text Encoder to save memory
for text_encoder in text_encoders_t:
del text_encoder
if not text_encoder_different:
logger.warning("Text encoder is same. Extract U-Net only.")
lora_network_o.text_encoder_loras = []
diffs = {} # clear diffs
for i, (lora_o, lora_t) in enumerate(zip(lora_network_o.unet_loras, lora_network_t.unet_loras)):
lora_name = lora_o.lora_name
module_o = lora_o.org_module
module_t = lora_t.org_module
diff = module_t.weight.to(work_device) - module_o.weight.to(work_device)
# clear weight to save memory
module_o.weight = None
module_t.weight = None
diffs[lora_name] = diff
# clear LoRA network, target U-Net to save memory
del lora_network_o
del lora_network_t
del unet_t
# make LoRA with svd
logger.info("calculating by svd")
lora_weights = {}
with torch.no_grad():
for lora_name, mat in tqdm(list(diffs.items())):
if args.device:
mat = mat.to(args.device)
mat = mat.to(torch.float) # calc by float
# if conv_dim is None, diffs do not include LoRAs for conv2d-3x3
conv2d = len(mat.size()) == 4
kernel_size = None if not conv2d else mat.size()[2:4]
conv2d_3x3 = conv2d and kernel_size != (1, 1)
rank = dim if not conv2d_3x3 or conv_dim is None else conv_dim
out_dim, in_dim = mat.size()[0:2]
if device:
mat = mat.to(device)
# logger.info(lora_name, mat.size(), mat.device, rank, in_dim, out_dim)
rank = min(rank, in_dim, out_dim) # LoRA rank cannot exceed the original dim
if conv2d:
if conv2d_3x3:
mat = mat.flatten(start_dim=1)
else:
mat = mat.squeeze()
U, S, Vh = torch.linalg.svd(mat)
U = U[:, :rank]
S = S[:rank]
U = U @ torch.diag(S)
Vh = Vh[:rank, :]
dist = torch.cat([U.flatten(), Vh.flatten()])
hi_val = torch.quantile(dist, clamp_quantile)
low_val = -hi_val
U = U.clamp(low_val, hi_val)
Vh = Vh.clamp(low_val, hi_val)
if conv2d:
U = U.reshape(out_dim, rank, 1, 1)
Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1])
U = U.to(work_device, dtype=save_dtype).contiguous()
Vh = Vh.to(work_device, dtype=save_dtype).contiguous()
lora_weights[lora_name] = (U, Vh)
# make state dict for LoRA
lora_sd = {}
for lora_name, (up_weight, down_weight) in lora_weights.items():
lora_sd[lora_name + ".lora_up.weight"] = up_weight
lora_sd[lora_name + ".lora_down.weight"] = down_weight
lora_sd[lora_name + ".alpha"] = torch.tensor(down_weight.size()[0])
# load state dict to LoRA and save it
lora_network_save, lora_sd = lora.create_network_from_weights(1.0, None, None, text_encoders_o, unet_o, weights_sd=lora_sd)
lora_network_save.apply_to(text_encoders_o, unet_o) # create internal module references for state_dict
info = lora_network_save.load_state_dict(lora_sd)
logger.info(f"Loading extracted LoRA weights: {info}")
dir_name = os.path.dirname(save_to)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name, exist_ok=True)
# minimum metadata
net_kwargs = {}
if conv_dim is not None:
net_kwargs["conv_dim"] = str(conv_dim)
net_kwargs["conv_alpha"] = str(float(conv_dim))
metadata = {
"ss_v2": str(v2),
"ss_base_model_version": model_version,
"ss_network_module": "networks.lora",
"ss_network_dim": str(dim),
"ss_network_alpha": str(float(dim)),
"ss_network_args": json.dumps(net_kwargs),
}
if not no_metadata:
title = os.path.splitext(os.path.basename(save_to))[0]
sai_metadata = sai_model_spec.build_metadata(None, v2, v_parameterization, sdxl, True, False, time.time(), title=title)
metadata.update(sai_metadata)
lora_network_save.save_weights(save_to, save_dtype, metadata)
logger.info(f"LoRA weights are saved to: {save_to}")
def setup_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument("--v2", action="store_true", help="load Stable Diffusion v2.x model / Stable Diffusion 2.xのモデルを読み込む")
parser.add_argument(
"--v_parameterization",
action="store_true",
default=None,
help="make LoRA metadata for v-parameterization (default is same to v2) / 作成するLoRAのメタデータにv-parameterization用と設定する省略時はv2と同じ",
)
parser.add_argument(
"--sdxl", action="store_true", help="load Stable Diffusion SDXL base model / Stable Diffusion SDXL baseのモデルを読み込む"
)
parser.add_argument(
"--load_precision",
type=str,
default=None,
choices=[None, "float", "fp16", "bf16"],
help="precision in loading, model default if omitted / 読み込み時に精度を変更して読み込む、省略時はモデルファイルによる"
)
parser.add_argument(
"--save_precision",
type=str,
default=None,
choices=[None, "float", "fp16", "bf16"],
help="precision in saving, same to merging if omitted / 保存時に精度を変更して保存する、省略時はfloat",
)
parser.add_argument(
"--model_org",
type=str,
default=None,
required=True,
help="Stable Diffusion original model: ckpt or safetensors file / 元モデル、ckptまたはsafetensors",
)
parser.add_argument(
"--model_tuned",
type=str,
default=None,
required=True,
help="Stable Diffusion tuned model, LoRA is difference of `original to tuned`: ckpt or safetensors file / 派生モデル生成されるLoRAは元→派生の差分になります、ckptまたはsafetensors",
)
parser.add_argument(
"--save_to",
type=str,
default=None,
required=True,
help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors",
)
parser.add_argument("--dim", type=int, default=4, help="dimension (rank) of LoRA (default 4) / LoRAの次元数rankデフォルト4")
parser.add_argument(
"--conv_dim",
type=int,
default=None,
help="dimension (rank) of LoRA for Conv2d-3x3 (default None, disabled) / LoRAのConv2d-3x3の次元数rankデフォルトNone、適用なし",
)
parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う")
parser.add_argument(
"--clamp_quantile",
type=float,
default=0.99,
help="Quantile clamping value, float, (0-1). Default = 0.99 / 値をクランプするための分位点、float、(0-1)。デフォルトは0.99",
)
parser.add_argument(
"--min_diff",
type=float,
default=0.01,
help="Minimum difference between finetuned model and base to consider them different enough to extract, float, (0-1). Default = 0.01 /"
+ "LoRAを抽出するために元モデルと派生モデルの差分の最小値、float、(0-1)。デフォルトは0.01",
)
parser.add_argument(
"--no_metadata",
action="store_true",
help="do not save sai modelspec metadata (minimum ss_metadata for LoRA is saved) / "
+ "sai modelspecのメタデータを保存しないLoRAの最低限のss_metadataは保存される",
)
parser.add_argument(
"--load_original_model_to",
type=str,
default=None,
help="location to load original model, cpu or cuda, cuda:0, etc, default is cpu, only for SDXL / 元モデル読み込み先、cpuまたはcuda、cuda:0など、省略時はcpu、SDXLのみ有効",
)
parser.add_argument(
"--load_tuned_model_to",
type=str,
default=None,
help="location to load tuned model, cpu or cuda, cuda:0, etc, default is cpu, only for SDXL / 派生モデル読み込み先、cpuまたはcuda、cuda:0など、省略時はcpu、SDXLのみ有効",
)
return parser
if __name__ == "__main__":
parser = setup_parser()
args = parser.parse_args()
svd(**vars(args))

2350
uv.lock

File diff suppressed because it is too large Load Diff