pull/1995/head
bmaltais 2024-02-24 10:40:44 -05:00
commit 822d94c904
8 changed files with 144 additions and 64 deletions

View File

@ -5,3 +5,11 @@ bitsandbytes_windows_deprecated/
dataset/
__pycache__/
venv/
**/.hadolint.yml
**/*.log
**/.git
**/.gitignore
**/.env
**/.github
**/.vscode
**/*.ps1

6
.hadolint.yml Normal file
View File

@ -0,0 +1,6 @@
ignored:
- DL3042 # Avoid use of cache directory with pip. Use `pip install --no-cache-dir <package>`
- DL3013 # Pin versions in pip. Instead of `pip install <package>` use `pip install <package>==<version>`
- DL3008 # Pin versions in apt get install. Instead of `apt-get install <package>` use `apt-get install <package>=<version>`
- DL4006 # Set the SHELL option -o pipefail before RUN with a pipe in it
- SC2015 # Note that A && B || C is not if-then-else. C may run when A is true.

View File

@ -1,54 +1,118 @@
FROM nvcr.io/nvidia/pytorch:23.04-py3 as base
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/London
# syntax=docker/dockerfile:1
ARG UID=1000
ARG VERSION=EDGE
ARG RELEASE=0
RUN apt update && apt-get install -y software-properties-common
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
add-apt-repository ppa:deadsnakes/ppa && \
apt update && \
apt-get install -y git curl libgl1 libglib2.0-0 libgoogle-perftools-dev \
python3.10-dev python3.10-tk python3-html5lib python3-apt python3-pip python3.10-distutils && \
rm -rf /var/lib/apt/lists/*
FROM python:3.10-slim as build
# Set python 3.10 and cuda 11.8 as default
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 3 && \
update-alternatives --set python3 /usr/bin/python3.10 && \
update-alternatives --set cuda /usr/local/cuda-11.8
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
WORKDIR /app
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install wheel
# Todo: Install torch 2.1.0 for cu121 support (only available as nightly as of writing)
## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --pre torch ninja setuptools --extra-index-url https://download.pytorch.org/whl/nightly/cu121
# Install under /root/.local
ENV PIP_USER="true"
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
ARG PIP_ROOT_USER_ACTION="ignore"
# Todo: Install xformers nightly for Torch 2.1.0 support
## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
# Install build dependencies
RUN apt-get update && apt-get upgrade -y && \
apt-get install -y --no-install-recommends python3-launchpadlib git curl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install PyTorch and TensorFlow
# The versions must align and be in sync with the requirements_linux_docker.txt
# hadolint ignore=SC2102
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \
torch==2.1.2 torchvision==0.16.2 \
xformers==0.0.23.post1 \
# Why [and-cuda]: https://github.com/tensorflow/tensorflow/issues/61468#issuecomment-1759462485
tensorflow[and-cuda]==2.14.0 \
ninja \
pip setuptools wheel
# Install requirements
COPY ./requirements.txt ./requirements_linux_docker.txt ./
COPY ./setup/docker_setup.py ./setup.py
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements_linux_docker.txt
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements.txt
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
--mount=source=requirements.txt,target=requirements.txt \
--mount=source=setup/docker_setup.py,target=setup.py \
pip install -r requirements_linux_docker.txt -r requirements.txt
# Replace pillow with pillow-simd
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip uninstall -y pillow && \
CC="cc -mavx2" python3 -m pip install -U --force-reinstall pillow-simd
# Replace pillow with pillow-simd (Only for x86)
ARG TARGETPLATFORM
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
pip uninstall -y pillow && \
CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \
fi
FROM python:3.10-slim as final
ARG UID
ARG VERSION
ARG RELEASE
LABEL name="bmaltais/kohya_ss" \
vendor="bmaltais" \
maintainer="bmaltais" \
# Dockerfile source repository
url="https://github.com/bmaltais/kohya_ss" \
version=${VERSION} \
# This should be a number, incremented with each change
release=${RELEASE} \
io.k8s.display-name="kohya_ss" \
summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \
description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Fix missing libnvinfer7
USER root
RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \
ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
RUN useradd -m -s /bin/bash appuser && \
chown -R appuser: /app
USER appuser
COPY --chown=appuser . .
# Create user
RUN groupadd -g $UID $UID && \
useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID
STOPSIGNAL SIGINT
# Create directories with correct permissions
RUN install -d -m 775 -o $UID -g 0 /dataset && \
install -d -m 775 -o $UID -g 0 /licenses && \
install -d -m 775 -o $UID -g 0 /app
# Copy dist and support arbitrary user ids (OpenShift best practice)
COPY --chown=$UID:0 --chmod=775 \
--from=build /root/.local /home/$UID/.local
WORKDIR /app
COPY --chown=$UID:0 --chmod=775 . .
# Copy licenses (OpenShift Policy)
COPY --chmod=775 LICENSE.md /licenses/LICENSE.md
ENV PATH="/home/$UID/.local/bin:$PATH"
ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages"
ENV LD_PRELOAD=libtcmalloc.so
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
ENV PATH="$PATH:/home/appuser/.local/bin"
CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860
VOLUME [ "/dataset" ]
# 7860: Kohya GUI
# 6006: TensorBoard
EXPOSE 7860 6006
USER $UID
STOPSIGNAL SIGINT
# Use dumb-init as PID 1 to handle signals properly
ENTRYPOINT ["dumb-init", "--"]
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860"]

View File

@ -3,10 +3,12 @@ services:
kohya-ss-gui:
container_name: kohya-ss-gui
image: kohya-ss-gui:latest
user: 1000:0
build:
context: .
args:
- UID=1000
ports:
- 127.0.0.1:3000:3000
- 7860:7860
- 6006:6006
tty: true
@ -16,15 +18,15 @@ services:
SAFETENSORS_FAST_GPU: 1
DISPLAY: $DISPLAY
tmpfs:
- /tmp
- /tmp
volumes:
- ./dataset:/dataset
- ./.cache/user:/home/appuser/.cache
- ./.cache/triton:/home/appuser/.triton
- ./.cache/config:/app/appuser/.config
- ./.cache/nv:/home/appuser/.nv
- ./.cache/keras:/home/appuser/.keras
- /tmp/.X11-unix:/tmp/.X11-unix
- ./dataset:/dataset
- ./.cache/user:/home/1000/.cache
- ./.cache/triton:/home/1000/.triton
- ./.cache/nv:/home/1000/.nv
- ./.cache/keras:/home/1000/.keras
- ./.cache/config:/home/1000/.config
deploy:
resources:
reservations:

View File

@ -34,7 +34,7 @@ U-Net is divided into "Down" (left half), "Mid" (bottom) and "Up" (right half).
And it consists of 25 blocks in total: Down12 block, Mid1 block, and Up12 block. The neural net added here is simply called "UNet" in Kohya_ss.
### RoLA Learning Object 2: Text Encoder
### LoRA Learning Object 2: Text Encoder
This isn't the only time LoRA adds neural nets .
@ -177,15 +177,15 @@ The default value is 0.0001.
### LR Scheduler:
You can change the learning rate in the middle of learning. A scheduler is a setting for how to change the learning rate.
You can change the learning rate in the middle of learning. A scheduler is a setting for how to change the learning rate. Possible values include:
adafactor: Select this to set the optimizer (described later) to Adafactor . Learn while automatically adjusting the learning rate according to the situation to save VRAM
constant: the learning rate does not change from beginning to end
constant_with_warmup: Start with a learning rate of 0 and gradually increase it toward the set value of Learning rate during warm-up, and use the set value of Learning rate during main learning.
cosine : Gradually decrease the learning rate toward 0 while drawing a wave (cosine curve)
cosine _with_restarts: repeat cosine many times (see also description of LR number of cycles)
linear: Start at the Learning rate setting and decrease linearly towards 0
polynomial: Same behavior as linear, but a bit more complicated to reduce (see also LR power description)
- `adafactor`: Select this to set the optimizer (described later) to Adafactor . Learn while automatically adjusting the learning rate according to the situation to save VRAM
- `constant`: the learning rate does not change from beginning to end
- `constant_with_warmup`: Start with a learning rate of 0 and gradually increase it toward the set value of Learning rate during warm-up, and use the set value of Learning rate during main learning.
- `cosine` : Gradually decrease the learning rate toward 0 while drawing a wave (cosine curve)
- `cosine _with_restarts`: repeat cosine many times (see also description of LR number of cycles)
- `linear`: Start at the Learning rate setting and decrease linearly towards 0
- `polynomial`: Same behavior as linear, but a bit more complicated to reduce (see also LR power description)
Set to constant if you want the learning rate to be fixed at the Learning rate setting.
Default is cosine
@ -204,13 +204,13 @@ Default is 10.
### Optimizer
The optimizer is a setting for "how to update the neural net weights during training ". Various methods have been proposed for smart learning, but the most commonly used in LoRA learning is ``AdamW'' (32-bit) or ``AdamW8bit''. AdamW8bit uses less VRAM and has enough accuracy, so if you get lost, use this.
The optimizer is a setting for "how to update the neural net weights during training ". Various methods have been proposed for smart learning, but the most commonly used in LoRA learning is "AdamW" (32-bit) or "AdamW8bit". AdamW8bit uses less VRAM and has enough accuracy, so if you get lost, use this.
In addition, "Adafactor", which adjusts the learning rate appropriately according to the progress of learning while incorporating Adam's method, is also often used (Learning rate setting is ignored when using Adafactor).
``DAdapt'' is an optimizer that adjusts the learning rate , and ``Lion'' is a relatively new optimizer , but it has not been fully verified yet. There is a report that "SGDNesterov" has good learning accuracy but slows down.
"DAdapt" is an optimizer that adjusts the learning rate, and "Lion" is a relatively new optimizer , but it has not been fully verified yet. There is a report that "SGDNesterov" has good learning accuracy but slows down.
The default is AdamW8bit. There is no problem basically as it is.
The default is "AdamW8bit". There is no problem basically as it is.
### Optimizer extra arguments
@ -785,4 +785,4 @@ Here are some commonly used settings:
Default is blank. When the field is blank, the description example is displayed in faint color, so please refer to it.

View File

@ -172,7 +172,7 @@ class BasicTraining:
info="Set to 0 to not train the Text Encoder 2"
)
self.lr_warmup = gr.Slider(
label="LR warmup (% of steps)",
label="LR warmup (% of total steps)",
value=lr_warmup_value,
minimum=0,
maximum=100,
@ -199,7 +199,7 @@ class BasicTraining:
maximum=100,
value=0,
step=1,
label="Stop text encoder training",
label="Stop text encoder training (% of total steps)",
)
with gr.Row(visible=not finetuning):
self.enable_bucket = gr.Checkbox(label="Enable buckets", value=True)

View File

@ -1,5 +1,5 @@
xformers==0.0.20
xformers>=0.0.20
bitsandbytes==0.41.1
accelerate==0.19.0
accelerate==0.25.0
tensorboard==2.14.1
tensorflow==2.14.0
tensorflow==2.14.0

View File

@ -1,3 +1,3 @@
from setuptools import setup, find_packages
setup(name="library", version="1.0.3", packages=find_packages())
setup()