chore(docker): rewrite Dockerfile

Signed-off-by: 陳鈞 <jim60105@gmail.com>
pull/1976/head
陳鈞 2024-02-17 22:47:34 +08:00
parent f8d2673641
commit 83bf2e0649
No known key found for this signature in database
GPG Key ID: 96A7988FC3596AF7
6 changed files with 102 additions and 49 deletions

View File

@ -5,3 +5,11 @@ bitsandbytes_windows_deprecated/
dataset/
__pycache__/
venv/
**/.hadolint.yml
**/*.log
**/.git
**/.gitignore
**/.env
**/.github
**/.vscode
**/*.ps1

6
.hadolint.yml Normal file
View File

@ -0,0 +1,6 @@
ignored:
- DL3042 # Avoid use of cache directory with pip. Use `pip install --no-cache-dir <package>`
- DL3013 # Pin versions in pip. Instead of `pip install <package>` use `pip install <package>==<version>`
- DL3008 # Pin versions in apt get install. Instead of `apt-get install <package>` use `apt-get install <package>=<version>`
- DL4006 # Set the SHELL option -o pipefail before RUN with a pipe in it
- SC2015 # Note that A && B || C is not if-then-else. C may run when A is true.

View File

@ -1,54 +1,91 @@
FROM nvcr.io/nvidia/pytorch:23.04-py3 as base
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/London
# syntax=docker/dockerfile:1
ARG UID=1000
RUN apt update && apt-get install -y software-properties-common
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
add-apt-repository ppa:deadsnakes/ppa && \
apt update && \
apt-get install -y git curl libgl1 libglib2.0-0 libgoogle-perftools-dev \
python3.10-dev python3.10-tk python3-html5lib python3-apt python3-pip python3.10-distutils && \
rm -rf /var/lib/apt/lists/*
FROM python:3.10 as build
# Set python 3.10 and cuda 11.8 as default
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 3 && \
update-alternatives --set python3 /usr/bin/python3.10 && \
update-alternatives --set cuda /usr/local/cuda-11.8
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
WORKDIR /app
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install wheel
# Todo: Install torch 2.1.0 for cu121 support (only available as nightly as of writing)
## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --pre torch ninja setuptools --extra-index-url https://download.pytorch.org/whl/nightly/cu121
# Install under /root/.local
ENV PIP_USER="true"
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
ARG PIP_ROOT_USER_ACTION="ignore"
# Todo: Install xformers nightly for Torch 2.1.0 support
## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
# Install build dependencies
RUN apt-get update && apt-get upgrade -y && \
apt-get install -y --no-install-recommends python3-launchpadlib git curl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install PyTorch and TensorFlow
# The versions must align and be in sync with the requirements_linux_docker.txt
# hadolint ignore=SC2102
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \
torch==2.1.2 torchvision==0.16.2 \
xformers==0.0.23.post1 \
# Why [and-cuda]: https://github.com/tensorflow/tensorflow/issues/61468#issuecomment-1759462485
tensorflow[and-cuda]==2.14.0 \
ninja \
pip setuptools wheel
# Install requirements
COPY ./requirements.txt ./requirements_linux_docker.txt ./
COPY ./setup/docker_setup.py ./setup.py
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements_linux_docker.txt
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements.txt
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
--mount=source=requirements.txt,target=requirements.txt \
--mount=source=setup/docker_setup.py,target=setup.py \
pip install -r requirements_linux_docker.txt -r requirements.txt && \
# Replace pillow with pillow-simd
pip uninstall -y pillow && \
CC="cc -mavx2" pip install -U --force-reinstall pillow-simd
# Replace pillow with pillow-simd
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip uninstall -y pillow && \
CC="cc -mavx2" python3 -m pip install -U --force-reinstall pillow-simd
FROM python:3.10 as final
ARG UID
WORKDIR /app
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libgoogle-perftools-dev dumb-init && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Fix missing libnvinfer7
USER root
RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \
ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
RUN useradd -m -s /bin/bash appuser && \
chown -R appuser: /app
USER appuser
COPY --chown=appuser . .
# Create user
RUN groupadd -g $UID $UID && \
useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID
STOPSIGNAL SIGINT
# Copy dist and support arbitrary user ids (OpenShift best practice)
COPY --chown=$UID:0 --chmod=775 \
--from=build /root/.local /home/$UID/.local
COPY --chown=$UID:0 --chmod=775 . .
ENV PATH="/home/$UID/.local/bin:$PATH"
ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages"
ENV LD_PRELOAD=libtcmalloc.so
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
ENV PATH="$PATH:/home/appuser/.local/bin"
CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860
# Create directories with correct permissions
RUN install -d -m 775 -o $UID -g 0 /dataset && \
install -d -m 775 -o $UID -g 0 /licenses && \
install -d -m 775 -o $UID -g 0 /app
# Copy licenses (OpenShift Policy)
COPY --chmod=775 LICENSE.md /licenses/LICENSE.md
VOLUME [ "/dataset" ]
USER $UID
STOPSIGNAL SIGINT
# Use dumb-init as PID 1 to handle signals properly
ENTRYPOINT ["dumb-init", "--"]
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860"]

View File

@ -3,10 +3,12 @@ services:
kohya-ss-gui:
container_name: kohya-ss-gui
image: kohya-ss-gui:latest
user: 1000:0
build:
context: .
args:
- UID=1000
ports:
- 127.0.0.1:3000:3000
- 7860:7860
- 6006:6006
tty: true
@ -16,15 +18,15 @@ services:
SAFETENSORS_FAST_GPU: 1
DISPLAY: $DISPLAY
tmpfs:
- /tmp
- /tmp
volumes:
- ./dataset:/dataset
- ./.cache/user:/home/appuser/.cache
- ./.cache/triton:/home/appuser/.triton
- ./.cache/config:/app/appuser/.config
- ./.cache/nv:/home/appuser/.nv
- ./.cache/keras:/home/appuser/.keras
- /tmp/.X11-unix:/tmp/.X11-unix
- ./dataset:/dataset
- ./.cache/user:/home/1000/.cache
- ./.cache/triton:/home/1000/.triton
- ./.cache/nv:/home/1000/.nv
- ./.cache/keras:/home/1000/.keras
- ./.cache/config:/home/1000/.config
deploy:
resources:
reservations:

View File

@ -1,5 +1,5 @@
xformers==0.0.20
xformers>=0.0.20
bitsandbytes==0.41.1
accelerate==0.19.0
accelerate==0.25.0
tensorboard==2.14.1
tensorflow==2.14.0
tensorflow==2.14.0

View File

@ -1,3 +1,3 @@
from setuptools import setup, find_packages
setup(name="library", version="1.0.3", packages=find_packages())
setup()