246 lines
8.5 KiB
Bash
Executable File
246 lines
8.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -euxo pipefail
|
|
|
|
if [ -f "/etc/environment" ]; then
|
|
source /etc/environment
|
|
fi
|
|
|
|
export SERVICE_TYPE="comfy"
|
|
export CONTAINER_NAME="esd_container"
|
|
export AWS_REGION=$(aws configure get region)
|
|
export COMMON_FILES_PREFIX="aws-gcr-solutions-$AWS_REGION/stable-diffusion-aws-extension-github-mainline"
|
|
|
|
CUR_PATH=$(realpath ./)
|
|
CONTAINER_PATH=$(realpath ./container)
|
|
sudo rm -rf "$CONTAINER_PATH/sync_lock"
|
|
sudo rm -rf "$CONTAINER_PATH/s5cmd_lock"
|
|
SUPERVISORD_FILE="$CONTAINER_PATH/supervisord.conf"
|
|
START_SH=$(realpath ./build_scripts/inference/start.sh)
|
|
START_PY=$(realpath ./build_scripts/comfy/serve.py)
|
|
COMFY_PROXY=$(realpath ./build_scripts/comfy/comfy_proxy.py)
|
|
COMFY_EXT=$(realpath ./build_scripts/comfy/ComfyUI-AWS-Extension)
|
|
IMAGE_SH=$(realpath ./docker_image.sh)
|
|
|
|
# Check if the repository already exists
|
|
if aws ecr describe-repositories --region "$AWS_REGION" --repository-names "$CONTAINER_NAME" >/dev/null 2>&1; then
|
|
echo "ECR repository '$CONTAINER_NAME' already exists."
|
|
else
|
|
echo "ECR repository '$CONTAINER_NAME' does not exist. Creating..."
|
|
aws ecr create-repository --repository-name --region "$AWS_REGION" "$CONTAINER_NAME" | jq .
|
|
echo "ECR repository '$CONTAINER_NAME' created successfully."
|
|
fi
|
|
|
|
aws ecr get-login-password --region "$AWS_REGION" | docker login --username AWS --password-stdin "366590864501.dkr.ecr.$AWS_REGION.amazonaws.com"
|
|
PUBLIC_BASE_IMAGE="366590864501.dkr.ecr.$AWS_REGION.amazonaws.com/esd-inference:$ESD_VERSION"
|
|
docker pull "$PUBLIC_BASE_IMAGE"
|
|
|
|
export release_image="$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME"
|
|
|
|
echo "Starting container..."
|
|
|
|
total_memory=$(cat /proc/meminfo | grep 'MemTotal' | awk '{print $2}')
|
|
total_memory_mb=$((total_memory / 1024))
|
|
echo "total_memory_mb: $total_memory_mb"
|
|
export limit_memory_mb=$((total_memory_mb - 2048))
|
|
echo "limit_memory_mb: $limit_memory_mb"
|
|
|
|
generate_process(){
|
|
init_port=$1
|
|
export PROGRAM_NAME="comfy_$init_port"
|
|
COMFY_WORKFLOW_FILE="$CONTAINER_PATH/$PROGRAM_NAME"
|
|
|
|
WORKFLOW_NAME_TMP=""
|
|
|
|
if [ -f "$COMFY_WORKFLOW_FILE" ]; then
|
|
WORKFLOW_NAME_TMP=$(cat "$COMFY_WORKFLOW_FILE")
|
|
fi
|
|
|
|
if [ -z "$WORKFLOW_NAME_TMP" ]; then
|
|
if [ "$init_port" -eq "10000" ]; then
|
|
WORKFLOW_NAME_TMP="default"
|
|
else
|
|
WORKFLOW_NAME_TMP="local"
|
|
fi
|
|
fi
|
|
|
|
echo "$WORKFLOW_NAME_TMP" > "$COMFY_WORKFLOW_FILE"
|
|
|
|
export MASTER_PROCESS=false
|
|
if [ "$init_port" -eq "10000" ]; then
|
|
export MASTER_PROCESS=true
|
|
fi
|
|
|
|
DOCKER_FILE="ARG BASE_IMAGE
|
|
FROM \$BASE_IMAGE
|
|
|
|
#RUN apt-get update -y && \
|
|
# apt-get install ffmpeg -y && \
|
|
# rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /home/ubuntu/ComfyUI"
|
|
|
|
if [ ! -f "$CONTAINER_PATH/$PROGRAM_NAME.Dockerfile" ]; then
|
|
echo "$DOCKER_FILE" > "$CONTAINER_PATH/$PROGRAM_NAME.Dockerfile"
|
|
fi
|
|
|
|
START_HANDLER="#!/bin/bash
|
|
set -euxo pipefail
|
|
|
|
WORKFLOW_NAME=\$(cat $CONTAINER_PATH/$PROGRAM_NAME)
|
|
|
|
if [ \"\$WORKFLOW_NAME\" = \"default\" ] || [ \"\$WORKFLOW_NAME\" = \"local\" ]; then
|
|
BASE_IMAGE=$PUBLIC_BASE_IMAGE
|
|
else
|
|
BASE_IMAGE=$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME:\$WORKFLOW_NAME
|
|
aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
|
|
docker pull $ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/esd_container:\$WORKFLOW_NAME
|
|
fi
|
|
|
|
sudo mkdir -p $CONTAINER_PATH/output/$PROGRAM_NAME
|
|
sudo mkdir -p $CONTAINER_PATH/temp/$PROGRAM_NAME
|
|
|
|
sudo chmod -R 777 $CONTAINER_PATH/output/$PROGRAM_NAME
|
|
sudo chmod -R 777 $CONTAINER_PATH/temp/$PROGRAM_NAME
|
|
|
|
docker build -f $CONTAINER_PATH/$PROGRAM_NAME.Dockerfile --build-arg BASE_IMAGE=\"\$BASE_IMAGE\" -t $PROGRAM_NAME .
|
|
docker stop $PROGRAM_NAME || true
|
|
docker rm $PROGRAM_NAME || true
|
|
docker run -v $(realpath ~/.aws):/root/.aws \\
|
|
-v $CONTAINER_PATH:/container \\
|
|
-v $CONTAINER_PATH/conda:/home/ubuntu/conda \\
|
|
-v $START_SH:/start.sh:ro \\
|
|
-v $START_PY:/serve.py:ro \\
|
|
-v $COMFY_PROXY:/comfy_proxy.py:ro \\
|
|
-v $COMFY_EXT:/ComfyUI-AWS-Extension:ro \\
|
|
--gpus all \\
|
|
-e IMAGE_HASH=$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/esd_container \\
|
|
-e ACCOUNT_ID=$ACCOUNT_ID \\
|
|
-e BASE_IMAGE=\$BASE_IMAGE \\
|
|
-e SERVICE_TYPE=$SERVICE_TYPE \\
|
|
-e ON_EC2=true \\
|
|
-e DISABLE_AUTO_SYNC=false \\
|
|
-e COMFY_ENDPOINT=name \\
|
|
-e S3_BUCKET_NAME=$COMFY_BUCKET_NAME \\
|
|
-e AWS_REGION=$AWS_REGION \\
|
|
-e AWS_DEFAULT_REGION=$AWS_REGION \\
|
|
-e COMFY_API_URL=$COMFY_API_URL \\
|
|
-e COMFY_API_TOKEN=$COMFY_API_TOKEN \\
|
|
-e ESD_VERSION=$ESD_VERSION \\
|
|
-e COMFY_BUCKET_NAME=$COMFY_BUCKET_NAME \\
|
|
-e MASTER_PROCESS=$MASTER_PROCESS \\
|
|
-e PROGRAM_NAME=$PROGRAM_NAME \\
|
|
-e WORKFLOW_NAME_FILE=/container/$PROGRAM_NAME \\
|
|
--name $PROGRAM_NAME \\
|
|
-p $init_port:8188 \\
|
|
--memory ${limit_memory_mb}mb \\
|
|
$PROGRAM_NAME"
|
|
|
|
echo "$START_HANDLER" > "$CONTAINER_PATH/$PROGRAM_NAME.sh"
|
|
chmod +x "$CONTAINER_PATH/$PROGRAM_NAME.sh"
|
|
|
|
# shellcheck disable=SC2129
|
|
echo "[program:$PROGRAM_NAME]" >> "$SUPERVISORD_FILE"
|
|
echo "directory=$CUR_PATH" >> "$SUPERVISORD_FILE"
|
|
echo "command=$CONTAINER_PATH/$PROGRAM_NAME.sh" >> "$SUPERVISORD_FILE"
|
|
echo "startretries=2" >> "$SUPERVISORD_FILE"
|
|
echo "stdout_logfile=$CONTAINER_PATH/$PROGRAM_NAME.log" >> "$SUPERVISORD_FILE"
|
|
echo "stderr_logfile=$CONTAINER_PATH/$PROGRAM_NAME.log" >> "$SUPERVISORD_FILE"
|
|
echo "" >> "$SUPERVISORD_FILE"
|
|
}
|
|
|
|
download_so(){
|
|
file_name=$1
|
|
if [ ! -f "/home/ubuntu/conda/lib/$file_name" ]; then
|
|
echo "cp s3://$COMMON_FILES_PREFIX/so/$file_name $CONTAINER_PATH/conda/lib/" >> /tmp/s5cmd.txt
|
|
fi
|
|
}
|
|
|
|
echo "---------------------------------------------------------------------------------"
|
|
# init default workflow for all users
|
|
if [ ! -d "$CONTAINER_PATH/workflows/default/ComfyUI/venv" ]; then
|
|
tar_file="$CONTAINER_PATH/default.tar"
|
|
|
|
if [ ! -f "$tar_file" ]; then
|
|
mkdir -p "$CONTAINER_PATH/workflows"
|
|
start_at=$(date +%s)
|
|
s5cmd cp "s3://$COMMON_FILES_PREFIX/$ESD_VERSION/comfy.tar" "$tar_file"
|
|
end_at=$(date +%s)
|
|
export DOWNLOAD_FILE_SECONDS=$((end_at-start_at))
|
|
fi
|
|
start_at=$(date +%s)
|
|
rm -rf "$CONTAINER_PATH/workflows/default"
|
|
mkdir -p "$CONTAINER_PATH/workflows/default"
|
|
tar --overwrite -xf "$tar_file" -C "$CONTAINER_PATH/workflows/default/"
|
|
rm -rf "$CONTAINER_PATH/workflows/local"
|
|
mkdir -p "$CONTAINER_PATH/workflows/local"
|
|
tar --overwrite -xf "$tar_file" -C "$CONTAINER_PATH/workflows/local/"
|
|
end_at=$(date +%s)
|
|
export DECOMPRESS_SECONDS=$((end_at-start_at))
|
|
cd "$CONTAINER_PATH/workflows/default/ComfyUI"
|
|
|
|
rm -rf "$CONTAINER_PATH/workflows/local/ComfyUI/custom_nodes/ComfyUI-Manager"
|
|
|
|
echo "cp s3://$COMMON_FILES_PREFIX/models/vae-ft-mse-840000-ema-pruned.safetensors models/vae/" > /tmp/models.txt
|
|
echo "cp s3://$COMMON_FILES_PREFIX/models/majicmixRealistic_v7.safetensors models/checkpoints/" >> /tmp/models.txt
|
|
echo "cp s3://$COMMON_FILES_PREFIX/models/v1-5-pruned-emaonly.ckpt models/checkpoints/" >> /tmp/models.txt
|
|
echo "cp s3://$COMMON_FILES_PREFIX/models/mm_sd_v15_v2.ckpt models/animatediff_models/" >> /tmp/models.txt
|
|
s5cmd run /tmp/models.txt
|
|
|
|
fi
|
|
|
|
rm -rf /tmp/s5cmd.txt
|
|
download_so "libcufft.so.10"
|
|
download_so "libcurand.so.10"
|
|
download_so "libcublasLt.so.11"
|
|
download_so "libonnxruntime_providers_cuda.so"
|
|
download_so "libcublas.so.11"
|
|
download_so "libcudart.so.11.0"
|
|
if [ -f "/tmp/s5cmd.txt" ]; then
|
|
s5cmd run /tmp/s5cmd.txt
|
|
fi
|
|
|
|
SUPERVISOR_CONF="[supervisord]
|
|
nodaemon=true
|
|
autostart=true
|
|
autorestart=true
|
|
directory=$CUR_PATH
|
|
|
|
[inet_http_server]
|
|
port=127.0.0.1:9001
|
|
|
|
[rpcinterface:supervisor]
|
|
supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
|
|
|
|
[supervisorctl]
|
|
logfile=/dev/stdout
|
|
"
|
|
|
|
echo "$SUPERVISOR_CONF" > "$SUPERVISORD_FILE"
|
|
|
|
echo "[program:image]" >> "$SUPERVISORD_FILE"
|
|
echo "directory=$CUR_PATH" >> "$SUPERVISORD_FILE"
|
|
echo "command=$IMAGE_SH" >> "$SUPERVISORD_FILE"
|
|
echo "startretries=1" >> "$SUPERVISORD_FILE"
|
|
echo "stdout_logfile=$CONTAINER_PATH/image.log" >> "$SUPERVISORD_FILE"
|
|
echo "stderr_logfile=$CONTAINER_PATH/image.log" >> "$SUPERVISORD_FILE"
|
|
echo "" >> "$SUPERVISORD_FILE"
|
|
|
|
if [ -z "$PROCESS_NUMBER" ]; then
|
|
echo "PROCESS_NUMBER not set"
|
|
exit 1
|
|
fi
|
|
|
|
init_port=9999
|
|
USER_TOTAL=$((PROCESS_NUMBER + 1))
|
|
for i in $(seq 1 "$USER_TOTAL"); do
|
|
init_port=$((init_port + 1))
|
|
generate_process $init_port
|
|
done
|
|
|
|
supervisorctl -c "$SUPERVISORD_FILE" shutdown || true
|
|
|
|
supervisord -c "$SUPERVISORD_FILE" | grep -v 'uncaptured python exception'
|
|
|
|
exit 1
|