stable-diffusion-aws-extension/build_scripts/inference/serve.sh

373 lines
13 KiB
Bash
Executable File

#!/bin/bash
# -------------------- common init --------------------
if [ -z "$ESD_VERSION" ]; then
echo "ESD_VERSION is not set"
exit 1
fi
if [ -z "$S3_BUCKET_NAME" ]; then
echo "S3_BUCKET_NAME is not set"
exit 1
fi
if [ -z "$SERVICE_TYPE" ]; then
echo "SERVICE_TYPE is not set"
exit 1
fi
export ESD_CODE_BRANCH="main"
export WEBUI_PORT=8080
export TAR_FILE="esd.tar"
export S3_LOCATION="$ENDPOINT_NAME-$ESD_VERSION"
random_string=$(LC_ALL=C cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | fold -w 6 | head -n 1)
export ENDPOINT_INSTANCE_ID="$ENDPOINT_NAME-$random_string"
if [[ $IMAGE_URL == *"dev"* ]]; then
export ESD_CODE_BRANCH="dev"
# Enable dev mode
trap 'echo "error_lock" > /error_lock; exit 1' ERR
if [ -f "/error_lock" ]; then
echo "start failed, please check the log"
sleep 30
exit 1
fi
fi
cores=$(lscpu | grep "^Core(s) per socket:" | awk '{print $4}')
sockets=$(lscpu | grep "^Socket(s):" | awk '{print $2}')
export CUP_CORE_NUMS=$((cores * sockets))
echo "---------------------------------------------------------------------------------"
echo "whoami: $(whoami)"
echo "Current shell: $SHELL"
echo "Running in $(bash --version)"
echo "---------------------------------------------------------------------------------"
echo "CREATED_AT: $CREATED_AT"
created_time_seconds=$(date -d "$CREATED_AT" +%s)
current_time=$(date "+%Y-%m-%dT%H:%M:%S.%6N")
current_time_seconds=$(date -d "$current_time" +%s)
export INSTANCE_INIT_SECONDS=$(( current_time_seconds - created_time_seconds ))
echo "NOW_AT: $current_time"
echo "Init from Create: $INSTANCE_INIT_SECONDS seconds"
echo "---------------------------------------------------------------------------------"
printenv
echo "---------------------------------------------------------------------------------"
nvidia-smi
echo "---------------------------------------------------------------------------------"
# -------------------- common functions --------------------
set_conda(){
echo "---------------------------------------------------------------------------------"
echo "set conda environment..."
export AWS_REGION="us-west-2"
conda_path="aws-gcr-solutions-us-west-2/extension-for-stable-diffusion-on-aws/1.5.0-g5/conda"
s5cmd --log=error cp "s3://$conda_path/libcufft.so.10" /home/ubuntu/conda/lib/
s5cmd --log=error cp "s3://$conda_path/libcurand.so.10" /home/ubuntu/conda/lib/
export LD_LIBRARY_PATH=/home/ubuntu/conda/lib:$LD_LIBRARY_PATH
export AWS_REGION=$AWS_DEFAULT_REGION
}
remove_unused(){
echo "rm $1"
rm -rf "$1"
}
get_device_count(){
echo "---------------------------------------------------------------------------------"
export CUDA_DEVICE_COUNT=$(python -c "import torch; print(torch.cuda.device_count())")
echo "CUDA_DEVICE_COUNT: $CUDA_DEVICE_COUNT"
}
# -------------------- sd functions --------------------
sd_remove_unused_list(){
echo "---------------------------------------------------------------------------------"
echo "deleting big unused files..."
remove_unused /home/ubuntu/stable-diffusion-webui/extensions/stable-diffusion-aws-extension/docs
remove_unused /home/ubuntu/stable-diffusion-webui/extensions/stable-diffusion-aws-extension/infrastructure
remove_unused /home/ubuntu/stable-diffusion-webui/extensions/stable-diffusion-aws-extension/middleware_api
remove_unused /home/ubuntu/stable-diffusion-webui/extensions/stable-diffusion-aws-extension/test
remove_unused /home/ubuntu/stable-diffusion-webui/repositories/BLIP/BLIP.gif
remove_unused /home/ubuntu/stable-diffusion-webui/repositories/generative-models/assets/
remove_unused /home/ubuntu/stable-diffusion-webui/repositories/stable-diffusion-stability-ai/assets/
echo "deleting git dir..."
find /home/ubuntu/stable-diffusion-webui -type d \( -name '.git' -o -name '.github' \) | while read dir; do
remove_unused "$dir";
done
echo "deleting unused files..."
find /home/ubuntu/stable-diffusion-webui -type f \( -name '.gitignore' -o -name 'README.md' -o -name 'CHANGELOG.md' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/stable-diffusion-webui -type f \( -name 'CODE_OF_CONDUCT.md' -o -name 'LICENSE.md' -o -name 'NOTICE.md' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/stable-diffusion-webui -type f \( -name 'CODEOWNERS' -o -name 'LICENSE.txt' -o -name 'LICENSE' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/stable-diffusion-webui -type f \( -name '*.gif' -o -name '*.png' -o -name '*.jpg' \) | while read file; do
remove_unused "$file";
done
}
sd_listen_ready() {
while true; do
RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}\n" localhost:8080/ping)
if [ "$RESPONSE_CODE" -eq 200 ]; then
echo "Server is ready!"
start_at=$(date +%s)
echo "collection big files..."
upload_files=$(mktemp)
big_files=$(find "/home/ubuntu/stable-diffusion-webui" -type f -size +2520k)
for file in $big_files; do
key=$(echo "$file" | cut -d'/' -f4-)
echo "sync $file s3://$S3_BUCKET_NAME/$S3_LOCATION/$key" >> "$upload_files"
done
echo "tar files..."
filelist=$(mktemp)
# shellcheck disable=SC2164
cd /home/ubuntu/stable-diffusion-webui
find "./" \( -type f -o -type l \) -size -2530k > "$filelist"
tar -cf $TAR_FILE -T "$filelist"
echo "sync $TAR_FILE s3://$S3_BUCKET_NAME/$S3_LOCATION/" >> "$upload_files"
echo "sync /home/ubuntu/conda/* s3://$S3_BUCKET_NAME/$S3_LOCATION/conda/" >> "$upload_files"
# for ReActor
echo "sync /home/ubuntu/stable-diffusion-webui/models/insightface/* s3://$S3_BUCKET_NAME/$S3_LOCATION/insightface/" >> "$upload_files"
echo "upload files..."
s5cmd run "$upload_files"
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "sync endpoint files: $cost seconds"
break
fi
sleep 2
done
}
sd_build_for_launch(){
cd /home/ubuntu || exit 1
curl -sSL "https://raw.githubusercontent.com/awslabs/stable-diffusion-aws-extension/$ESD_CODE_BRANCH/install_sd.sh" | bash;
}
sd_accelerate_launch(){
echo "---------------------------------------------------------------------------------"
echo "accelerate sd launch..."
cd /home/ubuntu/stable-diffusion-webui || exit 1
source venv/bin/activate
get_device_count
python /metrics.py &
if [ "$INSTANCE_TYPE" == "ml.p4d.24xlarge" ]; then
python launch.py --enable-insecure-extension-access --api --api-log --log-startup --listen --port $WEBUI_PORT --xformers --no-half-vae --no-download-sd-model --no-hashing --nowebui --skip-torch-cuda-test --skip-load-model-at-start --disable-safe-unpickle --skip-prepare-environment --skip-python-version-check --skip-install --skip-version-check --disable-nan-check
fi
accelerate launch --num_cpu_threads_per_process=$CUP_CORE_NUMS launch.py --enable-insecure-extension-access --api --api-log --log-startup --listen --port $WEBUI_PORT --xformers --no-half-vae --no-download-sd-model --no-hashing --nowebui --skip-torch-cuda-test --skip-load-model-at-start --disable-safe-unpickle --skip-prepare-environment --skip-python-version-check --skip-install --skip-version-check --disable-nan-check
}
sd_launch_from_s3(){
start_at=$(date +%s)
s5cmd --log=error sync "s3://$S3_BUCKET_NAME/$S3_LOCATION/*" /home/ubuntu/
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "download file: $cost seconds"
echo "set conda environment..."
export LD_LIBRARY_PATH=/home/ubuntu/conda/lib:$LD_LIBRARY_PATH
start_at=$(date +%s)
rm -rf /home/ubuntu/stable-diffusion-webui/models
tar --overwrite -xf "$TAR_FILE" -C /home/ubuntu/stable-diffusion-webui/
rm -rf $TAR_FILE
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "decompress file: $cost seconds"
# remove soft link
rm -rf /home/ubuntu/stable-diffusion-webui/models
s5cmd --log=error sync "s3://$S3_BUCKET_NAME/$S3_LOCATION/insightface/*" "/home/ubuntu/stable-diffusion-webui/models/insightface/"
cd /home/ubuntu/stable-diffusion-webui/ || exit 1
mkdir -p models/VAE
mkdir -p models/Stable-diffusion
mkdir -p models/Lora
mkdir -p models/hypernetworks
sd_accelerate_launch
}
sd_launch_from_local(){
set_conda
sd_build_for_launch
sd_remove_unused_list
sd_listen_ready &
sd_accelerate_launch
}
# -------------------- comfy functions --------------------
comfy_remove_unused_list(){
echo "---------------------------------------------------------------------------------"
echo "deleting big unused files..."
# remove_unused /home/ubuntu/stable-diffusion-webui/extensions/stable-diffusion-aws-extension/docs
echo "deleting git dir..."
find /home/ubuntu/ComfyUI -type d \( -name '.git' -o -name '.github' \) | while read dir; do
remove_unused "$dir";
done
echo "deleting unused files..."
find /home/ubuntu/ComfyUI -type f \( -name '.gitignore' -o -name 'README.md' -o -name 'CHANGELOG.md' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/ComfyUI -type f \( -name 'CODE_OF_CONDUCT.md' -o -name 'LICENSE.md' -o -name 'NOTICE.md' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/ComfyUI -type f \( -name 'CODEOWNERS' -o -name 'LICENSE.txt' -o -name 'LICENSE' \) | while read file; do
remove_unused "$file";
done
find /home/ubuntu/ComfyUI -type f \( -name '*.gif' -o -name '*.png' -o -name '*.jpg' \) | while read file; do
remove_unused "$file";
done
}
comfy_build_for_launch(){
cd /home/ubuntu || exit 1
curl -sSL "https://raw.githubusercontent.com/awslabs/stable-diffusion-aws-extension/$ESD_CODE_BRANCH/install_comfy.sh" | bash;
}
comfy_listen_ready() {
while true; do
RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}\n" localhost:8080/ping)
if [ "$RESPONSE_CODE" -eq 200 ]; then
echo "Comfy Server is ready!"
start_at=$(date +%s)
echo "collection big files..."
upload_files=$(mktemp)
big_files=$(find "/home/ubuntu/ComfyUI" -type f -size +2520k)
for file in $big_files; do
key=$(echo "$file" | cut -d'/' -f4-)
echo "sync $file s3://$S3_BUCKET_NAME/$S3_LOCATION/$key" >> "$upload_files"
done
echo "tar files..."
filelist=$(mktemp)
# shellcheck disable=SC2164
cd /home/ubuntu/ComfyUI
find "./" \( -type f -o -type l \) -size -2530k > "$filelist"
tar -cf $TAR_FILE -T "$filelist"
echo "sync $TAR_FILE s3://$S3_BUCKET_NAME/$S3_LOCATION/" >> "$upload_files"
echo "sync /home/ubuntu/conda/* s3://$S3_BUCKET_NAME/$S3_LOCATION/conda/" >> "$upload_files"
echo "upload files..."
s5cmd run "$upload_files"
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "sync endpoint files: $cost seconds"
break
fi
sleep 2
done
}
comfy_accelerate_launch(){
echo "---------------------------------------------------------------------------------"
echo "accelerate comfy launch..."
cd /home/ubuntu/ComfyUI || exit 1
source venv/bin/activate
get_device_count
python /metrics.py &
# todo maybe need optimize
python serve.py
}
comfy_launch_from_s3(){
start_at=$(date +%s)
s5cmd --log=error sync "s3://$S3_BUCKET_NAME/$S3_LOCATION/*" /home/ubuntu/
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "download file: $cost seconds"
echo "set conda environment..."
export LD_LIBRARY_PATH=/home/ubuntu/conda/lib:$LD_LIBRARY_PATH
start_at=$(date +%s)
tar --overwrite -xf "$TAR_FILE" -C /home/ubuntu/ComfyUI/
rm -rf $TAR_FILE
end_at=$(date +%s)
cost=$((end_at-start_at))
echo "decompress file: $cost seconds"
comfy_accelerate_launch
}
comfy_launch_from_local(){
set_conda
comfy_build_for_launch
comfy_remove_unused_list
comfy_listen_ready &
comfy_accelerate_launch
}
# -------------------- startup --------------------
if [ "$FULL_IMAGE" == "true" ]; then
echo "Running on full docker image..."
if [ "$SERVICE_TYPE" == "sd" ]; then
export LD_LIBRARY_PATH=/home/ubuntu/conda/lib:$LD_LIBRARY_PATH
# wget -P /home/ubuntu/stable-diffusion-webui/models/Stable-diffusion/ https://aws-gcr-solutions.s3.cn-north-1.amazonaws.com.cn/stable-diffusion-aws-extension-github-mainline/models/v1-5-pruned-emaonly.safetensors
sd_accelerate_launch
else
comfy_accelerate_launch
fi
exit 0
fi
echo "Checking s3://$S3_BUCKET_NAME/$S3_LOCATION files..."
output=$(s5cmd ls "s3://$S3_BUCKET_NAME/")
if echo "$output" | grep -q "$S3_LOCATION"; then
if [ "$SERVICE_TYPE" == "sd" ]; then
sd_launch_from_s3
else
comfy_launch_from_s3
fi
fi
echo "No files in S3, just install the environment and launch from local..."
if [ "$SERVICE_TYPE" == "sd" ]; then
sd_launch_from_local
else
comfy_launch_from_local
fi
# todo https://aws-gcr-solutions-us-west-2.s3.us-west-2.amazonaws.com/extension-for-stable-diffusion-on-aws/1.5.0-g5/creator/inswapper_128.onnx