improved workflow lock

pull/800/head
Jingyi 2024-05-24 17:40:29 +08:00
parent bd40efaf19
commit 0629d964c3
5 changed files with 219 additions and 187 deletions

1
.gitignore vendored
View File

@ -50,3 +50,4 @@ test/**/.env
.DS_Store
/container/
/.env
supervisord.*

View File

@ -851,9 +851,14 @@ if is_on_ec2:
try:
json_data = await request.json()
if 'name' not in json_data or not json_data['name']:
raise ValueError("name is required")
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": False, "message": f"name is required"}))
workflow_name = json_data['name']
if workflow_name == 'default':
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": False, "message": f"{workflow_name} is not allowed"}))
payload_json = ''
if 'payload_json' in json_data:
@ -865,20 +870,23 @@ if is_on_ec2:
start_time = time.time()
s5cmd_syn_model_command = (f's5cmd sync '
s5cmd_sync_command = (f's5cmd sync '
f'--delete=true '
f'--exclude="*comfy.tar" '
f'--exclude="*.log" '
f'--exclude="*__pycache__*" '
f'--exclude="*.cache*" '
f'--exclude="*/ComfyUI/input/*" '
f'--exclude="*/ComfyUI/output/*" '
f'"/home/ubuntu/*" '
f'"s3://{bucket_name}/comfy/workflows/{workflow_name}/"')
logger.info(f"sync workflows files start {s5cmd_syn_model_command}")
os.system(s5cmd_syn_model_command)
os.system(f'echo "lock" > lock && s5cmd sync lock s3://{bucket_name}/comfy/workflows/{workflow_name}/lock')
action_unlock('release')
s5cmd_lock_command = (f'echo "lock" > lock && '
f's5cmd sync lock s3://{bucket_name}/comfy/workflows/{workflow_name}/lock')
logger.info(f"sync workflows files start {s5cmd_sync_command}")
subprocess.check_output(s5cmd_sync_command, shell=True)
subprocess.check_output(s5cmd_lock_command, shell=True)
end_time = time.time()
cost_time = end_time - start_time
@ -891,12 +899,54 @@ if is_on_ec2:
response = get_response.json()
logger.info(f"release workflow response is {response}")
action_unlock('release')
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": True, "message": "success", "cost_time": cost_time}))
except Exception as e:
action_unlock('release')
logger.info(e)
return web.Response(status=500, content_type='application/json',
body=json.dumps({"result": False, "message": e}))
body=json.dumps({"result": False, "message": 'Release workflow failed'}))
@server.PromptServer.instance.routes.put("/workflows")
async def switch_workflow(request):
if is_action_lock('release'):
return web.Response(status=200, content_type='application/json',
body=json.dumps(
{"result": False, "message": "release is not allowed during release workflow"}))
try:
json_data = await request.json()
if 'name' not in json_data or not json_data['name']:
raise ValueError("name is required")
workflow_name = json_data['name']
if workflow_name == os.getenv('WORKFLOW_NAME'):
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": False, "message": "workflow is already in use"}))
if workflow_name == 'default' and not is_master_process:
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": False, "message": "slave can not use default workflow"}))
if workflow_name != 'default':
if not check_file_exists(f"comfy/workflows/{workflow_name}/lock"):
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": False, "message": f"{workflow_name} not exists"}))
name_file = os.getenv('WORKFLOW_NAME_FILE')
subprocess.check_output(f"echo {workflow_name} > {name_file}", shell=True)
subprocess.run(["pkill", "-f", "python3"])
return web.Response(status=200, content_type='application/json',
body=json.dumps({"result": True, "message": "Please wait to restart"}))
except Exception as e:
logger.info(e)
return web.Response(status=500, content_type='application/json',
body=json.dumps({"result": False, "message": 'Switch workflow failed'}))
def check_file_exists(key):

Binary file not shown.

View File

@ -320,65 +320,66 @@ comfy_launch_from_public_s3(){
comfy_launch
}
# -------------------- startup --------------------
ec2_start_process(){
if [ -n "$ON_EC2" ]; then
set -euxo pipefail
echo "---------------------------------------------------------------------------------"
export LD_LIBRARY_PATH=$LD_PRELOAD
set_conda
pip install supervisor
chown -R root:root "/home/ubuntu/ComfyUI"
chmod -R +x venv
WORKFLOW_NAME=$(cat "$WORKFLOW_NAME_FILE")
export WORKFLOW_DIR="/container/workflows/$WORKFLOW_NAME"
SUPERVISOR_CONF="[supervisord]
nodaemon=true
directory=/home/ubuntu/ComfyUI
autostart=true
autorestart=true
if [ ! -d "$WORKFLOW_DIR/ComfyUI/venv" ]; then
mkdir -p "$WORKFLOW_DIR"
[inet_http_server]
port = 127.0.0.1:9001
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
logfile=/dev/stdout
"
echo "$SUPERVISOR_CONF" > /etc/supervisord.conf
init_port=8187
for i in $(seq 1 "$PROCESS_NUMBER"); do
init_port=$((init_port + 1))
MASTER_PROCESS=false
if [ "$init_port" -eq "8188" ]; then
MASTER_PROCESS=true
if [ "$WORKFLOW_NAME" = "default" ]; then
if [ ! -f "/container/$WORKFLOW_NAME.tar" ]; then
start_at=$(date +%s)
s5cmd cp "s3://aws-gcr-solutions-$AWS_REGION/stable-diffusion-aws-extension-github-mainline/$ESD_VERSION/$SERVICE_TYPE.tar" "/container/$WORKFLOW_NAME.tar"
end_at=$(date +%s)
export DOWNLOAD_FILE_SECONDS=$((end_at-start_at))
fi
PROGRAM_NAME="comfy_$init_port"
start_at=$(date +%s)
tar --overwrite -xf "/container/$WORKFLOW_NAME.tar" -C "$WORKFLOW_DIR"
end_at=$(date +%s)
export DECOMPRESS_SECONDS=$((end_at-start_at))
# shellcheck disable=SC2129
echo "[program:$PROGRAM_NAME]" >> /etc/supervisord.conf
echo "command=/home/ubuntu/ComfyUI/venv/bin/python3 main.py --listen 0.0.0.0 --port $init_port --cuda-malloc --output-directory /home/ubuntu/ComfyUI/output/$init_port --temp-directory /home/ubuntu/ComfyUI/temp/$init_port" >> /etc/supervisord.conf
echo "startretries=3" >> /etc/supervisord.conf
echo "stdout_logfile=/home/ubuntu/ComfyUI/$PROGRAM_NAME.log" >> /etc/supervisord.conf
echo "stderr_logfile=/home/ubuntu/ComfyUI/$PROGRAM_NAME.log" >> /etc/supervisord.conf
echo "environment=MASTER_PROCESS=$MASTER_PROCESS,PROGRAM_NAME=$PROGRAM_NAME" >> /etc/supervisord.conf
echo "" >> /etc/supervisord.conf
done
cd "$WORKFLOW_DIR/ComfyUI"
mkdir -p models/vae/
wget --quiet -O models/vae/vae-ft-mse-840000-ema-pruned.safetensors "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors"
mkdir -p models/checkpoints/
wget --quiet -O models/checkpoints/majicmixRealistic_v7.safetensors "https://huggingface.co/GreenGrape/231209/resolve/045ebfc504c47ba8ccc424f1869c65a223d1f5cc/majicmixRealistic_v7.safetensors"
mkdir -p models/animatediff_models/
wget --quiet -O models/animatediff_models/mm_sd_v15_v2.ckpt "https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt"
wget --quiet -O models/checkpoints/v1-5-pruned-emaonly.ckpt "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt"
else
start_at=$(date +%s)
s5cmd --log=error sync "s3://$COMFY_BUCKET_NAME/comfy/workflows/$WORKFLOW_NAME/*" "$WORKFLOW_DIR/"
end_at=$(date +%s)
export DOWNLOAD_FILE_SECONDS=$((end_at-start_at))
echo "download file: $DOWNLOAD_FILE_SECONDS seconds"
cd "$WORKFLOW_DIR/ComfyUI"
fi
fi
echo "---------------------------------------------------------------------------------"
cat /etc/supervisord.conf
echo "---------------------------------------------------------------------------------"
rm -rf /home/ubuntu/ComfyUI
supervisord -c /etc/supervisord.conf | grep -v 'uncaptured python exception'
ln -s "$WORKFLOW_DIR/ComfyUI" /home/ubuntu/ComfyUI
cd /home/ubuntu/ComfyUI || exit 1
# if /comfy_proxy.py exists
if [ -f "/comfy_proxy.py" ]; then
cp /comfy_proxy.py /home/ubuntu/ComfyUI/custom_nodes/
fi
rm -rf web/extensions/ComfyLiterals
chmod -R +x venv
source venv/bin/activate
chmod -R 777 /home/ubuntu/ComfyUI
venv/bin/python3 main.py --listen 0.0.0.0 --port 8188 --cuda-malloc
exit 1
}
fi
if [ -n "$WORKFLOW_NAME" ]; then
cd /home/ubuntu || exit 1
@ -410,114 +411,12 @@ if [ -n "$WORKFLOW_NAME" ]; then
chmod -R +x venv
source venv/bin/activate
# on EC2
if [ -n "$ON_EC2" ]; then
ec2_start_process
exit 1
fi
# on SageMaker
python /metrics.py &
python3 serve.py
exit 1
fi
if [ -n "$ON_EC2" ]; then
set -euxo pipefail
if [ "$SERVICE_TYPE" == "sd" ]; then
cd /home/ubuntu || exit 1
if [ -d "/home/ubuntu/stable-diffusion-webui/venv" ]; then
cd /home/ubuntu/stable-diffusion-webui || exit 1
chmod -R +x venv
source venv/bin/activate
chmod -R 777 /home/ubuntu
python3 launch.py --enable-insecure-extension-access --skip-torch-cuda-test --no-half --listen --no-download-sd-model
exit 1
fi
echo "downloading comfy file $CACHE_PUBLIC_SD ..."
start_at=$(date +%s)
s5cmd cp "s3://$CACHE_PUBLIC_SD" /home/ubuntu/
end_at=$(date +%s)
export DOWNLOAD_FILE_SECONDS=$((end_at-start_at))
echo "download file: $DOWNLOAD_FILE_SECONDS seconds"
echo "decompressing sd file..."
start_at=$(date +%s)
tar --overwrite -xf "$SERVICE_TYPE.tar" -C /home/ubuntu/
end_at=$(date +%s)
export DECOMPRESS_SECONDS=$((end_at-start_at))
echo "decompress file: $DECOMPRESS_SECONDS seconds"
cd /home/ubuntu/stable-diffusion-webui/extensions || exit 1
git clone https://github.com/zixaphir/Stable-Diffusion-Webui-Civitai-Helper.git
cd ../
export AWS_REGION=us-east-1
wget https://raw.githubusercontent.com/awslabs/stable-diffusion-aws-extension/dev/workshop/sd_models.txt
s5cmd run sd_models.txt
chmod -R +x venv
source venv/bin/activate
chmod -R 777 /home/ubuntu/stable-diffusion-webui
python3 launch.py --enable-insecure-extension-access --skip-torch-cuda-test --no-half --listen --no-download-sd-model
else
cd /home/ubuntu || exit 1
if [ -d "/home/ubuntu/ComfyUI/venv" ]; then
cd /home/ubuntu/ComfyUI || exit 1
rm -rf web/extensions/ComfyLiterals
chmod -R +x venv
source venv/bin/activate
ec2_start_process
exit 1
fi
echo "downloading comfy file $CACHE_PUBLIC_COMFY ..."
start_at=$(date +%s)
s5cmd cp "s3://$CACHE_PUBLIC_COMFY" /home/ubuntu/
end_at=$(date +%s)
export DOWNLOAD_FILE_SECONDS=$((end_at-start_at))
echo "download file: $DOWNLOAD_FILE_SECONDS seconds"
echo "decompressing comfy file..."
start_at=$(date +%s)
tar --overwrite -xf "$SERVICE_TYPE.tar" -C /home/ubuntu/
end_at=$(date +%s)
export DECOMPRESS_SECONDS=$((end_at-start_at))
echo "decompress file: $DECOMPRESS_SECONDS seconds"
cd /home/ubuntu/ComfyUI || exit 1
rm -rf web/extensions/ComfyLiterals
chmod -R +x venv
source venv/bin/activate
pip install dynamicprompts
pip install ultralytics
mkdir -p models/vae/
wget --quiet -O models/vae/vae-ft-mse-840000-ema-pruned.safetensors "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors"
mkdir -p models/checkpoints/
wget --quiet -O models/checkpoints/majicmixRealistic_v7.safetensors "https://huggingface.co/GreenGrape/231209/resolve/045ebfc504c47ba8ccc424f1869c65a223d1f5cc/majicmixRealistic_v7.safetensors"
mkdir -p models/animatediff_models/
wget --quiet -O models/animatediff_models/mm_sd_v15_v2.ckpt "https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt"
wget --quiet -O models/checkpoints/v1-5-pruned-emaonly.ckpt "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt"
chmod -R 777 /home/ubuntu/ComfyUI
ec2_start_process
fi
exit 1
fi
if [ -f "/initiated_lock" ]; then
echo "already initiated, start service directly..."
if [ "$SERVICE_TYPE" == "sd" ]; then

View File

@ -6,13 +6,12 @@ if [ -f "/etc/environment" ]; then
source /etc/environment
fi
SERVICE_TYPE="comfy"
export SERVICE_TYPE="comfy"
export CONTAINER_NAME="esd_container"
export ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
export AWS_REGION=$(aws configure get region)
image="$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME:latest"
export image="$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME:latest"
docker stop "$CONTAINER_NAME" || true
docker rm "$CONTAINER_NAME" || true
@ -36,7 +35,7 @@ docker build -f Dockerfile \
image_hash=$(docker inspect "$image" | jq -r ".[0].Id")
image_hash=${image_hash:7}
release_image="$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME:$image_hash"
export release_image="$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$CONTAINER_NAME:$image_hash"
docker tag "$image" "$release_image"
aws ecr get-login-password --region "$AWS_REGION" | docker login --username AWS --password-stdin "$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com"
@ -58,29 +57,112 @@ fi
total_memory=$(cat /proc/meminfo | grep 'MemTotal' | awk '{print $2}')
total_memory_mb=$((total_memory / 1024))
echo "total_memory_mb: $total_memory_mb"
limit_memory_mb=$((total_memory_mb - 2048))
export limit_memory_mb=$((total_memory_mb - 2048))
echo "limit_memory_mb: $limit_memory_mb"
# -v ./build_scripts/comfy/comfy_proxy.py:/home/ubuntu/ComfyUI/custom_nodes/comfy_proxy.py \
docker run -v ~/.aws:/root/.aws \
-v "$local_volume":/home/ubuntu \
-v ./build_scripts/inference/start.sh:/start.sh \
-v ./build_scripts/comfy/comfy_proxy.py:/home/ubuntu/ComfyUI/custom_nodes/comfy_proxy.py \
--gpus all \
-e "IMAGE_HASH=$release_image" \
-e "ESD_VERSION=$ESD_VERSION" \
-e "SERVICE_TYPE=$SERVICE_TYPE" \
-e "ON_EC2=true" \
-e "S3_BUCKET_NAME=$COMFY_BUCKET_NAME" \
-e "AWS_REGION=$AWS_REGION" \
-e "AWS_DEFAULT_REGION=$AWS_REGION" \
-e "COMFY_API_URL=$COMFY_API_URL" \
-e "COMFY_API_TOKEN=$COMFY_API_TOKEN" \
-e "COMFY_ENDPOINT=$COMFY_ENDPOINT" \
-e "COMFY_BUCKET_NAME=$COMFY_BUCKET_NAME" \
-e "PROCESS_NUMBER=$PROCESS_NUMBER" \
-e "WORKFLOW_NAME=$WORKFLOW_NAME" \
--name "$CONTAINER_NAME" \
-p 8188-8288:8188-8288 \
--memory "${limit_memory_mb}mb" \
"$image"
generate_process(){
init_port=$1
export PROGRAM_NAME="comfy_$init_port"
comfy_workflow_file="./container/$PROGRAM_NAME"
WORKFLOW_NAME_TMP=""
if [ -f "$comfy_workflow_file" ]; then
WORKFLOW_NAME_TMP=$(cat "$comfy_workflow_file")
fi
if [ -z "$WORKFLOW_NAME_TMP" ]; then
WORKFLOW_NAME_TMP="$WORKFLOW_NAME"
fi
if [ -z "$WORKFLOW_NAME_TMP" ]; then
WORKFLOW_NAME_TMP="default"
fi
echo "$WORKFLOW_NAME_TMP" > "$comfy_workflow_file"
export MASTER_PROCESS=false
if [ "$init_port" -eq "8188" ]; then
export MASTER_PROCESS=true
fi
CONTAINER_PATH=$(realpath ./container)
START_SH=$(realpath ./build_scripts/inference/start.sh)
COMFY_PROXY=$(realpath ./build_scripts/comfy/comfy_proxy.py)
AWS_PATH=$(realpath ~/.aws)
START_HANDLER="#!/bin/bash
set -euxo pipefail
docker stop $PROGRAM_NAME || true
docker rm $PROGRAM_NAME || true
docker run -v $AWS_PATH:/root/.aws \\
-v $CONTAINER_PATH:/container \\
-v $START_SH:/start.sh \\
-v $COMFY_PROXY:/comfy_proxy.py \\
--gpus all \\
-e IMAGE_HASH=$release_image \\
-e SERVICE_TYPE=$SERVICE_TYPE \\
-e ON_EC2=true \\
-e S3_BUCKET_NAME=$COMFY_BUCKET_NAME \\
-e AWS_REGION=$AWS_REGION \\
-e AWS_DEFAULT_REGION=$AWS_REGION \\
-e COMFY_API_URL=$COMFY_API_URL \\
-e COMFY_API_TOKEN=$COMFY_API_TOKEN \\
-e ESD_VERSION=$ESD_VERSION \\
-e COMFY_ENDPOINT=$COMFY_ENDPOINT \\
-e COMFY_BUCKET_NAME=$COMFY_BUCKET_NAME \\
-e MASTER_PROCESS=$MASTER_PROCESS \\
-e PROGRAM_NAME=$PROGRAM_NAME \\
-e WORKFLOW_NAME_FILE=/container/$PROGRAM_NAME \\
--name $PROGRAM_NAME \\
-p $init_port:8188 \\
--memory ${limit_memory_mb}mb \\
$image
"
echo "$START_HANDLER" > "./container/$PROGRAM_NAME.sh"
chmod +x "./container/$PROGRAM_NAME.sh"
# shellcheck disable=SC2129
echo "[program:$PROGRAM_NAME]" >> /tmp/supervisord.conf
echo "command=./container/$PROGRAM_NAME.sh" >> /tmp/supervisord.conf
echo "startretries=1" >> /tmp/supervisord.conf
echo "stdout_logfile=/dev/stdout" >> /tmp/supervisord.conf
echo "stderr_logfile=/dev/stderr" >> /tmp/supervisord.conf
echo "" >> /tmp/supervisord.conf
}
echo "---------------------------------------------------------------------------------"
SUPERVISOR_CONF="[supervisord]
nodaemon=true
autostart=true
autorestart=true
[inet_http_server]
port = 127.0.0.1:9001
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
logfile=/dev/stdout
"
echo "$SUPERVISOR_CONF" > /tmp/supervisord.conf
init_port=8187
for i in $(seq 1 "$PROCESS_NUMBER"); do
init_port=$((init_port + 1))
generate_process $init_port
done
echo "---------------------------------------------------------------------------------"
cat /tmp/supervisord.conf
echo "---------------------------------------------------------------------------------"
supervisorctl -c /tmp/supervisord.conf shutdown || true
sudo systemctl restart supervisor.service
supervisord -c /tmp/supervisord.conf | grep -v 'uncaptured python exception'
exit 1