improved wf delete

pull/800/head
Jingyi 2024-05-23 13:31:49 +08:00
parent 5eb8e4ba58
commit 3946cd0411
5 changed files with 83 additions and 21 deletions

View File

@ -5,7 +5,9 @@ FROM 366590864501.dkr.ecr.$AWS_REGION.amazonaws.com/esd-inference:$ESD_VERSION
# TODO BYOC
#RUN apt-get update -y && \
# apt-get install ffmpeg -y && \
# rm -rf /var/lib/apt/lists/* \
# rm -rf /var/lib/apt/lists/*
WORKDIR /home/ubuntu/ComfyUI
COPY build_scripts/inference/start.sh /
RUN chmod +x /start.sh

View File

@ -87,6 +87,7 @@ if is_on_ec2:
max_wait_time = os.environ.get('MAX_WAIT_TIME', 86400)
msg_max_wait_time = os.environ.get('MSG_MAX_WAIT_TIME', 86400)
is_master_process = os.getenv('MASTER_PROCESS') == 'true'
program_name = os.getenv('PROGRAM_NAME')
no_need_sync_files = ['.autosave', '.cache', '.autosave1', '~', '.swp']
need_resend_msg_result = []
@ -714,7 +715,11 @@ if is_on_ec2:
async def restart(self):
logger.info(f"start to reboot {self}")
try:
subprocess.run(["sudo", "reboot"])
from xmlrpc.client import ServerProxy
server = ServerProxy('http://localhost:9001/RPC2')
server.supervisor.restart()
# server.supervisor.shutdown()
return web.Response(status=200, content_type='application/json', body=json.dumps({"result": True}))
except Exception as e:
logger.info(f"error reboot {e}")
pass

View File

@ -328,31 +328,56 @@ ec2_start_process(){
export LD_LIBRARY_PATH=$LD_PRELOAD
set_conda
pip install supervisor
chown -R root:root "/home/ubuntu/ComfyUI"
chmod -R +x venv
SUPERVISOR_CONF="[supervisord]
nodaemon=true
directory=/home/ubuntu/ComfyUI
autostart=true
autorestart=true
[inet_http_server]
port = 127.0.0.1:9001
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
logfile=/dev/stdout
"
echo "$SUPERVISOR_CONF" > /etc/supervisord.conf
init_port=8187
for i in $(seq 1 "$PROCESS_NUMBER"); do
init_port=$((init_port + 1))
MASTER_PROCESS=false
if [ "$init_port" -eq "8188" ]; then
MASTER_PROCESS=true
else
MASTER_PROCESS=false
fi
if [ "$i" -eq "$PROCESS_NUMBER" ]; then
export MASTER_PROCESS=$MASTER_PROCESS && python3 main.py --listen 0.0.0.0 \
--port "$init_port" \
--cuda-malloc \
--output-directory "/home/ubuntu/ComfyUI/output/$init_port" \
--temp-directory "/home/ubuntu/ComfyUI/temp/$init_port"
exit 1
fi
PROGRAM_NAME="comfy_$init_port"
export MASTER_PROCESS=$MASTER_PROCESS && nohup python3 main.py --listen 0.0.0.0 \
--port "$init_port" \
--cuda-malloc \
--output-directory "/home/ubuntu/ComfyUI/output/$init_port" \
--temp-directory "/home/ubuntu/ComfyUI/temp/$init_port" &
# shellcheck disable=SC2129
echo "[program:$PROGRAM_NAME]" >> /etc/supervisord.conf
echo "command=/home/ubuntu/ComfyUI/venv/bin/python3 main.py --listen 0.0.0.0 --port $init_port --cuda-malloc --output-directory /home/ubuntu/ComfyUI/output/$init_port --temp-directory /home/ubuntu/ComfyUI/temp/$init_port" >> /etc/supervisord.conf
echo "startretries=3" >> /etc/supervisord.conf
echo "stdout_logfile=/dev/stdout" >> /etc/supervisord.conf
echo "stderr_logfile=/dev/stderr" >> /etc/supervisord.conf
echo "environment=MASTER_PROCESS=$MASTER_PROCESS,PROGRAM_NAME=$PROGRAM_NAME" >> /etc/supervisord.conf
echo "" >> /etc/supervisord.conf
done
echo "---------------------------------------------------------------------------------"
cat /etc/supervisord.conf
echo "---------------------------------------------------------------------------------"
supervisord -c /etc/supervisord.conf | grep -v 'uncaptured python exception'
exit 1
}
if [ -n "$WORKFLOW_NAME" ]; then
@ -407,7 +432,7 @@ if [ -n "$ON_EC2" ]; then
cd /home/ubuntu/stable-diffusion-webui || exit 1
chmod -R +x venv
source venv/bin/activate
chmod -R 777 /home/ubuntu/stable-diffusion-webui
chmod -R 777 /home/ubuntu
python3 launch.py --enable-insecure-extension-access --skip-torch-cuda-test --no-half --listen --no-download-sd-model
exit 1
fi

View File

@ -55,6 +55,12 @@ else
export WORKFLOW_NAME=""
fi
total_memory=$(cat /proc/meminfo | grep 'MemTotal' | awk '{print $2}')
total_memory_mb=$((total_memory / 1024))
echo "total_memory_mb: $total_memory_mb"
limit_memory_mb=$((total_memory_mb - 2048))
echo "limit_memory_mb: $limit_memory_mb"
# -v ./build_scripts/comfy/comfy_proxy.py:/home/ubuntu/ComfyUI/custom_nodes/comfy_proxy.py \
docker run -v ~/.aws:/root/.aws \
-v "$local_volume":/home/ubuntu \
@ -76,4 +82,5 @@ docker run -v ~/.aws:/root/.aws \
-e "WORKFLOW_NAME=$WORKFLOW_NAME" \
--name "$CONTAINER_NAME" \
-p 8188-8288:8188-8288 \
--memory "${limit_memory_mb}mb" \
"$image"

View File

@ -60,7 +60,7 @@ class TestComfySnapshotEpCreateE2E:
"endpoint_name": f'snapshot-{config.endpoint_name}',
"service_type": "comfy",
"endpoint_type": "Async",
"instance_type": 'ml.g5.8xlarge',
"instance_type": 'ml.g5.4xlarge',
"workflow_name": 'workflow1',
"initial_instance_count": 1,
"autoscaling_enabled": False,
@ -73,9 +73,32 @@ class TestComfySnapshotEpCreateE2E:
resp = self.api.create_endpoint(headers=headers, data=data)
assert 'data' in resp.json(), resp.dumps()
assert resp.json()["data"]["endpoint_status"] == "Creating", resp.dumps()
def test_3_list_endpoints_status(self):
def test_3_create_comfy_snapshot_endpoint_rt(self):
headers = {
"x-api-key": config.api_key,
"username": config.username
}
data = {
"endpoint_name": f'snapshot-{config.endpoint_name}',
"service_type": "comfy",
"endpoint_type": "Real-time",
"instance_type": 'ml.g5.8xlarge',
"workflow_name": 'workflow1',
"initial_instance_count": 1,
"autoscaling_enabled": False,
"assign_to_roles": [config.role_comfy_real_time],
"creator": config.username
}
if config.custom_docker_image_uri:
data["custom_docker_image_uri"] = config.custom_docker_image_uri
resp = self.api.create_endpoint(headers=headers, data=data)
assert 'data' in resp.json(), resp.dumps()
def test_4_list_endpoints_status(self):
headers = {
"x-api-key": config.api_key,
"username": config.username