improved workflow

pull/779/head
Jingyi Niu 2024-05-17 22:20:39 +08:00
parent 396765d2bd
commit 8a702d7ec2
24 changed files with 201 additions and 132 deletions

View File

@ -17,6 +17,7 @@ class InvocationsRequest(BaseModel):
merge_checkpoint_payload: Optional[dict]
param_s3: Optional[str] = None
payload_string: Optional[str] = None
workflow: Optional[str] = None
port: Optional[str] = "8080"

View File

@ -154,7 +154,7 @@ def update_execute_job_table(prompt_id, key, value):
async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
try:
record_metric(comfy_app)
record_metric(comfy_app, request_obj)
logger.info(f"Starting on {comfy_app.port} {need_async} {request_obj}")
comfy_app.busy = True
@ -185,7 +185,7 @@ async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
if response.status_code != 200:
raise HTTPException(status_code=response.status_code,
detail=f"COMFY service returned an error: {response.text}")
return wrap_response(start_time, response, comfy_app)
return wrap_response(start_time, response, comfy_app, request_obj)
except Exception as e:
logger.error(f"send_request error {e}")
raise HTTPException(status_code=500, detail=f"COMFY service not available for internal multi reqs {e}")
@ -253,80 +253,100 @@ def ping():
return {'status': 'Healthy'}
def wrap_response(start_time, response, comfy_app: ComfyApp):
def wrap_response(start_time, response, comfy_app: ComfyApp, request_obj):
data = response.json()
data['start_time'] = start_time
data['endpoint_name'] = os.getenv('ENDPOINT_NAME')
data['endpoint_instance_id'] = os.getenv('ENDPOINT_INSTANCE_ID')
data['device_id'] = comfy_app.device_id
if 'workflow' in request_obj and request_obj['workflow']:
data['workflow'] = request_obj['workflow']
return data
def record_metric(comfy_app: ComfyApp):
def record_metric(comfy_app: ComfyApp, request_obj):
data = [
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
{
'Name': 'InstanceGPU',
'Value': f"GPU{comfy_app.device_id}"
}
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Service',
'Value': 'Comfy'
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
]
if 'workflow' in request_obj and request_obj['workflow']:
data.append({
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Workflow',
'Value': request_obj['workflow']
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
})
response = cloudwatch.put_metric_data(
Namespace='ESD',
MetricData=[
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
{
'Name': 'InstanceGPU',
'Value': f"GPU{comfy_app.device_id}"
}
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Service',
'Value': 'Comfy'
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
]
MetricData=data
)
logger.info(f"record_metric response: {response}")

View File

@ -12,7 +12,7 @@ import {
SCHEMA_EXECUTE_NEED_SYNC,
SCHEMA_EXECUTE_PROMPT_ID,
SCHEMA_INFER_TYPE,
SCHEMA_MESSAGE,
SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
} from '../../shared/schema';
@ -290,6 +290,7 @@ export class CreateExecuteApi {
},
endpoint_name: SCHEMA_ENDPOINT_NAME,
need_sync: SCHEMA_EXECUTE_NEED_SYNC,
workflow: SCHEMA_WORKFLOW,
number: {
type: JsonSchemaType.STRING,
minLength: 1,

View File

@ -12,7 +12,7 @@ import {
SCHEMA_INFERENCE,
SCHEMA_INFERENCE_ASYNC_MODEL,
SCHEMA_INFERENCE_REAL_TIME_MODEL,
SCHEMA_MESSAGE,
SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
} from '../../shared/schema';
import { ApiValidators } from '../../shared/validator';
@ -148,6 +148,7 @@ export class CreateInferenceJobApi {
type: {
type: JsonSchemaType.STRING,
},
workflow: SCHEMA_WORKFLOW,
api_params_s3_location: {
type: JsonSchemaType.STRING,
format: 'uri',
@ -225,6 +226,7 @@ export class CreateInferenceJobApi {
type: JsonSchemaType.STRING,
},
inference_type: SCHEMA_INFER_TYPE,
workflow: SCHEMA_WORKFLOW,
payload_string: {
type: JsonSchemaType.STRING,
},

View File

@ -441,6 +441,11 @@ export const SCHEMA_EXECUTE_PROMPT_PATH: JsonSchema = {
description: 'Prompt Path',
};
export const SCHEMA_WORKFLOW: JsonSchema = {
type: JsonSchemaType.STRING,
description: 'Workflow remark',
};
export const SCHEMA_INFERENCE: Record<string, JsonSchema> = {
img_presigned_urls: {
type: JsonSchemaType.ARRAY,

View File

@ -16,9 +16,9 @@ from sagemaker.serializers import JSONSerializer
from common.ddb_service.client import DynamoDbUtilsService
from common.excepts import BadRequestException
from common.response import ok, created, internal_server_error
from common.util import s3_scan_files, generate_presigned_url_for_keys, record_latency_metrics, \
record_count_metrics
from common.response import ok, created
from common.util import s3_scan_files, generate_presigned_url_for_keys, \
record_latency_metrics, record_count_metrics
from libs.comfy_data_types import ComfyExecuteTable, InferenceResult
from libs.enums import ComfyExecuteType, EndpointStatus, ServiceType
from libs.utils import get_endpoint_by_name, response_error
@ -38,7 +38,9 @@ sqs_url = os.environ.get('MERGE_SQS_URL')
index_name = "endpoint_name-startTime-index"
predictors = {}
multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge', 'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']
multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge',
'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']
@dataclass

View File

@ -7,8 +7,8 @@ import boto3
from aws_lambda_powertools import Tracer
from common.ddb_service.client import DynamoDbUtilsService
from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, record_latency_metrics, \
record_queue_latency_metrics
from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, \
record_latency_metrics, record_queue_latency_metrics
from libs.comfy_data_types import InferenceResult
from libs.enums import ServiceType

View File

@ -73,7 +73,10 @@ def handler(event, context):
parse_sagemaker_result(sagemaker_out, create_time, inference_id, task_type, endpoint_name)
record_count_metrics(ep_name=endpoint_name, metric_name='InferenceSucceed')
record_count_metrics(ep_name=endpoint_name,
metric_name='InferenceSucceed',
workflow=workflow
)
record_latency_metrics(start_time=sagemaker_out['start_time'],
ep_name=endpoint_name,
workflow=workflow,

View File

@ -73,6 +73,7 @@ def inference_start(job: InferenceJob, username):
payload = InvocationRequest(
id=job.InferenceJobId,
task=job.taskType,
workflow=job.workflow,
username=username,
models=models,
param_s3=job.params['input_body_s3'],

View File

@ -194,3 +194,4 @@ class InvocationRequest:
models: Optional[dict]
param_s3: Optional[str] = None
payload_string: Optional[str] = None
workflow: Optional[str] = None

View File

@ -56,65 +56,82 @@ def update_execute_job_table(prompt_id, key, value):
raise e
def record_metric():
def record_metric(req: InvocationsRequest):
data = [
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Service',
'Value': 'Stable-Diffusion'
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
]
if req.workflow:
data.append({
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Workflow',
'Value': req.workflow
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
})
response = cloudwatch.put_metric_data(
Namespace='ESD',
MetricData=[
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceTotal',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
{
'Name': 'Instance',
'Value': endpoint_instance_id
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Service',
'Value': 'Stable-Diffusion'
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
{
'MetricName': 'InferenceEndpointReceived',
'Dimensions': [
{
'Name': 'Endpoint',
'Value': endpoint_name
},
],
'Timestamp': datetime.datetime.utcnow(),
'Value': 1,
'Unit': 'Count'
},
]
MetricData=data
)
logger.info(f"record_metric response: {response}")
@ -279,7 +296,7 @@ def sagemaker_api(_, app: FastAPI):
update_execute_job_table(req.id, 'startTime', start_time)
record_metric()
record_metric(req)
with condition:
try:

View File

@ -48,6 +48,7 @@ class TestTxt2ImgInferenceAsyncAfterComfyE2E:
}
data = {"endpoint_name": f"{self.ep_name}",
"need_reboot": True,
"workflow": 'comfy_txt2img',
"prepare_id": prepare_id,
"prepare_type": "models"}
resp = self.api.prepare(data=data, headers=headers)

View File

@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn1E2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'cn1',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn2E2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'cn2',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn3E2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'cn2',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'sd_txt2img',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestTxt2ImgReactorAsyncE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'sd_reactor_txt2img',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestEsiInferenceAsyncE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.ESI.value,
"workflow": 'sd_esi',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestImg2ImgInferenceAsyncE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.IMG2IMG.value,
"workflow": 'sd_img2img',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -59,6 +59,7 @@ class TestRembgInferenceAsyncE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.REMBG.value,
"workflow": 'sd_rembg',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -157,6 +157,7 @@ class TestTurboE2E:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'sd_turbo_xl',
"models": {
"Stable-diffusion": [filename],
"embeddings": []

View File

@ -46,6 +46,7 @@ class TestMutilGPUsSingleTask:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'single_gpu',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -73,6 +73,7 @@ class TestMutilTaskGPUs:
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'gpus',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []

View File

@ -243,6 +243,7 @@ def comfy_execute_create(n, api, endpoint_name, wait_succeed=True,
prompt_id = str(uuid.uuid4())
workflow = json.load(f)
workflow['prompt_id'] = prompt_id
workflow['workflow'] = 'latency_compare_comfy'
workflow['endpoint_name'] = endpoint_name
resp = api.create_execute(headers=headers, data=workflow)
@ -292,6 +293,7 @@ def sd_inference_create(n, api, endpoint_name: str, workflow: str = './data/api_
data = {
"inference_type": "Async",
"task_type": InferenceType.TXT2IMG.value,
"workflow": 'latency_compare_sd',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []
@ -352,6 +354,7 @@ def sd_inference_esi(api, workflow: str = './data/api_params/extra-single-image-
data = {
"inference_type": "Async",
"task_type": InferenceType.ESI.value,
"workflow": 'esi',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []
@ -411,6 +414,7 @@ def sd_inference_rembg(api, workflow: str = './data/api_params/rembg-api-params.
data = {
"inference_type": "Async",
"task_type": InferenceType.REMBG.value,
"workflow": 'rembg',
"models": {
"Stable-diffusion": [config.default_model_id],
"embeddings": []