improved workflow

2024-05-17 22:20:39 +08:00 · 2024-05-17 22:20:39 +08:00 · 8a702d7ec2
parent 396765d2bd
commit 8a702d7ec2
24 changed files with 201 additions and 132 deletions
--- a/aws_extension/models.py
+++ b/aws_extension/models.py
@ -17,6 +17,7 @@ class InvocationsRequest(BaseModel):
    merge_checkpoint_payload: Optional[dict]
    param_s3: Optional[str] = None
    payload_string: Optional[str] = None
+    workflow: Optional[str] = None
    port: Optional[str] = "8080"


--- a/build_scripts/comfy/serve.py
+++ b/build_scripts/comfy/serve.py
@ -154,7 +154,7 @@ def update_execute_job_table(prompt_id, key, value):

 async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
    try:
-        record_metric(comfy_app)
+        record_metric(comfy_app, request_obj)
        logger.info(f"Starting on {comfy_app.port} {need_async} {request_obj}")

        comfy_app.busy = True
@ -185,7 +185,7 @@ async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
        if response.status_code != 200:
            raise HTTPException(status_code=response.status_code,
                                detail=f"COMFY service returned an error: {response.text}")
-        return wrap_response(start_time, response, comfy_app)
+        return wrap_response(start_time, response, comfy_app, request_obj)
    except Exception as e:
        logger.error(f"send_request error {e}")
        raise HTTPException(status_code=500, detail=f"COMFY service not available for internal multi reqs {e}")
@ -253,80 +253,100 @@ def ping():
    return {'status': 'Healthy'}


-def wrap_response(start_time, response, comfy_app: ComfyApp):
+def wrap_response(start_time, response, comfy_app: ComfyApp, request_obj):
    data = response.json()
    data['start_time'] = start_time
    data['endpoint_name'] = os.getenv('ENDPOINT_NAME')
    data['endpoint_instance_id'] = os.getenv('ENDPOINT_INSTANCE_ID')
    data['device_id'] = comfy_app.device_id
+
+    if 'workflow' in request_obj and request_obj['workflow']:
+        data['workflow'] = request_obj['workflow']
+
    return data


-def record_metric(comfy_app: ComfyApp):
+def record_metric(comfy_app: ComfyApp, request_obj):
+    data = [
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+                {
+                    'Name': 'InstanceGPU',
+                    'Value': f"GPU{comfy_app.device_id}"
+                }
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Service',
+                    'Value': 'Comfy'
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+    ]
+
+    if 'workflow' in request_obj and request_obj['workflow']:
+        data.append({
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Workflow',
+                    'Value': request_obj['workflow']
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        })
+
    response = cloudwatch.put_metric_data(
        Namespace='ESD',
-        MetricData=[
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                    {
-                        'Name': 'InstanceGPU',
-                        'Value': f"GPU{comfy_app.device_id}"
-                    }
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Service',
-                        'Value': 'Comfy'
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-        ]
+        MetricData=data
    )
    logger.info(f"record_metric response: {response}")

--- a/infrastructure/src/api/comfy/create_excute.ts
+++ b/infrastructure/src/api/comfy/create_excute.ts
@ -12,7 +12,7 @@ import {
  SCHEMA_EXECUTE_NEED_SYNC,
  SCHEMA_EXECUTE_PROMPT_ID,
  SCHEMA_INFER_TYPE,
-  SCHEMA_MESSAGE,
+  SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
 } from '../../shared/schema';


@ -290,6 +290,7 @@ export class CreateExecuteApi {
          },
          endpoint_name: SCHEMA_ENDPOINT_NAME,
          need_sync: SCHEMA_EXECUTE_NEED_SYNC,
+          workflow: SCHEMA_WORKFLOW,
          number: {
            type: JsonSchemaType.STRING,
            minLength: 1,
--- a/infrastructure/src/api/inferences/create-inference-job.ts
+++ b/infrastructure/src/api/inferences/create-inference-job.ts
@ -12,7 +12,7 @@ import {
  SCHEMA_INFERENCE,
  SCHEMA_INFERENCE_ASYNC_MODEL,
  SCHEMA_INFERENCE_REAL_TIME_MODEL,
-  SCHEMA_MESSAGE,
+  SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
 } from '../../shared/schema';
 import { ApiValidators } from '../../shared/validator';

@ -148,6 +148,7 @@ export class CreateInferenceJobApi {
                  type: {
                    type: JsonSchemaType.STRING,
                  },
+                  workflow: SCHEMA_WORKFLOW,
                  api_params_s3_location: {
                    type: JsonSchemaType.STRING,
                    format: 'uri',
@ -225,6 +226,7 @@ export class CreateInferenceJobApi {
            type: JsonSchemaType.STRING,
          },
          inference_type: SCHEMA_INFER_TYPE,
+          workflow: SCHEMA_WORKFLOW,
          payload_string: {
            type: JsonSchemaType.STRING,
          },
--- a/infrastructure/src/shared/schema.ts
+++ b/infrastructure/src/shared/schema.ts
@ -441,6 +441,11 @@ export const SCHEMA_EXECUTE_PROMPT_PATH: JsonSchema = {
  description: 'Prompt Path',
 };

+export const SCHEMA_WORKFLOW: JsonSchema = {
+  type: JsonSchemaType.STRING,
+  description: 'Workflow remark',
+};
+
 export const SCHEMA_INFERENCE: Record<string, JsonSchema> = {
  img_presigned_urls: {
    type: JsonSchemaType.ARRAY,
--- a/middleware_api/comfy/execute.py
+++ b/middleware_api/comfy/execute.py
@ -16,9 +16,9 @@ from sagemaker.serializers import JSONSerializer

 from common.ddb_service.client import DynamoDbUtilsService
 from common.excepts import BadRequestException
-from common.response import ok, created, internal_server_error
-from common.util import s3_scan_files, generate_presigned_url_for_keys, record_latency_metrics, \
-    record_count_metrics
+from common.response import ok, created
+from common.util import s3_scan_files, generate_presigned_url_for_keys, \
+    record_latency_metrics, record_count_metrics
 from libs.comfy_data_types import ComfyExecuteTable, InferenceResult
 from libs.enums import ComfyExecuteType, EndpointStatus, ServiceType
 from libs.utils import get_endpoint_by_name, response_error
@ -38,7 +38,9 @@ sqs_url = os.environ.get('MERGE_SQS_URL')
 index_name = "endpoint_name-startTime-index"
 predictors = {}

-multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge', 'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']
+multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
+                                'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge',
+                                'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']


@dataclass
--- a/middleware_api/comfy/execute_async_events.py
+++ b/middleware_api/comfy/execute_async_events.py
@ -7,8 +7,8 @@ import boto3
 from aws_lambda_powertools import Tracer

 from common.ddb_service.client import DynamoDbUtilsService
-from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, record_latency_metrics, \
-    record_queue_latency_metrics
+from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, \
+    record_latency_metrics, record_queue_latency_metrics
 from libs.comfy_data_types import InferenceResult
 from libs.enums import ServiceType

--- a/middleware_api/inferences/inference_async_events.py
+++ b/middleware_api/inferences/inference_async_events.py
@ -73,7 +73,10 @@ def handler(event, context):

    parse_sagemaker_result(sagemaker_out, create_time, inference_id, task_type, endpoint_name)

-    record_count_metrics(ep_name=endpoint_name, metric_name='InferenceSucceed')
+    record_count_metrics(ep_name=endpoint_name,
+                         metric_name='InferenceSucceed',
+                         workflow=workflow
+                         )
    record_latency_metrics(start_time=sagemaker_out['start_time'],
                           ep_name=endpoint_name,
                           workflow=workflow,
--- a/middleware_api/inferences/start_inference_job.py
+++ b/middleware_api/inferences/start_inference_job.py
@ -73,6 +73,7 @@ def inference_start(job: InferenceJob, username):
    payload = InvocationRequest(
        id=job.InferenceJobId,
        task=job.taskType,
+        workflow=job.workflow,
        username=username,
        models=models,
        param_s3=job.params['input_body_s3'],
--- a/middleware_api/libs/data_types.py
+++ b/middleware_api/libs/data_types.py
@ -194,3 +194,4 @@ class InvocationRequest:
    models: Optional[dict]
    param_s3: Optional[str] = None
    payload_string: Optional[str] = None
+    workflow: Optional[str] = None
--- a/scripts/api.py
+++ b/scripts/api.py
@ -56,65 +56,82 @@ def update_execute_job_table(prompt_id, key, value):
        raise e


-def record_metric():
+def record_metric(req: InvocationsRequest):
+    data = [
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Service',
+                    'Value': 'Stable-Diffusion'
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+    ]
+
+    if req.workflow:
+        data.append({
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Workflow',
+                    'Value': req.workflow
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        })
+
    response = cloudwatch.put_metric_data(
        Namespace='ESD',
-        MetricData=[
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Service',
-                        'Value': 'Stable-Diffusion'
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-        ]
+        MetricData=data
    )
+
    logger.info(f"record_metric response: {response}")


@ -279,7 +296,7 @@ def sagemaker_api(_, app: FastAPI):

        update_execute_job_table(req.id, 'startTime', start_time)

-        record_metric()
+        record_metric(req)

        with condition:
            try:
--- a/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py
+++ b/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py
@ -48,6 +48,7 @@ class TestTxt2ImgInferenceAsyncAfterComfyE2E:
        }
        data = {"endpoint_name": f"{self.ep_name}",
                "need_reboot": True,
+                "workflow": 'comfy_txt2img',
                "prepare_id": prepare_id,
                "prepare_type": "models"}
        resp = self.api.prepare(data=data, headers=headers)
--- a/test/test_06_api_inference/test_10_txt2img_async_cn_1.py
+++ b/test/test_06_api_inference/test_10_txt2img_async_cn_1.py
@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn1E2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn1',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_11_txt2img_async_cn_2.py
+++ b/test/test_06_api_inference/test_11_txt2img_async_cn_2.py
@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn2E2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn2',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_12_txt2img_async_cn_3.py
+++ b/test/test_06_api_inference/test_12_txt2img_async_cn_3.py
@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn3E2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn2',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_13_txt2img_async.py
+++ b/test/test_06_api_inference/test_13_txt2img_async.py
@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_txt2img',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_15_txt2img_reactor_async.py
+++ b/test/test_06_api_inference/test_15_txt2img_reactor_async.py
@ -59,6 +59,7 @@ class TestTxt2ImgReactorAsyncE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_reactor_txt2img',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_20_esi_async.py
+++ b/test/test_06_api_inference/test_20_esi_async.py
@ -59,6 +59,7 @@ class TestEsiInferenceAsyncE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.ESI.value,
+            "workflow": 'sd_esi',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_30_img2img_async.py
+++ b/test/test_06_api_inference/test_30_img2img_async.py
@ -59,6 +59,7 @@ class TestImg2ImgInferenceAsyncE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.IMG2IMG.value,
+            "workflow": 'sd_img2img',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_40_rembg_async.py
+++ b/test/test_06_api_inference/test_40_rembg_async.py
@ -59,6 +59,7 @@ class TestRembgInferenceAsyncE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.REMBG.value,
+            "workflow": 'sd_rembg',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_06_api_inference/test_41_sd_xl_turbo.py
+++ b/test/test_06_api_inference/test_41_sd_xl_turbo.py
@ -157,6 +157,7 @@ class TestTurboE2E:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_turbo_xl',
            "models": {
                "Stable-diffusion": [filename],
                "embeddings": []
--- a/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py
+++ b/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py
@ -46,6 +46,7 @@ class TestMutilGPUsSingleTask:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'single_gpu',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py
+++ b/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py
@ -73,6 +73,7 @@ class TestMutilTaskGPUs:
        data = {
            "inference_type": "Async",
            "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'gpus',
            "models": {
                "Stable-diffusion": [config.default_model_id],
                "embeddings": []
--- a/test/utils/helper.py
+++ b/test/utils/helper.py
@ -243,6 +243,7 @@ def comfy_execute_create(n, api, endpoint_name, wait_succeed=True,
        prompt_id = str(uuid.uuid4())
        workflow = json.load(f)
        workflow['prompt_id'] = prompt_id
+        workflow['workflow'] = 'latency_compare_comfy'
        workflow['endpoint_name'] = endpoint_name

        resp = api.create_execute(headers=headers, data=workflow)
@ -292,6 +293,7 @@ def sd_inference_create(n, api, endpoint_name: str, workflow: str = './data/api_
    data = {
        "inference_type": "Async",
        "task_type": InferenceType.TXT2IMG.value,
+        "workflow": 'latency_compare_sd',
        "models": {
            "Stable-diffusion": [config.default_model_id],
            "embeddings": []
@ -352,6 +354,7 @@ def sd_inference_esi(api, workflow: str = './data/api_params/extra-single-image-
    data = {
        "inference_type": "Async",
        "task_type": InferenceType.ESI.value,
+        "workflow": 'esi',
        "models": {
            "Stable-diffusion": [config.default_model_id],
            "embeddings": []
@ -411,6 +414,7 @@ def sd_inference_rembg(api, workflow: str = './data/api_params/rembg-api-params.
    data = {
        "inference_type": "Async",
        "task_type": InferenceType.REMBG.value,
+        "workflow": 'rembg',
        "models": {
            "Stable-diffusion": [config.default_model_id],
            "embeddings": []