From 8a702d7ec22c844f6848c7e2a9ba7a09299a853e Mon Sep 17 00:00:00 2001
From: Jingyi Niu <elonniu@amazon.com>
Date: Fri, 17 May 2024 22:20:39 +0800
Subject: [PATCH] improved workflow

---
 aws_extension/models.py                       |   1 +
 build_scripts/comfy/serve.py                  | 152 ++++++++++--------
 infrastructure/src/api/comfy/create_excute.ts |   3 +-
 .../api/inferences/create-inference-job.ts    |   4 +-
 infrastructure/src/shared/schema.ts           |   5 +
 middleware_api/comfy/execute.py               |  10 +-
 middleware_api/comfy/execute_async_events.py  |   4 +-
 .../inferences/inference_async_events.py      |   5 +-
 .../inferences/start_inference_job.py         |   1 +
 middleware_api/libs/data_types.py             |   1 +
 scripts/api.py                                | 131 ++++++++-------
 .../test_08_comfy_inference_txt2img_async.py  |   1 +
 .../test_10_txt2img_async_cn_1.py             |   1 +
 .../test_11_txt2img_async_cn_2.py             |   1 +
 .../test_12_txt2img_async_cn_3.py             |   1 +
 .../test_13_txt2img_async.py                  |   1 +
 .../test_15_txt2img_reactor_async.py          |   1 +
 .../test_20_esi_async.py                      |   1 +
 .../test_30_img2img_async.py                  |   1 +
 .../test_40_rembg_async.py                    |   1 +
 .../test_41_sd_xl_turbo.py                    |   1 +
 .../bak_test_05_sd_mutil_gpus_single_task.py  |   1 +
 .../bak_test_06_sd_mutil_gpus_tasks.py        |   1 +
 test/utils/helper.py                          |   4 +
 24 files changed, 201 insertions(+), 132 deletions(-)

diff --git a/aws_extension/models.py b/aws_extension/models.py
index f1c52f7a..896f1e51 100644
--- a/aws_extension/models.py
+++ b/aws_extension/models.py
@@ -17,6 +17,7 @@ class InvocationsRequest(BaseModel):
     merge_checkpoint_payload: Optional[dict]
     param_s3: Optional[str] = None
     payload_string: Optional[str] = None
+    workflow: Optional[str] = None
     port: Optional[str] = "8080"
 
 
diff --git a/build_scripts/comfy/serve.py b/build_scripts/comfy/serve.py
index f6d35f60..f31b4d91 100644
--- a/build_scripts/comfy/serve.py
+++ b/build_scripts/comfy/serve.py
@@ -154,7 +154,7 @@ def update_execute_job_table(prompt_id, key, value):
 
 async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
     try:
-        record_metric(comfy_app)
+        record_metric(comfy_app, request_obj)
         logger.info(f"Starting on {comfy_app.port} {need_async} {request_obj}")
 
         comfy_app.busy = True
@@ -185,7 +185,7 @@ async def send_request(request_obj, comfy_app: ComfyApp, need_async: bool):
         if response.status_code != 200:
             raise HTTPException(status_code=response.status_code,
                                 detail=f"COMFY service returned an error: {response.text}")
-        return wrap_response(start_time, response, comfy_app)
+        return wrap_response(start_time, response, comfy_app, request_obj)
     except Exception as e:
         logger.error(f"send_request error {e}")
         raise HTTPException(status_code=500, detail=f"COMFY service not available for internal multi reqs {e}")
@@ -253,80 +253,100 @@ def ping():
     return {'status': 'Healthy'}
 
 
-def wrap_response(start_time, response, comfy_app: ComfyApp):
+def wrap_response(start_time, response, comfy_app: ComfyApp, request_obj):
     data = response.json()
     data['start_time'] = start_time
     data['endpoint_name'] = os.getenv('ENDPOINT_NAME')
     data['endpoint_instance_id'] = os.getenv('ENDPOINT_INSTANCE_ID')
     data['device_id'] = comfy_app.device_id
+
+    if 'workflow' in request_obj and request_obj['workflow']:
+        data['workflow'] = request_obj['workflow']
+
     return data
 
 
-def record_metric(comfy_app: ComfyApp):
+def record_metric(comfy_app: ComfyApp, request_obj):
+    data = [
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+                {
+                    'Name': 'InstanceGPU',
+                    'Value': f"GPU{comfy_app.device_id}"
+                }
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Service',
+                    'Value': 'Comfy'
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+    ]
+
+    if 'workflow' in request_obj and request_obj['workflow']:
+        data.append({
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Workflow',
+                    'Value': request_obj['workflow']
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        })
+
     response = cloudwatch.put_metric_data(
         Namespace='ESD',
-        MetricData=[
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                    {
-                        'Name': 'InstanceGPU',
-                        'Value': f"GPU{comfy_app.device_id}"
-                    }
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Service',
-                        'Value': 'Comfy'
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-        ]
+        MetricData=data
     )
     logger.info(f"record_metric response: {response}")
 
diff --git a/infrastructure/src/api/comfy/create_excute.ts b/infrastructure/src/api/comfy/create_excute.ts
index 779c8b31..9ef6c375 100644
--- a/infrastructure/src/api/comfy/create_excute.ts
+++ b/infrastructure/src/api/comfy/create_excute.ts
@@ -12,7 +12,7 @@ import {
   SCHEMA_EXECUTE_NEED_SYNC,
   SCHEMA_EXECUTE_PROMPT_ID,
   SCHEMA_INFER_TYPE,
-  SCHEMA_MESSAGE,
+  SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
 } from '../../shared/schema';
 
 
@@ -290,6 +290,7 @@ export class CreateExecuteApi {
           },
           endpoint_name: SCHEMA_ENDPOINT_NAME,
           need_sync: SCHEMA_EXECUTE_NEED_SYNC,
+          workflow: SCHEMA_WORKFLOW,
           number: {
             type: JsonSchemaType.STRING,
             minLength: 1,
diff --git a/infrastructure/src/api/inferences/create-inference-job.ts b/infrastructure/src/api/inferences/create-inference-job.ts
index 7abf5013..41fe0495 100644
--- a/infrastructure/src/api/inferences/create-inference-job.ts
+++ b/infrastructure/src/api/inferences/create-inference-job.ts
@@ -12,7 +12,7 @@ import {
   SCHEMA_INFERENCE,
   SCHEMA_INFERENCE_ASYNC_MODEL,
   SCHEMA_INFERENCE_REAL_TIME_MODEL,
-  SCHEMA_MESSAGE,
+  SCHEMA_MESSAGE, SCHEMA_WORKFLOW,
 } from '../../shared/schema';
 import { ApiValidators } from '../../shared/validator';
 
@@ -148,6 +148,7 @@ export class CreateInferenceJobApi {
                   type: {
                     type: JsonSchemaType.STRING,
                   },
+                  workflow: SCHEMA_WORKFLOW,
                   api_params_s3_location: {
                     type: JsonSchemaType.STRING,
                     format: 'uri',
@@ -225,6 +226,7 @@ export class CreateInferenceJobApi {
             type: JsonSchemaType.STRING,
           },
           inference_type: SCHEMA_INFER_TYPE,
+          workflow: SCHEMA_WORKFLOW,
           payload_string: {
             type: JsonSchemaType.STRING,
           },
diff --git a/infrastructure/src/shared/schema.ts b/infrastructure/src/shared/schema.ts
index 6b95cdfc..bb01dfc8 100644
--- a/infrastructure/src/shared/schema.ts
+++ b/infrastructure/src/shared/schema.ts
@@ -441,6 +441,11 @@ export const SCHEMA_EXECUTE_PROMPT_PATH: JsonSchema = {
   description: 'Prompt Path',
 };
 
+export const SCHEMA_WORKFLOW: JsonSchema = {
+  type: JsonSchemaType.STRING,
+  description: 'Workflow remark',
+};
+
 export const SCHEMA_INFERENCE: Record<string, JsonSchema> = {
   img_presigned_urls: {
     type: JsonSchemaType.ARRAY,
diff --git a/middleware_api/comfy/execute.py b/middleware_api/comfy/execute.py
index 1929a637..b718fae1 100644
--- a/middleware_api/comfy/execute.py
+++ b/middleware_api/comfy/execute.py
@@ -16,9 +16,9 @@ from sagemaker.serializers import JSONSerializer
 
 from common.ddb_service.client import DynamoDbUtilsService
 from common.excepts import BadRequestException
-from common.response import ok, created, internal_server_error
-from common.util import s3_scan_files, generate_presigned_url_for_keys, record_latency_metrics, \
-    record_count_metrics
+from common.response import ok, created
+from common.util import s3_scan_files, generate_presigned_url_for_keys, \
+    record_latency_metrics, record_count_metrics
 from libs.comfy_data_types import ComfyExecuteTable, InferenceResult
 from libs.enums import ComfyExecuteType, EndpointStatus, ServiceType
 from libs.utils import get_endpoint_by_name, response_error
@@ -38,7 +38,9 @@ sqs_url = os.environ.get('MERGE_SQS_URL')
 index_name = "endpoint_name-startTime-index"
 predictors = {}
 
-multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge', 'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']
+multi_gpu_instance_type_list = ['ml.p5.48xlarge', 'ml.p4d.24xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
+                                'ml.p3dn.24xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.g4dn.12xlarge',
+                                'ml.g5.12xlarge', 'ml.g5.24xlarge', 'ml.g5.48xlarge']
 
 
 @dataclass
diff --git a/middleware_api/comfy/execute_async_events.py b/middleware_api/comfy/execute_async_events.py
index dac0f92e..0052df95 100644
--- a/middleware_api/comfy/execute_async_events.py
+++ b/middleware_api/comfy/execute_async_events.py
@@ -7,8 +7,8 @@ import boto3
 from aws_lambda_powertools import Tracer
 
 from common.ddb_service.client import DynamoDbUtilsService
-from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, record_latency_metrics, \
-    record_queue_latency_metrics
+from common.util import s3_scan_files, load_json_from_s3, record_count_metrics, \
+    record_latency_metrics, record_queue_latency_metrics
 from libs.comfy_data_types import InferenceResult
 from libs.enums import ServiceType
 
diff --git a/middleware_api/inferences/inference_async_events.py b/middleware_api/inferences/inference_async_events.py
index 28dee20e..e5f8dd05 100644
--- a/middleware_api/inferences/inference_async_events.py
+++ b/middleware_api/inferences/inference_async_events.py
@@ -73,7 +73,10 @@ def handler(event, context):
 
     parse_sagemaker_result(sagemaker_out, create_time, inference_id, task_type, endpoint_name)
 
-    record_count_metrics(ep_name=endpoint_name, metric_name='InferenceSucceed')
+    record_count_metrics(ep_name=endpoint_name,
+                         metric_name='InferenceSucceed',
+                         workflow=workflow
+                         )
     record_latency_metrics(start_time=sagemaker_out['start_time'],
                            ep_name=endpoint_name,
                            workflow=workflow,
diff --git a/middleware_api/inferences/start_inference_job.py b/middleware_api/inferences/start_inference_job.py
index 95508665..a2d99c21 100644
--- a/middleware_api/inferences/start_inference_job.py
+++ b/middleware_api/inferences/start_inference_job.py
@@ -73,6 +73,7 @@ def inference_start(job: InferenceJob, username):
     payload = InvocationRequest(
         id=job.InferenceJobId,
         task=job.taskType,
+        workflow=job.workflow,
         username=username,
         models=models,
         param_s3=job.params['input_body_s3'],
diff --git a/middleware_api/libs/data_types.py b/middleware_api/libs/data_types.py
index a440c4cc..6626c559 100644
--- a/middleware_api/libs/data_types.py
+++ b/middleware_api/libs/data_types.py
@@ -194,3 +194,4 @@ class InvocationRequest:
     models: Optional[dict]
     param_s3: Optional[str] = None
     payload_string: Optional[str] = None
+    workflow: Optional[str] = None
diff --git a/scripts/api.py b/scripts/api.py
index 3a3e5832..bca65644 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -56,65 +56,82 @@ def update_execute_job_table(prompt_id, key, value):
         raise e
 
 
-def record_metric():
+def record_metric(req: InvocationsRequest):
+    data = [
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceTotal',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+                {
+                    'Name': 'Instance',
+                    'Value': endpoint_instance_id
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Service',
+                    'Value': 'Stable-Diffusion'
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+        {
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Endpoint',
+                    'Value': endpoint_name
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        },
+    ]
+
+    if req.workflow:
+        data.append({
+            'MetricName': 'InferenceEndpointReceived',
+            'Dimensions': [
+                {
+                    'Name': 'Workflow',
+                    'Value': req.workflow
+                },
+            ],
+            'Timestamp': datetime.datetime.utcnow(),
+            'Value': 1,
+            'Unit': 'Count'
+        })
+
     response = cloudwatch.put_metric_data(
         Namespace='ESD',
-        MetricData=[
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceTotal',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                    {
-                        'Name': 'Instance',
-                        'Value': endpoint_instance_id
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Service',
-                        'Value': 'Stable-Diffusion'
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-            {
-                'MetricName': 'InferenceEndpointReceived',
-                'Dimensions': [
-                    {
-                        'Name': 'Endpoint',
-                        'Value': endpoint_name
-                    },
-                ],
-                'Timestamp': datetime.datetime.utcnow(),
-                'Value': 1,
-                'Unit': 'Count'
-            },
-        ]
+        MetricData=data
     )
+
     logger.info(f"record_metric response: {response}")
 
 
@@ -279,7 +296,7 @@ def sagemaker_api(_, app: FastAPI):
 
         update_execute_job_table(req.id, 'startTime', start_time)
 
-        record_metric()
+        record_metric(req)
 
         with condition:
             try:
diff --git a/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py b/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py
index 7bd24eac..c0fbbef2 100644
--- a/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py
+++ b/test/test_06_api_inference/test_08_comfy_inference_txt2img_async.py
@@ -48,6 +48,7 @@ class TestTxt2ImgInferenceAsyncAfterComfyE2E:
         }
         data = {"endpoint_name": f"{self.ep_name}",
                 "need_reboot": True,
+                "workflow": 'comfy_txt2img',
                 "prepare_id": prepare_id,
                 "prepare_type": "models"}
         resp = self.api.prepare(data=data, headers=headers)
diff --git a/test/test_06_api_inference/test_10_txt2img_async_cn_1.py b/test/test_06_api_inference/test_10_txt2img_async_cn_1.py
index 7ff36f91..4a2a58fd 100644
--- a/test/test_06_api_inference/test_10_txt2img_async_cn_1.py
+++ b/test/test_06_api_inference/test_10_txt2img_async_cn_1.py
@@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn1E2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn1',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_11_txt2img_async_cn_2.py b/test/test_06_api_inference/test_11_txt2img_async_cn_2.py
index 1c85d2e3..2f19f188 100644
--- a/test/test_06_api_inference/test_11_txt2img_async_cn_2.py
+++ b/test/test_06_api_inference/test_11_txt2img_async_cn_2.py
@@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn2E2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn2',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_12_txt2img_async_cn_3.py b/test/test_06_api_inference/test_12_txt2img_async_cn_3.py
index 633f1c0c..8f814406 100644
--- a/test/test_06_api_inference/test_12_txt2img_async_cn_3.py
+++ b/test/test_06_api_inference/test_12_txt2img_async_cn_3.py
@@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncCn3E2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'cn2',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_13_txt2img_async.py b/test/test_06_api_inference/test_13_txt2img_async.py
index 4ae6277c..b7b3a266 100644
--- a/test/test_06_api_inference/test_13_txt2img_async.py
+++ b/test/test_06_api_inference/test_13_txt2img_async.py
@@ -59,6 +59,7 @@ class TestTxt2ImgInferenceAsyncE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_txt2img',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_15_txt2img_reactor_async.py b/test/test_06_api_inference/test_15_txt2img_reactor_async.py
index e108f7f6..4d496469 100644
--- a/test/test_06_api_inference/test_15_txt2img_reactor_async.py
+++ b/test/test_06_api_inference/test_15_txt2img_reactor_async.py
@@ -59,6 +59,7 @@ class TestTxt2ImgReactorAsyncE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_reactor_txt2img',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_20_esi_async.py b/test/test_06_api_inference/test_20_esi_async.py
index 2f84f4d2..2df5f365 100644
--- a/test/test_06_api_inference/test_20_esi_async.py
+++ b/test/test_06_api_inference/test_20_esi_async.py
@@ -59,6 +59,7 @@ class TestEsiInferenceAsyncE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.ESI.value,
+            "workflow": 'sd_esi',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_30_img2img_async.py b/test/test_06_api_inference/test_30_img2img_async.py
index 27077b48..2d67c9f5 100644
--- a/test/test_06_api_inference/test_30_img2img_async.py
+++ b/test/test_06_api_inference/test_30_img2img_async.py
@@ -59,6 +59,7 @@ class TestImg2ImgInferenceAsyncE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.IMG2IMG.value,
+            "workflow": 'sd_img2img',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_40_rembg_async.py b/test/test_06_api_inference/test_40_rembg_async.py
index 0df9b986..d56d0eda 100644
--- a/test/test_06_api_inference/test_40_rembg_async.py
+++ b/test/test_06_api_inference/test_40_rembg_async.py
@@ -59,6 +59,7 @@ class TestRembgInferenceAsyncE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.REMBG.value,
+            "workflow": 'sd_rembg',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_06_api_inference/test_41_sd_xl_turbo.py b/test/test_06_api_inference/test_41_sd_xl_turbo.py
index 34a3f85a..d6dbf5dc 100644
--- a/test/test_06_api_inference/test_41_sd_xl_turbo.py
+++ b/test/test_06_api_inference/test_41_sd_xl_turbo.py
@@ -157,6 +157,7 @@ class TestTurboE2E:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'sd_turbo_xl',
             "models": {
                 "Stable-diffusion": [filename],
                 "embeddings": []
diff --git a/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py b/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py
index 93cc9234..0b7b36dd 100644
--- a/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py
+++ b/test/test_10_local_only/bak_test_05_sd_mutil_gpus_single_task.py
@@ -46,6 +46,7 @@ class TestMutilGPUsSingleTask:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'single_gpu',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py b/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py
index 6af363fb..3e5cf304 100644
--- a/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py
+++ b/test/test_10_local_only/bak_test_06_sd_mutil_gpus_tasks.py
@@ -73,6 +73,7 @@ class TestMutilTaskGPUs:
         data = {
             "inference_type": "Async",
             "task_type": InferenceType.TXT2IMG.value,
+            "workflow": 'gpus',
             "models": {
                 "Stable-diffusion": [config.default_model_id],
                 "embeddings": []
diff --git a/test/utils/helper.py b/test/utils/helper.py
index 5c57c775..b3462ed9 100644
--- a/test/utils/helper.py
+++ b/test/utils/helper.py
@@ -243,6 +243,7 @@ def comfy_execute_create(n, api, endpoint_name, wait_succeed=True,
         prompt_id = str(uuid.uuid4())
         workflow = json.load(f)
         workflow['prompt_id'] = prompt_id
+        workflow['workflow'] = 'latency_compare_comfy'
         workflow['endpoint_name'] = endpoint_name
 
         resp = api.create_execute(headers=headers, data=workflow)
@@ -292,6 +293,7 @@ def sd_inference_create(n, api, endpoint_name: str, workflow: str = './data/api_
     data = {
         "inference_type": "Async",
         "task_type": InferenceType.TXT2IMG.value,
+        "workflow": 'latency_compare_sd',
         "models": {
             "Stable-diffusion": [config.default_model_id],
             "embeddings": []
@@ -352,6 +354,7 @@ def sd_inference_esi(api, workflow: str = './data/api_params/extra-single-image-
     data = {
         "inference_type": "Async",
         "task_type": InferenceType.ESI.value,
+        "workflow": 'esi',
         "models": {
             "Stable-diffusion": [config.default_model_id],
             "embeddings": []
@@ -411,6 +414,7 @@ def sd_inference_rembg(api, workflow: str = './data/api_params/rembg-api-params.
     data = {
         "inference_type": "Async",
         "task_type": InferenceType.REMBG.value,
+        "workflow": 'rembg',
         "models": {
             "Stable-diffusion": [config.default_model_id],
             "embeddings": []