105 lines
3.7 KiB
Python
105 lines
3.7 KiB
Python
import json
|
|
import logging
|
|
import os
|
|
|
|
import boto3
|
|
from aws_lambda_powertools import Tracer
|
|
|
|
from common.const import PERMISSION_TRAIN_ALL
|
|
from common.ddb_service.client import DynamoDbUtilsService
|
|
from common.response import ok, forbidden
|
|
from common.util import get_query_param
|
|
from libs.data_types import DatasetInfo
|
|
from libs.utils import get_permissions_by_username, get_user_roles, check_user_permissions, permissions_check, \
|
|
response_error, decode_last_key, encode_last_key
|
|
|
|
tracer = Tracer()
|
|
dataset_info_table = os.environ.get('DATASET_INFO_TABLE')
|
|
bucket_name = os.environ.get('S3_BUCKET_NAME')
|
|
user_table = os.environ.get('MULTI_USER_TABLE')
|
|
ddb = boto3.resource('dynamodb')
|
|
table = ddb.Table(dataset_info_table)
|
|
logger = logging.getLogger(__name__)
|
|
logger.setLevel(os.environ.get('LOG_LEVEL') or logging.ERROR)
|
|
|
|
ddb_service = DynamoDbUtilsService(logger=logger)
|
|
|
|
|
|
# GET /datasets
|
|
@tracer.capture_lambda_handler
|
|
def handler(event, context):
|
|
try:
|
|
logger.info(json.dumps(event))
|
|
requestor_name = permissions_check(event, [PERMISSION_TRAIN_ALL])
|
|
|
|
exclusive_start_key = get_query_param(event, 'exclusive_start_key')
|
|
dataset_status = get_query_param(event, 'dataset_status')
|
|
limit = int(get_query_param(event, 'limit', 10))
|
|
|
|
scan_kwargs = {
|
|
'Limit': limit,
|
|
}
|
|
|
|
if exclusive_start_key:
|
|
scan_kwargs['ExclusiveStartKey'] = decode_last_key(exclusive_start_key)
|
|
|
|
requestor_permissions = get_permissions_by_username(ddb_service, user_table, requestor_name)
|
|
requestor_roles = get_user_roles(ddb_service=ddb_service, user_table_name=user_table, username=requestor_name)
|
|
if 'train' not in requestor_permissions or \
|
|
('all' not in requestor_permissions['train'] and 'list' not in requestor_permissions['train']):
|
|
return forbidden(message='user has no permission to train')
|
|
|
|
response = table.scan(**scan_kwargs)
|
|
scan_rows = response.get('Items', [])
|
|
last_evaluated_key = encode_last_key(response.get('LastEvaluatedKey'))
|
|
if not scan_rows or len(scan_rows) == 0:
|
|
return ok(data={
|
|
'datasets': [],
|
|
'last_evaluated_key': last_evaluated_key
|
|
})
|
|
|
|
datasets = []
|
|
for row in scan_rows:
|
|
|
|
dataset_info = DatasetInfo(**row)
|
|
|
|
logger.info(f'dataset_info: {dataset_info}')
|
|
|
|
if dataset_status and dataset_info.dataset_status.value != dataset_status:
|
|
continue
|
|
|
|
dataset_info_dto = {
|
|
'datasetName': dataset_info.dataset_name,
|
|
's3': f's3://{bucket_name}/{dataset_info.get_s3_key()}',
|
|
'status': dataset_info.dataset_status.value,
|
|
'timestamp': dataset_info.timestamp,
|
|
**dataset_info.params
|
|
}
|
|
|
|
if dataset_info.allowed_roles_or_users \
|
|
and check_user_permissions(dataset_info.allowed_roles_or_users, requestor_roles, requestor_name):
|
|
datasets.append(dataset_info_dto)
|
|
elif not dataset_info.allowed_roles_or_users and \
|
|
'user' in requestor_permissions and \
|
|
'all' in requestor_permissions['user']:
|
|
# superuser can view the legacy data
|
|
datasets.append(dataset_info_dto)
|
|
|
|
datasets = sort_datasets(datasets)
|
|
|
|
data = {
|
|
'datasets': datasets,
|
|
'last_evaluated_key': last_evaluated_key
|
|
}
|
|
|
|
return ok(data=data, decimal=True)
|
|
except Exception as e:
|
|
return response_error(e)
|
|
|
|
|
|
def sort_datasets(data):
|
|
if len(data) == 0:
|
|
return data
|
|
|
|
return sorted(data, key=lambda x: x['timestamp'], reverse=True)
|