465 lines
17 KiB
Python
465 lines
17 KiB
Python
import os
|
|
import requests
|
|
import boto3
|
|
import botocore
|
|
import boto3.s3.transfer as s3transfer
|
|
import sys
|
|
from urllib.parse import urlparse
|
|
import requests
|
|
import json
|
|
import gradio as gr
|
|
|
|
sys.path.append(os.getcwd())
|
|
# from modules.timer import Timer
|
|
import tarfile
|
|
|
|
import shutil
|
|
from pathlib import Path
|
|
import psutil
|
|
|
|
class ModelsRef:
|
|
def __init__(self):
|
|
self.models_ref = {}
|
|
|
|
def get_models_ref_dict(self):
|
|
return self.models_ref
|
|
|
|
def add_models_ref(self, model_name):
|
|
if model_name in self.models_ref:
|
|
self.models_ref[model_name] += 1
|
|
else:
|
|
self.models_ref[model_name] = 0
|
|
|
|
def remove_model_ref(self,model_name):
|
|
if self.models_ref.get(model_name):
|
|
del self.models_ref[model_name]
|
|
|
|
def get_models_ref(self, model_name):
|
|
return self.models_ref.get(model_name)
|
|
|
|
def get_least_ref_model(self):
|
|
sorted_models = sorted(self.models_ref.items(), key=lambda item: item[1])
|
|
if sorted_models:
|
|
least_ref_model, least_counter = sorted_models[0]
|
|
return least_ref_model,least_counter
|
|
else:
|
|
return None,None
|
|
|
|
def pop_least_ref_model(self):
|
|
sorted_models = sorted(self.models_ref.items(), key=lambda item: item[1])
|
|
if sorted_models:
|
|
least_ref_model, least_counter = sorted_models[0]
|
|
del self.models_ref[least_ref_model]
|
|
return least_ref_model,least_counter
|
|
else:
|
|
return None,None
|
|
|
|
def get_sorted_models(self, key_list=None):
|
|
print('!!!!!!!!!!!', key_list)
|
|
if key_list is None:
|
|
return sorted(self.models_ref.items(), key=lambda item: item[1])
|
|
else:
|
|
models_ref_tmp = {}
|
|
for key_value in key_list:
|
|
if key_value not in self.models_ref.keys():
|
|
models_ref_tmp[key_value] = -1
|
|
else:
|
|
models_ref_tmp[key_value] = self.models_ref[key_value]
|
|
models_sorted_info = sorted(models_ref_tmp.items(), key=lambda item: item[1])
|
|
models_sorted = []
|
|
for model_info in models_sorted_info:
|
|
models_sorted.append(model_info[0])
|
|
return models_sorted
|
|
|
|
# sd_models_Ref = ModelsRef()
|
|
# cn_models_Ref = ModelsRef()
|
|
# lora_models_Ref = ModelsRef()
|
|
# hyper_models_Ref = ModelsRef()
|
|
# embedding_Ref = ModelsRef()
|
|
|
|
def upload_folder_to_s3(local_folder_path, bucket_name, s3_folder_path):
|
|
s3_client = boto3.client('s3')
|
|
for root, dirs, files in os.walk(local_folder_path):
|
|
for file in files:
|
|
local_file_path = os.path.join(root, file)
|
|
s3_file_path = os.path.join(s3_folder_path, local_file_path)
|
|
s3_client.upload_file(local_file_path, bucket_name, s3_file_path)
|
|
|
|
|
|
def upload_folder_to_s3_by_tar(local_folder_path, bucket_name, s3_folder_path):
|
|
tar_name = f"{os.path.basename(local_folder_path)}.tar"
|
|
# os.system(f'tar cvf {tar_name} {local_folder_path}')
|
|
tar(mode='c', archive=tar_name, sfiles=local_folder_path, verbose=True)
|
|
# tar = tarfile.open(tar_path, "w:gz")
|
|
# for root, dirs, files in os.walk(local_folder_path):
|
|
# for file in files:
|
|
# local_file_path = os.path.join(root, file)
|
|
# tar.add(local_file_path)
|
|
# tar.close()
|
|
s3_client = boto3.client('s3')
|
|
s3_client.upload_file(tar_name, bucket_name, os.path.join(s3_folder_path, tar_name))
|
|
# os.system(f"rm {tar_name}")
|
|
rm(tar_name, recursive=True)
|
|
|
|
|
|
def upload_file_to_s3(file_name, bucket, directory=None, object_name=None):
|
|
# If S3 object_name was not specified, use file_name
|
|
if object_name is None:
|
|
object_name = file_name
|
|
|
|
# Add the directory to the object_name
|
|
if directory:
|
|
object_name = f"{directory}/{object_name}"
|
|
|
|
# Upload the file
|
|
try:
|
|
s3_client = boto3.client('s3')
|
|
s3_client.upload_file(file_name, bucket, object_name)
|
|
print(f"File {file_name} uploaded to {bucket}/{object_name}")
|
|
except Exception as e:
|
|
print(f"Error occurred while uploading {file_name} to {bucket}/{object_name}: {e}")
|
|
return False
|
|
return True
|
|
|
|
def upload_file_to_s3_by_presign_url(local_path, s3_presign_url):
|
|
response = requests.put(s3_presign_url, open(local_path, "rb"))
|
|
response.raise_for_status()
|
|
|
|
def upload_multipart_files_to_s3_by_signed_url(local_path, signed_urls, part_size):
|
|
integral_uploaded = False
|
|
with open(local_path, "rb") as f:
|
|
parts = []
|
|
try:
|
|
for i, signed_url in enumerate(signed_urls):
|
|
file_data = f.read(part_size)
|
|
response = requests.put(signed_url, data=file_data)
|
|
response.raise_for_status()
|
|
etag = response.headers['ETag']
|
|
parts.append({
|
|
'ETag': etag,
|
|
'PartNumber': i + 1
|
|
})
|
|
print(f'model upload part {i+1}: {response}')
|
|
|
|
integral_uploaded = True
|
|
return parts
|
|
except Exception as e:
|
|
print(e)
|
|
gr.Error(f'Upload file{local_path} failed, please try again. If still not work, contact your admin')
|
|
finally:
|
|
if not integral_uploaded:
|
|
gr.Error(f'Upload file {local_path} not complete, please try again or create new one.')
|
|
raise Exception('failed at multipart')
|
|
|
|
|
|
def download_folder_from_s3(bucket_name, s3_folder_path, local_folder_path):
|
|
s3_resource = boto3.resource('s3')
|
|
bucket = s3_resource.Bucket(bucket_name)
|
|
for obj in bucket.objects.filter(Prefix=s3_folder_path):
|
|
obj_dirname = os.sep.join(os.path.dirname(obj.key).split("/")[1:])
|
|
obj_basename = os.path.basename(obj.key)
|
|
local_sub_folder_path = os.path.join(local_folder_path, obj_dirname)
|
|
if not os.path.exists(local_sub_folder_path):
|
|
os.makedirs(local_sub_folder_path)
|
|
bucket.download_file(obj.key, os.path.join(local_sub_folder_path, obj_basename)) # save to same path
|
|
|
|
|
|
def download_folder_from_s3_by_tar(bucket_name, s3_tar_path, local_tar_path, target_dir="."):
|
|
s3_client = boto3.client('s3')
|
|
s3_client.download_file(bucket_name, s3_tar_path, local_tar_path)
|
|
# tar_name = os.path.basename(s3_tar_path)
|
|
# os.system(f"tar xvf {local_tar_path} -C {target_dir}")
|
|
tar(mode='x', archive=local_tar_path, verbose=True, change_dir=target_dir)
|
|
# tar = tarfile.open(local_tar_path, "r")
|
|
# tar.extractall()
|
|
# tar.close()
|
|
# os.system(f"rm {local_tar_path}")
|
|
rm(local_tar_path, recursive=True)
|
|
|
|
|
|
def download_file_from_s3(bucket_name, s3_file_path, local_file_path):
|
|
s3_client = boto3.client('s3')
|
|
s3_client.download_file(bucket_name, s3_file_path, local_file_path)
|
|
|
|
|
|
def get_bucket_name_from_s3_url(s3_path) -> str:
|
|
o = urlparse(s3_path, allow_fragments=False)
|
|
return o.netloc
|
|
|
|
def get_bucket_name_from_s3_path(s3_path) -> str:
|
|
s3_path = s3_path.replace("s3://", "")
|
|
return s3_path.split("/")[0]
|
|
|
|
def get_path_from_s3_path(s3_path) -> str:
|
|
s3_path = s3_path.replace("s3://", "")
|
|
return "/".join(s3_path.split("/")[1:])
|
|
|
|
def fast_upload(session, bucketname, s3dir, filelist, progress_func=None, workers=10):
|
|
# timer = Timer()
|
|
botocore_config = botocore.config.Config(max_pool_connections=workers)
|
|
s3client = session.client('s3', config=botocore_config)
|
|
transfer_config = s3transfer.TransferConfig(
|
|
use_threads=True,
|
|
max_concurrency=workers,
|
|
)
|
|
s3t = s3transfer.create_transfer_manager(s3client, transfer_config)
|
|
# timer.record("init")
|
|
for src in filelist:
|
|
dst = os.path.join(s3dir, os.path.basename(src))
|
|
s3t.upload(
|
|
src, bucketname, dst,
|
|
subscribers=[
|
|
s3transfer.ProgressCallbackInvoker(progress_func),
|
|
] if progress_func else None,
|
|
)
|
|
s3t.shutdown() # wait for all the upload tasks to finish
|
|
# timer.record("upload")
|
|
# print(timer.summary())
|
|
|
|
def save_variable_to_json(variable_name, variable_value, filename='sagemaker_ui.json'):
|
|
data = {}
|
|
|
|
if os.path.exists(filename):
|
|
with open(filename, 'r') as json_file:
|
|
data = json.load(json_file)
|
|
|
|
data[variable_name] = variable_value
|
|
|
|
with open(filename, 'w') as json_file:
|
|
json.dump(data, json_file)
|
|
|
|
def get_variable_from_json(variable_name, filename='sagemaker_ui.json'):
|
|
if not os.path.exists(filename):
|
|
initial_data = {
|
|
"api_gateway_url": "",
|
|
"api_token": ""
|
|
}
|
|
with open(filename, 'w') as json_file:
|
|
json.dump(initial_data, json_file)
|
|
|
|
with open(filename, 'r') as json_file:
|
|
data = json.load(json_file)
|
|
|
|
variable_value = data.get(variable_name)
|
|
|
|
return variable_value
|
|
|
|
"""
|
|
Description: Below functions are used to replace existing shell command implementation with os.system method, which is not os agonostic and not recommended.
|
|
"""
|
|
def tar(mode, archive, sfiles=None, verbose=False, change_dir=None):
|
|
"""
|
|
Description:
|
|
Create or extract a tar archive.
|
|
Args:
|
|
mode: 'c' for create or 'x' for extract
|
|
archive: the archive file name
|
|
files: a list of files to add to the archive (when creating) or extract (when extracting); None to extract all files
|
|
verbose: whether to print the names of the files as they are being processed
|
|
change_dir: the directory to change to before performing any other operations; None to use the current directory
|
|
Usage:
|
|
# Create a new archive
|
|
tar(mode='c', archive='archive.tar', sfiles=['file1.txt', 'file2.txt'])
|
|
|
|
# Extract files from an archive
|
|
tar(mode='x', archive='archive.tar')
|
|
|
|
# Create a new archive with verbose mode and input directory
|
|
tar(mode='c', archive='archive.tar', sfiles='./some_directory', verbose=True)
|
|
|
|
# Extract files from an archive with verbose mode and change directory
|
|
tar(mode='x', archive='archive.tar', verbose=True, change_dir='./some_directory')
|
|
"""
|
|
if mode == 'c':
|
|
# os.chdir(change_dir)
|
|
with tarfile.open(archive, mode='w') as tar:
|
|
# check if input option file is a list or string
|
|
if isinstance(sfiles, list):
|
|
for file in sfiles:
|
|
if verbose:
|
|
print(f"Adding {file} to {archive}")
|
|
tar.add(file)
|
|
# take it as a folder name string
|
|
else:
|
|
for folder_path, subfolders, files in os.walk(sfiles):
|
|
for file in files:
|
|
if verbose:
|
|
print(f"Adding {os.path.join(folder_path, file)} to {archive}")
|
|
tar.add(os.path.join(folder_path, file))
|
|
|
|
elif mode == 'x':
|
|
with tarfile.open(archive, mode='r') as tar:
|
|
# sfiles is set to all files in the archive if not specified
|
|
if not sfiles:
|
|
sfiles = tar.getnames()
|
|
for file in sfiles:
|
|
if verbose:
|
|
print(f"Extracting {file} from {archive}")
|
|
# extra to specified directory
|
|
if change_dir:
|
|
tar.extract(file, path=change_dir)
|
|
else:
|
|
tar.extract(file)
|
|
|
|
def rm(path, force=False, recursive=False):
|
|
"""
|
|
Description:
|
|
Remove a file or directory.
|
|
Args:
|
|
path (str): Path of the file or directory to remove.
|
|
force (bool): If True, ignore non-existent files and errors. Default is False.
|
|
recursive (bool): If True, remove directories and their contents recursively. Default is False.
|
|
Usage:
|
|
# Remove the file
|
|
rm(dst)
|
|
# Remove a directory recursively
|
|
rm("directory/path", recursive=True)
|
|
"""
|
|
path_obj = Path(path)
|
|
|
|
try:
|
|
if path_obj.is_file() or (path_obj.is_symlink() and not path_obj.is_dir()):
|
|
path_obj.unlink()
|
|
elif path_obj.is_dir() and recursive:
|
|
shutil.rmtree(path)
|
|
elif path_obj.is_dir():
|
|
raise ValueError("Cannot remove directory without recursive=True")
|
|
else:
|
|
raise ValueError("File or directory does not exist")
|
|
except Exception as e:
|
|
if not force:
|
|
raise e
|
|
|
|
def cp(src, dst, recursive=False, dereference=False, preserve=True):
|
|
"""
|
|
Description:
|
|
Copy a file or directory from source path to destination path.
|
|
Args:
|
|
src (str): Source file or directory path.
|
|
dst (str): Destination file or directory path.
|
|
recursive (bool): If True, copy directory and its contents recursively. Default is False.
|
|
dereference (bool): If True, always dereference symbolic links. Default is False.
|
|
preserve (bool): If True, preserve file metadata. Default is True.
|
|
Usage:
|
|
src = "source/file/path.txt"
|
|
dst = "destination/file/path.txt"
|
|
|
|
# Copy the file
|
|
cp(src, dst)
|
|
|
|
# Copy a directory recursively and dereference symlinks
|
|
cp("source/directory", "destination/directory", recursive=True, dereference=True)
|
|
"""
|
|
src_path = Path(src)
|
|
dst_path = Path(dst)
|
|
try:
|
|
if dereference:
|
|
src_path = src_path.resolve()
|
|
|
|
if src_path.is_dir() and recursive:
|
|
if preserve:
|
|
shutil.copytree(src_path, dst_path, copy_function=shutil.copy2, symlinks=not dereference)
|
|
else:
|
|
shutil.copytree(src_path, dst_path, symlinks=not dereference)
|
|
elif src_path.is_file():
|
|
if preserve:
|
|
shutil.copy2(src_path, dst_path)
|
|
else:
|
|
shutil.copy(src_path, dst_path)
|
|
else:
|
|
raise ValueError("Source must be a file or a directory with recursive=True")
|
|
except shutil.SameFileError:
|
|
print("Source and destination represents the same file.")
|
|
|
|
|
|
def mv(src, dest, force=False):
|
|
"""
|
|
Description:
|
|
Move or rename files and directories.
|
|
Args:
|
|
src (str): Source file or directory path.
|
|
dest (str): Destination file or directory path.
|
|
force (bool): If True, overwrite the destination if it exists. Default is False.
|
|
Usage:
|
|
# Rename a file
|
|
mv('old_name.txt', 'new_name.txt')
|
|
|
|
# Move a file to a new directory
|
|
mv('file.txt', 'new_directory/file.txt')
|
|
|
|
# Move a directory to another directory
|
|
mv('source_directory', 'destination_directory')
|
|
|
|
# Force move (overwrite) a file or directory
|
|
mv('source_file.txt', 'existing_destination_file.txt', force=True)
|
|
"""
|
|
src_path = Path(src)
|
|
dest_path = Path(dest)
|
|
|
|
if src_path.exists():
|
|
if dest_path.exists() and not force:
|
|
raise FileExistsError(f"Destination path '{dest}' already exists and 'force' is not set")
|
|
else:
|
|
if dest_path.is_file():
|
|
dest_path.unlink()
|
|
elif dest_path.is_dir():
|
|
shutil.rmtree(dest_path)
|
|
|
|
if src_path.is_file() or src_path.is_dir():
|
|
shutil.move(src, dest)
|
|
else:
|
|
raise FileNotFoundError(f"Source path '{src}' does not exist")
|
|
|
|
def format_size(size, human_readable):
|
|
if human_readable:
|
|
for unit in ['B', 'K', 'M', 'G', 'T', 'P']:
|
|
if size < 1024:
|
|
return f"{size:.1f}{unit}"
|
|
size /= 1024
|
|
else:
|
|
return str(size)
|
|
|
|
def df(show_all=False, human_readable=False):
|
|
"""
|
|
Description:
|
|
Get disk usage statistics.
|
|
Args:
|
|
show_all (bool): If True, include all filesystems. Default is False.
|
|
human_readable (bool): If True, format sizes in human readable format. Default is False.
|
|
Usage:
|
|
filesystems = df(show_all=True, human_readable=True)
|
|
for filesystem in filesystems:
|
|
print(f"Filesystem: {filesystem['filesystem']}")
|
|
print(f"Total: {filesystem['total']}")
|
|
print(f"Used: {filesystem['used']}")
|
|
print(f"Free: {filesystem['free']}")
|
|
print(f"Percent: {filesystem['percent']}%")
|
|
print(f"Mountpoint: {filesystem['mountpoint']}")
|
|
"""
|
|
partitions = psutil.disk_partitions(all=show_all)
|
|
result = []
|
|
|
|
for partition in partitions:
|
|
usage = psutil.disk_usage(partition.mountpoint)
|
|
partition_info = {
|
|
'filesystem': partition.device,
|
|
'total': format_size(usage.total, human_readable),
|
|
'used': format_size(usage.used, human_readable),
|
|
'free': format_size(usage.free, human_readable),
|
|
'percent': usage.percent,
|
|
'mountpoint': partition.mountpoint,
|
|
}
|
|
result.append(partition_info)
|
|
|
|
return result
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
|
|
# upload_file_to_s3(sys.argv[1], 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2])
|
|
# fast_upload(boto3.Session(), 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2], [sys.argv[1]])
|
|
upload_folder_to_s3_by_tar('models/dreambooth/sagemaker_test/samples', 'aws-gcr-csdc-atl-exp-us-west-2',
|
|
'aigc-webui-test-samples')
|
|
download_folder_from_s3_by_tar('aws-gcr-csdc-atl-exp-us-west-2', 'aigc-webui-test-samples/samples.tar',
|
|
'samples.tar')
|