From 824ebcfc6e1e3c67bfdff05a9a2d401048c0b2b9 Mon Sep 17 00:00:00 2001
From: Ning <lvning19860909@gmail.com>
Date: Thu, 25 Jan 2024 15:28:05 +0800
Subject: [PATCH] chore: remove db

---
 build_scripts/README.md                       |  2 +-
 .../training/sagemaker_entrypoint.py          | 75 -------------------
 docs/Environment-Preconfiguration.md          |  1 -
 docs/en/user-guide/dreambooth-guide.md        | 40 ----------
 docs/mkdocs.en.yml                            |  1 -
 docs/mkdocs.zh.yml                            |  1 -
 .../features-and-benefits.md                  |  3 +-
 docs/zh/user-guide/dreambooth-guide.md        | 42 -----------
 docs/zh/user-guide/preparation.md             | 11 +--
 .../lambda/trainings/start_training_job.py    |  1 -
 utils.py                                      |  2 -
 utils_cn.py                                   |  2 -
 12 files changed, 4 insertions(+), 177 deletions(-)
 delete mode 100644 docs/en/user-guide/dreambooth-guide.md
 delete mode 100644 docs/zh/user-guide/dreambooth-guide.md

diff --git a/build_scripts/README.md b/build_scripts/README.md
index aa823c5f..30cdf2d1 100644
--- a/build_scripts/README.md
+++ b/build_scripts/README.md
@@ -18,7 +18,7 @@ accelerate launch --num_cpu_threads_per_process=6 launch.py --api
 
 # How to build images
 
-### Build public images for aigc-webui-utils which is used for light-weight CPU operations, like create_model in Dreambooth, merge_checkpoint.
+### Build public images for aigc-webui-utils which is used for light-weight CPU operations, like create_model in merge_checkpoint.
 
 ```
 sh build_and_push.sh Dockerfile.utils.from_scratch aigc-webui-utils
diff --git a/build_scripts/training/sagemaker_entrypoint.py b/build_scripts/training/sagemaker_entrypoint.py
index e9d06fe5..10f5d088 100644
--- a/build_scripts/training/sagemaker_entrypoint.py
+++ b/build_scripts/training/sagemaker_entrypoint.py
@@ -61,8 +61,6 @@ def sync_status_from_s3_in_sagemaker(bucket_name, webui_status_file_path, sagema
             pickle.dump(status, sagemaker_status_file)
         upload_file_to_s3('sagemaker_status.pickle', bucket_name, sagemaker_status_file_path)
 
-def train(model_dir):
-    start_training(model_dir)
 
 def check_and_upload(local_path, bucket, s3_path):
     while True:
@@ -135,77 +133,6 @@ def download_data(data_list, s3_data_path_list, s3_input_path):
             logger.info(f"Download data from s3 {input_bucket_name} {input_path} to {target_dir} {local_tar_path}")
             download_folder_from_s3_by_tar(input_bucket_name, input_path, local_tar_path, target_dir)
 
-def prepare_for_training(s3_model_path, model_name, s3_input_path, data_tar_list, class_data_tar_list):
-    model_bucket_name = get_bucket_name_from_s3_path(s3_model_path)
-    s3_model_path = os.path.join(get_path_from_s3_path(s3_model_path), f'{model_name}.tar')
-    logger.info(f"Download src model from s3 {model_bucket_name} {s3_model_path} {model_name}.tar")
-    print(f"Download src model from s3 {model_bucket_name} {s3_model_path} {model_name}.tar")
-    download_folder_from_s3_by_tar(model_bucket_name, s3_model_path, f'{model_name}.tar')
-
-    input_bucket_name = get_bucket_name_from_s3_path(s3_input_path)
-    input_path = os.path.join(get_path_from_s3_path(s3_input_path), "db_config.tar")
-    logger.info(f"Download db_config from s3 {input_bucket_name} {input_path} db_config.tar")
-    download_folder_from_s3_by_tar(input_bucket_name, input_path, "db_config.tar")
-    download_db_config_path = f"models/sagemaker_dreambooth/{model_name}/db_config_cloud.json"
-    target_db_config_path = f"models/dreambooth/{model_name}/db_config.json"
-    logger.info(f"Move db_config to correct position {download_db_config_path} {target_db_config_path}")
-    # os.system(f"mv {download_db_config_path} {target_db_config_path}")
-    mv(download_db_config_path, target_db_config_path, force=True)
-    with open(target_db_config_path) as db_config_file:
-        db_config = json.load(db_config_file)
-        logger.info(db_config)
-    data_list = []
-    class_data_list = []
-    for concept in db_config["concepts_list"]:
-        data_list.append(concept["instance_data_dir"])
-        class_data_list.append(concept["class_data_dir"])
-    # hack_db_config(db_config, db_config_path, model_name, data_tar_list, class_data_tar_list)
-    download_data(data_list, data_tar_list, s3_input_path)
-    download_data(class_data_list, class_data_tar_list, s3_input_path)
-
-def prepare_for_training_v2(s3_model_path, model_name, s3_input_path, s3_data_path_list, s3_class_data_path_list):
-    model_bucket_name = get_bucket_name_from_s3_path(s3_model_path)
-    s3_model_path = os.path.join(get_path_from_s3_path(s3_model_path), f'{model_name}.tar')
-    logger.info(f"Download src model from s3 {model_bucket_name} {s3_model_path} {model_name}.tar")
-    print(f"Download src model from s3 {model_bucket_name} {s3_model_path} {model_name}.tar")
-    download_folder_from_s3_by_tar(model_bucket_name, s3_model_path, f'{model_name}.tar')
-
-    # input_bucket_name = get_bucket_name_from_s3_path(s3_input_path)
-    input_path = os.path.join(get_path_from_s3_path(s3_input_path), "db_config.tar")
-    logger.info(f"Download db_config from s3 {input_bucket_name} {input_path} db_config.tar")
-    download_folder_from_s3_by_tar(input_bucket_name, input_path, "db_config.tar")
-    download_db_config_path = f"models/sagemaker_dreambooth/{model_name}/db_config_cloud.json"
-    target_db_config_path = f"models/dreambooth/{model_name}/db_config.json"
-    logger.info(f"Move db_config to correct position {download_db_config_path} {target_db_config_path}")
-    # os.system(f"mv {download_db_config_path} {target_db_config_path}")
-    mv(download_db_config_path, target_db_config_path)
-    with open(target_db_config_path) as db_config_file:
-        db_config = json.load(db_config_file)
-    data_list = []
-    class_data_list = []
-    for concept in db_config["concepts_list"]:
-        data_list.append(concept["instance_data_dir"])
-        class_data_list.append(concept["class_data_dir"])
-    # hack_db_config(db_config, db_config_path, model_name, data_tar_list, class_data_tar_list)
-
-    for s3_data_path, local_data_path in zip(data_list, s3_data_path_list):
-        if len(local_data_path) == 0:
-            continue
-        target_dir = local_data_path
-        os.makedirs(target_dir, exist_ok=True)
-        input_bucket_name = get_bucket_name_from_s3_path(s3_data_path)
-        input_path = get_path_from_s3_path(s3_data_path)
-        logger.info(f"Download data from s3 {input_bucket_name} {input_path} to {target_dir}")
-        download_folder_from_s3_by_tar(input_bucket_name, input_path, target_dir)
-    for s3_class_data_path, local_class_data_path in zip(class_data_list, s3_class_data_path_list):
-        if len(local_class_data_path) == 0:
-            continue
-        target_dir = local_class_data_path
-        os.makedirs(target_dir, exist_ok=True)
-        input_bucket_name = get_bucket_name_from_s3_path(s3_class_data_path)
-        input_path = get_path_from_s3_path(s3_class_data_path)
-        logger.info(f"Download data from s3 {input_bucket_name} {input_path} to {target_dir}")
-        download_folder_from_s3_by_tar(input_bucket_name, input_path, target_dir)
 
 def sync_status(job_id, bucket_name, model_dir):
     local_samples_dir = f'models/dreambooth/{model_dir}/samples'
@@ -228,10 +155,8 @@ def main(s3_input_path, s3_output_path, params):
     s3_class_data_path_list = params["class_data_tar_list"]
     # s3_data_path_list = params["s3_data_path_list"]
     # s3_class_data_path_list = params["s3_class_data_path_list"]
-    prepare_for_training(s3_model_path, model_name, s3_input_path, s3_data_path_list, s3_class_data_path_list)
     os.system("df -h")
     # sync_status(job_id, bucket_name, model_dir)
-    train(model_name)
     os.system("df -h")
     os.system("ls -R models")
     upload_model_to_s3_v2(model_name, s3_output_path, model_type)
diff --git a/docs/Environment-Preconfiguration.md b/docs/Environment-Preconfiguration.md
index a3d7452e..c257ec45 100644
--- a/docs/Environment-Preconfiguration.md
+++ b/docs/Environment-Preconfiguration.md
@@ -29,7 +29,6 @@ wget https://raw.githubusercontent.com/awslabs/stable-diffusion-aws-extension/ma
    * stable-diffusion-webui
    * stable-diffusion-aws-extension
    * sd-webui-controlnet
-   * sd_dreambooth_extension
 ```bash
 sh install.sh
 ```
diff --git a/docs/en/user-guide/dreambooth-guide.md b/docs/en/user-guide/dreambooth-guide.md
deleted file mode 100644
index 54f6fb86..00000000
--- a/docs/en/user-guide/dreambooth-guide.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Dreambooth Guide
-You can open **Dreambooth** tab, by combining the use with native Dreambooth, the tab **Create from Cloud** and **Select from Cloud** that newly added by the solution, you can achieve  cloud-based model creating and training in Dreambooth.
-
-
-## Create Model
-1. Open **Dreambooth** tab, **Model** subtab **Create From Cloud**.
-![Creat model tab](../images/open-create-model-tab.png)
-2. Enter a model name in the **Name** text box.
-
-    !!! Important "Notice"
-        Please note the naming format requirements: the name can only contain alphanumeric characters and dashes ("-").
-
-3. Select one checkpoint under **Source Checkpoint** dropdown list.
-> **Note：** The checkpoint files here include two sources: files starting with "local" are locally stored checkpoint files, while those starting with "cloud" are checkpoint files stored on Amazon S3. For first-time use, it is recommended to select a local checkpoint file.
-
-4. Click **Create Model From Cloud** to start model creation on cloud. **Model Creation Jobs Details** field will instantly update with the progress of the model creation job. When the status changes to *Complete*, it indicates that the model creation is finished.
-
-## Train Model
-1. Open **Dreambooth** tab, **Model** subtab, **Select From Cloud**.
-2. Fresh and select the model from **Model** drop down list that need to train.
-3. Set corresponding parameters in **Input** session.
-    - Set training parameters
-        - Checking *Lora* can accelerate the training process.
-        - The *Training Steps Per Image (Epochs)* represents the number of iterations for training a single image and can be left at the default value.
-    ![Input setting](../images/dreambooth-input-settings.png) 
-    - Set the concepts that need to be trained. A total of four concepts can be set, and we will use the first concept as an example.
-        - In the *Dataset Directory* field, enter the path to the images required for training. It can be a path on a web server or an S3 path. For S3 paths, you can obtain them by uploading the data through AWS Dataset Management or by uploading them to S3 on your own. The path should start with “s3://".
-        - In the *Instance Prompt* section under *Training Prompts*, enter the keywords for the concept. These keywords will be used to generate the concept during the training process in txt2img. Therefore, avoid using common English words (as they might get confused with other concepts in the base model).
-    ![Input concepts](../images/dreambooth-input-concepts.png) 
-    - You need to check **Save Checkpoint to Subdirectory** to save the model to a subdirectory. 
-    - If you need to save the lora model separately (the model file will be smaller, but it needs to be used with the SD basic model), please check **Generate lora weights for extra networks**.
-    ![Input saving](../images/dreambooth-lora-save.png) 
-
-4. Click **SageMaker Train** to start model training task. The **Training Job Details** section will be updated in real-time with the status of the model training job. When the status changes to *Complete*, an email notification will be sent to the email address provided during the initial deployment of the solution, indicating that the model training is complete.
-5. Future steps. For example: Navigate to **txt2img** tab **Amazon SageMaker Inference** panel, check trained model by refreshing **Stable Diffusion Checkpoint** dropdown list.  
-
-
-
-
-
diff --git a/docs/mkdocs.en.yml b/docs/mkdocs.en.yml
index bd23d3a5..f2d5f4d9 100644
--- a/docs/mkdocs.en.yml
+++ b/docs/mkdocs.en.yml
@@ -33,7 +33,6 @@ nav:
         - txt2img guide: user-guide/txt2img-guide.md
         - img2img guide: user-guide/img2img-guide.md
         - controlNet guide: user-guide/controlnet-guide.md
-        - Dreambooth guide: user-guide/dreambooth-guide.md
         #- Checkpoint Merger guide: user-guide/checkpoint-merge-guide.md
     - Developer guide:
         - Source code: developer-guide/source.md
diff --git a/docs/mkdocs.zh.yml b/docs/mkdocs.zh.yml
index fe66a3f6..dec35e6e 100644
--- a/docs/mkdocs.zh.yml
+++ b/docs/mkdocs.zh.yml
@@ -36,7 +36,6 @@ nav:
         - img2img指南: user-guide/img2img-guide.md
         - controlNet指南: user-guide/controlnet-guide.md
         # - Merge Checkpoint: user-guide/checkpoint-merge-guide.md
-        - Dreambooth指南: user-guide/dreambooth-guide.md
     - 开发者指南:
         - 源代码: developer-guide/source.md
         - API调用指南: developer-guide/api.md
diff --git a/docs/zh/solution-overview/features-and-benefits.md b/docs/zh/solution-overview/features-and-benefits.md
index db4a9177..8aeba530 100644
--- a/docs/zh/solution-overview/features-and-benefits.md
+++ b/docs/zh/solution-overview/features-and-benefits.md
@@ -10,7 +10,6 @@
 | [img2img](https://github.com/AUTOMATIC1111/stable-diffusion-webui){:target="_blank"}  | V1.7.0  | 支持除batch外的所有功能|
 | [LoRa](https://github.com/AUTOMATIC1111/stable-diffusion-webui){:target="_blank"}  | V1.2.1  | |
 | [ControlNet](https://github.com/Mikubill/sd-webui-controlnet){:target="_blank"}  | V1.1.410  | 支持SDXL + ControlNet推理 |
-| [Dreambooth](https://github.com/d8ahazard/sd_dreambooth_extension){:target="_blank"}  | V1.0.14  |
 | [Tiled Diffusion & VAE](https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111.git){:target="_blank"}  | f9f8073e64f4e682838f255215039ba7884553bf  |
 | [Extras](https://github.com/AUTOMATIC1111/stable-diffusion-webui){:target="_blank"}  | V1.7.0  | API|
 | [rembg](https://github.com/AUTOMATIC1111/stable-diffusion-webui-rembg.git){:target="_blank"}  | 3d9eedbbf0d585207f97d5b21e42f32c0042df70  | API
@@ -20,5 +19,5 @@
 ## 产品优势
 
 * **安装便捷**。本解决方案使用 CloudFormation 一键部署亚马逊云科技中间件，搭配社区原生 Stable Diffusion WebUI 插件安装形式一键安装，即可赋能用户快速使用 Amazon SageMaker 云上资源，进行推理、训练和调优工作。
-* **社区原生**。该方案以插件形式实现，用户无需改变现有Web用户界面的使用习惯。此外，该方案的代码是开源的，采用非侵入式设计，有助于用户快速跟上社区相关功能的迭代，例如备受欢迎的Dreambooth、ControlNet和LoRa等插件。
+* **社区原生**。该方案以插件形式实现，用户无需改变现有Web用户界面的使用习惯。此外，该方案的代码是开源的，采用非侵入式设计，有助于用户快速跟上社区相关功能的迭代，例如备受欢迎的ControlNet和LoRa等插件。
 * **可扩展性强**。本解决方案将WebUI界面与后端分离，WebUI可以在支持的终端启动而没有GPU的限制；原有训练，推理等任务通过插件所提供的功能迁移到Amazon SageMaker，为用户提供弹性计算资源、降低成本、提高灵活性和可扩展性。
diff --git a/docs/zh/user-guide/dreambooth-guide.md b/docs/zh/user-guide/dreambooth-guide.md
deleted file mode 100644
index f80f7d2f..00000000
--- a/docs/zh/user-guide/dreambooth-guide.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# 使用Dreambooth进行云上模型训练
-您可以打开**Dreambooth**标签页，通过结合使用**Dreambooth**原生区域及解决方案新增面板**Amazon SageMaker Inference**，实现调用云上资源的**Dreambooth**云上模型训练工作。 
-
-
-## 创建模型
-
-1. 打开**Dreambooth**标签页，模块**Model**的标签页**Create From Cloud**。
-![Creat model tab](../images/open-create-model-tab.png)
-2. 在**Name**文本框输入所需创建模型名称。
-
-    !!! Important "提示"
-        请注意命名格式要求，只能包含字母数字和“-”。
-
-3. 在**Source Checkpoint**下拉列表，选择一个checkpoint文件。
-> **补充：** 此处checkpoint文件包含两个来源的文件：一是以local开头的是本地存储的checkpoint文件，另一是以cloud开头的是存储在S3上的checkpoint文件。首次使用建议选择local checkpoint文件。
-
-4. 点击**Create Model From Cloud**按钮，开始创建模型。**Model Creation Jobs Details**区域会即时增加本次模型创建的工作状态。当状态变成**Complete**，即完成模型创建。
- 
-
-
-
-## 训练模型
-1. 打开**Dreambooth**标签页，模块**Model**的标签页**Select From Cloud**。
-2. 刷新**Model**列表，选择需要训练的模型。
-3. 在**Input**模块设置相应的参数。  
-    - 设置训练参数  
-        - 勾选*Lora*可以加速训练过程。
-        - *Training Steps Per Image (Epochs)* 代表了单张图片训练的迭代次数，可以使用默认值。
-   ![Input setting](../images/dreambooth-input-settings.png) 
-    - 设置需要训练的概念。总共可以设置四个概念，我们以第一个概念为例。
-        - *Dataset Directory* 处填写训练所需的图片路径，可以是web server上的路径也可以是s3路径，s3路径可以从**AWS Dataset Management**中上传数据后获取或者自行上传s3后获取，需以s3://开头。
-        - *Training Prompts* 下的 *Instance Prompt* 处填写概念的关键词，关键词用于后续txt2img中生成本次训练所产生的概念，因此不要是常见的英文单词（会和基础模型中的其他概念混淆）。
-   ![Input concepts](../images/dreambooth-input-concepts.png) 
-    - 需要选择将模型保存到子目录 **Save Checkpoint to Subdirectory**。
-    - 如果需要单独保存lora模型(这样模型文件会比较小,但需要配合SD基础模型使用)，请勾选**Generate lora weights for extra networks**。
-   ![Input saving](../images/dreambooth-lora-save.png) 
-
-4. 点击**SageMaker Train**，启动模型训练任务。**Training Job Details**区域会即时增加本次模型训练的工作状态。当状态变成**Complete**，同时您部署解决方案时预留的邮箱将会收到邮件通知，即完成模型训练。
-5. 后续工作。可以进入**txt2img**标签页的**Amazon SageMaker Inference**面板，刷新**Stable Diffusion Checkpoint**，即可看到已训练好的模型。
-
-
-
diff --git a/docs/zh/user-guide/preparation.md b/docs/zh/user-guide/preparation.md
index 55147c83..c1a6f23f 100644
--- a/docs/zh/user-guide/preparation.md
+++ b/docs/zh/user-guide/preparation.md
@@ -4,13 +4,13 @@
 您需要已经成功完成解决方案的部署。
 
 *可选项*
-为了保证AWS Extension插件同第三方插件之间（dreambooth，controlnet）版本的兼容性，您可以运行以下命令检查AWS Extension所支持的第三方插件的最小支持版本。
+为了保证AWS Extension插件同第三方插件之间（controlnet）版本的兼容性，您可以运行以下命令检查AWS Extension所支持的第三方插件的最小支持版本。
 ```bash
 ./preflight.sh
 ```
 ![preflight](../images/preflight.png)
 
-如果您对第三方插件的版本没有特别的要求，您可以通过执行以下命令安装版本兼容的第三方插件（dreambooth，controlnet）。
+如果您对第三方插件的版本没有特别的要求，您可以通过执行以下命令安装版本兼容的第三方插件（controlnet）。
 
 ```bash
 # Clone sd-webui-controlnet
@@ -21,13 +21,6 @@ cd sd-webui-controlnet
 git reset --hard 7c674f8364227d63e1628fc29fa8619d33c56674
 cd ..
 
-# Clone sd_dreambooth_extension
-git clone https://github.com/d8ahazard/sd_dreambooth_extension.git
-
-# Go to sd_dreambooth_extension directory and reset to specific commit
-cd sd_dreambooth_extension
-git reset --hard 926ae204ef5de17efca2059c334b6098492a0641
-cd ..
 ```
 
 <!-- ### 操作步骤
diff --git a/middleware_api/lambda/trainings/start_training_job.py b/middleware_api/lambda/trainings/start_training_job.py
index e9a6213b..19846041 100644
--- a/middleware_api/lambda/trainings/start_training_job.py
+++ b/middleware_api/lambda/trainings/start_training_job.py
@@ -16,7 +16,6 @@ model_table = os.environ.get('MODEL_TABLE')
 checkpoint_table = os.environ.get('CHECKPOINT_TABLE')
 instance_type = os.environ.get('INSTANCE_TYPE')
 sagemaker_role_arn = os.environ.get('TRAIN_JOB_ROLE')
-# e.g. "648149843064.dkr.ecr.us-east-1.amazonaws.com/dreambooth-training-repo"
 image_uri = os.environ.get('TRAIN_ECR_URL')
 region = os.environ.get('AWS_REGION')
 training_stepfunction_arn = os.environ.get('TRAINING_SAGEMAKER_ARN')
diff --git a/utils.py b/utils.py
index d287a58e..ca63a3fb 100644
--- a/utils.py
+++ b/utils.py
@@ -499,7 +499,5 @@ if __name__ == '__main__':
 
     # upload_file_to_s3(sys.argv[1], 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2])
     # fast_upload(boto3.Session(), 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2], [sys.argv[1]])
-    upload_folder_to_s3_by_tar('models/dreambooth/sagemaker_test/samples', 'aws-gcr-csdc-atl-exp-us-west-2',
-                               'aigc-webui-test-samples')
     download_folder_from_s3_by_tar('aws-gcr-csdc-atl-exp-us-west-2', 'aigc-webui-test-samples/samples.tar',
                                    'samples.tar')
diff --git a/utils_cn.py b/utils_cn.py
index 17edf971..8448a454 100644
--- a/utils_cn.py
+++ b/utils_cn.py
@@ -178,7 +178,5 @@ if __name__ == '__main__':
 
     # upload_file_to_s3(sys.argv[1], 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2])
     # fast_upload(boto3.Session(), 'aws-gcr-csdc-atl-exp-us-west-2', sys.argv[2], [sys.argv[1]])
-    # upload_folder_to_s3_by_tar('models/dreambooth/sagemaker_test/samples', 'aws-gcr-csdc-atl-exp-us-west-2',
-    #                            'aigc-webui-test-samples', '')
     # download_folder_from_s3_by_tar('aws-gcr-csdc-atl-exp-us-west-2', 'aigc-webui-test-samples/samples.tar',
     #                                'samples.tar')