From 4b2918a86f686e059cfef3e63de1cc508786a969 Mon Sep 17 00:00:00 2001 From: toshiaki1729 <116595002+toshiaki1729@users.noreply.github.com> Date: Thu, 10 Nov 2022 22:19:10 +0900 Subject: [PATCH 1/2] display simple loading state --- dataset_tag_editor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dataset_tag_editor.py b/dataset_tag_editor.py index f526abf..f0f5ee8 100644 --- a/dataset_tag_editor.py +++ b/dataset_tag_editor.py @@ -141,9 +141,12 @@ class DatasetTagEditor: def load_dataset(self, img_dir: str, recursive: bool = False): self.clear() + print(f'Loading dataset from {img_dir}') try: filepath_set = get_filepath_set(dir=img_dir, recursive=recursive) - except: + except Exception as e: + print(e) + print('Loading dataset has been aborted.') return self.dataset_dir = img_dir @@ -168,6 +171,7 @@ class DatasetTagEditor: self.construct_tag_counts() self.set_img_filter_img_path() + print(f'Loading dataset has been Completed') def save_dataset(self, backup: bool) -> Tuple[int, int, str]: From c5939b68768b016f1fc484c78b9bf099b1a30da5 Mon Sep 17 00:00:00 2001 From: toshiaki1729 <116595002+toshiaki1729@users.noreply.github.com> Date: Thu, 10 Nov 2022 23:34:01 +0900 Subject: [PATCH 2/2] load supported images by Pillow same as modules/textual_inversion/dataset in webUI. may be slow in extremely huge dataset. --- dataset_tag_editor.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/dataset_tag_editor.py b/dataset_tag_editor.py index f0f5ee8..765a394 100644 --- a/dataset_tag_editor.py +++ b/dataset_tag_editor.py @@ -3,6 +3,7 @@ import re from typing import Optional, List, Tuple, Set from modules import shared from modules.textual_inversion.dataset import re_numbers_at_start +from PIL import Image re_tags = re.compile(r'^(.+) \[\d+\]$') @@ -154,20 +155,27 @@ class DatasetTagEditor: for img_path in filepath_set: img_dir = os.path.dirname(img_path) img_filename, img_ext = os.path.splitext(os.path.basename(img_path)) - if img_ext == '.png': - text_filename = os.path.join(img_dir, img_filename+'.txt') - # from modules/textual_inversion/dataset.py - if os.path.exists(text_filename) and os.path.isfile(text_filename): - with open(text_filename, "r", encoding="utf8") as ftxt: - filename_text = ftxt.read() - else: - filename_text = img_filename - filename_text = re.sub(re_numbers_at_start, '', filename_text) - if self.re_word: - tokens = self.re_word.findall(filename_text) - filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens) - - self.set_tags_by_image_path(img_path, [t.strip() for t in filename_text.split(',')]) + if img_ext == '.txt': + continue + try: + img = Image.open(img_path) + except: + img.close() + continue + + text_filename = os.path.join(img_dir, img_filename+'.txt') + # from modules/textual_inversion/dataset.py + if os.path.exists(text_filename) and os.path.isfile(text_filename): + with open(text_filename, "r", encoding="utf8") as ftxt: + filename_text = ftxt.read() + else: + filename_text = img_filename + filename_text = re.sub(re_numbers_at_start, '', filename_text) + if self.re_word: + tokens = self.re_word.findall(filename_text) + filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens) + + self.set_tags_by_image_path(img_path, [t.strip() for t in filename_text.split(',')]) self.construct_tag_counts() self.set_img_filter_img_path()