255 lines
9.6 KiB
Python
255 lines
9.6 KiB
Python
import os
|
|
import re
|
|
from typing import List
|
|
|
|
import PIL
|
|
from PIL.Image import Image
|
|
|
|
|
|
|
|
def clean_string(s):
|
|
"""
|
|
Remove non-alphanumeric characters except spaces, and normalize spacing.
|
|
Args:
|
|
s: The string to clean.
|
|
|
|
Returns: A cleaned string.
|
|
"""
|
|
# Remove non-alphanumeric characters except spaces
|
|
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', s)
|
|
# Check for a sentence with just the same word repeated
|
|
if len(set(cleaned.split())) == 1:
|
|
cleaned = cleaned.split()[0]
|
|
# Replace multiple spaces with a single space
|
|
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
|
return cleaned
|
|
|
|
|
|
class ImageData:
|
|
image_path: str = ""
|
|
temp_image_path: str = ""
|
|
caption: str = ""
|
|
tags: List[str] = []
|
|
selected: bool = False
|
|
filtered: bool = False
|
|
id = None
|
|
|
|
def __init__(self, image_path):
|
|
self.image_path = image_path
|
|
self.caption = self.read_caption()
|
|
self.tags = self.split_caption()
|
|
# Generate a random id
|
|
self.id = os.urandom(32).hex()
|
|
|
|
def read_caption(self):
|
|
existing_caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
|
|
if os.path.exists(existing_caption_txt_filename):
|
|
with open(existing_caption_txt_filename, 'r', encoding="utf8") as file:
|
|
existing_caption_txt = file.read()
|
|
existing_caption_txt = existing_caption_txt.strip()
|
|
else:
|
|
image_name = os.path.splitext(os.path.basename(self.image_path))[0]
|
|
existing_caption_txt = clean_string(image_name)
|
|
return existing_caption_txt
|
|
|
|
def split_caption(self):
|
|
tags = self.caption.split(",")
|
|
tags = [tag.strip() for tag in tags]
|
|
tags = [tag for tag in tags if tag != ""]
|
|
return tags
|
|
|
|
def update_image(self, image: Image, save_file: bool = False):
|
|
if save_file:
|
|
img_path = os.path.splitext(self.image_path)[0] + '.png'
|
|
if img_path != self.image_path and os.path.exists(self.image_path):
|
|
os.remove(self.image_path)
|
|
self.image_path = img_path
|
|
else:
|
|
self.temp_image_path = os.path.splitext(self.image_path)[0] + '_temp.png'
|
|
img_path = self.temp_image_path
|
|
image.save(img_path)
|
|
|
|
def update_caption(self, caption: str, save_file: bool = False):
|
|
if save_file:
|
|
caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
|
|
with open(caption_txt_filename, 'w', encoding="utf8") as file:
|
|
file.write(caption)
|
|
self.caption = caption
|
|
self.tags = self.split_caption()
|
|
|
|
def get_image(self):
|
|
return PIL.Image.open(self.image_path).convert("RGB")
|
|
|
|
|
|
class FileManager:
|
|
file_path: str = ""
|
|
_instance = None
|
|
files: List[ImageData] = []
|
|
included_tags: List[str] = []
|
|
excluded_tags: List[str] = []
|
|
included_strings: List[str] = []
|
|
excluded_strings: List[str] = []
|
|
current_image = None
|
|
|
|
def __init__(self):
|
|
self.files = []
|
|
|
|
def __new__(cls):
|
|
if FileManager._instance is None:
|
|
FileManager._instance = object.__new__(cls)
|
|
return FileManager._instance
|
|
|
|
def clear(self):
|
|
self.files = []
|
|
self.included_tags = []
|
|
self.excluded_tags = []
|
|
self.included_strings = []
|
|
self.excluded_strings = []
|
|
self.current_image = None
|
|
|
|
def load_files(self):
|
|
from extensions.sd_smartprocess.smartprocess import is_image
|
|
|
|
self.clear()
|
|
# Walk through all files in the directory that contains the images
|
|
for root, dirs, files in os.walk(self.file_path):
|
|
for file in files:
|
|
file = os.path.join(root, file)
|
|
if is_image(file) and "_backup" not in file:
|
|
image_data = ImageData(file)
|
|
self.files.append(image_data)
|
|
self.update_filters()
|
|
|
|
def filtered_files(self, for_gallery: bool = False):
|
|
if not for_gallery:
|
|
return [file for file in self.files if file.filtered]
|
|
else:
|
|
return [(file.image_path, file.caption) for file in self.files if file.filtered]
|
|
|
|
def all_files(self, for_gallery: bool = False) -> List[ImageData]:
|
|
if not for_gallery:
|
|
return self.files
|
|
else:
|
|
return [(file.image_path, file.caption) for file in self.files]
|
|
|
|
def selected_files(self, for_gallery: bool = False) -> List[ImageData]:
|
|
if not for_gallery:
|
|
return [file for file in self.files if file.selected]
|
|
else:
|
|
return [(file.image_path, file.caption) for file in self.files if file.selected]
|
|
|
|
def filtered_and_selected_files(self, for_gallery: bool = False) -> List[ImageData]:
|
|
if not for_gallery:
|
|
return [file for file in self.files if file.filtered and file.selected and file.filtered]
|
|
else:
|
|
return [(file.image_path, file.caption) for file in self.files if file.selected and file.filtered]
|
|
|
|
def update_files(self, files: List[ImageData]):
|
|
for file in files:
|
|
self.update_file(file)
|
|
|
|
def update_file(self, file: ImageData):
|
|
# Search for the file with the same ID and update it if found
|
|
for i, existing_file in enumerate(self.files):
|
|
if existing_file.id == file.id:
|
|
self.files[i] = file
|
|
break
|
|
else:
|
|
# The file was not found in the list, append it
|
|
self.files.append(file)
|
|
|
|
def update_filters(self):
|
|
"""
|
|
Filters a collection of files based on specified inclusion and exclusion criteria for tags and filter strings.
|
|
|
|
The function filters files based on tags extracted from file captions and matches these tags against specified
|
|
inclusion and exclusion criteria. The criteria can be a set of plain tags, wildcard patterns, or regex expressions.
|
|
|
|
Parameters:
|
|
- use_all_files (bool): Determines whether to filter from all files or only selected files.
|
|
If True, filters from all files; otherwise, filters from selected files.
|
|
|
|
Globals:
|
|
- filter_tags_include (list): Tags required to include a file.
|
|
- filter_tags_exclude (list): Tags that lead to exclusion of a file.
|
|
- filter_string_include (list): Filter strings for inclusion; supports wildcards and regex.
|
|
- filter_string_exclude (list): Filter strings for exclusion; supports wildcards and regex.
|
|
- all_files (list): List of all files, where each file is a tuple (filename, caption).
|
|
- selected_files (list): List of selected files, where each file is a tuple (filename, caption).
|
|
|
|
Returns:
|
|
- filtered_files (list): List of files filtered based on the specified criteria.
|
|
|
|
Examples:
|
|
1. Plain Tag Matching:
|
|
- To include files with a tag 'holiday', add 'holiday' to filter_tags_include.
|
|
- To exclude files with a tag 'work', add 'work' to filter_tags_exclude.
|
|
|
|
2. Wildcard Patterns:
|
|
- To include files with tags starting with 'trip', add 'trip*' to filter_string_include.
|
|
- To exclude files with tags ending with '2023', add '*2023' to filter_string_exclude.
|
|
|
|
3. Regex Expressions:
|
|
- To include files with tags that have any number followed by 'days', add '\\d+days' to filter_string_include.
|
|
- To exclude files with tags formatted like dates (e.g., '2023-04-01'), add '\\d{4}-\\d{2}-\\d{2}' to filter_string_exclude.
|
|
|
|
Note:
|
|
- The function treats tags as case-sensitive.
|
|
- Wildcard '*' matches any sequence of characters (including none).
|
|
- Regex patterns should follow Python's 're' module syntax.
|
|
"""
|
|
|
|
def matches_pattern(pattern, string):
|
|
# Convert wildcard to regex if necessary
|
|
if '*' in pattern:
|
|
pattern = '^' + pattern.replace('*', '.*') + '$'
|
|
return re.match(pattern, string) is not None
|
|
else:
|
|
if " " not in pattern:
|
|
parts = string.split(" ")
|
|
return pattern in parts
|
|
else:
|
|
return pattern in string
|
|
|
|
def should_include(tag, filter_tags, filter_strings):
|
|
if len(filter_tags) == 0 and len(filter_strings) == 0:
|
|
return True
|
|
tag_match = False
|
|
filter_match = False
|
|
if tag in filter_tags and len(filter_tags) > 0:
|
|
tag_match = True
|
|
if len(filter_strings) > 0:
|
|
for filter_string in filter_strings:
|
|
if matches_pattern(filter_string, tag):
|
|
filter_match = True
|
|
break
|
|
return tag_match or filter_match
|
|
|
|
def should_exclude(tag, filter_tags, filter_strings):
|
|
if tag in filter_tags:
|
|
return True
|
|
for filter_string in filter_strings:
|
|
if matches_pattern(filter_string, tag):
|
|
return True
|
|
return False
|
|
|
|
files = self.files
|
|
updated_files = []
|
|
for file in files:
|
|
tags = file.tags
|
|
out_tags = []
|
|
for tag in tags:
|
|
include = True
|
|
if should_exclude(tag, self.excluded_tags, self.excluded_strings):
|
|
include = False
|
|
elif not should_include(tag, self.included_tags, self.included_strings):
|
|
include = False
|
|
if include:
|
|
out_tags.append(tag)
|
|
file.filtered = len(out_tags) > 0
|
|
updated_files.append(file)
|
|
self.files = files
|
|
|
|
def all_captions(self):
|
|
return [file.caption for file in self.files]
|