sd_smartprocess/file_manager.py

255 lines
9.6 KiB
Python

import os
import re
from typing import List
import PIL
from PIL.Image import Image
def clean_string(s):
"""
Remove non-alphanumeric characters except spaces, and normalize spacing.
Args:
s: The string to clean.
Returns: A cleaned string.
"""
# Remove non-alphanumeric characters except spaces
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', s)
# Check for a sentence with just the same word repeated
if len(set(cleaned.split())) == 1:
cleaned = cleaned.split()[0]
# Replace multiple spaces with a single space
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
return cleaned
class ImageData:
image_path: str = ""
temp_image_path: str = ""
caption: str = ""
tags: List[str] = []
selected: bool = False
filtered: bool = False
id = None
def __init__(self, image_path):
self.image_path = image_path
self.caption = self.read_caption()
self.tags = self.split_caption()
# Generate a random id
self.id = os.urandom(32).hex()
def read_caption(self):
existing_caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
if os.path.exists(existing_caption_txt_filename):
with open(existing_caption_txt_filename, 'r', encoding="utf8") as file:
existing_caption_txt = file.read()
existing_caption_txt = existing_caption_txt.strip()
else:
image_name = os.path.splitext(os.path.basename(self.image_path))[0]
existing_caption_txt = clean_string(image_name)
return existing_caption_txt
def split_caption(self):
tags = self.caption.split(",")
tags = [tag.strip() for tag in tags]
tags = [tag for tag in tags if tag != ""]
return tags
def update_image(self, image: Image, save_file: bool = False):
if save_file:
img_path = os.path.splitext(self.image_path)[0] + '.png'
if img_path != self.image_path and os.path.exists(self.image_path):
os.remove(self.image_path)
self.image_path = img_path
else:
self.temp_image_path = os.path.splitext(self.image_path)[0] + '_temp.png'
img_path = self.temp_image_path
image.save(img_path)
def update_caption(self, caption: str, save_file: bool = False):
if save_file:
caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
with open(caption_txt_filename, 'w', encoding="utf8") as file:
file.write(caption)
self.caption = caption
self.tags = self.split_caption()
def get_image(self):
return PIL.Image.open(self.image_path).convert("RGB")
class FileManager:
file_path: str = ""
_instance = None
files: List[ImageData] = []
included_tags: List[str] = []
excluded_tags: List[str] = []
included_strings: List[str] = []
excluded_strings: List[str] = []
current_image = None
def __init__(self):
self.files = []
def __new__(cls):
if FileManager._instance is None:
FileManager._instance = object.__new__(cls)
return FileManager._instance
def clear(self):
self.files = []
self.included_tags = []
self.excluded_tags = []
self.included_strings = []
self.excluded_strings = []
self.current_image = None
def load_files(self):
from extensions.sd_smartprocess.smartprocess import is_image
self.clear()
# Walk through all files in the directory that contains the images
for root, dirs, files in os.walk(self.file_path):
for file in files:
file = os.path.join(root, file)
if is_image(file) and "_backup" not in file:
image_data = ImageData(file)
self.files.append(image_data)
self.update_filters()
def filtered_files(self, for_gallery: bool = False):
if not for_gallery:
return [file for file in self.files if file.filtered]
else:
return [(file.image_path, file.caption) for file in self.files if file.filtered]
def all_files(self, for_gallery: bool = False) -> List[ImageData]:
if not for_gallery:
return self.files
else:
return [(file.image_path, file.caption) for file in self.files]
def selected_files(self, for_gallery: bool = False) -> List[ImageData]:
if not for_gallery:
return [file for file in self.files if file.selected]
else:
return [(file.image_path, file.caption) for file in self.files if file.selected]
def filtered_and_selected_files(self, for_gallery: bool = False) -> List[ImageData]:
if not for_gallery:
return [file for file in self.files if file.filtered and file.selected and file.filtered]
else:
return [(file.image_path, file.caption) for file in self.files if file.selected and file.filtered]
def update_files(self, files: List[ImageData]):
for file in files:
self.update_file(file)
def update_file(self, file: ImageData):
# Search for the file with the same ID and update it if found
for i, existing_file in enumerate(self.files):
if existing_file.id == file.id:
self.files[i] = file
break
else:
# The file was not found in the list, append it
self.files.append(file)
def update_filters(self):
"""
Filters a collection of files based on specified inclusion and exclusion criteria for tags and filter strings.
The function filters files based on tags extracted from file captions and matches these tags against specified
inclusion and exclusion criteria. The criteria can be a set of plain tags, wildcard patterns, or regex expressions.
Parameters:
- use_all_files (bool): Determines whether to filter from all files or only selected files.
If True, filters from all files; otherwise, filters from selected files.
Globals:
- filter_tags_include (list): Tags required to include a file.
- filter_tags_exclude (list): Tags that lead to exclusion of a file.
- filter_string_include (list): Filter strings for inclusion; supports wildcards and regex.
- filter_string_exclude (list): Filter strings for exclusion; supports wildcards and regex.
- all_files (list): List of all files, where each file is a tuple (filename, caption).
- selected_files (list): List of selected files, where each file is a tuple (filename, caption).
Returns:
- filtered_files (list): List of files filtered based on the specified criteria.
Examples:
1. Plain Tag Matching:
- To include files with a tag 'holiday', add 'holiday' to filter_tags_include.
- To exclude files with a tag 'work', add 'work' to filter_tags_exclude.
2. Wildcard Patterns:
- To include files with tags starting with 'trip', add 'trip*' to filter_string_include.
- To exclude files with tags ending with '2023', add '*2023' to filter_string_exclude.
3. Regex Expressions:
- To include files with tags that have any number followed by 'days', add '\\d+days' to filter_string_include.
- To exclude files with tags formatted like dates (e.g., '2023-04-01'), add '\\d{4}-\\d{2}-\\d{2}' to filter_string_exclude.
Note:
- The function treats tags as case-sensitive.
- Wildcard '*' matches any sequence of characters (including none).
- Regex patterns should follow Python's 're' module syntax.
"""
def matches_pattern(pattern, string):
# Convert wildcard to regex if necessary
if '*' in pattern:
pattern = '^' + pattern.replace('*', '.*') + '$'
return re.match(pattern, string) is not None
else:
if " " not in pattern:
parts = string.split(" ")
return pattern in parts
else:
return pattern in string
def should_include(tag, filter_tags, filter_strings):
if len(filter_tags) == 0 and len(filter_strings) == 0:
return True
tag_match = False
filter_match = False
if tag in filter_tags and len(filter_tags) > 0:
tag_match = True
if len(filter_strings) > 0:
for filter_string in filter_strings:
if matches_pattern(filter_string, tag):
filter_match = True
break
return tag_match or filter_match
def should_exclude(tag, filter_tags, filter_strings):
if tag in filter_tags:
return True
for filter_string in filter_strings:
if matches_pattern(filter_string, tag):
return True
return False
files = self.files
updated_files = []
for file in files:
tags = file.tags
out_tags = []
for tag in tags:
include = True
if should_exclude(tag, self.excluded_tags, self.excluded_strings):
include = False
elif not should_include(tag, self.included_tags, self.included_strings):
include = False
if include:
out_tags.append(tag)
file.filtered = len(out_tags) > 0
updated_files.append(file)
self.files = files
def all_captions(self):
return [file.caption for file in self.files]