sd_smartprocess/file_manager.py

import os
import re
from typing import List

import PIL
from PIL.Image import Image


def clean_string(s):
    """
    Remove non-alphanumeric characters except spaces, and normalize spacing.
    Args:
        s: The string to clean.

    Returns: A cleaned string.
    """
    # Remove non-alphanumeric characters except spaces
    cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', s)
    # Check for a sentence with just the same word repeated
    if len(set(cleaned.split())) == 1:
        cleaned = cleaned.split()[0]
    # Replace multiple spaces with a single space
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
    return cleaned


class ImageData:
    image_path: str = ""
    temp_image_path: str = ""
    caption: str = ""
    tags: List[str] = []
    selected: bool = False
    filtered: bool = False
    id = None

    def __init__(self, image_path):
        self.image_path = image_path
        self.caption = self.read_caption()
        self.tags = self.split_caption()
        # Generate a random id
        self.id = os.urandom(32).hex()

    def read_caption(self):
        existing_caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
        if os.path.exists(existing_caption_txt_filename):
            with open(existing_caption_txt_filename, 'r', encoding="utf8") as file:
                existing_caption_txt = file.read()
                existing_caption_txt = existing_caption_txt.strip()
        else:
            image_name = os.path.splitext(os.path.basename(self.image_path))[0]
            existing_caption_txt = clean_string(image_name)
        return existing_caption_txt

    def split_caption(self):
        tags = self.caption.split(",")
        tags = [tag.strip() for tag in tags]
        tags = [tag for tag in tags if tag != ""]
        return tags

    def update_image(self, image: Image, save_file: bool = False):
        if save_file:
            img_path = os.path.splitext(self.image_path)[0] + '.png'
            if img_path != self.image_path and os.path.exists(self.image_path):
                os.remove(self.image_path)
            self.image_path = img_path
        else:
            self.temp_image_path = os.path.splitext(self.image_path)[0] + '_temp.png'
            img_path = self.temp_image_path
        image.save(img_path)

    def update_caption(self, caption: str, save_file: bool = False):
        if save_file:
            caption_txt_filename = os.path.splitext(self.image_path)[0] + '.txt'
            with open(caption_txt_filename, 'w', encoding="utf8") as file:
                file.write(caption)
        self.caption = caption
        self.tags = self.split_caption()

    def get_image(self):
        return PIL.Image.open(self.image_path).convert("RGB")


class FileManager:
    file_path: str = ""
    _instance = None
    files: List[ImageData] = []
    included_tags: List[str] = []
    excluded_tags: List[str] = []
    included_strings: List[str] = []
    excluded_strings: List[str] = []
    current_image = None

    def __init__(self):
        self.files = []

    def __new__(cls):
        if FileManager._instance is None:
            FileManager._instance = object.__new__(cls)
        return FileManager._instance

    def clear(self):
        self.files = []
        self.included_tags = []
        self.excluded_tags = []
        self.included_strings = []
        self.excluded_strings = []
        self.current_image = None

    def load_files(self):
        from extensions.sd_smartprocess.smartprocess import is_image

        self.clear()
        # Walk through all files in the directory that contains the images
        for root, dirs, files in os.walk(self.file_path):
            for file in files:
                file = os.path.join(root, file)
                if is_image(file) and "_backup" not in file:
                    image_data = ImageData(file)
                    self.files.append(image_data)
        self.update_filters()

    def filtered_files(self, for_gallery: bool = False):
        if not for_gallery:
            return [file for file in self.files if file.filtered]
        else:
            return [(file.image_path, file.caption) for file in self.files if file.filtered]

    def all_files(self, for_gallery: bool = False) -> List[ImageData]:
        if not for_gallery:
            return self.files
        else:
            return [(file.image_path, file.caption) for file in self.files]

    def selected_files(self, for_gallery: bool = False) -> List[ImageData]:
        if not for_gallery:
            return [file for file in self.files if file.selected]
        else:
            return [(file.image_path, file.caption) for file in self.files if file.selected]

    def filtered_and_selected_files(self, for_gallery: bool = False) -> List[ImageData]:
        if not for_gallery:
            return [file for file in self.files if file.filtered and file.selected and file.filtered]
        else:
            return [(file.image_path, file.caption) for file in self.files if file.selected and file.filtered]

    def update_files(self, files: List[ImageData]):
        for file in files:
            self.update_file(file)

    def update_file(self, file: ImageData):
        # Search for the file with the same ID and update it if found
        for i, existing_file in enumerate(self.files):
            if existing_file.id == file.id:
                self.files[i] = file
                break
        else:
            # The file was not found in the list, append it
            self.files.append(file)

    def update_filters(self):
        """
        Filters a collection of files based on specified inclusion and exclusion criteria for tags and filter strings.

        The function filters files based on tags extracted from file captions and matches these tags against specified
        inclusion and exclusion criteria. The criteria can be a set of plain tags, wildcard patterns, or regex expressions.

        Parameters:
        - use_all_files (bool): Determines whether to filter from all files or only selected files.
          If True, filters from all files; otherwise, filters from selected files.

        Globals:
        - filter_tags_include (list): Tags required to include a file.
        - filter_tags_exclude (list): Tags that lead to exclusion of a file.
        - filter_string_include (list): Filter strings for inclusion; supports wildcards and regex.
        - filter_string_exclude (list): Filter strings for exclusion; supports wildcards and regex.
        - all_files (list): List of all files, where each file is a tuple (filename, caption).
        - selected_files (list): List of selected files, where each file is a tuple (filename, caption).

        Returns:
        - filtered_files (list): List of files filtered based on the specified criteria.

        Examples:
        1. Plain Tag Matching:
           - To include files with a tag 'holiday', add 'holiday' to filter_tags_include.
           - To exclude files with a tag 'work', add 'work' to filter_tags_exclude.

        2. Wildcard Patterns:
           - To include files with tags starting with 'trip', add 'trip*' to filter_string_include.
           - To exclude files with tags ending with '2023', add '*2023' to filter_string_exclude.

        3. Regex Expressions:
           - To include files with tags that have any number followed by 'days', add '\\d+days' to filter_string_include.
           - To exclude files with tags formatted like dates (e.g., '2023-04-01'), add '\\d{4}-\\d{2}-\\d{2}' to filter_string_exclude.

        Note:
        - The function treats tags as case-sensitive.
        - Wildcard '*' matches any sequence of characters (including none).
        - Regex patterns should follow Python's 're' module syntax.
        """

        def matches_pattern(pattern, string):
            # Convert wildcard to regex if necessary
            if '*' in pattern:
                pattern = '^' + pattern.replace('*', '.*') + '$'
                return re.match(pattern, string) is not None
            else:
                if " " not in pattern:
                    parts = string.split(" ")
                    return pattern in parts
                else:
                    return pattern in string

        def should_include(tag, filter_tags, filter_strings):
            if len(filter_tags) == 0 and len(filter_strings) == 0:
                return True
            tag_match = False
            filter_match = False
            if tag in filter_tags and len(filter_tags) > 0:
                tag_match = True
            if len(filter_strings) > 0:
                for filter_string in filter_strings:
                    if matches_pattern(filter_string, tag):
                        filter_match = True
                        break
            return tag_match or filter_match

        def should_exclude(tag, filter_tags, filter_strings):
            if tag in filter_tags:
                return True
            for filter_string in filter_strings:
                if matches_pattern(filter_string, tag):
                    return True
            return False

        files = self.files
        updated_files = []
        for file in files:
            tags = file.tags
            out_tags = []
            for tag in tags:
                include = True
                if should_exclude(tag, self.excluded_tags, self.excluded_strings):
                    include = False
                elif not should_include(tag, self.included_tags, self.included_strings):
                    include = False
                if include:
                    out_tags.append(tag)
            file.filtered = len(out_tags) > 0
            updated_files.append(file)
        self.files = files

    def all_captions(self):
        return [file.caption for file in self.files]