Merge pull request #921 from zanllp/fix/cleanup-orphaned-images-on-path-removal

fix: cleanup orphaned images when removing extra paths
2026-02-18 14:23:51 +08:00 · 2026-02-18 14:23:51 +08:00 · fb2a8878af
parent 20dd9b09a2 a5b99223e3
commit fb2a8878af
5 changed files with 900 additions and 2 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@ -1,7 +1,8 @@
 {
  "permissions": {
    "allow": [
-      "Bash(git checkout:*)"
+      "Bash(git checkout:*)",
+      "Bash(tree:*)"
    ]
  }
 }
--- a/scripts/iib/api.py
+++ b/scripts/iib/api.py
@ -1484,7 +1484,14 @@ def infinite_image_browsing_api(app: FastAPI, **kwargs):
    async def delete_extra_path(extra_path: ExtraPathModel):
        path = to_abs_path(extra_path.path)
        conn = DataBase.get_conn()
-        ExtraPath.remove(conn, path, extra_path.types, img_search_dirs=get_img_search_dirs())
+        ExtraPath.remove(
+            conn,
+            path,
+            extra_path.types,
+            img_search_dirs=get_img_search_dirs(),
+            all_scanned_paths=mem["all_scanned_paths"],
+        )
+        update_extra_paths(conn)

    
    @app.post(
--- a/scripts/iib/db/datamodel.py
+++ b/scripts/iib/db/datamodel.py
@ -1223,6 +1223,7 @@ class ExtraPath:
        path: str,
        types: List[str] = None,
        img_search_dirs: Optional[List[str]] = [],
+        all_scanned_paths: Optional[List[str]] = [],
    ):
        with closing(conn.cursor()) as cur:
            path = os.path.normpath(path)
@ -1245,6 +1246,56 @@ class ExtraPath:
                Folder.remove_folder(conn, path)
            conn.commit()

+            # Clean up orphaned images that are no longer under any scanned path
+            if all_scanned_paths:
+                remaining_paths = [
+                    os.path.normpath(p) for p in all_scanned_paths
+                    if os.path.normpath(p) != path
+                ]
+                cls._cleanup_orphaned_images(conn, path, remaining_paths)
+
+    @classmethod
+    def _cleanup_orphaned_images(
+        cls,
+        conn,
+        removed_path: str,
+        remaining_paths: List[str],
+    ):
+        """
+        Clean up images under removed_path that are not covered by any remaining_paths.
+        An image is orphaned if it's under removed_path but not under any of the remaining paths.
+        """
+        with closing(conn.cursor()) as cur:
+            # Find all images under the removed path
+            cur.execute(
+                "SELECT id, path FROM image WHERE path LIKE ?",
+                (removed_path + os.sep + "%",)
+            )
+            rows = cur.fetchall()
+
+            if not rows:
+                return
+
+            orphaned_ids = []
+            for row in rows:
+                img_id, img_path = row[0], row[1]
+                img_path_normalized = os.path.normpath(img_path)
+
+                # Check if this image is still covered by any remaining path
+                is_still_owned = False
+                for remaining_path in remaining_paths:
+                    # Image is owned if its path starts with the remaining path
+                    if img_path_normalized.startswith(remaining_path + os.sep) or img_path_normalized == remaining_path:
+                        is_still_owned = True
+                        break
+
+                if not is_still_owned:
+                    orphaned_ids.append(img_id)
+
+            # Batch remove orphaned images
+            if orphaned_ids:
+                Image.safe_batch_remove(conn, orphaned_ids)
+
    @classmethod
    def create_table(cls, conn):
        with closing(conn.cursor()) as cur:
--- a/skills/iib-api/SKILL.md
+++ b/skills/iib-api/SKILL.md
@ -0,0 +1,268 @@
+---
+name: IIB API
+description: Access IIB (Infinite Image Browsing) APIs for image searching, browsing, tagging, and AI-powered organization.
+---
+
+# IIB (Infinite Image Browsing) API Skill
+
+IIB is a powerful image/video browsing, searching, and management tool with support for parsing metadata from multiple AI generation tools.
+
+## Starting the Service
+
+### Method 1: Standalone Mode (Recommended)
+
+```bash
+# Basic startup
+python app.py --port 8000 --host 127.0.0.1
+
+# With extra scan paths
+python app.py --port 8000 --extra_paths /path/to/images /another/path
+
+# Update index on startup
+python app.py --port 8000 --extra_paths /path/to/images --update_image_index
+
+# Enable CORS for external access
+python app.py --port 8000 --allow_cors
+
+# Full example
+python app.py --port 8000 --host 0.0.0.0 --allow_cors --extra_paths /my/images --update_image_index
+```
+
+### Method 2: As SD WebUI Extension
+
+Place the project in `extensions/sd-webui-infinite-image-browsing` directory and start with SD WebUI.
+
+API Base URL: `http://localhost:7860/infinite_image_browsing`
+
+### Method 3: Python Code Integration
+
+```python
+from app import launch_app, AppUtils
+from fastapi import FastAPI
+
+# Option A: Direct launch
+launch_app(port=8000, extra_paths=["/my/images"], allow_cors=True)
+
+# Option B: Mount to existing FastAPI app
+app = FastAPI()
+app_utils = AppUtils(extra_paths=["/my/images"], allow_cors=True)
+app_utils.wrap_app(app)
+
+# Option C: Async launch for Jupyter Notebook
+import asyncio
+await async_launch_app(port=8000, extra_paths=["/my/images"])
+```
+
+### Environment Variables
+
+```bash
+# Authentication key (optional, enables API authentication)
+export IIB_SECRET_KEY="your_secret_key"
+
+# AI features configuration (required for clustering, smart organization)
+export OPENAI_API_KEY="sk-xxx"
+export OPENAI_BASE_URL="https://api.openai.com/v1"  # or compatible endpoint
+export AI_MODEL="gpt-4o-mini"
+export EMBEDDING_MODEL="text-embedding-3-small"
+
+# Access control
+export IIB_ACCESS_CONTROL_ALLOWED_PATHS="/path1,/path2"
+export IIB_ACCESS_CONTROL_PERMISSION="read-write"  # read-only | read-write | write-only
+```
+
+---
+
+## Core Feature: Image Search
+
+IIB provides multiple image search methods - this is its core capability.
+
+> **Note:** The examples below use Python for illustration, but you can use any language (Node.js, Go, Rust, etc.) that supports HTTP requests. The API is language-agnostic REST.
+
+### 1. Substring Search (Fuzzy Search)
+
+Search images by text in file path or generation parameters.
+
+```python
+import requests
+
+BASE_URL = "http://localhost:8000/infinite_image_browsing"
+
+# Search images containing "landscape"
+resp = requests.post(f"{BASE_URL}/db/search_by_substr", json={
+    "surstr": "landscape",      # Search keyword
+    "cursor": "",               # Pagination cursor, empty for first page
+    "regexp": "",               # Regular expression (optional)
+    "size": 100,                # Results per page
+    "folder_paths": [],         # Limit to specific directories (optional)
+    "media_type": "image"       # "all" | "image" | "video"
+})
+
+result = resp.json()
+for file in result["files"]:
+    print(file["fullpath"], file["size"])
+
+# Pagination
+if result["cursor"]["has_next"]:
+    next_resp = requests.post(f"{BASE_URL}/db/search_by_substr", json={
+        "surstr": "landscape",
+        "cursor": result["cursor"]["next"],
+        "regexp": "",
+        "size": 100
+    })
+```
+
+### 2. Regular Expression Search
+
+Use regex for precise pattern matching.
+
+```python
+# Search images with filenames starting with numbers
+resp = requests.post(f"{BASE_URL}/db/search_by_substr", json={
+    "surstr": "",
+    "cursor": "",
+    "regexp": r"^\d+.*\.png$",  # Regex pattern
+    "size": 100
+})
+
+# Search images with specific prompt format
+resp = requests.post(f"{BASE_URL}/db/search_by_substr", json={
+    "surstr": "",
+    "cursor": "",
+    "regexp": r"masterpiece.*1girl.*blue eyes",
+    "size": 100
+})
+```
+
+### 3. Tag-based Search
+
+Search by custom tags with AND/OR/NOT logic.
+
+```python
+# First get all tags
+tags_resp = requests.get(f"{BASE_URL}/db/basic_info")
+all_tags = tags_resp.json()["tags"]
+# tags format: [{"id": 1, "name": "favorites", "type": "custom"}, ...]
+
+# Search: (tag_id=1 AND tag_id=2) OR tag_id=3, excluding tag_id=4
+resp = requests.post(f"{BASE_URL}/db/match_images_by_tags", json={
+    "and_tags": [1, 2],         # Must have all these tags
+    "or_tags": [3],             # Have any of these
+    "not_tags": [4],            # Exclude these tags
+    "cursor": "",
+    "size": 100,
+    "folder_paths": [],         # Limit to directories (optional)
+    "random_sort": False        # Random order
+})
+```
+
+### 4. Directory Browsing
+
+List files and subdirectories in a folder.
+
+```python
+# List directory contents
+resp = requests.get(f"{BASE_URL}/files", params={
+    "folder_path": "/path/to/images"
+})
+
+files = resp.json()["files"]
+for f in files:
+    if f["type"] == "dir":
+        print(f"[DIR] {f['name']}")
+    else:
+        print(f"[FILE] {f['name']} - {f['size']}")
+```
+
+### 5. Random Images
+
+Get random images from the database.
+
+```python
+resp = requests.get(f"{BASE_URL}/db/random_images")
+random_images = resp.json()  # Returns 128 random images
+```
+
+### 6. AI Semantic Clustering
+
+Cluster images by semantic similarity of generation parameters.
+
+```python
+# Start clustering job
+start_resp = requests.post(f"{BASE_URL}/db/cluster_iib_output_job_start", json={
+    "folder_paths": ["/path/to/images"],
+    "threshold": 0.85,          # Similarity threshold
+    "min_cluster_size": 3,      # Minimum cluster size
+    "lang": "en",               # Title language
+    "recursive": True           # Include subdirectories
+})
+job_id = start_resp.json()["job_id"]
+
+# Poll for completion
+import time
+while True:
+    status = requests.get(f"{BASE_URL}/db/cluster_iib_output_job_status",
+                          params={"job_id": job_id}).json()
+    if status.get("status") == "completed":
+        clusters = status["result"]["clusters"]
+        for c in clusters:
+            print(f"Topic: {c['title']}, Count: {c['size']}")
+            print(f"  Keywords: {c['keywords']}")
+            print(f"  Files: {c['paths'][:3]}...")
+        break
+    time.sleep(2)
+```
+
+---
+
+## Common Operations
+
+### Batch Tagging
+
+```python
+# Create a tag
+tag = requests.post(f"{BASE_URL}/db/add_custom_tag",
+                    json={"tag_name": "favorites"}).json()
+
+# Batch add tag to images
+requests.post(f"{BASE_URL}/db/batch_update_image_tag", json={
+    "img_paths": ["/path/to/img1.png", "/path/to/img2.png"],
+    "action": "add",
+    "tag_id": tag["id"]
+})
+```
+
+### Get Image Generation Parameters
+
+```python
+# Single image
+geninfo = requests.get(f"{BASE_URL}/image_geninfo",
+                       params={"path": "/path/to/image.png"}).text
+
+# Batch get
+batch_info = requests.post(f"{BASE_URL}/image_geninfo_batch", json={
+    "paths": ["/path/to/img1.png", "/path/to/img2.png"]
+}).json()
+```
+
+### Smart File Organization
+
+```python
+# Start organization job
+job = requests.post(f"{BASE_URL}/db/organize_files_start", json={
+    "folder_paths": ["/messy/folder"],
+    "dest_folder": "/organized/folder",
+    "threshold": 0.85,
+    "lang": "en"
+}).json()
+
+# Wait for completion then confirm
+requests.post(f"{BASE_URL}/db/organize_files_confirm", json={
+    "job_id": job["job_id"]
+})
+```
+
+---
+
+## Reference Documentation
+
+See detailed API documentation: [references/api-reference.md](references/api-reference.md)
--- a/skills/iib-api/references/api-reference.md
+++ b/skills/iib-api/references/api-reference.md
@ -0,0 +1,571 @@
+# IIB API Reference
+
+Complete API endpoint reference documentation.
+
+Base path: `/infinite_image_browsing`
+
+---
+
+## 1. File System Operations
+
+### GET /files
+List directory files.
+
+**Parameters:**
+- `folder_path` (string): Directory path
+
+**Response:**
+```json
+{
+  "files": [{
+    "type": "file|dir",
+    "date": 1234567890.0,
+    "created_time": 1234567890.0,
+    "size": "1.2 MB",
+    "bytes": 1258291,
+    "name": "image.png",
+    "fullpath": "/path/to/image.png",
+    "is_under_scanned_path": true
+  }]
+}
+```
+
+### POST /batch_get_files_info
+Batch get file information.
+
+**Request body:**
+```json
+{ "paths": ["/path/to/file1", "/path/to/file2"] }
+```
+
+### POST /delete_files
+Delete files or empty folders.
+
+**Request body:**
+```json
+{ "file_paths": ["/path/to/file1", "/path/to/file2"] }
+```
+
+### POST /mkdirs
+Create directory.
+
+**Request body:**
+```json
+{ "dest_folder": "/path/to/new/folder" }
+```
+
+### POST /copy_files
+Copy files.
+
+**Request body:**
+```json
+{
+  "file_paths": ["/path/to/file1"],
+  "dest": "/destination/folder",
+  "create_dest_folder": false,
+  "continue_on_error": false
+}
+```
+
+### POST /move_files
+Move files.
+
+**Request body:**
+```json
+{
+  "file_paths": ["/path/to/file1"],
+  "dest": "/destination/folder",
+  "create_dest_folder": false,
+  "continue_on_error": false
+}
+```
+
+### POST /db/rename
+Rename file.
+
+**Request body:**
+```json
+{ "path": "/path/to/file", "name": "new_name.png" }
+```
+
+### POST /flatten_folder
+Flatten folder (move files from subdirectories to root).
+
+**Request body:**
+```json
+{
+  "folder_path": "/path/to/folder",
+  "dry_run": true
+}
+```
+
+### POST /zip
+Create ZIP archive.
+
+**Request body:**
+```json
+{
+  "paths": ["/path/to/file1", "/path/to/file2"],
+  "compress": true,
+  "pack_only": false
+}
+```
+
+### POST /check_path_exists
+Check if paths exist.
+
+**Request body:**
+```json
+{ "paths": ["/path1", "/path2"] }
+```
+
+---
+
+## 2. Media File Access
+
+### GET /image-thumbnail
+Get image thumbnail.
+
+**Parameters:**
+- `path` (string): Image path
+- `t` (string): Timestamp (for caching)
+- `size` (string, default "256x256"): Thumbnail size
+
+**Response:** WebP image
+
+### GET /file
+Get original file.
+
+**Parameters:**
+- `path` (string): File path
+- `t` (string): Timestamp
+- `disposition` (string, optional): Download filename
+
+### GET /stream_video
+Stream video with HTTP Range support.
+
+**Parameters:**
+- `path` (string): Video path
+
+### GET /video_cover
+Get video cover thumbnail.
+
+**Parameters:**
+- `path` (string): Video path
+- `mt` (string): Modified time
+
+---
+
+## 3. Image Metadata
+
+### GET /image_geninfo
+Get image generation info (SD prompt, etc.).
+
+**Parameters:**
+- `path` (string): Image path
+
+**Response:** Generation parameter text
+
+### POST /image_geninfo_batch
+Batch get generation info.
+
+**Request body:**
+```json
+{ "paths": ["/path/to/img1.png", "/path/to/img2.png"] }
+```
+
+### GET /image_exif
+Get image EXIF data.
+
+**Parameters:**
+- `path` (string): Image path
+
+---
+
+## 4. Database & Search
+
+### GET /db/basic_info
+Get database basic info.
+
+**Response:**
+```json
+{
+  "img_count": 10000,
+  "tags": [{"id": 1, "name": "tag1", "type": "custom", "color": "#ff0000"}],
+  "expired": false,
+  "expired_dirs": []
+}
+```
+
+### GET /db/random_images
+Get random images (128 images).
+
+### POST /db/update_image_data
+Refresh image index (incremental update).
+
+### POST /db/rebuild_index
+Full rebuild of image index.
+
+### POST /db/search_by_substr
+Substring search.
+
+**Request body:**
+```json
+{
+  "surstr": "search term",
+  "cursor": "",
+  "regexp": "",
+  "folder_paths": [],
+  "size": 200,
+  "path_only": false,
+  "media_type": "all"
+}
+```
+
+**Response:**
+```json
+{
+  "files": [{...FileInfo...}],
+  "cursor": { "has_next": true, "next": "cursor_string" }
+}
+```
+
+### POST /db/match_images_by_tags
+Tag-based search.
+
+**Request body:**
+```json
+{
+  "and_tags": [1, 2],
+  "or_tags": [3],
+  "not_tags": [4],
+  "cursor": "",
+  "folder_paths": [],
+  "size": 200,
+  "random_sort": false
+}
+```
+
+---
+
+## 5. Tag Management
+
+### GET /db/img_selected_custom_tag
+Get image's custom tags.
+
+**Parameters:**
+- `path` (string): Image path
+
+### POST /db/get_image_tags
+Batch get image tags.
+
+**Request body:**
+```json
+{ "paths": ["/path/to/img1.png"] }
+```
+
+### POST /db/add_custom_tag
+Add custom tag.
+
+**Request body:**
+```json
+{ "tag_name": "my_tag" }
+```
+
+**Response:**
+```json
+{ "id": 1, "name": "my_tag", "type": "custom", "color": "" }
+```
+
+### POST /db/toggle_custom_tag_to_img
+Toggle image tag (add if missing, remove if present).
+
+**Request body:**
+```json
+{ "img_path": "/path/to/image.png", "tag_id": 1 }
+```
+
+### POST /db/batch_update_image_tag
+Batch update tags.
+
+**Request body:**
+```json
+{
+  "img_paths": ["/path/to/img1.png", "/path/to/img2.png"],
+  "action": "add",
+  "tag_id": 1
+}
+```
+
+### POST /db/remove_custom_tag
+Delete custom tag.
+
+**Request body:**
+```json
+{ "tag_id": 1 }
+```
+
+### POST /db/update_tag
+Update tag properties.
+
+**Request body:**
+```json
+{ "id": 1, "color": "#ff0000" }
+```
+
+---
+
+## 6. AI Features
+
+### POST /ai-chat
+General AI chat interface (OpenAI compatible).
+
+**Request body:**
+```json
+{
+  "messages": [
+    { "role": "system", "content": "You are a helpful assistant." },
+    { "role": "user", "content": "Hello!" }
+  ],
+  "temperature": 0.7,
+  "max_tokens": null,
+  "stream": false
+}
+```
+
+### POST /db/build_iib_output_embeddings
+Build image embeddings.
+
+**Request body:**
+```json
+{
+  "folder": "/path/to/folder",
+  "model": "text-embedding-3-small",
+  "force": false,
+  "batch_size": 100,
+  "max_chars": 2000,
+  "recursive": false
+}
+```
+
+### POST /db/cluster_iib_output_job_start
+Start clustering background job.
+
+**Request body:**
+```json
+{
+  "folder": "/path/to/folder",
+  "folder_paths": [],
+  "model": "text-embedding-3-small",
+  "threshold": 0.85,
+  "min_cluster_size": 3,
+  "title_model": "gpt-4o-mini",
+  "lang": "en",
+  "recursive": false
+}
+```
+
+**Response:**
+```json
+{ "job_id": "uuid-string" }
+```
+
+### GET /db/cluster_iib_output_job_status
+Query clustering job status.
+
+**Parameters:**
+- `job_id` (string): Job ID
+
+**Response:**
+```json
+{
+  "job_id": "uuid",
+  "status": "running|completed|failed",
+  "progress": 0.5,
+  "result": {
+    "clusters": [{
+      "id": "c1",
+      "title": "Landscape Photos",
+      "size": 50,
+      "keywords": ["landscape", "nature"],
+      "paths": ["/path/to/img1.png", ...]
+    }]
+  }
+}
+```
+
+---
+
+## 7. Smart File Organization
+
+### POST /db/organize_files_start
+Start file organization job.
+
+**Request body:**
+```json
+{
+  "folder_paths": ["/path/to/source"],
+  "dest_folder": "/path/to/destination",
+  "threshold": 0.90,
+  "min_cluster_size": 2,
+  "lang": "en",
+  "recursive": false,
+  "folder_naming": "title",
+  "action": "move",
+  "handle_noise": "unsorted",
+  "noise_folder_name": "Unsorted"
+}
+```
+
+**Parameter details:**
+- `folder_naming`: "title" | "keywords" | "id"
+- `action`: "move" | "copy"
+- `handle_noise`: "skip" | "unsorted" | "leave"
+
+### GET /db/organize_files_status
+Query organization job status.
+
+**Parameters:**
+- `job_id` (string): Job ID
+
+### POST /db/organize_files_confirm
+Confirm and execute organization.
+
+**Request body:**
+```json
+{
+  "job_id": "uuid",
+  "folder_edits": [
+    { "cluster_id": "c1", "new_folder_name": "Custom Name" }
+  ],
+  "skip_cluster_ids": ["c2", "c3"]
+}
+```
+
+---
+
+## 8. Tag Graph
+
+### POST /db/cluster_tag_graph
+Build tag relationship graph.
+
+**Request body:**
+```json
+{
+  "folder_paths": ["/path/to/folder"],
+  "lang": "en"
+}
+```
+
+---
+
+## 9. Extra Paths Management
+
+### GET /db/extra_paths
+Get extra paths list.
+
+### POST /db/extra_paths
+Add extra path.
+
+**Request body:**
+```json
+{ "path": "/new/scan/path", "types": ["scan"] }
+```
+
+### DELETE /db/extra_paths
+Remove extra path.
+
+**Request body:**
+```json
+{ "path": "/path/to/remove", "types": ["scan"] }
+```
+
+### POST /db/alias_extra_path
+Set path alias.
+
+**Request body:**
+```json
+{ "path": "/path", "alias": "My Alias" }
+```
+
+---
+
+## 10. System APIs
+
+### GET /hello
+Health check. Returns `"hello"`
+
+### GET /version
+Get version info.
+
+### GET /global_setting
+Get global settings.
+
+### POST /app_fe_setting
+Save frontend setting.
+
+### DELETE /app_fe_setting
+Delete frontend setting.
+
+### POST /open_folder
+Open file browser.
+
+### POST /open_with_default_app
+Open file with default application.
+
+### POST /shutdown
+Shutdown application (requires `--enable_shutdown`).
+
+---
+
+## Data Models
+
+### FileInfoDict
+```typescript
+{
+  type: "file" | "dir"
+  date: number           // Modified timestamp
+  created_time: number   // Created timestamp
+  size: string           // Human readable size "1.2 MB"
+  bytes: number          // Raw byte count
+  name: string           // Filename
+  fullpath: string       // Full path
+  is_under_scanned_path: boolean
+}
+```
+
+### Cursor
+```typescript
+{
+  has_next: boolean
+  next: string           // Next page cursor
+}
+```
+
+### Tag
+```typescript
+{
+  id: number
+  name: string
+  type: "custom" | "auto"
+  color: string
+}
+```
+
+---
+
+## Error Handling
+
+| Status Code | Meaning |
+|-------------|---------|
+| 200 | Success |
+| 400 | Bad request / Invalid parameters |
+| 401 | Authentication failed |
+| 403 | Permission denied |
+| 404 | Resource not found |
+| 500 | Server error |
+
+Error response format:
+```json
+{ "detail": "error message" }
+```