From c062b3bed5a3add5091fc5008944f7204cba89b3 Mon Sep 17 00:00:00 2001 From: zanllp Date: Tue, 17 Feb 2026 19:36:21 +0800 Subject: [PATCH] feat: AI reuses existing folder names when organizing files When running AI organize on a folder that already contains subfolders, the AI will now consider reusing existing folder names if the theme matches, instead of always generating new titles. This prevents duplicate folders from being created when re-organizing the same directory multiple times. - Add existing_folder_names parameter to ClusterIibOutputReq - Pass existing folder names from dest directory to AI title generation - Update AI prompt to prioritize reusing matching folder names Co-Authored-By: Claude --- README-zh.md | 8 -------- README.md | 8 -------- scripts/iib/organize_files.py | 12 +++++++++++- scripts/iib/topic_cluster.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/README-zh.md b/README-zh.md index 05cd77d..083e271 100644 --- a/README-zh.md +++ b/README-zh.md @@ -78,14 +78,6 @@ AI 驱动的自动文件整理 - **后台处理**:大文件夹在后台异步处理,不影响继续使用 - **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹 -**使用方法:** -1. 进入需要整理的文件夹 -2. 点击地址栏中的「智能整理」按钮 -3. 配置选项(目标文件夹、最小聚类大小等) -4. 等待 AI 分析完成 -5. 预览整理方案 -6. 确认执行 - > **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib` > > 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示 diff --git a/README.md b/README.md index ff75073..1221852 100644 --- a/README.md +++ b/README.md @@ -93,14 +93,6 @@ AI-powered automatic file organization - **Background Processing**: Large folders are processed in the background, you can continue working - **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively -**How to use:** -1. Navigate to the folder you want to organize -2. Click the "Smart Organize" button in the address bar -3. Configure options (target folder, min cluster size, etc.) -4. Wait for AI analysis to complete -5. Preview the proposed organization -6. Confirm to execute - > **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib` > > 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo. diff --git a/scripts/iib/organize_files.py b/scripts/iib/organize_files.py index 4b410f4..5c2f79e 100644 --- a/scripts/iib/organize_files.py +++ b/scripts/iib/organize_files.py @@ -349,13 +349,23 @@ def mount_organize_routes( }) # 1. Start cluster job using topic_cluster API - logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})") + # Get existing folder names in dest directory for AI to consider reusing + dest_folder = req.dest_folder or req.folder_paths[0] + dest_folder = os.path.abspath(dest_folder) + existing_folder_names = [] + if os.path.isdir(dest_folder): + for item in os.listdir(dest_folder): + item_path = os.path.join(dest_folder, item) + if os.path.isdir(item_path): + existing_folder_names.append(item) + logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})") cluster_job_id = await start_cluster_job_func( folder_paths=req.folder_paths, threshold=req.threshold, min_cluster_size=req.min_cluster_size, lang=req.lang, recursive=req.recursive, + existing_folder_names=existing_folder_names, ) # 2. Poll cluster job status until done diff --git a/scripts/iib/topic_cluster.py b/scripts/iib/topic_cluster.py index 4f5eb75..bcf90ac 100644 --- a/scripts/iib/topic_cluster.py +++ b/scripts/iib/topic_cluster.py @@ -520,9 +520,12 @@ def _call_chat_title_sync( prompt_samples: List[str], output_lang: str, existing_keywords: Optional[List[str]] = None, + existing_folder_names: Optional[List[str]] = None, ) -> Optional[Dict]: """ Ask LLM to generate a short topic title and a few keywords. Returns dict or None. + If existing_folder_names is provided, AI will prefer reusing an existing folder name + if the theme matches, instead of generating a new title. """ logger.info("[chat_title] === _call_chat_title_sync START ===") logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang) @@ -566,6 +569,16 @@ def _call_chat_title_sync( "- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n" "\n" ) + # Add existing folder names hint for file organization scenarios + if existing_folder_names: + folder_list = existing_folder_names[:100] # Limit to 100 folders + sys += ( + "IMPORTANT - Title Reuse:\n" + "The following titles have been used before. " + "If the theme matches one of them, you MUST reuse that exact title. " + "Only create a new title if the theme is clearly different from ALL existing ones.\n" + f"Existing titles: {', '.join(folder_list)}\n\n" + ) if existing_keywords: # Dynamic keyword selection based on total unique count unique_count = len(existing_keywords) @@ -744,6 +757,7 @@ async def _call_chat_title( prompt_samples: List[str], output_lang: str, existing_keywords: Optional[List[str]] = None, + existing_folder_names: Optional[List[str]] = None, ) -> Dict: """ Same rationale as embeddings: @@ -758,6 +772,7 @@ async def _call_chat_title( prompt_samples=prompt_samples, output_lang=output_lang, existing_keywords=existing_keywords, + existing_folder_names=existing_folder_names, ) if not isinstance(ret, dict): raise HTTPException(status_code=502, detail="Chat API returned empty title payload") @@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes( lang: Optional[str] = None # If True, recursively scan subfolders; default True for Topic Search (backward compatible) recursive: Optional[bool] = True + # Existing folder names in dest directory (for file organize: AI will prefer reusing these) + existing_folder_names: Optional[List[str]] = None def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict: """ @@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes( recursive = bool(req.recursive) if req.recursive is not None else False logger.info("[cluster_after] recursive=%s", recursive) + # Extract existing folder names for AI to consider reusing + existing_folder_names = req.existing_folder_names or [] + logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names)) + if progress_cb: logger.info("[cluster_after] Calling progress callback with clustering stage") progress_cb({"stage": "clustering", "folder": folder, "folders": folders}) @@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes( prompt_samples=[rep] + texts[:5], output_lang=output_lang, existing_keywords=top_keywords, + existing_folder_names=existing_folder_names, ) title = (llm or {}).get("title") keywords = (llm or {}).get("keywords", []) @@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes( min_cluster_size: int = 2, lang: str = "en", recursive: bool = False, + existing_folder_names: Optional[List[str]] = None, ) -> str: """ Start a cluster job and return job_id. This is a wrapper for organize_files to use. + + Args: + existing_folder_names: List of folder names already in dest directory. + AI will prefer reusing these names if theme matches. """ _ensure_perf_deps() req = ClusterIibOutputReq( @@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes( min_cluster_size=min_cluster_size, lang=lang, recursive=recursive, + existing_folder_names=existing_folder_names, ) job_id = uuid.uuid4().hex _job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})