diff --git a/README-zh.md b/README-zh.md index 05cd77d..083e271 100644 --- a/README-zh.md +++ b/README-zh.md @@ -78,14 +78,6 @@ AI 驱动的自动文件整理 - **后台处理**:大文件夹在后台异步处理,不影响继续使用 - **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹 -**使用方法:** -1. 进入需要整理的文件夹 -2. 点击地址栏中的「智能整理」按钮 -3. 配置选项(目标文件夹、最小聚类大小等) -4. 等待 AI 分析完成 -5. 预览整理方案 -6. 确认执行 - > **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib` > > 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示 diff --git a/README.md b/README.md index ff75073..1221852 100644 --- a/README.md +++ b/README.md @@ -93,14 +93,6 @@ AI-powered automatic file organization - **Background Processing**: Large folders are processed in the background, you can continue working - **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively -**How to use:** -1. Navigate to the folder you want to organize -2. Click the "Smart Organize" button in the address bar -3. Configure options (target folder, min cluster size, etc.) -4. Wait for AI analysis to complete -5. Preview the proposed organization -6. Confirm to execute - > **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib` > > 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo. diff --git a/scripts/iib/organize_files.py b/scripts/iib/organize_files.py index 4b410f4..5c2f79e 100644 --- a/scripts/iib/organize_files.py +++ b/scripts/iib/organize_files.py @@ -349,13 +349,23 @@ def mount_organize_routes( }) # 1. Start cluster job using topic_cluster API - logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})") + # Get existing folder names in dest directory for AI to consider reusing + dest_folder = req.dest_folder or req.folder_paths[0] + dest_folder = os.path.abspath(dest_folder) + existing_folder_names = [] + if os.path.isdir(dest_folder): + for item in os.listdir(dest_folder): + item_path = os.path.join(dest_folder, item) + if os.path.isdir(item_path): + existing_folder_names.append(item) + logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})") cluster_job_id = await start_cluster_job_func( folder_paths=req.folder_paths, threshold=req.threshold, min_cluster_size=req.min_cluster_size, lang=req.lang, recursive=req.recursive, + existing_folder_names=existing_folder_names, ) # 2. Poll cluster job status until done diff --git a/scripts/iib/topic_cluster.py b/scripts/iib/topic_cluster.py index 4f5eb75..bcf90ac 100644 --- a/scripts/iib/topic_cluster.py +++ b/scripts/iib/topic_cluster.py @@ -520,9 +520,12 @@ def _call_chat_title_sync( prompt_samples: List[str], output_lang: str, existing_keywords: Optional[List[str]] = None, + existing_folder_names: Optional[List[str]] = None, ) -> Optional[Dict]: """ Ask LLM to generate a short topic title and a few keywords. Returns dict or None. + If existing_folder_names is provided, AI will prefer reusing an existing folder name + if the theme matches, instead of generating a new title. """ logger.info("[chat_title] === _call_chat_title_sync START ===") logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang) @@ -566,6 +569,16 @@ def _call_chat_title_sync( "- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n" "\n" ) + # Add existing folder names hint for file organization scenarios + if existing_folder_names: + folder_list = existing_folder_names[:100] # Limit to 100 folders + sys += ( + "IMPORTANT - Title Reuse:\n" + "The following titles have been used before. " + "If the theme matches one of them, you MUST reuse that exact title. " + "Only create a new title if the theme is clearly different from ALL existing ones.\n" + f"Existing titles: {', '.join(folder_list)}\n\n" + ) if existing_keywords: # Dynamic keyword selection based on total unique count unique_count = len(existing_keywords) @@ -744,6 +757,7 @@ async def _call_chat_title( prompt_samples: List[str], output_lang: str, existing_keywords: Optional[List[str]] = None, + existing_folder_names: Optional[List[str]] = None, ) -> Dict: """ Same rationale as embeddings: @@ -758,6 +772,7 @@ async def _call_chat_title( prompt_samples=prompt_samples, output_lang=output_lang, existing_keywords=existing_keywords, + existing_folder_names=existing_folder_names, ) if not isinstance(ret, dict): raise HTTPException(status_code=502, detail="Chat API returned empty title payload") @@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes( lang: Optional[str] = None # If True, recursively scan subfolders; default True for Topic Search (backward compatible) recursive: Optional[bool] = True + # Existing folder names in dest directory (for file organize: AI will prefer reusing these) + existing_folder_names: Optional[List[str]] = None def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict: """ @@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes( recursive = bool(req.recursive) if req.recursive is not None else False logger.info("[cluster_after] recursive=%s", recursive) + # Extract existing folder names for AI to consider reusing + existing_folder_names = req.existing_folder_names or [] + logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names)) + if progress_cb: logger.info("[cluster_after] Calling progress callback with clustering stage") progress_cb({"stage": "clustering", "folder": folder, "folders": folders}) @@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes( prompt_samples=[rep] + texts[:5], output_lang=output_lang, existing_keywords=top_keywords, + existing_folder_names=existing_folder_names, ) title = (llm or {}).get("title") keywords = (llm or {}).get("keywords", []) @@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes( min_cluster_size: int = 2, lang: str = "en", recursive: bool = False, + existing_folder_names: Optional[List[str]] = None, ) -> str: """ Start a cluster job and return job_id. This is a wrapper for organize_files to use. + + Args: + existing_folder_names: List of folder names already in dest directory. + AI will prefer reusing these names if theme matches. """ _ensure_perf_deps() req = ClusterIibOutputReq( @@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes( min_cluster_size=min_cluster_size, lang=lang, recursive=recursive, + existing_folder_names=existing_folder_names, ) job_id = uuid.uuid4().hex _job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})