Merge pull request #919 from zanllp/feat/reuse-existing-folders

feat: AI reuses existing folder names when organizing files
pull/920/head
zanllp 2026-02-17 20:13:04 +08:00 committed by GitHub
commit 1289db58a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 39 additions and 17 deletions

View File

@ -78,14 +78,6 @@ AI 驱动的自动文件整理
- **后台处理**:大文件夹在后台异步处理,不影响继续使用 - **后台处理**:大文件夹在后台异步处理,不影响继续使用
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹 - **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
**使用方法:**
1. 进入需要整理的文件夹
2. 点击地址栏中的「智能整理」按钮
3. 配置选项(目标文件夹、最小聚类大小等)
4. 等待 AI 分析完成
5. 预览整理方案
6. 确认执行
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib` > **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
> >
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示 > 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示

View File

@ -93,14 +93,6 @@ AI-powered automatic file organization
- **Background Processing**: Large folders are processed in the background, you can continue working - **Background Processing**: Large folders are processed in the background, you can continue working
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively - **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
**How to use:**
1. Navigate to the folder you want to organize
2. Click the "Smart Organize" button in the address bar
3. Configure options (target folder, min cluster size, etc.)
4. Wait for AI analysis to complete
5. Preview the proposed organization
6. Confirm to execute
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib` > **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
> >
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo. > 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.

View File

@ -349,13 +349,23 @@ def mount_organize_routes(
}) })
# 1. Start cluster job using topic_cluster API # 1. Start cluster job using topic_cluster API
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})") # Get existing folder names in dest directory for AI to consider reusing
dest_folder = req.dest_folder or req.folder_paths[0]
dest_folder = os.path.abspath(dest_folder)
existing_folder_names = []
if os.path.isdir(dest_folder):
for item in os.listdir(dest_folder):
item_path = os.path.join(dest_folder, item)
if os.path.isdir(item_path):
existing_folder_names.append(item)
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})")
cluster_job_id = await start_cluster_job_func( cluster_job_id = await start_cluster_job_func(
folder_paths=req.folder_paths, folder_paths=req.folder_paths,
threshold=req.threshold, threshold=req.threshold,
min_cluster_size=req.min_cluster_size, min_cluster_size=req.min_cluster_size,
lang=req.lang, lang=req.lang,
recursive=req.recursive, recursive=req.recursive,
existing_folder_names=existing_folder_names,
) )
# 2. Poll cluster job status until done # 2. Poll cluster job status until done

View File

@ -520,9 +520,12 @@ def _call_chat_title_sync(
prompt_samples: List[str], prompt_samples: List[str],
output_lang: str, output_lang: str,
existing_keywords: Optional[List[str]] = None, existing_keywords: Optional[List[str]] = None,
existing_folder_names: Optional[List[str]] = None,
) -> Optional[Dict]: ) -> Optional[Dict]:
""" """
Ask LLM to generate a short topic title and a few keywords. Returns dict or None. Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
If existing_folder_names is provided, AI will prefer reusing an existing folder name
if the theme matches, instead of generating a new title.
""" """
logger.info("[chat_title] === _call_chat_title_sync START ===") logger.info("[chat_title] === _call_chat_title_sync START ===")
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang) logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
@ -566,6 +569,16 @@ def _call_chat_title_sync(
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n" "- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
"\n" "\n"
) )
# Add existing folder names hint for file organization scenarios
if existing_folder_names:
folder_list = existing_folder_names[:100] # Limit to 100 folders
sys += (
"IMPORTANT - Title Reuse:\n"
"The following titles have been used before. "
"If the theme matches one of them, you MUST reuse that exact title. "
"Only create a new title if the theme is clearly different from ALL existing ones.\n"
f"Existing titles: {', '.join(folder_list)}\n\n"
)
if existing_keywords: if existing_keywords:
# Dynamic keyword selection based on total unique count # Dynamic keyword selection based on total unique count
unique_count = len(existing_keywords) unique_count = len(existing_keywords)
@ -744,6 +757,7 @@ async def _call_chat_title(
prompt_samples: List[str], prompt_samples: List[str],
output_lang: str, output_lang: str,
existing_keywords: Optional[List[str]] = None, existing_keywords: Optional[List[str]] = None,
existing_folder_names: Optional[List[str]] = None,
) -> Dict: ) -> Dict:
""" """
Same rationale as embeddings: Same rationale as embeddings:
@ -758,6 +772,7 @@ async def _call_chat_title(
prompt_samples=prompt_samples, prompt_samples=prompt_samples,
output_lang=output_lang, output_lang=output_lang,
existing_keywords=existing_keywords, existing_keywords=existing_keywords,
existing_folder_names=existing_folder_names,
) )
if not isinstance(ret, dict): if not isinstance(ret, dict):
raise HTTPException(status_code=502, detail="Chat API returned empty title payload") raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes(
lang: Optional[str] = None lang: Optional[str] = None
# If True, recursively scan subfolders; default True for Topic Search (backward compatible) # If True, recursively scan subfolders; default True for Topic Search (backward compatible)
recursive: Optional[bool] = True recursive: Optional[bool] = True
# Existing folder names in dest directory (for file organize: AI will prefer reusing these)
existing_folder_names: Optional[List[str]] = None
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict: def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
""" """
@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes(
recursive = bool(req.recursive) if req.recursive is not None else False recursive = bool(req.recursive) if req.recursive is not None else False
logger.info("[cluster_after] recursive=%s", recursive) logger.info("[cluster_after] recursive=%s", recursive)
# Extract existing folder names for AI to consider reusing
existing_folder_names = req.existing_folder_names or []
logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names))
if progress_cb: if progress_cb:
logger.info("[cluster_after] Calling progress callback with clustering stage") logger.info("[cluster_after] Calling progress callback with clustering stage")
progress_cb({"stage": "clustering", "folder": folder, "folders": folders}) progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes(
prompt_samples=[rep] + texts[:5], prompt_samples=[rep] + texts[:5],
output_lang=output_lang, output_lang=output_lang,
existing_keywords=top_keywords, existing_keywords=top_keywords,
existing_folder_names=existing_folder_names,
) )
title = (llm or {}).get("title") title = (llm or {}).get("title")
keywords = (llm or {}).get("keywords", []) keywords = (llm or {}).get("keywords", [])
@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes(
min_cluster_size: int = 2, min_cluster_size: int = 2,
lang: str = "en", lang: str = "en",
recursive: bool = False, recursive: bool = False,
existing_folder_names: Optional[List[str]] = None,
) -> str: ) -> str:
""" """
Start a cluster job and return job_id. Start a cluster job and return job_id.
This is a wrapper for organize_files to use. This is a wrapper for organize_files to use.
Args:
existing_folder_names: List of folder names already in dest directory.
AI will prefer reusing these names if theme matches.
""" """
_ensure_perf_deps() _ensure_perf_deps()
req = ClusterIibOutputReq( req = ClusterIibOutputReq(
@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes(
min_cluster_size=min_cluster_size, min_cluster_size=min_cluster_size,
lang=lang, lang=lang,
recursive=recursive, recursive=recursive,
existing_folder_names=existing_folder_names,
) )
job_id = uuid.uuid4().hex job_id = uuid.uuid4().hex
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()}) _job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})