feat: AI reuses existing folder names when organizing files
When running AI organize on a folder that already contains subfolders, the AI will now consider reusing existing folder names if the theme matches, instead of always generating new titles. This prevents duplicate folders from being created when re-organizing the same directory multiple times. - Add existing_folder_names parameter to ClusterIibOutputReq - Pass existing folder names from dest directory to AI title generation - Update AI prompt to prioritize reusing matching folder names Co-Authored-By: Claude <noreply@anthropic.com>feat/reuse-existing-folders
parent
d5167e293e
commit
c062b3bed5
|
|
@ -78,14 +78,6 @@ AI 驱动的自动文件整理
|
||||||
- **后台处理**:大文件夹在后台异步处理,不影响继续使用
|
- **后台处理**:大文件夹在后台异步处理,不影响继续使用
|
||||||
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
|
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
|
||||||
|
|
||||||
**使用方法:**
|
|
||||||
1. 进入需要整理的文件夹
|
|
||||||
2. 点击地址栏中的「智能整理」按钮
|
|
||||||
3. 配置选项(目标文件夹、最小聚类大小等)
|
|
||||||
4. 等待 AI 分析完成
|
|
||||||
5. 预览整理方案
|
|
||||||
6. 确认执行
|
|
||||||
|
|
||||||
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
|
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
|
||||||
>
|
>
|
||||||
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示
|
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示
|
||||||
|
|
|
||||||
|
|
@ -93,14 +93,6 @@ AI-powered automatic file organization
|
||||||
- **Background Processing**: Large folders are processed in the background, you can continue working
|
- **Background Processing**: Large folders are processed in the background, you can continue working
|
||||||
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
|
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
|
||||||
|
|
||||||
**How to use:**
|
|
||||||
1. Navigate to the folder you want to organize
|
|
||||||
2. Click the "Smart Organize" button in the address bar
|
|
||||||
3. Configure options (target folder, min cluster size, etc.)
|
|
||||||
4. Wait for AI analysis to complete
|
|
||||||
5. Preview the proposed organization
|
|
||||||
6. Confirm to execute
|
|
||||||
|
|
||||||
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
|
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
|
||||||
>
|
>
|
||||||
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.
|
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.
|
||||||
|
|
|
||||||
|
|
@ -349,13 +349,23 @@ def mount_organize_routes(
|
||||||
})
|
})
|
||||||
|
|
||||||
# 1. Start cluster job using topic_cluster API
|
# 1. Start cluster job using topic_cluster API
|
||||||
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})")
|
# Get existing folder names in dest directory for AI to consider reusing
|
||||||
|
dest_folder = req.dest_folder or req.folder_paths[0]
|
||||||
|
dest_folder = os.path.abspath(dest_folder)
|
||||||
|
existing_folder_names = []
|
||||||
|
if os.path.isdir(dest_folder):
|
||||||
|
for item in os.listdir(dest_folder):
|
||||||
|
item_path = os.path.join(dest_folder, item)
|
||||||
|
if os.path.isdir(item_path):
|
||||||
|
existing_folder_names.append(item)
|
||||||
|
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})")
|
||||||
cluster_job_id = await start_cluster_job_func(
|
cluster_job_id = await start_cluster_job_func(
|
||||||
folder_paths=req.folder_paths,
|
folder_paths=req.folder_paths,
|
||||||
threshold=req.threshold,
|
threshold=req.threshold,
|
||||||
min_cluster_size=req.min_cluster_size,
|
min_cluster_size=req.min_cluster_size,
|
||||||
lang=req.lang,
|
lang=req.lang,
|
||||||
recursive=req.recursive,
|
recursive=req.recursive,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Poll cluster job status until done
|
# 2. Poll cluster job status until done
|
||||||
|
|
|
||||||
|
|
@ -520,9 +520,12 @@ def _call_chat_title_sync(
|
||||||
prompt_samples: List[str],
|
prompt_samples: List[str],
|
||||||
output_lang: str,
|
output_lang: str,
|
||||||
existing_keywords: Optional[List[str]] = None,
|
existing_keywords: Optional[List[str]] = None,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> Optional[Dict]:
|
) -> Optional[Dict]:
|
||||||
"""
|
"""
|
||||||
Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
|
Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
|
||||||
|
If existing_folder_names is provided, AI will prefer reusing an existing folder name
|
||||||
|
if the theme matches, instead of generating a new title.
|
||||||
"""
|
"""
|
||||||
logger.info("[chat_title] === _call_chat_title_sync START ===")
|
logger.info("[chat_title] === _call_chat_title_sync START ===")
|
||||||
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
|
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
|
||||||
|
|
@ -566,6 +569,16 @@ def _call_chat_title_sync(
|
||||||
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
|
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
|
||||||
"\n"
|
"\n"
|
||||||
)
|
)
|
||||||
|
# Add existing folder names hint for file organization scenarios
|
||||||
|
if existing_folder_names:
|
||||||
|
folder_list = existing_folder_names[:100] # Limit to 100 folders
|
||||||
|
sys += (
|
||||||
|
"IMPORTANT - Title Reuse:\n"
|
||||||
|
"The following titles have been used before. "
|
||||||
|
"If the theme matches one of them, you MUST reuse that exact title. "
|
||||||
|
"Only create a new title if the theme is clearly different from ALL existing ones.\n"
|
||||||
|
f"Existing titles: {', '.join(folder_list)}\n\n"
|
||||||
|
)
|
||||||
if existing_keywords:
|
if existing_keywords:
|
||||||
# Dynamic keyword selection based on total unique count
|
# Dynamic keyword selection based on total unique count
|
||||||
unique_count = len(existing_keywords)
|
unique_count = len(existing_keywords)
|
||||||
|
|
@ -744,6 +757,7 @@ async def _call_chat_title(
|
||||||
prompt_samples: List[str],
|
prompt_samples: List[str],
|
||||||
output_lang: str,
|
output_lang: str,
|
||||||
existing_keywords: Optional[List[str]] = None,
|
existing_keywords: Optional[List[str]] = None,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""
|
"""
|
||||||
Same rationale as embeddings:
|
Same rationale as embeddings:
|
||||||
|
|
@ -758,6 +772,7 @@ async def _call_chat_title(
|
||||||
prompt_samples=prompt_samples,
|
prompt_samples=prompt_samples,
|
||||||
output_lang=output_lang,
|
output_lang=output_lang,
|
||||||
existing_keywords=existing_keywords,
|
existing_keywords=existing_keywords,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
if not isinstance(ret, dict):
|
if not isinstance(ret, dict):
|
||||||
raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
|
raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
|
||||||
|
|
@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes(
|
||||||
lang: Optional[str] = None
|
lang: Optional[str] = None
|
||||||
# If True, recursively scan subfolders; default True for Topic Search (backward compatible)
|
# If True, recursively scan subfolders; default True for Topic Search (backward compatible)
|
||||||
recursive: Optional[bool] = True
|
recursive: Optional[bool] = True
|
||||||
|
# Existing folder names in dest directory (for file organize: AI will prefer reusing these)
|
||||||
|
existing_folder_names: Optional[List[str]] = None
|
||||||
|
|
||||||
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
|
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
|
||||||
"""
|
"""
|
||||||
|
|
@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes(
|
||||||
recursive = bool(req.recursive) if req.recursive is not None else False
|
recursive = bool(req.recursive) if req.recursive is not None else False
|
||||||
logger.info("[cluster_after] recursive=%s", recursive)
|
logger.info("[cluster_after] recursive=%s", recursive)
|
||||||
|
|
||||||
|
# Extract existing folder names for AI to consider reusing
|
||||||
|
existing_folder_names = req.existing_folder_names or []
|
||||||
|
logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names))
|
||||||
|
|
||||||
if progress_cb:
|
if progress_cb:
|
||||||
logger.info("[cluster_after] Calling progress callback with clustering stage")
|
logger.info("[cluster_after] Calling progress callback with clustering stage")
|
||||||
progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
|
progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
|
||||||
|
|
@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes(
|
||||||
prompt_samples=[rep] + texts[:5],
|
prompt_samples=[rep] + texts[:5],
|
||||||
output_lang=output_lang,
|
output_lang=output_lang,
|
||||||
existing_keywords=top_keywords,
|
existing_keywords=top_keywords,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
title = (llm or {}).get("title")
|
title = (llm or {}).get("title")
|
||||||
keywords = (llm or {}).get("keywords", [])
|
keywords = (llm or {}).get("keywords", [])
|
||||||
|
|
@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes(
|
||||||
min_cluster_size: int = 2,
|
min_cluster_size: int = 2,
|
||||||
lang: str = "en",
|
lang: str = "en",
|
||||||
recursive: bool = False,
|
recursive: bool = False,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Start a cluster job and return job_id.
|
Start a cluster job and return job_id.
|
||||||
This is a wrapper for organize_files to use.
|
This is a wrapper for organize_files to use.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
existing_folder_names: List of folder names already in dest directory.
|
||||||
|
AI will prefer reusing these names if theme matches.
|
||||||
"""
|
"""
|
||||||
_ensure_perf_deps()
|
_ensure_perf_deps()
|
||||||
req = ClusterIibOutputReq(
|
req = ClusterIibOutputReq(
|
||||||
|
|
@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes(
|
||||||
min_cluster_size=min_cluster_size,
|
min_cluster_size=min_cluster_size,
|
||||||
lang=lang,
|
lang=lang,
|
||||||
recursive=recursive,
|
recursive=recursive,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
job_id = uuid.uuid4().hex
|
job_id = uuid.uuid4().hex
|
||||||
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})
|
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue