Merge pull request #919 from zanllp/feat/reuse-existing-folders
feat: AI reuses existing folder names when organizing filespull/920/head
commit
1289db58a9
|
|
@ -78,14 +78,6 @@ AI 驱动的自动文件整理
|
|||
- **后台处理**:大文件夹在后台异步处理,不影响继续使用
|
||||
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
|
||||
|
||||
**使用方法:**
|
||||
1. 进入需要整理的文件夹
|
||||
2. 点击地址栏中的「智能整理」按钮
|
||||
3. 配置选项(目标文件夹、最小聚类大小等)
|
||||
4. 等待 AI 分析完成
|
||||
5. 预览整理方案
|
||||
6. 确认执行
|
||||
|
||||
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
|
||||
>
|
||||
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示
|
||||
|
|
|
|||
|
|
@ -93,14 +93,6 @@ AI-powered automatic file organization
|
|||
- **Background Processing**: Large folders are processed in the background, you can continue working
|
||||
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
|
||||
|
||||
**How to use:**
|
||||
1. Navigate to the folder you want to organize
|
||||
2. Click the "Smart Organize" button in the address bar
|
||||
3. Configure options (target folder, min cluster size, etc.)
|
||||
4. Wait for AI analysis to complete
|
||||
5. Preview the proposed organization
|
||||
6. Confirm to execute
|
||||
|
||||
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
|
||||
>
|
||||
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.
|
||||
|
|
|
|||
|
|
@ -349,13 +349,23 @@ def mount_organize_routes(
|
|||
})
|
||||
|
||||
# 1. Start cluster job using topic_cluster API
|
||||
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})")
|
||||
# Get existing folder names in dest directory for AI to consider reusing
|
||||
dest_folder = req.dest_folder or req.folder_paths[0]
|
||||
dest_folder = os.path.abspath(dest_folder)
|
||||
existing_folder_names = []
|
||||
if os.path.isdir(dest_folder):
|
||||
for item in os.listdir(dest_folder):
|
||||
item_path = os.path.join(dest_folder, item)
|
||||
if os.path.isdir(item_path):
|
||||
existing_folder_names.append(item)
|
||||
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})")
|
||||
cluster_job_id = await start_cluster_job_func(
|
||||
folder_paths=req.folder_paths,
|
||||
threshold=req.threshold,
|
||||
min_cluster_size=req.min_cluster_size,
|
||||
lang=req.lang,
|
||||
recursive=req.recursive,
|
||||
existing_folder_names=existing_folder_names,
|
||||
)
|
||||
|
||||
# 2. Poll cluster job status until done
|
||||
|
|
|
|||
|
|
@ -520,9 +520,12 @@ def _call_chat_title_sync(
|
|||
prompt_samples: List[str],
|
||||
output_lang: str,
|
||||
existing_keywords: Optional[List[str]] = None,
|
||||
existing_folder_names: Optional[List[str]] = None,
|
||||
) -> Optional[Dict]:
|
||||
"""
|
||||
Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
|
||||
If existing_folder_names is provided, AI will prefer reusing an existing folder name
|
||||
if the theme matches, instead of generating a new title.
|
||||
"""
|
||||
logger.info("[chat_title] === _call_chat_title_sync START ===")
|
||||
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
|
||||
|
|
@ -566,6 +569,16 @@ def _call_chat_title_sync(
|
|||
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
|
||||
"\n"
|
||||
)
|
||||
# Add existing folder names hint for file organization scenarios
|
||||
if existing_folder_names:
|
||||
folder_list = existing_folder_names[:100] # Limit to 100 folders
|
||||
sys += (
|
||||
"IMPORTANT - Title Reuse:\n"
|
||||
"The following titles have been used before. "
|
||||
"If the theme matches one of them, you MUST reuse that exact title. "
|
||||
"Only create a new title if the theme is clearly different from ALL existing ones.\n"
|
||||
f"Existing titles: {', '.join(folder_list)}\n\n"
|
||||
)
|
||||
if existing_keywords:
|
||||
# Dynamic keyword selection based on total unique count
|
||||
unique_count = len(existing_keywords)
|
||||
|
|
@ -744,6 +757,7 @@ async def _call_chat_title(
|
|||
prompt_samples: List[str],
|
||||
output_lang: str,
|
||||
existing_keywords: Optional[List[str]] = None,
|
||||
existing_folder_names: Optional[List[str]] = None,
|
||||
) -> Dict:
|
||||
"""
|
||||
Same rationale as embeddings:
|
||||
|
|
@ -758,6 +772,7 @@ async def _call_chat_title(
|
|||
prompt_samples=prompt_samples,
|
||||
output_lang=output_lang,
|
||||
existing_keywords=existing_keywords,
|
||||
existing_folder_names=existing_folder_names,
|
||||
)
|
||||
if not isinstance(ret, dict):
|
||||
raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
|
||||
|
|
@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes(
|
|||
lang: Optional[str] = None
|
||||
# If True, recursively scan subfolders; default True for Topic Search (backward compatible)
|
||||
recursive: Optional[bool] = True
|
||||
# Existing folder names in dest directory (for file organize: AI will prefer reusing these)
|
||||
existing_folder_names: Optional[List[str]] = None
|
||||
|
||||
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
|
||||
"""
|
||||
|
|
@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes(
|
|||
recursive = bool(req.recursive) if req.recursive is not None else False
|
||||
logger.info("[cluster_after] recursive=%s", recursive)
|
||||
|
||||
# Extract existing folder names for AI to consider reusing
|
||||
existing_folder_names = req.existing_folder_names or []
|
||||
logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names))
|
||||
|
||||
if progress_cb:
|
||||
logger.info("[cluster_after] Calling progress callback with clustering stage")
|
||||
progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
|
||||
|
|
@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes(
|
|||
prompt_samples=[rep] + texts[:5],
|
||||
output_lang=output_lang,
|
||||
existing_keywords=top_keywords,
|
||||
existing_folder_names=existing_folder_names,
|
||||
)
|
||||
title = (llm or {}).get("title")
|
||||
keywords = (llm or {}).get("keywords", [])
|
||||
|
|
@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes(
|
|||
min_cluster_size: int = 2,
|
||||
lang: str = "en",
|
||||
recursive: bool = False,
|
||||
existing_folder_names: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Start a cluster job and return job_id.
|
||||
This is a wrapper for organize_files to use.
|
||||
|
||||
Args:
|
||||
existing_folder_names: List of folder names already in dest directory.
|
||||
AI will prefer reusing these names if theme matches.
|
||||
"""
|
||||
_ensure_perf_deps()
|
||||
req = ClusterIibOutputReq(
|
||||
|
|
@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes(
|
|||
min_cluster_size=min_cluster_size,
|
||||
lang=lang,
|
||||
recursive=recursive,
|
||||
existing_folder_names=existing_folder_names,
|
||||
)
|
||||
job_id = uuid.uuid4().hex
|
||||
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})
|
||||
|
|
|
|||
Loading…
Reference in New Issue