Merge pull request #919 from zanllp/feat/reuse-existing-folders
feat: AI reuses existing folder names when organizing filespull/920/head
commit
1289db58a9
|
|
@ -78,14 +78,6 @@ AI 驱动的自动文件整理
|
||||||
- **后台处理**:大文件夹在后台异步处理,不影响继续使用
|
- **后台处理**:大文件夹在后台异步处理,不影响继续使用
|
||||||
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
|
- **灵活配置**:支持移动/复制、设置最小聚类大小、递归处理子文件夹
|
||||||
|
|
||||||
**使用方法:**
|
|
||||||
1. 进入需要整理的文件夹
|
|
||||||
2. 点击地址栏中的「智能整理」按钮
|
|
||||||
3. 配置选项(目标文件夹、最小聚类大小等)
|
|
||||||
4. 等待 AI 分析完成
|
|
||||||
5. 预览整理方案
|
|
||||||
6. 确认执行
|
|
||||||
|
|
||||||
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
|
> **前置条件**:与自然语言搜索相同 - 需要配置 `OPENAI_BASE_URL`、`OPENAI_API_KEY`,以及 Python 依赖 `numpy`、`hnswlib`
|
||||||
>
|
>
|
||||||
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示
|
> 📸 查看下方[智能整理预览](#智能整理-1)获取截图和视频演示
|
||||||
|
|
|
||||||
|
|
@ -93,14 +93,6 @@ AI-powered automatic file organization
|
||||||
- **Background Processing**: Large folders are processed in the background, you can continue working
|
- **Background Processing**: Large folders are processed in the background, you can continue working
|
||||||
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
|
- **Flexible Options**: Choose between move or copy, set minimum cluster size, include subfolders recursively
|
||||||
|
|
||||||
**How to use:**
|
|
||||||
1. Navigate to the folder you want to organize
|
|
||||||
2. Click the "Smart Organize" button in the address bar
|
|
||||||
3. Configure options (target folder, min cluster size, etc.)
|
|
||||||
4. Wait for AI analysis to complete
|
|
||||||
5. Preview the proposed organization
|
|
||||||
6. Confirm to execute
|
|
||||||
|
|
||||||
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
|
> **Requirements**: Same as Topic Search - requires `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and Python dependencies `numpy`, `hnswlib`
|
||||||
>
|
>
|
||||||
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.
|
> 📸 See [Smart Organize Preview](#smart-organize) below for screenshots and video demo.
|
||||||
|
|
|
||||||
|
|
@ -349,13 +349,23 @@ def mount_organize_routes(
|
||||||
})
|
})
|
||||||
|
|
||||||
# 1. Start cluster job using topic_cluster API
|
# 1. Start cluster job using topic_cluster API
|
||||||
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive})")
|
# Get existing folder names in dest directory for AI to consider reusing
|
||||||
|
dest_folder = req.dest_folder or req.folder_paths[0]
|
||||||
|
dest_folder = os.path.abspath(dest_folder)
|
||||||
|
existing_folder_names = []
|
||||||
|
if os.path.isdir(dest_folder):
|
||||||
|
for item in os.listdir(dest_folder):
|
||||||
|
item_path = os.path.join(dest_folder, item)
|
||||||
|
if os.path.isdir(item_path):
|
||||||
|
existing_folder_names.append(item)
|
||||||
|
logger.info(f"[organize_files][{job_id}] Starting cluster job (recursive={req.recursive}, existing_folders={len(existing_folder_names)})")
|
||||||
cluster_job_id = await start_cluster_job_func(
|
cluster_job_id = await start_cluster_job_func(
|
||||||
folder_paths=req.folder_paths,
|
folder_paths=req.folder_paths,
|
||||||
threshold=req.threshold,
|
threshold=req.threshold,
|
||||||
min_cluster_size=req.min_cluster_size,
|
min_cluster_size=req.min_cluster_size,
|
||||||
lang=req.lang,
|
lang=req.lang,
|
||||||
recursive=req.recursive,
|
recursive=req.recursive,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Poll cluster job status until done
|
# 2. Poll cluster job status until done
|
||||||
|
|
|
||||||
|
|
@ -520,9 +520,12 @@ def _call_chat_title_sync(
|
||||||
prompt_samples: List[str],
|
prompt_samples: List[str],
|
||||||
output_lang: str,
|
output_lang: str,
|
||||||
existing_keywords: Optional[List[str]] = None,
|
existing_keywords: Optional[List[str]] = None,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> Optional[Dict]:
|
) -> Optional[Dict]:
|
||||||
"""
|
"""
|
||||||
Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
|
Ask LLM to generate a short topic title and a few keywords. Returns dict or None.
|
||||||
|
If existing_folder_names is provided, AI will prefer reusing an existing folder name
|
||||||
|
if the theme matches, instead of generating a new title.
|
||||||
"""
|
"""
|
||||||
logger.info("[chat_title] === _call_chat_title_sync START ===")
|
logger.info("[chat_title] === _call_chat_title_sync START ===")
|
||||||
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
|
logger.info("[chat_title] base_url=%s model=%s lang=%s", base_url, model, output_lang)
|
||||||
|
|
@ -566,6 +569,16 @@ def _call_chat_title_sync(
|
||||||
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
|
"- The output MUST start with '{' and end with '}' (no leading/trailing characters).\n"
|
||||||
"\n"
|
"\n"
|
||||||
)
|
)
|
||||||
|
# Add existing folder names hint for file organization scenarios
|
||||||
|
if existing_folder_names:
|
||||||
|
folder_list = existing_folder_names[:100] # Limit to 100 folders
|
||||||
|
sys += (
|
||||||
|
"IMPORTANT - Title Reuse:\n"
|
||||||
|
"The following titles have been used before. "
|
||||||
|
"If the theme matches one of them, you MUST reuse that exact title. "
|
||||||
|
"Only create a new title if the theme is clearly different from ALL existing ones.\n"
|
||||||
|
f"Existing titles: {', '.join(folder_list)}\n\n"
|
||||||
|
)
|
||||||
if existing_keywords:
|
if existing_keywords:
|
||||||
# Dynamic keyword selection based on total unique count
|
# Dynamic keyword selection based on total unique count
|
||||||
unique_count = len(existing_keywords)
|
unique_count = len(existing_keywords)
|
||||||
|
|
@ -744,6 +757,7 @@ async def _call_chat_title(
|
||||||
prompt_samples: List[str],
|
prompt_samples: List[str],
|
||||||
output_lang: str,
|
output_lang: str,
|
||||||
existing_keywords: Optional[List[str]] = None,
|
existing_keywords: Optional[List[str]] = None,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""
|
"""
|
||||||
Same rationale as embeddings:
|
Same rationale as embeddings:
|
||||||
|
|
@ -758,6 +772,7 @@ async def _call_chat_title(
|
||||||
prompt_samples=prompt_samples,
|
prompt_samples=prompt_samples,
|
||||||
output_lang=output_lang,
|
output_lang=output_lang,
|
||||||
existing_keywords=existing_keywords,
|
existing_keywords=existing_keywords,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
if not isinstance(ret, dict):
|
if not isinstance(ret, dict):
|
||||||
raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
|
raise HTTPException(status_code=502, detail="Chat API returned empty title payload")
|
||||||
|
|
@ -1239,6 +1254,8 @@ def mount_topic_cluster_routes(
|
||||||
lang: Optional[str] = None
|
lang: Optional[str] = None
|
||||||
# If True, recursively scan subfolders; default True for Topic Search (backward compatible)
|
# If True, recursively scan subfolders; default True for Topic Search (backward compatible)
|
||||||
recursive: Optional[bool] = True
|
recursive: Optional[bool] = True
|
||||||
|
# Existing folder names in dest directory (for file organize: AI will prefer reusing these)
|
||||||
|
existing_folder_names: Optional[List[str]] = None
|
||||||
|
|
||||||
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
|
def _scope_cache_stale_by_folders(conn: Connection, folders: List[str]) -> Dict:
|
||||||
"""
|
"""
|
||||||
|
|
@ -1441,6 +1458,10 @@ def mount_topic_cluster_routes(
|
||||||
recursive = bool(req.recursive) if req.recursive is not None else False
|
recursive = bool(req.recursive) if req.recursive is not None else False
|
||||||
logger.info("[cluster_after] recursive=%s", recursive)
|
logger.info("[cluster_after] recursive=%s", recursive)
|
||||||
|
|
||||||
|
# Extract existing folder names for AI to consider reusing
|
||||||
|
existing_folder_names = req.existing_folder_names or []
|
||||||
|
logger.info("[cluster_after] existing_folder_names count=%d", len(existing_folder_names))
|
||||||
|
|
||||||
if progress_cb:
|
if progress_cb:
|
||||||
logger.info("[cluster_after] Calling progress callback with clustering stage")
|
logger.info("[cluster_after] Calling progress callback with clustering stage")
|
||||||
progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
|
progress_cb({"stage": "clustering", "folder": folder, "folders": folders})
|
||||||
|
|
@ -1707,6 +1728,7 @@ def mount_topic_cluster_routes(
|
||||||
prompt_samples=[rep] + texts[:5],
|
prompt_samples=[rep] + texts[:5],
|
||||||
output_lang=output_lang,
|
output_lang=output_lang,
|
||||||
existing_keywords=top_keywords,
|
existing_keywords=top_keywords,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
title = (llm or {}).get("title")
|
title = (llm or {}).get("title")
|
||||||
keywords = (llm or {}).get("keywords", [])
|
keywords = (llm or {}).get("keywords", [])
|
||||||
|
|
@ -1945,10 +1967,15 @@ def mount_topic_cluster_routes(
|
||||||
min_cluster_size: int = 2,
|
min_cluster_size: int = 2,
|
||||||
lang: str = "en",
|
lang: str = "en",
|
||||||
recursive: bool = False,
|
recursive: bool = False,
|
||||||
|
existing_folder_names: Optional[List[str]] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Start a cluster job and return job_id.
|
Start a cluster job and return job_id.
|
||||||
This is a wrapper for organize_files to use.
|
This is a wrapper for organize_files to use.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
existing_folder_names: List of folder names already in dest directory.
|
||||||
|
AI will prefer reusing these names if theme matches.
|
||||||
"""
|
"""
|
||||||
_ensure_perf_deps()
|
_ensure_perf_deps()
|
||||||
req = ClusterIibOutputReq(
|
req = ClusterIibOutputReq(
|
||||||
|
|
@ -1957,6 +1984,7 @@ def mount_topic_cluster_routes(
|
||||||
min_cluster_size=min_cluster_size,
|
min_cluster_size=min_cluster_size,
|
||||||
lang=lang,
|
lang=lang,
|
||||||
recursive=recursive,
|
recursive=recursive,
|
||||||
|
existing_folder_names=existing_folder_names,
|
||||||
)
|
)
|
||||||
job_id = uuid.uuid4().hex
|
job_id = uuid.uuid4().hex
|
||||||
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})
|
_job_upsert(job_id, {"status": "queued", "stage": "queued", "created_at": _job_now()})
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue