|
|
@@ -12,12 +12,16 @@ if torch_path not in sys.path:
|
|
|
sys.path.insert(0, torch_path)
|
|
|
|
|
|
import asyncio
|
|
|
+import base64
|
|
|
import json
|
|
|
import os
|
|
|
+import tempfile
|
|
|
import uuid
|
|
|
from pathlib import Path
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
+import requests
|
|
|
+
|
|
|
from fastmcp import FastMCP
|
|
|
|
|
|
from ..translator.engine import TranslationEngine
|
|
|
@@ -73,7 +77,7 @@ def _initialize_components() -> None:
|
|
|
)
|
|
|
|
|
|
# Initialize repository and fingerprint service
|
|
|
- _repository = Repository()
|
|
|
+ _repository = Repository(Path("/mnt/code/223-236-template-6/data"))
|
|
|
_fingerprint_service = FingerprintService(_repository)
|
|
|
|
|
|
|
|
|
@@ -252,9 +256,97 @@ def _add_suffix(path: str, suffix: str) -> str:
|
|
|
return str(p.with_stem(p.stem + suffix))
|
|
|
|
|
|
|
|
|
+async def _resolve_file_input(
|
|
|
+ file_path: Optional[str] = None,
|
|
|
+ file_content: Optional[str] = None,
|
|
|
+ filename: Optional[str] = None,
|
|
|
+ file_url: Optional[str] = None
|
|
|
+) -> tuple[Path, bool]:
|
|
|
+ """
|
|
|
+ 解析三种文件输入方式,返回文件路径和是否需要清理临时文件。
|
|
|
+
|
|
|
+ 优先级:file_content > file_url > file_path
|
|
|
+
|
|
|
+ Args:
|
|
|
+ file_path: 容器内文件路径
|
|
|
+ file_content: base64 编码的文件内容
|
|
|
+ filename: 配合 file_content 使用的文件名
|
|
|
+ file_url: HTTP URL
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (文件路径, 是否为临时文件)
|
|
|
+
|
|
|
+ Raises:
|
|
|
+ ValueError: 如果没有提供任何有效的输入
|
|
|
+ IOError: 如果文件读取失败
|
|
|
+ """
|
|
|
+ temp_file = None
|
|
|
+
|
|
|
+ # 1. file_content 优先级最高
|
|
|
+ if file_content:
|
|
|
+ try:
|
|
|
+ decoded = base64.b64decode(file_content)
|
|
|
+ except Exception as e:
|
|
|
+ raise ValueError(f"Invalid base64 content: {e}")
|
|
|
+
|
|
|
+ # 使用文件名或默认名称
|
|
|
+ name = filename or "uploaded_file.txt"
|
|
|
+ temp_file = Path(tempfile.gettempdir()) / f"mcp_upload_{uuid.uuid4().hex}_{name}"
|
|
|
+
|
|
|
+ temp_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ with open(temp_file, "wb") as f:
|
|
|
+ f.write(decoded)
|
|
|
+
|
|
|
+ return temp_file, True
|
|
|
+
|
|
|
+ # 2. file_url 其次
|
|
|
+ if file_url:
|
|
|
+ try:
|
|
|
+ response = requests.get(file_url, timeout=30)
|
|
|
+ response.raise_for_status()
|
|
|
+ content = response.content
|
|
|
+ except requests.RequestException as e:
|
|
|
+ raise IOError(f"Failed to download file from URL: {e}")
|
|
|
+
|
|
|
+ # 从 URL 提取文件名或使用默认名称
|
|
|
+ url_path = Path(file_url.split("?")[0])
|
|
|
+ name = url_path.name or filename or "downloaded_file.txt"
|
|
|
+ temp_file = Path(tempfile.gettempdir()) / f"mcp_download_{uuid.uuid4().hex}_{name}"
|
|
|
+
|
|
|
+ temp_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ with open(temp_file, "wb") as f:
|
|
|
+ f.write(content)
|
|
|
+
|
|
|
+ return temp_file, True
|
|
|
+
|
|
|
+ # 3. file_path 最后
|
|
|
+ if file_path:
|
|
|
+ path_obj = Path(file_path)
|
|
|
+ if not path_obj.exists():
|
|
|
+ raise FileNotFoundError(f"File not found: {file_path}")
|
|
|
+ return path_obj, False
|
|
|
+
|
|
|
+ # 没有提供任何有效输入
|
|
|
+ raise ValueError(
|
|
|
+ "At least one of file_path, file_content, or file_url must be provided"
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+async def _cleanup_temp_file(path: Path) -> None:
|
|
|
+ """清理临时文件。"""
|
|
|
+ try:
|
|
|
+ if path.exists():
|
|
|
+ path.unlink()
|
|
|
+ except Exception:
|
|
|
+ pass # 忽略清理错误
|
|
|
+
|
|
|
+
|
|
|
@mcp.tool()
|
|
|
async def translate_file(
|
|
|
- file_path: str,
|
|
|
+ file_path: Optional[str] = None,
|
|
|
+ file_content: Optional[str] = None,
|
|
|
+ filename: Optional[str] = None,
|
|
|
+ file_url: Optional[str] = None,
|
|
|
src_lang: str = "zh",
|
|
|
tgt_lang: str = "en",
|
|
|
output_path: Optional[str] = None,
|
|
|
@@ -270,8 +362,16 @@ async def translate_file(
|
|
|
3. 逐章翻译(使用术语表)
|
|
|
4. 保存结果(保留章节结构)
|
|
|
|
|
|
+ 支持三种文件输入方式(按优先级排序):
|
|
|
+ 1. file_content: base64 编码的文件内容(优先级最高)
|
|
|
+ 2. file_url: HTTP URL(服务器自动下载)
|
|
|
+ 3. file_path: 容器内文件路径(优先级最低)
|
|
|
+
|
|
|
Args:
|
|
|
- file_path: 要翻译的文件路径
|
|
|
+ file_path: 容器内文件路径
|
|
|
+ file_content: base64 编码的文件内容
|
|
|
+ filename: 配合 file_content 使用,指定原始文件名
|
|
|
+ file_url: HTTP URL,服务器自动下载文件
|
|
|
src_lang: 源语言代码
|
|
|
tgt_lang: 目标语言代码
|
|
|
output_path: 输出文件路径(默认添加 _en 后缀)
|
|
|
@@ -283,15 +383,25 @@ async def translate_file(
|
|
|
"""
|
|
|
from ..cleaning.models import Chapter
|
|
|
|
|
|
- file_path_obj = Path(file_path)
|
|
|
- if not file_path_obj.exists():
|
|
|
- return {"success": False, "error": f"File not found: {file_path}"}
|
|
|
+ # 解析文件输入
|
|
|
+ try:
|
|
|
+ file_path_obj, is_temp = await _resolve_file_input(
|
|
|
+ file_path=file_path,
|
|
|
+ file_content=file_content,
|
|
|
+ filename=filename,
|
|
|
+ file_url=file_url
|
|
|
+ )
|
|
|
+ except (ValueError, FileNotFoundError, IOError) as e:
|
|
|
+ return {"success": False, "error": str(e)}
|
|
|
|
|
|
task_id = create_task(
|
|
|
task_type="file_translation",
|
|
|
- metadata={"file_path": file_path}
|
|
|
+ metadata={"file_path": str(file_path_obj)}
|
|
|
)
|
|
|
|
|
|
+ # 用于清理临时文件的标记
|
|
|
+ _temp_file_to_cleanup = file_path_obj if is_temp else None
|
|
|
+
|
|
|
try:
|
|
|
pipeline = get_pipeline()
|
|
|
cleaning_pipeline = get_cleaning_pipeline()
|
|
|
@@ -341,7 +451,7 @@ async def translate_file(
|
|
|
"message": f"Translated chapter {i + 1}/{total_chapters}: {chapter.title}"
|
|
|
})
|
|
|
|
|
|
- output = output_path or _add_suffix(file_path, "_en")
|
|
|
+ output = output_path or _add_suffix(str(file_path_obj), "_en")
|
|
|
output_file = Path(output)
|
|
|
|
|
|
await update_progress(task_id, {
|
|
|
@@ -358,6 +468,20 @@ async def translate_file(
|
|
|
f.write(chapter["content"])
|
|
|
f.write("\n\n")
|
|
|
|
|
|
+ # Generate preview (500-1000 characters)
|
|
|
+ preview_length_limit = 750
|
|
|
+ with open(output_file, "r", encoding="utf-8") as f:
|
|
|
+ content = f.read()
|
|
|
+ if len(content) <= preview_length_limit:
|
|
|
+ preview = content
|
|
|
+ else:
|
|
|
+ # Try to break at a newline for cleaner preview
|
|
|
+ preview = content[:preview_length_limit]
|
|
|
+ last_newline = preview.rfind('\n')
|
|
|
+ if last_newline > preview_length_limit // 2:
|
|
|
+ preview = content[:last_newline]
|
|
|
+ preview += "\n\n... (truncated)"
|
|
|
+
|
|
|
await complete_task(task_id, success=True)
|
|
|
|
|
|
await update_progress(task_id, {
|
|
|
@@ -370,11 +494,16 @@ async def translate_file(
|
|
|
"output_path": output,
|
|
|
"task_id": task_id,
|
|
|
"terms_used": list(all_terms_used),
|
|
|
- "chapters_translated": total_chapters
|
|
|
+ "chapters_translated": total_chapters,
|
|
|
+ "total_chapters": total_chapters,
|
|
|
+ "preview": preview,
|
|
|
+ "preview_length": len(preview)
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
await complete_task(task_id, success=False)
|
|
|
+ if _temp_file_to_cleanup:
|
|
|
+ await _cleanup_temp_file(_temp_file_to_cleanup)
|
|
|
|
|
|
await update_progress(task_id, {
|
|
|
"status": "failed",
|
|
|
@@ -390,7 +519,10 @@ async def translate_file(
|
|
|
|
|
|
@mcp.tool()
|
|
|
async def clean_file(
|
|
|
- file_path: str,
|
|
|
+ file_path: Optional[str] = None,
|
|
|
+ file_content: Optional[str] = None,
|
|
|
+ filename: Optional[str] = None,
|
|
|
+ file_url: Optional[str] = None,
|
|
|
output_path: Optional[str] = None,
|
|
|
enable_cleaning: bool = True,
|
|
|
enable_splitting: bool = True
|
|
|
@@ -404,8 +536,16 @@ async def clean_file(
|
|
|
- 标准化空白字符和标点
|
|
|
- 可选章节分割
|
|
|
|
|
|
+ 支持三种文件输入方式(按优先级排序):
|
|
|
+ 1. file_content: base64 编码的文件内容(优先级最高)
|
|
|
+ 2. file_url: HTTP URL(服务器自动下载)
|
|
|
+ 3. file_path: 容器内文件路径(优先级最低)
|
|
|
+
|
|
|
Args:
|
|
|
- file_path: 要清洗的文件路径
|
|
|
+ file_path: 容器内文件路径
|
|
|
+ file_content: base64 编码的文件内容
|
|
|
+ filename: 配合 file_content 使用,指定原始文件名
|
|
|
+ file_url: HTTP URL,服务器自动下载文件
|
|
|
output_path: 输出路径(可选)
|
|
|
enable_cleaning: 是否启用清洗
|
|
|
enable_splitting: 是否启用章节分割
|
|
|
@@ -413,9 +553,18 @@ async def clean_file(
|
|
|
Returns:
|
|
|
包含章节信息和清洗结果的字典
|
|
|
"""
|
|
|
- file_path_obj = Path(file_path)
|
|
|
- if not file_path_obj.exists():
|
|
|
- return {"success": False, "error": f"File not found: {file_path}"}
|
|
|
+ # 解析文件输入
|
|
|
+ try:
|
|
|
+ file_path_obj, is_temp = await _resolve_file_input(
|
|
|
+ file_path=file_path,
|
|
|
+ file_content=file_content,
|
|
|
+ filename=filename,
|
|
|
+ file_url=file_url
|
|
|
+ )
|
|
|
+ except (ValueError, FileNotFoundError, IOError) as e:
|
|
|
+ return {"success": False, "error": str(e)}
|
|
|
+
|
|
|
+ _temp_file_to_cleanup = file_path_obj if is_temp else None
|
|
|
|
|
|
try:
|
|
|
pipeline = get_cleaning_pipeline()
|
|
|
@@ -447,6 +596,23 @@ async def clean_file(
|
|
|
|
|
|
saved_path = str(output_file)
|
|
|
|
|
|
+ # Generate preview (500-1000 characters)
|
|
|
+ preview_length_limit = 750
|
|
|
+ with open(output_file, "r", encoding="utf-8") as f:
|
|
|
+ content = f.read()
|
|
|
+ if len(content) <= preview_length_limit:
|
|
|
+ preview = content
|
|
|
+ else:
|
|
|
+ # Try to break at a newline for cleaner preview
|
|
|
+ preview = content[:preview_length_limit]
|
|
|
+ last_newline = preview.rfind('\n')
|
|
|
+ if last_newline > preview_length_limit / 2:
|
|
|
+ preview = content[:last_newline]
|
|
|
+ preview += "\n\n... (truncated)"
|
|
|
+ else:
|
|
|
+ preview = None
|
|
|
+ preview_length = None
|
|
|
+
|
|
|
total_chars = sum(c.char_count for c in chapters)
|
|
|
|
|
|
return {
|
|
|
@@ -454,11 +620,19 @@ async def clean_file(
|
|
|
"chapters": chapter_list,
|
|
|
"chapter_count": len(chapters),
|
|
|
"total_chars": total_chars,
|
|
|
- "output_path": saved_path
|
|
|
+ "output_path": saved_path,
|
|
|
+ "preview": preview,
|
|
|
+ "preview_length": len(preview) if preview else None
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
+ if _temp_file_to_cleanup:
|
|
|
+ await _cleanup_temp_file(_temp_file_to_cleanup)
|
|
|
return {"success": False, "error": f"Cleaning failed: {str(e)}"}
|
|
|
+ finally:
|
|
|
+ # 清理临时文件
|
|
|
+ if _temp_file_to_cleanup:
|
|
|
+ await _cleanup_temp_file(_temp_file_to_cleanup)
|
|
|
|
|
|
|
|
|
@mcp.tool()
|
|
|
@@ -587,12 +761,23 @@ async def glossary_add(
|
|
|
|
|
|
|
|
|
@mcp.tool()
|
|
|
-async def glossary_list() -> Dict[str, Any]:
|
|
|
+async def glossary_list(
|
|
|
+ export_format: Optional[str] = None,
|
|
|
+ output_path: Optional[str] = None
|
|
|
+) -> Dict[str, Any]:
|
|
|
"""
|
|
|
- 列出术语表所有条目。
|
|
|
+ 列出术语表所有条目,支持导出为 JSON 文件。
|
|
|
+
|
|
|
+ Args:
|
|
|
+ export_format: 导出格式,目前仅支持 "json"
|
|
|
+ output_path: 导出文件保存路径(当 export_format 为 "json" 时必需)
|
|
|
|
|
|
Returns:
|
|
|
- 包含所有术语条目的字典
|
|
|
+ 包含所有术语条目的字典,或导出结果
|
|
|
+
|
|
|
+ Examples:
|
|
|
+ 列出术语: glossary_list()
|
|
|
+ 导出 JSON: glossary_list(export_format="json", output_path="/path/to/glossary.json")
|
|
|
"""
|
|
|
try:
|
|
|
glossary = get_glossary()
|
|
|
@@ -607,6 +792,44 @@ async def glossary_list() -> Dict[str, Any]:
|
|
|
for e in glossary.get_all()
|
|
|
]
|
|
|
|
|
|
+ # 如果指定了导出格式
|
|
|
+ if export_format:
|
|
|
+ if export_format.lower() != "json":
|
|
|
+ return {
|
|
|
+ "success": False,
|
|
|
+ "error": f"Unsupported export format: {export_format}. Currently only 'json' is supported."
|
|
|
+ }
|
|
|
+
|
|
|
+ if not output_path:
|
|
|
+ return {
|
|
|
+ "success": False,
|
|
|
+ "error": "output_path is required when export_format is specified"
|
|
|
+ }
|
|
|
+
|
|
|
+ # 准备导出数据
|
|
|
+ export_data = {
|
|
|
+ "glossary": entries,
|
|
|
+ "count": len(entries),
|
|
|
+ "exported_at": __import__("datetime").datetime.utcnow().isoformat() + "Z"
|
|
|
+ }
|
|
|
+
|
|
|
+ # 写入文件
|
|
|
+ output_file = Path(output_path)
|
|
|
+ output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ with open(output_file, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(export_data, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "exported": True,
|
|
|
+ "export_format": "json",
|
|
|
+ "output_path": str(output_file),
|
|
|
+ "count": len(entries),
|
|
|
+ "message": f"Successfully exported {len(entries)} glossary entries to {output_path}"
|
|
|
+ }
|
|
|
+
|
|
|
+ # 默认返回条目列表
|
|
|
return {
|
|
|
"success": True,
|
|
|
"entries": entries,
|
|
|
@@ -617,6 +840,54 @@ async def glossary_list() -> Dict[str, Any]:
|
|
|
return {"success": False, "error": str(e)}
|
|
|
|
|
|
|
|
|
+@mcp.tool()
|
|
|
+async def glossary_import(
|
|
|
+ file_path: Optional[str] = None,
|
|
|
+ file_content: Optional[str] = None,
|
|
|
+ filename: Optional[str] = None,
|
|
|
+ file_url: Optional[str] = None,
|
|
|
+ merge_mode: str = "merge"
|
|
|
+) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 批量导入术语表 JSON 文件。
|
|
|
+
|
|
|
+ 支持三种文件输入方式(按优先级排序):
|
|
|
+ 1. file_content: base64 编码的文件内容(优先级最高)
|
|
|
+ 2. file_url: HTTP URL(服务器自动下载)
|
|
|
+ 3. file_path: 容器内文件路径(优先级最低)
|
|
|
+
|
|
|
+ 术语表 JSON 格式示例:
|
|
|
+ {
|
|
|
+ "glossary": [
|
|
|
+ {"source": "林风", "target": "Lin Feng", "category": "character", "context": "主角"},
|
|
|
+ {"source": "青云宗", "target": "Qingyun Sect", "category": "organization"}
|
|
|
+ ]
|
|
|
+ }
|
|
|
+
|
|
|
+ Args:
|
|
|
+ file_path: 容器内文件路径
|
|
|
+ file_content: base64 编码的文件内容
|
|
|
+ filename: 配合 file_content 使用,指定原始文件名
|
|
|
+ file_url: HTTP URL,服务器自动下载文件
|
|
|
+ merge_mode: "merge" 合并到现有术语表,"replace" 替换现有术语表
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 导入结果,包含成功/跳过的术语数量
|
|
|
+ """
|
|
|
+ from .tools.glossary_import import glossary_import as _import
|
|
|
+
|
|
|
+ return await _import(
|
|
|
+ file_path=file_path,
|
|
|
+ file_content=file_content,
|
|
|
+ filename=filename,
|
|
|
+ file_url=file_url,
|
|
|
+ merge_mode=merge_mode,
|
|
|
+ resolve_file_fn=_resolve_file_input,
|
|
|
+ get_glossary_fn=get_glossary,
|
|
|
+ notify_updated_fn=notify_glossary_updated
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
@mcp.tool()
|
|
|
async def glossary_clear() -> Dict[str, Any]:
|
|
|
"""
|
|
|
@@ -752,15 +1023,17 @@ async def main():
|
|
|
port = int(os.getenv("MCP_PORT", "8080"))
|
|
|
|
|
|
print(f"Starting Novel Translator MCP Server on http://{host}:{port}")
|
|
|
- print(f"Transport: SSE (Server-Sent Events)")
|
|
|
- print(f"Endpoint: http://{host}:{port}/sse")
|
|
|
+ print(f"Transport: HTTP")
|
|
|
+ print(f"Endpoint: http://{host}:{port}/mcp")
|
|
|
|
|
|
# Run HTTP server with SSE transport
|
|
|
+ # stateless=True disables session requirement for Claude Desktop compatibility
|
|
|
await mcp.run_http_async(
|
|
|
- transport="sse",
|
|
|
+ transport="http",
|
|
|
host=host,
|
|
|
port=port,
|
|
|
- log_level="info"
|
|
|
+ log_level="info",
|
|
|
+ stateless=True
|
|
|
)
|
|
|
|
|
|
|