1 місяць тому · 3104fe9c72
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -5,3 +5,18 @@ A novel translation tool with glossary support and crash-safe state management.
 
															 """
														
 
															 __version__ = "0.1.0"
														
 
															+
														
 
															+# Translator module exports
														
 
															+from src.translator import (
														
 
															+    TranslationEngine,
														
 
															+    TranslationPipeline,
														
 
															+    ChapterTranslator,
														
 
															+    ProgressReporter,
														
 
															+)
														
 
															+
														
 
															+__all__ = [
														
 
															+    "TranslationEngine",
														
 
															+    "TranslationPipeline",
														
 
															+    "ChapterTranslator",
														
 
															+    "ProgressReporter",
														
 
															+]
														
--- a/src/translator/__init__.py
+++ b/src/translator/__init__.py
@@ -0,0 +1,19 @@
 
															+"""
														
 
															+Translation module for m2m100-based translation.
														
 
															+
														
 
															+This module provides translation capabilities integrated with
														
 
															+glossary preprocessing and postprocessing.
														
 
															+"""
														
 
															+
														
 
															+from .engine import TranslationEngine
														
 
															+from .pipeline import TranslationPipeline
														
 
															+from .chapter_translator import ChapterTranslator
														
 
															+from .progress import ProgressReporter, ProgressCallback
														
 
															+
														
 
															+__all__ = [
														
 
															+    "TranslationEngine",
														
 
															+    "TranslationPipeline",
														
 
															+    "ChapterTranslator",
														
 
															+    "ProgressReporter",
														
 
															+    "ProgressCallback",
														
 
															+]
														
--- a/src/translator/chapter_translator.py
+++ b/src/translator/chapter_translator.py
@@ -0,0 +1,326 @@
 
															+"""
														
 
															+Chapter-level translator for novel translation.
														
 
															+
														
 
															+This module provides high-level translation functionality for chapters,
														
 
															+integrating with the repository for persistence.
														
 
															+"""
														
 
															+
														
 
															+import logging
														
 
															+import traceback
														
 
															+from typing import List, Optional, Callable
														
 
															+
														
 
															+from ..repository.models import ChapterItem, ChapterStatus
														
 
															+from ..repository.repository import Repository
														
 
															+from .pipeline import TranslationPipeline
														
 
															+from .progress import ProgressReporter, ProgressCallback, ProgressStatus
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+
														
 
															+class ChapterTranslator:
														
 
															+    """
														
 
															+    Chapter-level translator with repository integration.
														
 
															+
														
 
															+    This class handles translation of individual chapters and entire works,
														
 
															+    with support for progress tracking and crash recovery.
														
 
															+
														
 
															+    Example:
														
 
															+        >>> engine = TranslationEngine()
														
 
															+        >>> glossary = Glossary()
														
 
															+        >>> pipeline = TranslationPipeline(engine, glossary)
														
 
															+        >>> repo = Repository(Path("/data"))
														
 
															+        >>> translator = ChapterTranslator(pipeline, repo)
														
 
															+        >>> translator.translate_work("work_id_123")
														
 
															+    """
														
 
															+
														
 
															+    # Maximum text length per translation request
														
 
															+    MAX_SEGMENT_LENGTH = 2000
														
 
															+
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        pipeline: TranslationPipeline,
														
 
															+        repository: Repository,
														
 
															+        progress_callback: Optional[ProgressCallback] = None
														
 
															+    ):
														
 
															+        """
														
 
															+        Initialize the chapter translator.
														
 
															+
														
 
															+        Args:
														
 
															+            pipeline: The translation pipeline to use
														
 
															+            repository: The repository for data persistence
														
 
															+            progress_callback: Optional callback for progress updates
														
 
															+        """
														
 
															+        self.pipeline = pipeline
														
 
															+        self.repository = repository
														
 
															+        self.progress_reporter = ProgressReporter(callback=progress_callback)
														
 
															+
														
 
															+    def _split_paragraphs(self, content: str) -> List[str]:
														
 
															+        """
														
 
															+        Split content into paragraphs for translation.
														
 
															+
														
 
															+        Handles long paragraphs by splitting them into smaller segments.
														
 
															+
														
 
															+        Args:
														
 
															+            content: The content to split
														
 
															+
														
 
															+        Returns:
														
 
															+            List of paragraph/segment strings
														
 
															+        """
														
 
															+        # Split by double newlines first (paragraph breaks)
														
 
															+        paragraphs = content.split('\n\n')
														
 
															+
														
 
															+        segments = []
														
 
															+        for para in paragraphs:
														
 
															+            para = para.strip()
														
 
															+            if not para:
														
 
															+                continue
														
 
															+
														
 
															+            # If paragraph is too long, split by sentences
														
 
															+            if len(para) > self.MAX_SEGMENT_LENGTH:
														
 
															+                segments.extend(self._split_long_paragraph(para))
														
 
															+            else:
														
 
															+                segments.append(para)
														
 
															+
														
 
															+        return segments
														
 
															+
														
 
															+    def _split_long_paragraph(self, paragraph: str) -> List[str]:
														
 
															+        """
														
 
															+        Split a long paragraph into smaller segments.
														
 
															+
														
 
															+        Args:
														
 
															+            paragraph: The long paragraph to split
														
 
															+
														
 
															+        Returns:
														
 
															+            List of segments
														
 
															+        """
														
 
															+        # Simple sentence splitting (naive but functional)
														
 
															+        # In production, consider using a proper sentence tokenizer
														
 
															+        segments = []
														
 
															+        current = ""
														
 
															+
														
 
															+        # Split by common sentence delimiters
														
 
															+        delimiters = ['。', '！', '？', '.', '!', '?']
														
 
															+        i = 0
														
 
															+
														
 
															+        while i < len(paragraph):
														
 
															+            char = paragraph[i]
														
 
															+            current += char
														
 
															+
														
 
															+            # Check if we hit a sentence delimiter
														
 
															+            if char in delimiters:
														
 
															+                # Check if adding next char would exceed limit
														
 
															+                if len(current) >= self.MAX_SEGMENT_LENGTH * 0.8:
														
 
															+                    segments.append(current.strip())
														
 
															+                    current = ""
														
 
															+
														
 
															+            i += 1
														
 
															+
														
 
															+        # Add remaining text
														
 
															+        if current.strip():
														
 
															+            segments.append(current.strip())
														
 
															+
														
 
															+        return segments
														
 
															+
														
 
															+    def translate_chapter(
														
 
															+        self,
														
 
															+        work_id: str,
														
 
															+        chapter: ChapterItem
														
 
															+    ) -> ChapterItem:
														
 
															+        """
														
 
															+        Translate a single chapter.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: The work item ID
														
 
															+            chapter: The chapter to translate
														
 
															+
														
 
															+        Returns:
														
 
															+            The updated chapter with translation
														
 
															+
														
 
															+        Raises:
														
 
															+            Exception: If translation fails
														
 
															+        """
														
 
															+        if chapter.status == ChapterStatus.COMPLETED and chapter.translation:
														
 
															+            # Skip already translated chapters
														
 
															+            logger.info(f"Chapter {chapter.chapter_index} already translated, skipping")
														
 
															+            return chapter
														
 
															+
														
 
															+        # Update status to processing
														
 
															+        chapter.status = ChapterStatus.PROCESSING
														
 
															+        chapter.error_message = None
														
 
															+        self.repository.save_chapter(work_id, chapter)
														
 
															+
														
 
															+        # Notify progress
														
 
															+        self.progress_reporter.on_progress(
														
 
															+            chapter.chapter_index,
														
 
															+            chapter.title
														
 
															+        )
														
 
															+
														
 
															+        try:
														
 
															+            # Split into paragraphs/segments
														
 
															+            segments = self._split_paragraphs(chapter.content)
														
 
															+
														
 
															+            # Translate each segment
														
 
															+            translated_segments = []
														
 
															+            for i, segment in enumerate(segments):
														
 
															+                if not segment.strip():
														
 
															+                    continue
														
 
															+                translated = self.pipeline.translate(segment)
														
 
															+                translated_segments.append(translated)
														
 
															+
														
 
															+                # Log progress for long chapters
														
 
															+                if len(segments) > 10 and (i + 1) % 10 == 0:
														
 
															+                    logger.debug(
														
 
															+                        f"Translated {i + 1}/{len(segments)} segments "
														
 
															+                        f"of chapter {chapter.chapter_index}"
														
 
															+                    )
														
 
															+
														
 
															+            # Join translated segments
														
 
															+            chapter.translation = "\n\n".join(translated_segments)
														
 
															+            chapter.status = ChapterStatus.COMPLETED
														
 
															+            chapter.error_message = None
														
 
															+
														
 
															+            # Save to repository
														
 
															+            self.repository.save_chapter(work_id, chapter)
														
 
															+
														
 
															+            # Notify completion
														
 
															+            self.progress_reporter.on_chapter_complete(
														
 
															+                chapter.chapter_index,
														
 
															+                chapter.title,
														
 
															+                chapter.word_count
														
 
															+            )
														
 
															+
														
 
															+            logger.info(f"Chapter {chapter.chapter_index} translation complete")
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            chapter.status = ChapterStatus.FAILED
														
 
															+            chapter.error_message = str(e)
														
 
															+            chapter.retry_count += 1
														
 
															+            self.repository.save_chapter(work_id, chapter)
														
 
															+
														
 
															+            # Record failure
														
 
															+            self.repository.record_failure(
														
 
															+                work_id, chapter.chapter_index, e, traceback.format_exc()
														
 
															+            )
														
 
															+
														
 
															+            # Notify failure
														
 
															+            self.progress_reporter.on_chapter_failed(
														
 
															+                chapter.chapter_index, e, chapter.title
														
 
															+            )
														
 
															+
														
 
															+            logger.error(f"Chapter {chapter.chapter_index} failed: {e}")
														
 
															+            raise
														
 
															+
														
 
															+        return chapter
														
 
															+
														
 
															+    def translate_work(
														
 
															+        self,
														
 
															+        work_id: str,
														
 
															+        resume: bool = True,
														
 
															+        start_index: Optional[int] = None,
														
 
															+        end_index: Optional[int] = None
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Translate an entire work.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: The work item ID
														
 
															+            resume: If True, resume from pending chapters; if False, start fresh
														
 
															+            start_index: Optional starting chapter index (inclusive)
														
 
															+            end_index: Optional ending chapter index (exclusive)
														
 
															+        """
														
 
															+        # Get chapters to translate
														
 
															+        if start_index is not None or end_index is not None:
														
 
															+            # Range-based translation
														
 
															+            all_chapters = self.repository.get_chapters(work_id)
														
 
															+            chapters = [
														
 
															+                c for c in all_chapters
														
 
															+                if (start_index is None or c.chapter_index >= start_index)
														
 
															+                and (end_index is None or c.chapter_index < end_index)
														
 
															+            ]
														
 
															+        elif resume:
														
 
															+            chapters = self.repository.get_pending_chapters(work_id)
														
 
															+        else:
														
 
															+            all_chapters = self.repository.get_chapters(work_id)
														
 
															+            chapters = [c for c in all_chapters if c.status != ChapterStatus.COMPLETED]
														
 
															+
														
 
															+        if not chapters:
														
 
															+            logger.info(f"No chapters to translate for work {work_id}")
														
 
															+            return
														
 
															+
														
 
															+        # Sort by chapter index
														
 
															+        chapters.sort(key=lambda c: c.chapter_index)
														
 
															+
														
 
															+        # Notify start
														
 
															+        self.progress_reporter.on_start(len(chapters))
														
 
															+
														
 
															+        # Update work status
														
 
															+        try:
														
 
															+            self.repository.update_work_status(work_id, "translating")
														
 
															+        except Exception as e:
														
 
															+            logger.warning(f"Could not update work status: {e}")
														
 
															+
														
 
															+        # Translate each chapter
														
 
															+        for chapter in chapters:
														
 
															+            try:
														
 
															+                self.translate_chapter(work_id, chapter)
														
 
															+            except Exception:
														
 
															+                # Continue with next chapter even if one fails
														
 
															+                logger.error(f"Failed to translate chapter {chapter.chapter_index}, continuing")
														
 
															+                continue
														
 
															+
														
 
															+        # Notify complete
														
 
															+        self.progress_reporter.on_complete()
														
 
															+
														
 
															+        # Update work status based on results
														
 
															+        summary = self.progress_reporter.get_summary()
														
 
															+        if summary["failed"] == 0:
														
 
															+            self.repository.update_work_status(work_id, "completed")
														
 
															+        elif summary["completed"] > 0:
														
 
															+            self.repository.update_work_status(work_id, "paused")
														
 
															+
														
 
															+    def retry_failed_chapters(self, work_id: str, max_retries: int = 3) -> None:
														
 
															+        """
														
 
															+        Retry translating failed chapters.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: The work item ID
														
 
															+            max_retries: Maximum number of retry attempts per chapter
														
 
															+        """
														
 
															+        failed_chapters = self.repository.get_failed_chapters(work_id)
														
 
															+
														
 
															+        if not failed_chapters:
														
 
															+            logger.info(f"No failed chapters to retry for work {work_id}")
														
 
															+            return
														
 
															+
														
 
															+        # Filter by retry count
														
 
															+        to_retry = [
														
 
															+            c for c in failed_chapters
														
 
															+            if c.retry_count < max_retries
														
 
															+        ]
														
 
															+
														
 
															+        if not to_retry:
														
 
															+            logger.info("All failed chapters have exceeded max retries")
														
 
															+            return
														
 
															+
														
 
															+        logger.info(f"Retrying {len(to_retry)} failed chapters")
														
 
															+
														
 
															+        # Notify start
														
 
															+        self.progress_reporter.on_start(len(to_retry))
														
 
															+
														
 
															+        for chapter in to_retry:
														
 
															+            try:
														
 
															+                self.translate_chapter(work_id, chapter)
														
 
															+            except Exception:
														
 
															+                logger.error(f"Retry failed for chapter {chapter.chapter_index}")
														
 
															+                continue
														
 
															+
														
 
															+        self.progress_reporter.on_complete()
														
 
															+
														
 
															+    def set_progress_callback(self, callback: ProgressCallback) -> None:
														
 
															+        """
														
 
															+        Update the progress callback.
														
 
															+
														
 
															+        Args:
														
 
															+            callback: New callback function
														
 
															+        """
														
 
															+        self.progress_reporter = ProgressReporter(callback=callback)
														
--- a/src/translator/engine.py
+++ b/src/translator/engine.py
@@ -0,0 +1,214 @@
 
															+"""
														
 
															+Translation engine using m2m100 model.
														
 
															+
														
 
															+This module provides the core translation engine using Facebook's m2m100
														
 
															+model for multilingual translation.
														
 
															+"""
														
 
															+
														
 
															+import torch
														
 
															+from pathlib import Path
														
 
															+from typing import List, Optional
														
 
															+
														
 
															+try:
														
 
															+    from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
														
 
															+except ImportError:
														
 
															+    M2M100ForConditionalGeneration = None
														
 
															+    M2M100Tokenizer = None
														
 
															+
														
 
															+
														
 
															+class TranslationEngine:
														
 
															+    """
														
 
															+    m2m100 translation engine.
														
 
															+
														
 
															+    This class wraps the m2m100 model for translation tasks, providing
														
 
															+    both single-text and batch translation capabilities.
														
 
															+
														
 
															+    Example:
														
 
															+        >>> engine = TranslationEngine()
														
 
															+        >>> result = engine.translate("你好世界", src_lang="zh", tgt_lang="en")
														
 
															+        >>> print(result)  # "Hello world"
														
 
															+    """
														
 
															+
														
 
															+    # Default model path from Phase 0 verification
														
 
															+    DEFAULT_MODEL_PATH = "/mnt/code/223-236-template-6/phase0-test/models/m2m100_418M"
														
 
															+
														
 
															+    def __init__(self, model_path: Optional[str] = None, device: Optional[str] = None):
														
 
															+        """
														
 
															+        Initialize the translation engine.
														
 
															+
														
 
															+        Args:
														
 
															+            model_path: Path to the m2m100 model. Defaults to DEFAULT_MODEL_PATH.
														
 
															+            device: Device to use ("cuda", "cpu", or None for auto-detect)
														
 
															+
														
 
															+        Raises:
														
 
															+            ImportError: If transformers library is not installed
														
 
															+            FileNotFoundError: If model path does not exist
														
 
															+        """
														
 
															+        if M2M100ForConditionalGeneration is None:
														
 
															+            raise ImportError(
														
 
															+                "transformers library is required. "
														
 
															+                "Install it with: pip install transformers torch"
														
 
															+            )
														
 
															+
														
 
															+        self.model_path = model_path or self.DEFAULT_MODEL_PATH
														
 
															+        self.device = device or self._get_device()
														
 
															+        self._load_model()
														
 
															+
														
 
															+    def _get_device(self) -> str:
														
 
															+        """
														
 
															+        Detect the best available device.
														
 
															+
														
 
															+        Returns:
														
 
															+            "cuda" if available, otherwise "cpu"
														
 
															+        """
														
 
															+        if torch.cuda.is_available():
														
 
															+            return "cuda"
														
 
															+        return "cpu"
														
 
															+
														
 
															+    def _load_model(self) -> None:
														
 
															+        """
														
 
															+        Load the model and tokenizer from disk.
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If model directory doesn't exist
														
 
															+        """
														
 
															+        model_path = Path(self.model_path)
														
 
															+        if not model_path.exists():
														
 
															+            raise FileNotFoundError(
														
 
															+                f"Model not found at {self.model_path}. "
														
 
															+                f"Please download the m2m100 model first."
														
 
															+            )
														
 
															+
														
 
															+        self.tokenizer = M2M100Tokenizer.from_pretrained(str(model_path))
														
 
															+        self.model = M2M100ForConditionalGeneration.from_pretrained(str(model_path))
														
 
															+        self.model.to(self.device)
														
 
															+        self.model.eval()  # Set to evaluation mode
														
 
															+
														
 
															+    @property
														
 
															+    def is_gpu_enabled(self) -> bool:
														
 
															+        """Check if GPU is being used."""
														
 
															+        return self.device == "cuda"
														
 
															+
														
 
															+    def translate(
														
 
															+        self,
														
 
															+        text: str,
														
 
															+        src_lang: str = "zh",
														
 
															+        tgt_lang: str = "en",
														
 
															+        max_length: Optional[int] = None
														
 
															+    ) -> str:
														
 
															+        """
														
 
															+        Translate a single text string.
														
 
															+
														
 
															+        Args:
														
 
															+            text: The text to translate
														
 
															+            src_lang: Source language code (default: "zh" for Chinese)
														
 
															+            tgt_lang: Target language code (default: "en" for English)
														
 
															+            max_length: Maximum generation length (default: 200 for m2m100)
														
 
															+
														
 
															+        Returns:
														
 
															+            The translated text
														
 
															+
														
 
															+        Raises:
														
 
															+            ValueError: If text is empty
														
 
															+        """
														
 
															+        if not text or not text.strip():
														
 
															+            raise ValueError("Text to translate cannot be empty")
														
 
															+
														
 
															+        # Set source language
														
 
															+        self.tokenizer.src_lang = src_lang
														
 
															+
														
 
															+        # Tokenize input
														
 
															+        encoded = self.tokenizer(text, return_tensors="pt").to(self.device)
														
 
															+
														
 
															+        # Generate translation
														
 
															+        if max_length is None:
														
 
															+            max_length = 200
														
 
															+
														
 
															+        tgt_lang_id = self.tokenizer.lang_code_to_id[tgt_lang]
														
 
															+        with torch.no_grad():
														
 
															+            tokens = self.model.generate(
														
 
															+                **encoded,
														
 
															+                forced_bos_token_id=tgt_lang_id,
														
 
															+                max_length=max_length
														
 
															+            )
														
 
															+
														
 
															+        # Decode result
														
 
															+        result = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
														
 
															+        return result
														
 
															+
														
 
															+    def translate_batch(
														
 
															+        self,
														
 
															+        texts: List[str],
														
 
															+        src_lang: str = "zh",
														
 
															+        tgt_lang: str = "en",
														
 
															+        batch_size: int = 4,
														
 
															+        max_length: Optional[int] = None
														
 
															+    ) -> List[str]:
														
 
															+        """
														
 
															+        Translate multiple texts in batches for improved efficiency.
														
 
															+
														
 
															+        Args:
														
 
															+            texts: List of texts to translate
														
 
															+            src_lang: Source language code
														
 
															+            tgt_lang: Target language code
														
 
															+            batch_size: Number of texts to process at once
														
 
															+            max_length: Maximum generation length per text
														
 
															+
														
 
															+        Returns:
														
 
															+            List of translated texts in the same order as input
														
 
															+
														
 
															+        Raises:
														
 
															+            ValueError: If texts list is empty
														
 
															+        """
														
 
															+        if not texts:
														
 
															+            raise ValueError("Texts list cannot be empty")
														
 
															+
														
 
															+        results = []
														
 
															+        tgt_lang_id = self.tokenizer.lang_code_to_id[tgt_lang]
														
 
															+
														
 
															+        for i in range(0, len(texts), batch_size):
														
 
															+            batch = texts[i:i + batch_size]
														
 
															+
														
 
															+            # Set source language for this batch
														
 
															+            self.tokenizer.src_lang = src_lang
														
 
															+
														
 
															+            # Tokenize batch
														
 
															+            encoded = self.tokenizer(batch, return_tensors="pt", padding=True).to(self.device)
														
 
															+
														
 
															+            # Generate translations
														
 
															+            if max_length is None:
														
 
															+                max_length = 200
														
 
															+
														
 
															+            with torch.no_grad():
														
 
															+                tokens = self.model.generate(
														
 
															+                    **encoded,
														
 
															+                    forced_bos_token_id=tgt_lang_id,
														
 
															+                    max_length=max_length
														
 
															+                )
														
 
															+
														
 
															+            # Decode batch
														
 
															+            batch_results = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)
														
 
															+            results.extend(batch_results)
														
 
															+
														
 
															+        return results
														
 
															+
														
 
															+    def get_supported_languages(self) -> List[str]:
														
 
															+        """
														
 
															+        Get list of supported language codes.
														
 
															+
														
 
															+        Returns:
														
 
															+            List of language codes supported by the model
														
 
															+        """
														
 
															+        return list(self.tokenizer.lang_code_to_id.keys())
														
 
															+
														
 
															+    def is_language_supported(self, lang_code: str) -> bool:
														
 
															+        """
														
 
															+        Check if a language code is supported.
														
 
															+
														
 
															+        Args:
														
 
															+            lang_code: Language code to check
														
 
															+
														
 
															+        Returns:
														
 
															+            True if the language is supported
														
 
															+        """
														
 
															+        return lang_code in self.tokenizer.lang_code_to_id
														
--- a/src/translator/pipeline.py
+++ b/src/translator/pipeline.py
@@ -0,0 +1,222 @@
 
															+"""
														
 
															+Translation pipeline integrating glossary and post-processing.
														
 
															+
														
 
															+This module provides a complete translation pipeline that combines
														
 
															+the translation engine with glossary preprocessing and post-processing.
														
 
															+"""
														
 
															+
														
 
															+from dataclasses import dataclass
														
 
															+from typing import Dict, List, Optional, Tuple
														
 
															+
														
 
															+from ..glossary.models import Glossary, GlossaryEntry
														
 
															+from ..glossary.pipeline import GlossaryPipeline
														
 
															+from ..glossary.postprocessor import GlossaryPostprocessor
														
 
															+from .engine import TranslationEngine
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class TranslationResult:
														
 
															+    """
														
 
															+    Result of a translation operation.
														
 
															+
														
 
															+    Attributes:
														
 
															+        original: Original text before translation
														
 
															+        translated: Final translated text after post-processing
														
 
															+        raw_translation: Raw translation before post-processing
														
 
															+        terms_used: List of glossary terms used in preprocessing
														
 
															+        placeholder_map: Mapping of placeholders to translations
														
 
															+    """
														
 
															+
														
 
															+    original: str
														
 
															+    translated: str
														
 
															+    raw_translation: str
														
 
															+    terms_used: List[str]
														
 
															+    placeholder_map: Dict[str, str]
														
 
															+
														
 
															+
														
 
															+class TranslationPipeline:
														
 
															+    """
														
 
															+    Complete translation pipeline with glossary and post-processing.
														
 
															+
														
 
															+    This pipeline integrates:
														
 
															+    1. Glossary preprocessing (term replacement with placeholders)
														
 
															+    2. Translation via m2m100
														
 
															+    3. Post-processing (placeholder restoration and cleanup)
														
 
															+
														
 
															+    Example:
														
 
															+        >>> glossary = Glossary()
														
 
															+        >>> glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
														
 
															+        >>> engine = TranslationEngine()
														
 
															+        >>> pipeline = TranslationPipeline(engine, glossary)
														
 
															+        >>> result = pipeline.translate("林风是青云宗的弟子")
														
 
															+        >>> print(result.translated)  # "Lin Feng is a disciple of Qingyun Sect"
														
 
															+    """
														
 
															+
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        engine: TranslationEngine,
														
 
															+        glossary: Optional[Glossary] = None,
														
 
															+        src_lang: str = "zh",
														
 
															+        tgt_lang: str = "en"
														
 
															+    ):
														
 
															+        """
														
 
															+        Initialize the translation pipeline.
														
 
															+
														
 
															+        Args:
														
 
															+            engine: The translation engine to use
														
 
															+            glossary: Optional glossary for terminology management
														
 
															+            src_lang: Source language code
														
 
															+            tgt_lang: Target language code
														
 
															+        """
														
 
															+        self.engine = engine
														
 
															+        self.glossary = glossary or Glossary()
														
 
															+        self.glossary_pipeline = GlossaryPipeline(self.glossary)
														
 
															+        self.postprocessor = GlossaryPostprocessor()
														
 
															+        self.src_lang = src_lang
														
 
															+        self.tgt_lang = tgt_lang
														
 
															+
														
 
															+    @property
														
 
															+    def has_glossary(self) -> bool:
														
 
															+        """Check if a glossary is configured."""
														
 
															+        return len(self.glossary) > 0
														
 
															+
														
 
															+    def translate(self, text: str, return_details: bool = False) -> str | TranslationResult:
														
 
															+        """
														
 
															+        Translate text with full glossary and post-processing pipeline.
														
 
															+
														
 
															+        Args:
														
 
															+            text: The text to translate
														
 
															+            return_details: If True, return TranslationResult with details
														
 
															+
														
 
															+        Returns:
														
 
															+            Translated text, or TranslationResult if return_details=True
														
 
															+        """
														
 
															+        # Step 1: Glossary preprocessing
														
 
															+        preprocessed, terms_used = self.glossary_pipeline.preprocess(text)
														
 
															+
														
 
															+        # Build placeholder map from matches
														
 
															+        _, placeholder_map = self.glossary_pipeline.matcher.replace_with_placeholder(text)
														
 
															+
														
 
															+        # Step 2: Translation
														
 
															+        raw_translation = self.engine.translate(
														
 
															+            preprocessed,
														
 
															+            src_lang=self.src_lang,
														
 
															+            tgt_lang=self.tgt_lang
														
 
															+        )
														
 
															+
														
 
															+        # Step 3: Post-processing
														
 
															+        final_translation = self.postprocessor.process(raw_translation, placeholder_map)
														
 
															+
														
 
															+        if return_details:
														
 
															+            return TranslationResult(
														
 
															+                original=text,
														
 
															+                translated=final_translation,
														
 
															+                raw_translation=raw_translation,
														
 
															+                terms_used=terms_used,
														
 
															+                placeholder_map=placeholder_map
														
 
															+            )
														
 
															+
														
 
															+        return final_translation
														
 
															+
														
 
															+    def translate_batch(
														
 
															+        self,
														
 
															+        texts: List[str],
														
 
															+        return_details: bool = False
														
 
															+    ) -> List[str] | List[TranslationResult]:
														
 
															+        """
														
 
															+        Translate multiple texts with the full pipeline.
														
 
															+
														
 
															+        Args:
														
 
															+            texts: List of texts to translate
														
 
															+            return_details: If True, return TranslationResult for each text
														
 
															+
														
 
															+        Returns:
														
 
															+            List of translated texts or TranslationResults
														
 
															+        """
														
 
															+        results = []
														
 
															+
														
 
															+        # Preprocess all texts
														
 
															+        preprocessed_texts = []
														
 
															+        all_placeholder_maps = []
														
 
															+        all_terms_used = []
														
 
															+
														
 
															+        for text in texts:
														
 
															+            preprocessed, terms_used = self.glossary_pipeline.preprocess(text)
														
 
															+            _, placeholder_map = self.glossary_pipeline.matcher.replace_with_placeholder(text)
														
 
															+            preprocessed_texts.append(preprocessed)
														
 
															+            all_placeholder_maps.append(placeholder_map)
														
 
															+            all_terms_used.append(terms_used)
														
 
															+
														
 
															+        # Translate in batch
														
 
															+        raw_translations = self.engine.translate_batch(
														
 
															+            preprocessed_texts,
														
 
															+            src_lang=self.src_lang,
														
 
															+            tgt_lang=self.tgt_lang
														
 
															+        )
														
 
															+
														
 
															+        # Post-process each result
														
 
															+        for i, raw_translation in enumerate(raw_translations):
														
 
															+            final_translation = self.postprocessor.process(
														
 
															+                raw_translation,
														
 
															+                all_placeholder_maps[i]
														
 
															+            )
														
 
															+
														
 
															+            if return_details:
														
 
															+                results.append(TranslationResult(
														
 
															+                    original=texts[i],
														
 
															+                    translated=final_translation,
														
 
															+                    raw_translation=raw_translation,
														
 
															+                    terms_used=all_terms_used[i],
														
 
															+                    placeholder_map=all_placeholder_maps[i]
														
 
															+                ))
														
 
															+            else:
														
 
															+                results.append(final_translation)
														
 
															+
														
 
															+        return results
														
 
															+
														
 
															+    def update_glossary(self, glossary: Glossary) -> None:
														
 
															+        """
														
 
															+        Update the glossary used by this pipeline.
														
 
															+
														
 
															+        Args:
														
 
															+            glossary: New glossary to use
														
 
															+        """
														
 
															+        self.glossary = glossary
														
 
															+        self.glossary_pipeline = GlossaryPipeline(glossary)
														
 
															+
														
 
															+    def add_term(self, entry: GlossaryEntry) -> None:
														
 
															+        """
														
 
															+        Add a single term to the glossary.
														
 
															+
														
 
															+        Args:
														
 
															+            entry: GlossaryEntry to add
														
 
															+        """
														
 
															+        self.glossary.add(entry)
														
 
															+
														
 
															+    def set_languages(self, src_lang: str, tgt_lang: str) -> None:
														
 
															+        """
														
 
															+        Update source and target languages.
														
 
															+
														
 
															+        Args:
														
 
															+            src_lang: Source language code
														
 
															+            tgt_lang: Target language code
														
 
															+        """
														
 
															+        if not self.engine.is_language_supported(src_lang):
														
 
															+            raise ValueError(f"Source language '{src_lang}' is not supported")
														
 
															+        if not self.engine.is_language_supported(tgt_lang):
														
 
															+            raise ValueError(f"Target language '{tgt_lang}' is not supported")
														
 
															+
														
 
															+        self.src_lang = src_lang
														
 
															+        self.tgt_lang = tgt_lang
														
 
															+
														
 
															+    def get_statistics(self, text: str) -> Dict[str, int]:
														
 
															+        """
														
 
															+        Get terminology statistics for the given text.
														
 
															+
														
 
															+        Args:
														
 
															+            text: Text to analyze
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary mapping term names to occurrence counts
														
 
															+        """
														
 
															+        return self.glossary_pipeline.get_statistics(text)
														
--- a/src/translator/progress.py
+++ b/src/translator/progress.py
@@ -0,0 +1,277 @@
 
															+"""
														
 
															+Progress reporting for translation operations.
														
 
															+
														
 
															+This module provides progress tracking and callback mechanisms
														
 
															+for long-running translation tasks.
														
 
															+"""
														
 
															+
														
 
															+from dataclasses import dataclass
														
 
															+from typing import Callable, Optional, Any, Dict
														
 
															+from datetime import datetime
														
 
															+from enum import Enum
														
 
															+
														
 
															+
														
 
															+class ProgressStatus(Enum):
														
 
															+    """Status types for progress updates."""
														
 
															+
														
 
															+    START = "start"
														
 
															+    PROGRESS = "progress"
														
 
															+    CHAPTER_COMPLETE = "chapter_complete"
														
 
															+    CHAPTER_FAILED = "chapter_failed"
														
 
															+    COMPLETE = "complete"
														
 
															+    ERROR = "error"
														
 
															+
														
 
															+
														
 
															+ProgressCallback = Callable[[ProgressStatus, Dict[str, Any]], None]
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class ProgressUpdate:
														
 
															+    """
														
 
															+    A progress update event.
														
 
															+
														
 
															+    Attributes:
														
 
															+        status: The type of status update
														
 
															+        timestamp: When the update occurred
														
 
															+        data: Additional data about the update
														
 
															+    """
														
 
															+
														
 
															+    status: ProgressStatus
														
 
															+    timestamp: datetime
														
 
															+    data: Dict[str, Any]
														
 
															+
														
 
															+
														
 
															+class ProgressReporter:
														
 
															+    """
														
 
															+    Progress reporter for translation operations.
														
 
															+
														
 
															+    This class tracks translation progress and invokes callbacks
														
 
															+    at key points during the translation process.
														
 
															+
														
 
															+    Example:
														
 
															+        >>> def my_callback(status, data):
														
 
															+        ...     print(f"Status: {status}, Data: {data}")
														
 
															+        >>> reporter = ProgressReporter(callback=my_callback)
														
 
															+        >>> reporter.on_start(total=10)
														
 
															+        >>> reporter.on_progress(chapter_index=0)
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, callback: Optional[ProgressCallback] = None):
														
 
															+        """
														
 
															+        Initialize the progress reporter.
														
 
															+
														
 
															+        Args:
														
 
															+            callback: Optional callback function for progress updates
														
 
															+        """
														
 
															+        self.callback = callback
														
 
															+        self._reset()
														
 
															+
														
 
															+    def _reset(self) -> None:
														
 
															+        """Reset all counters."""
														
 
															+        self.total = 0
														
 
															+        self.completed = 0
														
 
															+        self.failed = 0
														
 
															+        self.current_chapter: Optional[int] = None
														
 
															+        self.start_time: Optional[datetime] = None
														
 
															+        self.end_time: Optional[datetime] = None
														
 
															+
														
 
															+    def on_start(self, total: int) -> None:
														
 
															+        """
														
 
															+        Called when translation starts.
														
 
															+
														
 
															+        Args:
														
 
															+            total: Total number of chapters to translate
														
 
															+        """
														
 
															+        self._reset()
														
 
															+        self.total = total
														
 
															+        self.start_time = datetime.now()
														
 
															+        self._report(ProgressStatus.START, {"total": total})
														
 
															+
														
 
															+    def on_progress(self, chapter_index: int, chapter_title: str = "") -> None:
														
 
															+        """
														
 
															+        Called when a chapter translation is in progress.
														
 
															+
														
 
															+        Args:
														
 
															+            chapter_index: Index of the chapter being translated
														
 
															+            chapter_title: Optional chapter title
														
 
															+        """
														
 
															+        self.current_chapter = chapter_index
														
 
															+        data = {"chapter_index": chapter_index, "chapter_title": chapter_title}
														
 
															+        self._report(ProgressStatus.PROGRESS, data)
														
 
															+
														
 
															+    def on_chapter_complete(
														
 
															+        self,
														
 
															+        chapter_index: int,
														
 
															+        chapter_title: str = "",
														
 
															+        word_count: int = 0
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Called when a chapter translation completes successfully.
														
 
															+
														
 
															+        Args:
														
 
															+            chapter_index: Index of the completed chapter
														
 
															+            chapter_title: Optional chapter title
														
 
															+            word_count: Word count of the chapter
														
 
															+        """
														
 
															+        self.completed += 1
														
 
															+        self.current_chapter = None
														
 
															+        data = {
														
 
															+            "chapter_index": chapter_index,
														
 
															+            "chapter_title": chapter_title,
														
 
															+            "word_count": word_count,
														
 
															+            "completed": self.completed,
														
 
															+            "total": self.total,
														
 
															+            "progress_percent": self.progress_percent,
														
 
															+        }
														
 
															+        self._report(ProgressStatus.CHAPTER_COMPLETE, data)
														
 
															+
														
 
															+    def on_chapter_failed(
														
 
															+        self,
														
 
															+        chapter_index: int,
														
 
															+        error: Exception,
														
 
															+        chapter_title: str = ""
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Called when a chapter translation fails.
														
 
															+
														
 
															+        Args:
														
 
															+            chapter_index: Index of the failed chapter
														
 
															+            error: The exception that occurred
														
 
															+            chapter_title: Optional chapter title
														
 
															+        """
														
 
															+        self.failed += 1
														
 
															+        self.current_chapter = None
														
 
															+        data = {
														
 
															+            "chapter_index": chapter_index,
														
 
															+            "chapter_title": chapter_title,
														
 
															+            "error": str(error),
														
 
															+            "error_type": type(error).__name__,
														
 
															+            "failed": self.failed,
														
 
															+            "total": self.total,
														
 
															+        }
														
 
															+        self._report(ProgressStatus.CHAPTER_FAILED, data)
														
 
															+
														
 
															+    def on_complete(self) -> None:
														
 
															+        """Called when all translation is complete."""
														
 
															+        self.end_time = datetime.now()
														
 
															+        data = {
														
 
															+            "completed": self.completed,
														
 
															+            "failed": self.failed,
														
 
															+            "total": self.total,
														
 
															+            "progress_percent": self.progress_percent,
														
 
															+            "duration_seconds": self.duration_seconds,
														
 
															+        }
														
 
															+        self._report(ProgressStatus.COMPLETE, data)
														
 
															+
														
 
															+    def on_error(self, error: Exception) -> None:
														
 
															+        """
														
 
															+        Called when a general error occurs (not chapter-specific).
														
 
															+
														
 
															+        Args:
														
 
															+            error: The exception that occurred
														
 
															+        """
														
 
															+        data = {
														
 
															+            "error": str(error),
														
 
															+            "error_type": type(error).__name__,
														
 
															+        }
														
 
															+        self._report(ProgressStatus.ERROR, data)
														
 
															+
														
 
															+    def _report(self, status: ProgressStatus, data: Dict[str, Any]) -> None:
														
 
															+        """
														
 
															+        Trigger the callback with a progress update.
														
 
															+
														
 
															+        Args:
														
 
															+            status: The status type
														
 
															+            data: Additional data about the update
														
 
															+        """
														
 
															+        if self.callback:
														
 
															+            self.callback(status, data)
														
 
															+
														
 
															+    @property
														
 
															+    def progress_percent(self) -> float:
														
 
															+        """Get progress as a percentage (0-100)."""
														
 
															+        if self.total == 0:
														
 
															+            return 0.0
														
 
															+        return (self.completed / self.total) * 100
														
 
															+
														
 
															+    @property
														
 
															+    def duration_seconds(self) -> Optional[float]:
														
 
															+        """Get duration in seconds, or None if not started/complete."""
														
 
															+        if self.start_time is None:
														
 
															+            return None
														
 
															+        end = self.end_time or datetime.now()
														
 
															+        return (end - self.start_time).total_seconds()
														
 
															+
														
 
															+    @property
														
 
															+    def is_complete(self) -> bool:
														
 
															+        """Check if all chapters are processed (completed or failed)."""
														
 
															+        return (self.completed + self.failed) >= self.total and self.total > 0
														
 
															+
														
 
															+    def get_summary(self) -> Dict[str, Any]:
														
 
															+        """
														
 
															+        Get a summary of the translation progress.
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with progress summary
														
 
															+        """
														
 
															+        return {
														
 
															+            "total": self.total,
														
 
															+            "completed": self.completed,
														
 
															+            "failed": self.failed,
														
 
															+            "remaining": self.total - self.completed - self.failed,
														
 
															+            "progress_percent": self.progress_percent,
														
 
															+            "duration_seconds": self.duration_seconds,
														
 
															+            "is_complete": self.is_complete,
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+class ConsoleProgressReporter:
														
 
															+    """
														
 
															+    Simple console-based progress reporter.
														
 
															+
														
 
															+    This class prints progress updates to the console.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, show_details: bool = True):
														
 
															+        """
														
 
															+        Initialize the console reporter.
														
 
															+
														
 
															+        Args:
														
 
															+            show_details: Whether to show detailed progress
														
 
															+        """
														
 
															+        self.show_details = show_details
														
 
															+        self.reporter = ProgressReporter(callback=self._print_update)
														
 
															+
														
 
															+    def _print_update(self, status: ProgressStatus, data: Dict[str, Any]) -> None:
														
 
															+        """Print progress update to console."""
														
 
															+        if status == ProgressStatus.START:
														
 
															+            print(f"\n=== Starting Translation ===")
														
 
															+            print(f"Total chapters: {data.get('total', 0)}")
														
 
															+
														
 
															+        elif status == ProgressStatus.PROGRESS:
														
 
															+            if self.show_details:
														
 
															+                title = data.get('chapter_title', f"Chapter {data['chapter_index']}")
														
 
															+                print(f"Translating: {title}...")
														
 
															+
														
 
															+        elif status == ProgressStatus.CHAPTER_COMPLETE:
														
 
															+            title = data.get('chapter_title', f"Chapter {data['chapter_index']}")
														
 
															+            pct = data.get('progress_percent', 0)
														
 
															+            print(f"✓ {title} - [{data['completed']}/{data['total']}] ({pct:.1f}%)")
														
 
															+
														
 
															+        elif status == ProgressStatus.CHAPTER_FAILED:
														
 
															+            title = data.get('chapter_title', f"Chapter {data['chapter_index']}")
														
 
															+            print(f"✗ {title} - Failed: {data.get('error', 'Unknown error')}")
														
 
															+
														
 
															+        elif status == ProgressStatus.COMPLETE:
														
 
															+            print(f"\n=== Translation Complete ===")
														
 
															+            print(f"Completed: {data['completed']}/{data['total']}")
														
 
															+            print(f"Failed: {data['failed']}")
														
 
															+            if data.get('duration_seconds'):
														
 
															+                print(f"Duration: {data['duration_seconds']:.1f} seconds")
														
 
															+
														
 
															+        elif status == ProgressStatus.ERROR:
														
 
															+            print(f"\n!!! Error: {data.get('error', 'Unknown error')} !!!")
														
 
															+
														
 
															+    def get_reporter(self) -> ProgressReporter:
														
 
															+        """Get the underlying ProgressReporter instance."""
														
 
															+        return self.reporter
														
--- a/tests/test_translator.py
+++ b/tests/test_translator.py
@@ -0,0 +1,568 @@
 
															+"""
														
 
															+Unit tests for the translator module.
														
 
															+
														
 
															+Tests cover:
														
 
															+- TranslationEngine initialization and basic translation
														
 
															+- TranslationPipeline with glossary integration
														
 
															+- ProgressReporter callbacks
														
 
															+- ChapterTranslator (mocked)
														
 
															+"""
														
 
															+
														
 
															+import pytest
														
 
															+from pathlib import Path
														
 
															+from unittest.mock import Mock, MagicMock, patch
														
 
															+from datetime import datetime
														
 
															+
														
 
															+from src.translator.engine import TranslationEngine
														
 
															+from src.translator.pipeline import TranslationPipeline, TranslationResult
														
 
															+from src.translator.progress import ProgressReporter, ProgressStatus, ConsoleProgressReporter
														
 
															+from src.translator.chapter_translator import ChapterTranslator
														
 
															+
														
 
															+from src.glossary.models import Glossary, GlossaryEntry, TermCategory
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Test TranslationEngine (Story 5.1)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class TestTranslationEngine:
														
 
															+    """Tests for TranslationEngine class."""
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def mock_transformers(self):
														
 
															+        """Mock the transformers library."""
														
 
															+        with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
														
 
															+             patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
														
 
															+            # Setup mock tokenizer
														
 
															+            mock_tok_instance = MagicMock()
														
 
															+            mock_tok_instance.src_lang = "zh"
														
 
															+            mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
														
 
															+            mock_tok_instance.return_tensors = "pt"
														
 
															+            mock_tokenizer.from_pretrained.return_value = mock_tok_instance
														
 
															+
														
 
															+            # Setup mock model
														
 
															+            mock_model_instance = MagicMock()
														
 
															+            mock_model.from_pretrained.return_value = mock_model_instance
														
 
															+            mock_model_instance.eval.return_value = None
														
 
															+
														
 
															+            yield {
														
 
															+                "model": mock_model,
														
 
															+                "tokenizer": mock_tokenizer,
														
 
															+                "model_instance": mock_model_instance,
														
 
															+                "tokenizer_instance": mock_tok_instance
														
 
															+            }
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def mock_model_path(self, tmp_path):
														
 
															+        """Create a temporary mock model directory."""
														
 
															+        model_dir = tmp_path / "m2m100_418M"
														
 
															+        model_dir.mkdir()
														
 
															+        (model_dir / "config.json").write_text("{}")
														
 
															+        return str(model_dir)
														
 
															+
														
 
															+    def test_engine_init_with_mock_path(self, mock_transformers, mock_model_path):
														
 
															+        """Test engine initialization with a mock model path."""
														
 
															+        mock_transformers["tokenizer_instance"].batch_decode.return_value = ["Hello world"]
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+
														
 
															+        assert engine.model_path == mock_model_path
														
 
															+        assert engine.device in ("cpu", "cuda")
														
 
															+
														
 
															+    def test_engine_init_import_error(self):
														
 
															+        """Test that ImportError is raised when transformers is not available."""
														
 
															+        with patch('src.translator.engine.M2M100ForConditionalGeneration', None):
														
 
															+            with pytest.raises(ImportError, match="transformers library"):
														
 
															+                TranslationEngine(model_path="/fake/path")
														
 
															+
														
 
															+    def test_translate_single_text(self, mock_transformers, mock_model_path):
														
 
															+        """Test basic single-text translation."""
														
 
															+        mock_tok = mock_transformers["tokenizer_instance"]
														
 
															+        mock_tok.batch_decode.return_value = ["Hello world"]
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+        result = engine.translate("你好世界", src_lang="zh", tgt_lang="en")
														
 
															+
														
 
															+        assert result == "Hello world"
														
 
															+        mock_tok.batch_decode.assert_called_once()
														
 
															+
														
 
															+    def test_translate_empty_text_raises_error(self, mock_transformers, mock_model_path):
														
 
															+        """Test that translating empty text raises ValueError."""
														
 
															+        mock_tok = mock_transformers["tokenizer_instance"]
														
 
															+        mock_tok.batch_decode.return_value = ["Hello"]
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+
														
 
															+        with pytest.raises(ValueError, match="cannot be empty"):
														
 
															+            engine.translate("", src_lang="zh", tgt_lang="en")
														
 
															+
														
 
															+    def test_translate_batch(self, mock_transformers, mock_model_path):
														
 
															+        """Test batch translation."""
														
 
															+        mock_tok = mock_transformers["tokenizer_instance"]
														
 
															+        mock_tok.batch_decode.return_value = ["Hello", "World", "Test"]
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+        results = engine.translate_batch(
														
 
															+            ["你好", "世界", "测试"],
														
 
															+            src_lang="zh",
														
 
															+            tgt_lang="en",
														
 
															+            batch_size=3
														
 
															+        )
														
 
															+
														
 
															+        assert len(results) == 3
														
 
															+        assert results == ["Hello", "World", "Test"]
														
 
															+
														
 
															+    def test_translate_batch_empty_raises_error(self, mock_transformers, mock_model_path):
														
 
															+        """Test that empty batch list raises ValueError."""
														
 
															+        mock_tok = mock_transformers["tokenizer_instance"]
														
 
															+        mock_tok.batch_decode.return_value = []
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+
														
 
															+        with pytest.raises(ValueError, match="cannot be empty"):
														
 
															+            engine.translate_batch([], src_lang="zh", tgt_lang="en")
														
 
															+
														
 
															+    def test_is_language_supported(self, mock_transformers, mock_model_path):
														
 
															+        """Test language support checking."""
														
 
															+        mock_tok = mock_transformers["tokenizer_instance"]
														
 
															+        mock_tok.batch_decode.return_value = ["Hello"]
														
 
															+        mock_tok.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
														
 
															+
														
 
															+        engine = TranslationEngine(model_path=mock_model_path)
														
 
															+
														
 
															+        assert engine.is_language_supported("zh") is True
														
 
															+        assert engine.is_language_supported("en") is True
														
 
															+        assert engine.is_language_supported("de") is False
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Test TranslationPipeline (Story 5.2)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class TestTranslationPipeline:
														
 
															+    """Tests for TranslationPipeline class."""
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def mock_engine(self):
														
 
															+        """Create a mock translation engine."""
														
 
															+        engine = MagicMock(spec=TranslationEngine)
														
 
															+        engine.translate.return_value = "Lin Feng is a disciple"
														
 
															+        engine.translate_batch.return_value = ["Hello", "World"]
														
 
															+        engine.is_language_supported.return_value = True
														
 
															+        return engine
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def sample_glossary(self):
														
 
															+        """Create a sample glossary."""
														
 
															+        glossary = Glossary()
														
 
															+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
														
 
															+        glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.LOCATION))
														
 
															+        return glossary
														
 
															+
														
 
															+    def test_pipeline_init(self, mock_engine):
														
 
															+        """Test pipeline initialization."""
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+
														
 
															+        assert pipeline.engine is mock_engine
														
 
															+        assert pipeline.src_lang == "zh"
														
 
															+        assert pipeline.tgt_lang == "en"
														
 
															+
														
 
															+    def test_pipeline_with_glossary(self, mock_engine, sample_glossary):
														
 
															+        """Test pipeline with glossary."""
														
 
															+        pipeline = TranslationPipeline(mock_engine, sample_glossary)
														
 
															+
														
 
															+        assert pipeline.has_glossary is True
														
 
															+        assert len(pipeline.glossary) == 2
														
 
															+
														
 
															+    def test_translate_simple(self, mock_engine):
														
 
															+        """Test simple translation without glossary."""
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+        result = pipeline.translate("Hello world")
														
 
															+
														
 
															+        assert result == "Lin Feng is a disciple"
														
 
															+        mock_engine.translate.assert_called_once()
														
 
															+
														
 
															+    def test_translate_with_glossary(self, mock_engine, sample_glossary):
														
 
															+        """Test translation with glossary preprocessing."""
														
 
															+        mock_engine.translate.return_value = "__en__Lin Feng is a disciple of __en__Qingyun Sect"
														
 
															+        pipeline = TranslationPipeline(mock_engine, sample_glossary)
														
 
															+
														
 
															+        result = pipeline.translate("林风是青云宗的弟子")
														
 
															+
														
 
															+        # After post-processing, placeholders should be replaced
														
 
															+        assert "Lin Feng" in result
														
 
															+        assert "Qingyun Sect" in result
														
 
															+
														
 
															+    def test_translate_return_details(self, mock_engine, sample_glossary):
														
 
															+        """Test translation with detailed result."""
														
 
															+        mock_engine.translate.return_value = "__en__Lin Feng is here"
														
 
															+        pipeline = TranslationPipeline(mock_engine, sample_glossary)
														
 
															+
														
 
															+        result = pipeline.translate("林风在这里", return_details=True)
														
 
															+
														
 
															+        assert isinstance(result, TranslationResult)
														
 
															+        assert result.original == "林风在这里"
														
 
															+        assert "Lin Feng" in result.translated
														
 
															+        assert len(result.terms_used) > 0
														
 
															+        assert isinstance(result.placeholder_map, dict)
														
 
															+
														
 
															+    def test_translate_batch(self, mock_engine):
														
 
															+        """Test batch translation."""
														
 
															+        mock_engine.translate_batch.return_value = ["Result 1", "Result 2"]
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+
														
 
															+        results = pipeline.translate_batch(["Text 1", "Text 2"])
														
 
															+
														
 
															+        assert len(results) == 2
														
 
															+        mock_engine.translate_batch.assert_called_once()
														
 
															+
														
 
															+    def test_add_term(self, mock_engine):
														
 
															+        """Test adding a term to the pipeline glossary."""
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+        entry = GlossaryEntry("test", "TEST", TermCategory.OTHER)
														
 
															+
														
 
															+        pipeline.add_term(entry)
														
 
															+
														
 
															+        assert pipeline.has_glossary is True
														
 
															+        assert "test" in pipeline.glossary
														
 
															+
														
 
															+    def test_set_languages_valid(self, mock_engine):
														
 
															+        """Test setting valid languages."""
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+        mock_engine.is_language_supported.return_value = True
														
 
															+
														
 
															+        pipeline.set_languages("en", "fr")
														
 
															+
														
 
															+        assert pipeline.src_lang == "en"
														
 
															+        assert pipeline.tgt_lang == "fr"
														
 
															+
														
 
															+    def test_set_languages_invalid(self, mock_engine):
														
 
															+        """Test setting invalid language raises error."""
														
 
															+        pipeline = TranslationPipeline(mock_engine)
														
 
															+        mock_engine.is_language_supported.side_effect = lambda x: x in ["zh", "en"]
														
 
															+
														
 
															+        with pytest.raises(ValueError, match="not supported"):
														
 
															+            pipeline.set_languages("xx", "yy")
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Test ProgressReporter (Story 5.4)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class TestProgressReporter:
														
 
															+    """Tests for ProgressReporter class."""
														
 
															+
														
 
															+    def test_reporter_init(self):
														
 
															+        """Test reporter initialization."""
														
 
															+        callback = Mock()
														
 
															+        reporter = ProgressReporter(callback)
														
 
															+
														
 
															+        assert reporter.callback is callback
														
 
															+        assert reporter.total == 0
														
 
															+        assert reporter.completed == 0
														
 
															+
														
 
															+    def test_on_start(self):
														
 
															+        """Test start event."""
														
 
															+        callback = Mock()
														
 
															+        reporter = ProgressReporter(callback)
														
 
															+
														
 
															+        reporter.on_start(total=10)
														
 
															+
														
 
															+        assert reporter.total == 10
														
 
															+        callback.assert_called_once()
														
 
															+        status, data = callback.call_args[0]
														
 
															+        assert status == ProgressStatus.START
														
 
															+        assert data["total"] == 10
														
 
															+
														
 
															+    def test_on_chapter_complete(self):
														
 
															+        """Test chapter complete event."""
														
 
															+        callback = Mock()
														
 
															+        reporter = ProgressReporter(callback)
														
 
															+        reporter.on_start(total=5)
														
 
															+
														
 
															+        reporter.on_chapter_complete(chapter_index=0, chapter_title="Chapter 1")
														
 
															+
														
 
															+        assert reporter.completed == 1
														
 
															+        assert reporter.progress_percent == 20.0
														
 
															+
														
 
															+    def test_on_chapter_failed(self):
														
 
															+        """Test chapter failed event."""
														
 
															+        callback = Mock()
														
 
															+        reporter = ProgressReporter(callback)
														
 
															+        reporter.on_start(total=5)
														
 
															+
														
 
															+        error = Exception("Test error")
														
 
															+        reporter.on_chapter_failed(chapter_index=0, error=error)
														
 
															+
														
 
															+        assert reporter.failed == 1
														
 
															+
														
 
															+    def test_on_complete(self):
														
 
															+        """Test complete event."""
														
 
															+        callback = Mock()
														
 
															+        reporter = ProgressReporter(callback)
														
 
															+        reporter.on_start(total=3)
														
 
															+        reporter.on_chapter_complete(chapter_index=0)
														
 
															+        reporter.on_chapter_complete(chapter_index=1)
														
 
															+
														
 
															+        reporter.on_complete()
														
 
															+
														
 
															+        assert reporter.is_complete is True
														
 
															+        assert reporter.duration_seconds is not None
														
 
															+
														
 
															+    def test_progress_percent(self):
														
 
															+        """Test progress percentage calculation."""
														
 
															+        reporter = ProgressReporter()
														
 
															+        reporter.on_start(total=10)
														
 
															+
														
 
															+        assert reporter.progress_percent == 0.0
														
 
															+
														
 
															+        for i in range(5):
														
 
															+            reporter.on_chapter_complete(chapter_index=i)
														
 
															+
														
 
															+        assert reporter.progress_percent == 50.0
														
 
															+
														
 
															+    def test_get_summary(self):
														
 
															+        """Test getting progress summary."""
														
 
															+        reporter = ProgressReporter()
														
 
															+        reporter.on_start(total=10)
														
 
															+        reporter.on_chapter_complete(chapter_index=0)
														
 
															+        reporter.on_chapter_complete(chapter_index=1)
														
 
															+        reporter.on_chapter_failed(chapter_index=2, error=Exception("test"))
														
 
															+
														
 
															+        summary = reporter.get_summary()
														
 
															+
														
 
															+        assert summary["total"] == 10
														
 
															+        assert summary["completed"] == 2
														
 
															+        assert summary["failed"] == 1
														
 
															+        assert summary["remaining"] == 7
														
 
															+        assert summary["progress_percent"] == 20.0
														
 
															+
														
 
															+
														
 
															+class TestConsoleProgressReporter:
														
 
															+    """Tests for ConsoleProgressReporter class."""
														
 
															+
														
 
															+    def test_console_reporter_init(self):
														
 
															+        """Test console reporter initialization."""
														
 
															+        reporter = ConsoleProgressReporter(show_details=True)
														
 
															+
														
 
															+        assert reporter.show_details is True
														
 
															+        assert reporter.reporter is not None
														
 
															+
														
 
															+    def test_get_reporter(self):
														
 
															+        """Test getting underlying reporter."""
														
 
															+        console = ConsoleProgressReporter()
														
 
															+        reporter = console.get_reporter()
														
 
															+
														
 
															+        assert isinstance(reporter, ProgressReporter)
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Test ChapterTranslator (Story 5.3)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class TestChapterTranslator:
														
 
															+    """Tests for ChapterTranslator class."""
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def mock_pipeline(self):
														
 
															+        """Create a mock translation pipeline."""
														
 
															+        pipeline = MagicMock(spec=TranslationPipeline)
														
 
															+        pipeline.translate.return_value = "Translated text"
														
 
															+        return pipeline
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def mock_repository(self):
														
 
															+        """Create a mock repository."""
														
 
															+        repo = MagicMock()
														
 
															+        repo.save_chapter = MagicMock()
														
 
															+        repo.get_pending_chapters.return_value = []
														
 
															+        repo.get_chapters.return_value = []
														
 
															+        repo.get_failed_chapters.return_value = []
														
 
															+        repo.record_failure = MagicMock()
														
 
															+        repo.update_work_status = MagicMock()
														
 
															+        return repo
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def sample_chapter(self):
														
 
															+        """Create a sample chapter."""
														
 
															+        from src.repository.models import ChapterItem, ChapterStatus
														
 
															+
														
 
															+        return ChapterItem(
														
 
															+            work_id="test_work",
														
 
															+            chapter_index=0,
														
 
															+            title="Test Chapter",
														
 
															+            content="Test content for translation.",
														
 
															+            status=ChapterStatus.PENDING
														
 
															+        )
														
 
															+
														
 
															+    def test_translator_init(self, mock_pipeline, mock_repository):
														
 
															+        """Test translator initialization."""
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+
														
 
															+        assert translator.pipeline is mock_pipeline
														
 
															+        assert translator.repository is mock_repository
														
 
															+
														
 
															+    def test_split_paragraphs_simple(self, mock_pipeline, mock_repository):
														
 
															+        """Test splitting simple paragraphs."""
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+
														
 
															+        content = "Para 1\n\nPara 2\n\nPara 3"
														
 
															+        segments = translator._split_paragraphs(content)
														
 
															+
														
 
															+        assert len(segments) == 3
														
 
															+        assert segments[0] == "Para 1"
														
 
															+        assert segments[1] == "Para 2"
														
 
															+        assert segments[2] == "Para 3"
														
 
															+
														
 
															+    def test_split_long_paragraph(self, mock_pipeline, mock_repository):
														
 
															+        """Test splitting a long paragraph."""
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+
														
 
															+        # Create a long paragraph
														
 
															+        long_text = "。".join(["Sentence " + str(i) for i in range(100)])
														
 
															+        segments = translator._split_long_paragraph(long_text)
														
 
															+
														
 
															+        assert len(segments) > 1
														
 
															+        # Each segment should be under the max length
														
 
															+        for seg in segments:
														
 
															+            assert len(seg) <= translator.MAX_SEGMENT_LENGTH + 100  # Allow some buffer
														
 
															+
														
 
															+    def test_translate_chapter_success(
														
 
															+        self, mock_pipeline, mock_repository, sample_chapter
														
 
															+    ):
														
 
															+        """Test successful chapter translation."""
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+
														
 
															+        result = translator.translate_chapter("test_work", sample_chapter)
														
 
															+
														
 
															+        assert result.status == "completed"
														
 
															+        assert result.translation is not None
														
 
															+        mock_repository.save_chapter.assert_called()
														
 
															+
														
 
															+    def test_translate_chapter_already_completed(
														
 
															+        self, mock_pipeline, mock_repository
														
 
															+    ):
														
 
															+        """Test skipping already translated chapter."""
														
 
															+        from src.repository.models import ChapterItem, ChapterStatus
														
 
															+
														
 
															+        chapter = ChapterItem(
														
 
															+            work_id="test_work",
														
 
															+            chapter_index=0,
														
 
															+            title="Test",
														
 
															+            content="Content",
														
 
															+            status=ChapterStatus.COMPLETED,
														
 
															+            translation="Already translated"
														
 
															+        )
														
 
															+
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+        result = translator.translate_chapter("test_work", chapter)
														
 
															+
														
 
															+        assert result.translation == "Already translated"
														
 
															+        # translate should not be called
														
 
															+        mock_pipeline.translate.assert_not_called()
														
 
															+
														
 
															+    def test_translate_work_empty(self, mock_pipeline, mock_repository):
														
 
															+        """Test translating work with no pending chapters."""
														
 
															+        mock_repository.get_pending_chapters.return_value = []
														
 
															+        mock_repository.get_chapters.return_value = []
														
 
															+
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+        translator.translate_work("test_work")
														
 
															+
														
 
															+        # Should not crash, should just return
														
 
															+        mock_pipeline.translate.assert_not_called()
														
 
															+
														
 
															+    def test_retry_failed_chapters(self, mock_pipeline, mock_repository):
														
 
															+        """Test retrying failed chapters."""
														
 
															+        from src.repository.models import ChapterItem, ChapterStatus
														
 
															+
														
 
															+        failed_chapter = ChapterItem(
														
 
															+            work_id="test_work",
														
 
															+            chapter_index=0,
														
 
															+            title="Failed",
														
 
															+            content="Content",
														
 
															+            status=ChapterStatus.FAILED,
														
 
															+            retry_count=0
														
 
															+        )
														
 
															+
														
 
															+        mock_repository.get_failed_chapters.return_value = [failed_chapter]
														
 
															+
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+        translator.retry_failed_chapters("test_work")
														
 
															+
														
 
															+        assert mock_pipeline.translate.called
														
 
															+
														
 
															+    def test_set_progress_callback(self, mock_pipeline, mock_repository):
														
 
															+        """Test setting a new progress callback."""
														
 
															+        translator = ChapterTranslator(mock_pipeline, mock_repository)
														
 
															+
														
 
															+        new_callback = Mock()
														
 
															+        translator.set_progress_callback(new_callback)
														
 
															+
														
 
															+        assert translator.progress_reporter.callback is new_callback
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Integration Tests (with mocked external dependencies)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class TestIntegration:
														
 
															+    """Integration tests for the translator module."""
														
 
															+
														
 
															+    @pytest.fixture
														
 
															+    def full_pipeline(self, tmp_path):
														
 
															+        """Create a full pipeline with mocked model but real other components."""
														
 
															+        with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
														
 
															+             patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
														
 
															+
														
 
															+            # Setup mocks
														
 
															+            mock_tok_instance = MagicMock()
														
 
															+            mock_tok_instance.src_lang = "zh"
														
 
															+            mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2}
														
 
															+            mock_tokenizer.from_pretrained.return_value = mock_tok_instance
														
 
															+
														
 
															+            mock_model_instance = MagicMock()
														
 
															+            mock_model.from_pretrained.return_value = mock_model_instance
														
 
															+
														
 
															+            # Create mock model directory
														
 
															+            model_dir = tmp_path / "model"
														
 
															+            model_dir.mkdir()
														
 
															+            (model_dir / "config.json").write_text("{}")
														
 
															+
														
 
															+            # Return configured components
														
 
															+            mock_tok_instance.batch_decode.return_value = ["Translated text"]
														
 
															+
														
 
															+            engine = TranslationEngine(model_path=str(model_dir))
														
 
															+
														
 
															+            glossary = Glossary()
														
 
															+            glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
														
 
															+
														
 
															+            pipeline = TranslationPipeline(engine, glossary)
														
 
															+
														
 
															+            return {
														
 
															+                "engine": engine,
														
 
															+                "pipeline": pipeline,
														
 
															+                "glossary": glossary,
														
 
															+                "mock_tok": mock_tok_instance
														
 
															+            }
														
 
															+
														
 
															+    def test_full_pipeline_translate(self, full_pipeline):
														
 
															+        """Test full pipeline from text to translation."""
														
 
															+        pipeline = full_pipeline["pipeline"]
														
 
															+        mock_tok = full_pipeline["mock_tok"]
														
 
															+
														
 
															+        # Setup mock to return text with placeholder
														
 
															+        mock_tok.batch_decode.return_value = ["__en__Lin Feng is here"]
														
 
															+
														
 
															+        result = pipeline.translate("林风在这里")
														
 
															+
														
 
															+        assert "Lin Feng" in result
														
 
															+
														
 
															+    def test_full_pipeline_statistics(self, full_pipeline):
														
 
															+        """Test getting statistics from pipeline."""
														
 
															+        pipeline = full_pipeline["pipeline"]
														
 
															+
														
 
															+        stats = pipeline.get_statistics("林风是林风的剑")
														
 
															+
														
 
															+        assert "林风" in stats
														
 
															+        assert stats["林风"] == 2