2 天之前 · d55ea7de17
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,6 @@
 
				+[pytest]
			
 
				+testpaths = tests
			
 
				+python_files = test_*.py
			
 
				+python_classes = Test*
			
 
				+python_functions = test_*
			
 
				+addopts = --cov=src --cov-report=term-missing --cov-report=html
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,12 @@
 
				+# 223-236-template-6: BMAD Novel Translator
			
 
				+# Python dependencies
			
 
				+
			
 
				+# State Machine
			
 
				+transitions==0.9.0
			
 
				+
			
 
				+# Testing
			
 
				+pytest==7.4.0
			
 
				+pytest-cov==4.1.0
			
 
				+
			
 
				+# Utilities
			
 
				+pyyaml==6.0.1
			
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -0,0 +1,7 @@
 
				+"""
			
 
				+223-236-template-6: BMAD Novel Translator
			
 
				+
			
 
				+A novel translation tool with glossary support and crash-safe state management.
			
 
				+"""
			
 
				+
			
 
				+__version__ = "0.1.0"
			
--- a/src/glossary/__init__.py
+++ b/src/glossary/__init__.py
@@ -0,0 +1,21 @@
 
				+"""
			
 
				+Glossary module for terminology management.
			
 
				+
			
 
				+This module provides terminology extraction, matching, and replacement
			
 
				+to ensure consistent translations of proper nouns and character names.
			
 
				+"""
			
 
				+
			
 
				+from .models import Glossary, GlossaryEntry
			
 
				+from .matcher import GlossaryMatcher, TermMatch
			
 
				+from .preprocessor import GlossaryPreprocessor, PreprocessingResult
			
 
				+from .postprocessor import GlossaryPostprocessor
			
 
				+
			
 
				+__all__ = [
			
 
				+    "Glossary",
			
 
				+    "GlossaryEntry",
			
 
				+    "GlossaryMatcher",
			
 
				+    "TermMatch",
			
 
				+    "GlossaryPreprocessor",
			
 
				+    "PreprocessingResult",
			
 
				+    "GlossaryPostprocessor",
			
 
				+]
			
--- a/src/glossary/matcher.py
+++ b/src/glossary/matcher.py
@@ -0,0 +1,146 @@
 
				+"""
			
 
				+Term matching engine for the glossary module.
			
 
				+
			
 
				+This module implements the longest-match algorithm for terminology replacement.
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, List, Tuple
			
 
				+import re
			
 
				+
			
 
				+from .models import Glossary
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TermMatch:
			
 
				+    """
			
 
				+    A single term match found in text.
			
 
				+
			
 
				+    Attributes:
			
 
				+        source: The original source term
			
 
				+        target: The target translated term
			
 
				+        start: The starting position in the text
			
 
				+        end: The ending position in the text
			
 
				+        placeholder: The placeholder used for replacement
			
 
				+    """
			
 
				+
			
 
				+    source: str
			
 
				+    target: str
			
 
				+    start: int
			
 
				+    end: int
			
 
				+    placeholder: str
			
 
				+
			
 
				+
			
 
				+class GlossaryMatcher:
			
 
				+    """
			
 
				+    Engine for finding and replacing terms in text.
			
 
				+
			
 
				+    Uses longest-match processing to ensure longer terms are matched
			
 
				+    before shorter ones (e.g., "魔法师" before "魔法").
			
 
				+    """
			
 
				+
			
 
				+    PLACEHOLDER_PREFIX = "__en__"
			
 
				+
			
 
				+    def __init__(self, glossary: Glossary):
			
 
				+        """
			
 
				+        Initialize the matcher with a glossary.
			
 
				+
			
 
				+        Args:
			
 
				+            glossary: The Glossary to use for matching
			
 
				+        """
			
 
				+        self.glossary = glossary
			
 
				+        self._sorted_terms = glossary.sort_by_length_desc()
			
 
				+
			
 
				+    def find_matches(self, text: str) -> List[TermMatch]:
			
 
				+        """
			
 
				+        Find all term matches in the given text.
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text to search for terms
			
 
				+
			
 
				+        Returns:
			
 
				+            List of TermMatch objects found in the text
			
 
				+        """
			
 
				+        matches = []
			
 
				+        occupied_positions = set()
			
 
				+
			
 
				+        for term in self._sorted_terms:
			
 
				+            entry = self.glossary.get(term)
			
 
				+            if not entry:
			
 
				+                continue
			
 
				+
			
 
				+            # Find all occurrences of this term
			
 
				+            start = 0
			
 
				+            while True:
			
 
				+                pos = text.find(term, start)
			
 
				+                if pos == -1:
			
 
				+                    break
			
 
				+
			
 
				+                end = pos + len(term)
			
 
				+
			
 
				+                # Check if any position is already occupied
			
 
				+                if not any(pos <= p < end for p in occupied_positions):
			
 
				+                    placeholder = f"{self.PLACEHOLDER_PREFIX}{term}"
			
 
				+                    matches.append(
			
 
				+                        TermMatch(
			
 
				+                            source=term,
			
 
				+                            target=entry.target,
			
 
				+                            start=pos,
			
 
				+                            end=end,
			
 
				+                            placeholder=placeholder,
			
 
				+                        )
			
 
				+                    )
			
 
				+                    # Mark positions as occupied
			
 
				+                    occupied_positions.update(range(pos, end))
			
 
				+
			
 
				+                start = pos + 1
			
 
				+
			
 
				+        # Sort matches by position
			
 
				+        matches.sort(key=lambda m: m.start)
			
 
				+        return matches
			
 
				+
			
 
				+    def replace_with_placeholder(self, text: str) -> Tuple[str, Dict[str, str]]:
			
 
				+        """
			
 
				+        Replace all terms in text with placeholders.
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text to process
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple of (processed_text, placeholder_mapping)
			
 
				+        """
			
 
				+        matches = self.find_matches(text)
			
 
				+        placeholder_map = {}
			
 
				+
			
 
				+        if not matches:
			
 
				+            return text, placeholder_map
			
 
				+
			
 
				+        # Build placeholder map and process text
			
 
				+        result = text
			
 
				+        offset = 0
			
 
				+
			
 
				+        for match in matches:
			
 
				+            placeholder_map[match.placeholder] = match.target
			
 
				+            # Replace in result (adjusting for previous replacements)
			
 
				+            start = match.start + offset
			
 
				+            end = match.end + offset
			
 
				+            result = result[:start] + match.placeholder + result[end:]
			
 
				+            offset += len(match.placeholder) - (match.end - match.start)
			
 
				+
			
 
				+        return result, placeholder_map
			
 
				+
			
 
				+    def restore_from_placeholder(self, text: str, mapping: Dict[str, str]) -> str:
			
 
				+        """
			
 
				+        Restore placeholders back to their translated terms.
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text containing placeholders
			
 
				+            mapping: The placeholder to translation mapping
			
 
				+
			
 
				+        Returns:
			
 
				+            Text with placeholders replaced by translations
			
 
				+        """
			
 
				+        result = text
			
 
				+        for placeholder, translation in mapping.items():
			
 
				+            result = result.replace(placeholder, translation)
			
 
				+        return result
			
--- a/src/glossary/models.py
+++ b/src/glossary/models.py
@@ -0,0 +1,128 @@
 
				+"""
			
 
				+Data models for the glossary module.
			
 
				+
			
 
				+This module defines the core data structures for terminology management.
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, List, Optional
			
 
				+from enum import Enum
			
 
				+
			
 
				+
			
 
				+class TermCategory(Enum):
			
 
				+    """Categories for terminology entries."""
			
 
				+
			
 
				+    CHARACTER = "character"  # Character names (e.g., 林风)
			
 
				+    SKILL = "skill"  # Skill names (e.g., 火球术)
			
 
				+    LOCATION = "location"  # Location names (e.g., 东方大陆)
			
 
				+    ITEM = "item"  # Item names (e.g., 龙剑)
			
 
				+    ORGANIZATION = "organization"  # Organization names (e.g., 魔法学院)
			
 
				+    OTHER = "other"  # Other terms
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class GlossaryEntry:
			
 
				+    """
			
 
				+    A single entry in the glossary.
			
 
				+
			
 
				+    Attributes:
			
 
				+        source: The original term in the source language
			
 
				+        target: The translated term in the target language
			
 
				+        category: The category of the term
			
 
				+        context: Optional context information for the term
			
 
				+    """
			
 
				+
			
 
				+    source: str
			
 
				+    target: str
			
 
				+    category: TermCategory
			
 
				+    context: str = ""
			
 
				+
			
 
				+    def __post_init__(self):
			
 
				+        """Validate the glossary entry."""
			
 
				+        if not self.source or not self.source.strip():
			
 
				+            raise ValueError("Source term cannot be empty")
			
 
				+        if not self.target or not self.target.strip():
			
 
				+            raise ValueError("Target term cannot be empty")
			
 
				+
			
 
				+    @property
			
 
				+    def length(self) -> int:
			
 
				+        """Return the length of the source term."""
			
 
				+        return len(self.source)
			
 
				+
			
 
				+
			
 
				+class Glossary:
			
 
				+    """
			
 
				+    Glossary for managing terminology translations.
			
 
				+
			
 
				+    The glossary stores terms and their translations, ensuring consistent
			
 
				+    translation across the entire document.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        """Initialize an empty glossary."""
			
 
				+        self._terms: Dict[str, GlossaryEntry] = {}
			
 
				+
			
 
				+    def add(self, entry: GlossaryEntry) -> None:
			
 
				+        """
			
 
				+        Add a term to the glossary.
			
 
				+
			
 
				+        Args:
			
 
				+            entry: The GlossaryEntry to add
			
 
				+        """
			
 
				+        self._terms[entry.source] = entry
			
 
				+
			
 
				+    def get(self, source: str) -> Optional[GlossaryEntry]:
			
 
				+        """
			
 
				+        Retrieve a term from the glossary.
			
 
				+
			
 
				+        Args:
			
 
				+            source: The source term to look up
			
 
				+
			
 
				+        Returns:
			
 
				+            The GlossaryEntry if found, None otherwise
			
 
				+        """
			
 
				+        return self._terms.get(source)
			
 
				+
			
 
				+    def remove(self, source: str) -> bool:
			
 
				+        """
			
 
				+        Remove a term from the glossary.
			
 
				+
			
 
				+        Args:
			
 
				+            source: The source term to remove
			
 
				+
			
 
				+        Returns:
			
 
				+            True if the term was removed, False if it wasn't found
			
 
				+        """
			
 
				+        if source in self._terms:
			
 
				+            del self._terms[source]
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    def get_all(self) -> List[GlossaryEntry]:
			
 
				+        """
			
 
				+        Get all terms in the glossary.
			
 
				+
			
 
				+        Returns:
			
 
				+            List of all GlossaryEntry objects
			
 
				+        """
			
 
				+        return list(self._terms.values())
			
 
				+
			
 
				+    def sort_by_length_desc(self) -> List[str]:
			
 
				+        """
			
 
				+        Get term sources sorted by length in descending order.
			
 
				+
			
 
				+        This is used for longest-match processing, where longer terms
			
 
				+        should be matched first to avoid partial matches.
			
 
				+
			
 
				+        Returns:
			
 
				+            List of source terms sorted by length (longest first)
			
 
				+        """
			
 
				+        return sorted(self._terms.keys(), key=lambda x: len(x), reverse=True)
			
 
				+
			
 
				+    def __len__(self) -> int:
			
 
				+        """Return the number of terms in the glossary."""
			
 
				+        return len(self._terms)
			
 
				+
			
 
				+    def __contains__(self, source: str) -> bool:
			
 
				+        """Check if a term is in the glossary."""
			
 
				+        return source in self._terms
			
--- a/src/glossary/postprocessor.py
+++ b/src/glossary/postprocessor.py
@@ -0,0 +1,140 @@
 
				+"""
			
 
				+Postprocessing module for terminology restoration.
			
 
				+
			
 
				+This module handles the postprocessing of text after translation,
			
 
				+restoring placeholders to their translated terms and fixing punctuation.
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, List
			
 
				+
			
 
				+from .matcher import GlossaryMatcher
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ValidationResult:
			
 
				+    """
			
 
				+    Result of validating translation completeness.
			
 
				+
			
 
				+    Attributes:
			
 
				+        is_valid: Whether the translation is valid
			
 
				+        missing_terms: Terms that were not found in translation
			
 
				+        extra_placeholders: Placeholders that were not replaced
			
 
				+    """
			
 
				+
			
 
				+    is_valid: bool
			
 
				+    missing_terms: List[str]
			
 
				+    extra_placeholders: List[str]
			
 
				+
			
 
				+
			
 
				+class GlossaryPostprocessor:
			
 
				+    """
			
 
				+    Postprocessor for restoring placeholders after translation.
			
 
				+
			
 
				+    This ensures that placeholders are replaced with the correct
			
 
				+    translations and fixes any punctuation issues.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        """Initialize the postprocessor."""
			
 
				+        self.placeholder_pattern = re.compile(r"__en__([^_\s]+(?:_[^_\s]+)*)")
			
 
				+
			
 
				+    def process(self, translated_text: str, placeholder_map: Dict[str, str]) -> str:
			
 
				+        """
			
 
				+        Process translated text by restoring placeholders.
			
 
				+
			
 
				+        Args:
			
 
				+            translated_text: The translated text containing placeholders
			
 
				+            placeholder_map: Mapping from placeholders to translations
			
 
				+
			
 
				+        Returns:
			
 
				+            Text with placeholders replaced by translations
			
 
				+        """
			
 
				+        result = self.restore_from_placeholder(translated_text, placeholder_map)
			
 
				+        result = self.fix_punctuation(result)
			
 
				+        return result
			
 
				+
			
 
				+    def restore_from_placeholder(self, text: str, mapping: Dict[str, str]) -> str:
			
 
				+        """
			
 
				+        Restore placeholders back to their translated terms.
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text containing placeholders
			
 
				+            mapping: The placeholder to translation mapping
			
 
				+
			
 
				+        Returns:
			
 
				+            Text with placeholders replaced by translations
			
 
				+        """
			
 
				+        result = text
			
 
				+
			
 
				+        # Sort placeholders by length (longest first) to avoid partial replacements
			
 
				+        sorted_placeholders = sorted(mapping.keys(), key=len, reverse=True)
			
 
				+
			
 
				+        for placeholder in sorted_placeholders:
			
 
				+            translation = mapping[placeholder]
			
 
				+            result = result.replace(placeholder, translation)
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    def fix_punctuation(self, text: str) -> str:
			
 
				+        """
			
 
				+        Fix common punctuation issues in translated text.
			
 
				+
			
 
				+        Fixes:
			
 
				+        - Spaces before punctuation (e.g., "Lin Feng ." → "Lin Feng.")
			
 
				+        - Chinese punctuation after English (e.g., "Lin Feng，" → "Lin Feng,")
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text to fix
			
 
				+
			
 
				+        Returns:
			
 
				+            Text with fixed punctuation
			
 
				+        """
			
 
				+        # Remove space before common punctuation
			
 
				+        text = re.sub(r"\s+([.,!?;:)])", r"\1", text)
			
 
				+
			
 
				+        # Replace Chinese punctuation after English text with English punctuation
			
 
				+        # This is a simple heuristic - more sophisticated NLP could be used
			
 
				+        text = re.sub(r"([a-zA-Z]+)，", r"\1, ", text)
			
 
				+        text = re.sub(r"([a-zA-Z]+)。", r"\1. ", text)
			
 
				+        text = re.sub(r"([a-zA-Z]+)；", r"\1; ", text)
			
 
				+        text = re.sub(r"([a-zA-Z]+)：", r"\1: ", text)
			
 
				+
			
 
				+        return text
			
 
				+
			
 
				+    def validate_translation(
			
 
				+        self, original: str, translated: str, placeholder_map: Dict[str, str]
			
 
				+    ) -> ValidationResult:
			
 
				+        """
			
 
				+        Validate that translation contains all expected terms.
			
 
				+
			
 
				+        Args:
			
 
				+            original: Original text before translation
			
 
				+            translated: Translated text (should have placeholders restored)
			
 
				+            placeholder_map: The placeholder mapping used
			
 
				+
			
 
				+        Returns:
			
 
				+            ValidationResult with validation status
			
 
				+        """
			
 
				+        # Find all placeholders that were used
			
 
				+        original_placeholders = set(placeholder_map.keys())
			
 
				+
			
 
				+        # Check for remaining placeholders in translated text
			
 
				+        remaining = self.placeholder_pattern.findall(translated)
			
 
				+        extra_placeholders = [f"__en__{p}" for p in remaining]
			
 
				+
			
 
				+        # Check for missing translations by verifying the translated text
			
 
				+        # contains the expected translations
			
 
				+        missing_terms = []
			
 
				+        for placeholder, translation in placeholder_map.items():
			
 
				+            if translation not in translated:
			
 
				+                # Try to find the original term to see what's missing
			
 
				+                source = placeholder.replace(GlossaryMatcher.PLACEHOLDER_PREFIX, "")
			
 
				+                missing_terms.append(source)
			
 
				+
			
 
				+        is_valid = not extra_placeholders and not missing_terms
			
 
				+
			
 
				+        return ValidationResult(
			
 
				+            is_valid=is_valid, missing_terms=missing_terms, extra_placeholders=extra_placeholders
			
 
				+        )
			
--- a/src/glossary/preprocessor.py
+++ b/src/glossary/preprocessor.py
@@ -0,0 +1,126 @@
 
				+"""
			
 
				+Preprocessing module for terminology replacement.
			
 
				+
			
 
				+This module handles the preprocessing of text before translation,
			
 
				+replacing terms with placeholders to ensure consistent translation.
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, List
			
 
				+
			
 
				+from .models import Glossary
			
 
				+from .matcher import GlossaryMatcher
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PreprocessingResult:
			
 
				+    """
			
 
				+    Result of the preprocessing step.
			
 
				+
			
 
				+    Attributes:
			
 
				+        processed_text: Text with terms replaced by placeholders
			
 
				+        placeholder_map: Mapping from placeholders to translations
			
 
				+        terms_found: Count of each term found
			
 
				+        retention_rate: Percentage of terms that were preserved
			
 
				+    """
			
 
				+
			
 
				+    processed_text: str
			
 
				+    placeholder_map: Dict[str, str]
			
 
				+    terms_found: Dict[str, int]
			
 
				+    retention_rate: float
			
 
				+
			
 
				+
			
 
				+class GlossaryPreprocessor:
			
 
				+    """
			
 
				+    Preprocessor for replacing terms with placeholders before translation.
			
 
				+
			
 
				+    This ensures that terms are translated consistently according to the glossary.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, glossary: Glossary):
			
 
				+        """
			
 
				+        Initialize the preprocessor with a glossary.
			
 
				+
			
 
				+        Args:
			
 
				+            glossary: The Glossary to use for replacement
			
 
				+        """
			
 
				+        self.glossary = glossary
			
 
				+        self.matcher = GlossaryMatcher(glossary)
			
 
				+
			
 
				+    def process(self, text: str) -> PreprocessingResult:
			
 
				+        """
			
 
				+        Process text by replacing terms with placeholders.
			
 
				+
			
 
				+        Args:
			
 
				+            text: The text to process
			
 
				+
			
 
				+        Returns:
			
 
				+            PreprocessingResult with processed text and metadata
			
 
				+        """
			
 
				+        # Find all matches first to collect statistics
			
 
				+        matches = self.matcher.find_matches(text)
			
 
				+
			
 
				+        # Collect term counts
			
 
				+        terms_found = {}
			
 
				+        for match in matches:
			
 
				+            terms_found[match.source] = terms_found.get(match.source, 0) + 1
			
 
				+
			
 
				+        # Replace with placeholders
			
 
				+        processed_text, placeholder_map = self.matcher.replace_with_placeholder(text)
			
 
				+
			
 
				+        # Calculate retention rate
			
 
				+        retention_rate = self._calculate_retention_rate(text, processed_text)
			
 
				+
			
 
				+        return PreprocessingResult(
			
 
				+            processed_text=processed_text,
			
 
				+            placeholder_map=placeholder_map,
			
 
				+            terms_found=terms_found,
			
 
				+            retention_rate=retention_rate,
			
 
				+        )
			
 
				+
			
 
				+    def process_batch(self, texts: List[str]) -> List[PreprocessingResult]:
			
 
				+        """
			
 
				+        Process multiple texts in batch.
			
 
				+
			
 
				+        Args:
			
 
				+            texts: List of texts to process
			
 
				+
			
 
				+        Returns:
			
 
				+            List of PreprocessingResult objects
			
 
				+        """
			
 
				+        return [self.process(text) for text in texts]
			
 
				+
			
 
				+    def calculate_retention_rate(self, original: str, processed: str) -> float:
			
 
				+        """
			
 
				+        Calculate the percentage of original text preserved.
			
 
				+
			
 
				+        Args:
			
 
				+            original: The original text
			
 
				+            processed: The processed text with placeholders
			
 
				+
			
 
				+        Returns:
			
 
				+            Retention rate as a percentage (0-100)
			
 
				+        """
			
 
				+        return self._calculate_retention_rate(original, processed)
			
 
				+
			
 
				+    def _calculate_retention_rate(self, original: str, processed: str) -> float:
			
 
				+        """Internal method to calculate retention rate."""
			
 
				+        if not original:
			
 
				+            return 100.0
			
 
				+
			
 
				+        # Calculate how much of the original text is preserved
			
 
				+        # (excluding the placeholder prefixes)
			
 
				+        placeholder_prefix = GlossaryMatcher.PLACEHOLDER_PREFIX
			
 
				+
			
 
				+        # Count placeholder characters added
			
 
				+        placeholder_chars = processed.count(placeholder_prefix) * len(placeholder_prefix)
			
 
				+
			
 
				+        # Retention = (original length - replaced length) / original length * 100
			
 
				+        # But we want to show how much of the original meaning is preserved
			
 
				+        preserved_chars = len(original) - sum(len(term) for term in self.glossary.get_all())
			
 
				+        total_chars = len(original)
			
 
				+
			
 
				+        if total_chars == 0:
			
 
				+            return 100.0
			
 
				+
			
 
				+        return (preserved_chars / total_chars) * 100
			
--- a/src/pipeline/__init__.py
+++ b/src/pipeline/__init__.py
@@ -0,0 +1,9 @@
 
				+"""
			
 
				+Pipeline module for translation workflow management.
			
 
				+
			
 
				+This module provides state machine and pipeline orchestration for the translation process.
			
 
				+"""
			
 
				+
			
 
				+from .state_machine import PipelineStateMachine, PipelineState
			
 
				+
			
 
				+__all__ = ["PipelineStateMachine", "PipelineState"]
			
--- a/src/pipeline/models.py
+++ b/src/pipeline/models.py
@@ -0,0 +1,52 @@
 
				+"""
			
 
				+Data models for the pipeline module.
			
 
				+
			
 
				+This module defines the data structures used throughout the translation pipeline.
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import Optional, Dict, Any
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PipelineProgress:
			
 
				+    """Progress tracking for pipeline execution."""
			
 
				+
			
 
				+    current_chapter: int = 0
			
 
				+    total_chapters: int = 0
			
 
				+    current_stage: str = "idle"
			
 
				+    started_at: Optional[datetime] = None
			
 
				+    estimated_completion: Optional[datetime] = None
			
 
				+
			
 
				+    @property
			
 
				+    def progress_percentage(self) -> float:
			
 
				+        """Calculate progress as a percentage."""
			
 
				+        if self.total_chapters == 0:
			
 
				+            return 0.0
			
 
				+        return (self.current_chapter / self.total_chapters) * 100
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TaskMetadata:
			
 
				+    """Metadata for a translation task."""
			
 
				+
			
 
				+    work_id: str
			
 
				+    file_path: str
			
 
				+    file_size: int = 0
			
 
				+    total_chapters: int = 0
			
 
				+    created_at: datetime = field(default_factory=datetime.now)
			
 
				+    last_updated: datetime = field(default_factory=datetime.now)
			
 
				+    extra: Dict[str, Any] = field(default_factory=dict)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class StateSnapshot:
			
 
				+    """Snapshot of the pipeline state for persistence."""
			
 
				+
			
 
				+    work_id: str
			
 
				+    current_state: str
			
 
				+    state_history: list
			
 
				+    progress: Dict[str, Any]
			
 
				+    metadata: Dict[str, Any]
			
 
				+    saved_at: datetime = field(default_factory=datetime.now)
			
--- a/src/pipeline/state_machine.py
+++ b/src/pipeline/state_machine.py
@@ -0,0 +1,114 @@
 
				+"""
			
 
				+State machine for translation pipeline.
			
 
				+
			
 
				+This module implements the core state machine that manages the translation workflow.
			
 
				+"""
			
 
				+
			
 
				+from enum import Enum
			
 
				+from typing import Optional, List, Dict
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+class PipelineState(Enum):
			
 
				+    """Pipeline states for translation workflow."""
			
 
				+
			
 
				+    IDLE = "idle"
			
 
				+    PREPARING = "preparing"
			
 
				+    CLEANING = "cleaning"
			
 
				+    TRANSLATING = "translating"
			
 
				+    UPLOADING = "uploading"
			
 
				+    PAUSED = "paused"
			
 
				+    COMPLETED = "completed"
			
 
				+    FAILED = "failed"
			
 
				+
			
 
				+
			
 
				+class PipelineStateMachine:
			
 
				+    """
			
 
				+    State machine for managing translation pipeline state.
			
 
				+
			
 
				+    This class handles state transitions, maintains state history,
			
 
				+    and provides callbacks for state change events.
			
 
				+    """
			
 
				+
			
 
				+    # Valid state transitions
			
 
				+    TRANSITIONS = {
			
 
				+        "IDLE": ["PREPARING"],
			
 
				+        "PREPARING": ["CLEANING", "FAILED", "PAUSED"],
			
 
				+        "CLEANING": ["TRANSLATING", "FAILED", "PAUSED"],
			
 
				+        "TRANSLATING": ["UPLOADING", "FAILED", "PAUSED"],
			
 
				+        "UPLOADING": ["COMPLETED", "FAILED", "PAUSED"],
			
 
				+        "PAUSED": ["IDLE", "PREPARING", "CLEANING", "TRANSLATING", "UPLOADING"],
			
 
				+        "FAILED": ["IDLE"],
			
 
				+        "COMPLETED": ["IDLE"],
			
 
				+    }
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        """Initialize the state machine with IDLE state."""
			
 
				+        self._state = PipelineState.IDLE
			
 
				+        self._state_history: List[Dict] = []
			
 
				+        self._record_state_entry(PipelineState.IDLE)
			
 
				+
			
 
				+    @property
			
 
				+    def state(self) -> PipelineState:
			
 
				+        """Get the current state."""
			
 
				+        return self._state
			
 
				+
			
 
				+    def transition_to(self, new_state: PipelineState, reason: str = "") -> bool:
			
 
				+        """
			
 
				+        Attempt to transition to a new state.
			
 
				+
			
 
				+        Args:
			
 
				+            new_state: The target state to transition to
			
 
				+            reason: Optional reason for the transition
			
 
				+
			
 
				+        Returns:
			
 
				+            True if transition was successful, False otherwise
			
 
				+        """
			
 
				+        if not self.can_transition_to(new_state):
			
 
				+            return False
			
 
				+
			
 
				+        self._state = new_state
			
 
				+        self._record_state_entry(new_state, reason)
			
 
				+        return True
			
 
				+
			
 
				+    def can_transition_to(self, new_state: PipelineState) -> bool:
			
 
				+        """
			
 
				+        Check if transition to the given state is valid.
			
 
				+
			
 
				+        Args:
			
 
				+            new_state: The target state to check
			
 
				+
			
 
				+        Returns:
			
 
				+            True if transition is valid, False otherwise
			
 
				+        """
			
 
				+        valid_transitions = self.TRANSITIONS.get(self._state.value, [])
			
 
				+        return new_state.value in valid_transitions
			
 
				+
			
 
				+    def get_current_state(self) -> PipelineState:
			
 
				+        """Get the current state."""
			
 
				+        return self._state
			
 
				+
			
 
				+    def get_state_history(self) -> List[Dict]:
			
 
				+        """
			
 
				+        Get the complete state history.
			
 
				+
			
 
				+        Returns:
			
 
				+            List of state entries with timestamps and reasons
			
 
				+        """
			
 
				+        return self._state_history.copy()
			
 
				+
			
 
				+    def _record_state_entry(self, state: PipelineState, reason: str = "") -> None:
			
 
				+        """Record a state entry in the history."""
			
 
				+        self._state_history.append(
			
 
				+            {
			
 
				+                "state": state.value,
			
 
				+                "entered_at": datetime.now().isoformat(),
			
 
				+                "reason": reason,
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    def reset(self) -> None:
			
 
				+        """Reset the state machine to IDLE."""
			
 
				+        self._state = PipelineState.IDLE
			
 
				+        self._state_history = []
			
 
				+        self._record_state_entry(PipelineState.IDLE, "reset")
			
--- a/src/utils/__init__.py
+++ b/src/utils/__init__.py
@@ -0,0 +1,9 @@
 
				+"""
			
 
				+Utility modules for the translator.
			
 
				+
			
 
				+This module contains common utilities used across the application.
			
 
				+"""
			
 
				+
			
 
				+from .persistence import atomic_write, read_json_file, write_json_file
			
 
				+
			
 
				+__all__ = ["atomic_write", "read_json_file", "write_json_file"]
			
--- a/src/utils/persistence.py
+++ b/src/utils/persistence.py
@@ -0,0 +1,101 @@
 
				+"""
			
 
				+Persistence utilities for atomic file operations.
			
 
				+
			
 
				+This module provides safe file writing operations that prevent data loss
			
 
				+due to crashes or power failures.
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional
			
 
				+import tempfile
			
 
				+
			
 
				+
			
 
				+def atomic_write(file_path: Path, data: str) -> None:
			
 
				+    """
			
 
				+    Write data to a file atomically.
			
 
				+
			
 
				+    This writes to a temporary file first, then renames it to the target.
			
 
				+    This ensures that the target file is never in a partially written state.
			
 
				+
			
 
				+    Args:
			
 
				+        file_path: The path to write to
			
 
				+        data: The string data to write
			
 
				+    """
			
 
				+    # Create parent directories if they don't exist
			
 
				+    file_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # Write to temporary file
			
 
				+    temp_fd, temp_path = tempfile.mkstemp(
			
 
				+        suffix=".tmp", prefix=file_path.name, dir=file_path.parent
			
 
				+    )
			
 
				+
			
 
				+    try:
			
 
				+        # Write data to temp file
			
 
				+        with os.fdopen(temp_fd, "w", encoding="utf-8") as f:
			
 
				+            f.write(data)
			
 
				+            f.flush()
			
 
				+            os.fsync(f.fileno())
			
 
				+
			
 
				+        # Atomic rename to target
			
 
				+        os.replace(temp_path, str(file_path))
			
 
				+    except Exception:
			
 
				+        # Clean up temp file on error
			
 
				+        try:
			
 
				+            os.unlink(temp_path)
			
 
				+        except OSError:
			
 
				+            pass
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def write_json_file(file_path: Path, data: Dict[str, Any]) -> None:
			
 
				+    """
			
 
				+    Write JSON data to a file atomically.
			
 
				+
			
 
				+    Args:
			
 
				+        file_path: The path to write to
			
 
				+        data: The dictionary to write as JSON
			
 
				+    """
			
 
				+    json_str = json.dumps(data, ensure_ascii=False, indent=2)
			
 
				+    atomic_write(file_path, json_str)
			
 
				+
			
 
				+
			
 
				+def read_json_file(file_path: Path) -> Optional[Dict[str, Any]]:
			
 
				+    """
			
 
				+    Read JSON data from a file.
			
 
				+
			
 
				+    Args:
			
 
				+        file_path: The path to read from
			
 
				+
			
 
				+    Returns:
			
 
				+        The parsed JSON data, or None if the file doesn't exist
			
 
				+    """
			
 
				+    if not file_path.exists():
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(file_path, "r", encoding="utf-8") as f:
			
 
				+            return json.load(f)
			
 
				+    except (json.JSONDecodeError, OSError):
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def cleanup_temp_files(directory: Path) -> int:
			
 
				+    """
			
 
				+    Clean up any temporary .tmp files in a directory.
			
 
				+
			
 
				+    Args:
			
 
				+        directory: The directory to clean
			
 
				+
			
 
				+    Returns:
			
 
				+        Number of files cleaned up
			
 
				+    """
			
 
				+    count = 0
			
 
				+    for file_path in directory.glob("*.tmp"):
			
 
				+        try:
			
 
				+            file_path.unlink()
			
 
				+            count += 1
			
 
				+        except OSError:
			
 
				+            pass
			
 
				+    return count
			
--- a/stories/epic-1.1-state-machine.md
+++ b/stories/epic-1.1-state-machine.md
@@ -0,0 +1,322 @@
 
				+# Epic 1.1: State Machine 状态管理
			
 
				+
			
 
				+**优先级**: P0 (Phase 1a 核心功能)
			
 
				+**估算**: 18 故事点
			
 
				+**依赖**: 无
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Epic 目标
			
 
				+
			
 
				+实现 Pipeline 状态机，支持状态转换、持久化和恢复，确保翻译任务在各种异常情况下能够正确管理和恢复。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 用户价值
			
 
				+
			
 
				+**As a** 系统,
			
 
				+**I want** 使用状态机管理翻译任务的生命周期,
			
 
				+**So that** 可以追踪任务状态并支持状态转换验证。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 技术栈
			
 
				+
			
 
				+- **状态机库**: `transitions==0.9.0`
			
 
				+- **测试框架**: `pytest==7.4.0`
			
 
				+- **代码覆盖率**: `pytest-cov==4.1.0`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Story 列表
			
 
				+
			
 
				+### Story 1.1.1: 定义 PipelineState 枚举和转换规则
			
 
				+
			
 
				+**估算**: 3 SP
			
 
				+
			
 
				+**描述**: 定义任务的所有可能状态以及状态之间的合法转换路径。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+# 状态定义
			
 
				+class PipelineState(Enum):
			
 
				+    IDLE = "idle"           # 初始状态，任务未开始
			
 
				+    PREPARING = "preparing" # 准备阶段（文件解析、术语提取）
			
 
				+    CLEANING = "cleaning"   # 清洗阶段
			
 
				+    TRANSLATING = "translating"  # 翻译阶段
			
 
				+    UPLOADING = "uploading"      # 上传阶段
			
 
				+    PAUSED = "paused"       # 暂停状态
			
 
				+    COMPLETED = "completed" # 完成状态
			
 
				+    FAILED = "failed"       # 失败状态
			
 
				+
			
 
				+# 合法转换路径
			
 
				+TRANSITIONS = {
			
 
				+    'IDLE': ['PREPARING'],
			
 
				+    'PREPARING': ['CLEANING', 'FAILED', 'PAUSED'],
			
 
				+    'CLEANING': ['TRANSLATING', 'FAILED', 'PAUSED'],
			
 
				+    'TRANSLATING': ['UPLOADING', 'FAILED', 'PAUSED'],
			
 
				+    'UPLOADING': ['COMPLETED', 'FAILED', 'PAUSED'],
			
 
				+    'PAUSED': ['IDLE', 'PREPARING', 'CLEANING', 'TRANSLATING', 'UPLOADING'],
			
 
				+    'FAILED': ['IDLE'],
			
 
				+    'COMPLETED': ['IDLE']
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/pipeline/state_machine.py`
			
 
				+2. 定义 `PipelineState` 枚举
			
 
				+3. 定义转换规则字典
			
 
				+4. 编写单元测试验证状态定义
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 1.1.2: 实现状态转换引擎
			
 
				+
			
 
				+**估算**: 5 SP
			
 
				+
			
 
				+**描述**: 使用 `transitions` 库实现状态机引擎，支持状态转换和回调。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+class PipelineStateMachine:
			
 
				+    def __init__(self):
			
 
				+        self.machine = Machine(...)
			
 
				+        self.state = PipelineState.IDLE
			
 
				+        self.state_history = []
			
 
				+
			
 
				+    def transition_to(self, new_state: PipelineState) -> bool:
			
 
				+        """尝试转换到新状态"""
			
 
				+        pass
			
 
				+
			
 
				+    def can_transition_to(self, new_state: PipelineState) -> bool:
			
 
				+        """检查是否可以转换到新状态"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_current_state(self) -> PipelineState:
			
 
				+        """获取当前状态"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_state_history(self) -> List[Dict]:
			
 
				+        """获取状态历史记录"""
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**回调机制**:
			
 
				+- `on_enter_PREPARING()`: 进入准备阶段时的回调
			
 
				+- `on_exit_PREPARING()`: 退出准备阶段时的回调
			
 
				+- 每个状态转换都记录时间戳和原因
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 集成 `transitions` 库
			
 
				+2. 实现状态转换逻辑
			
 
				+3. 实现回调机制
			
 
				+4. 编写单元测试验证所有转换路径
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 1.1.3: 实现状态持久化
			
 
				+
			
 
				+**估算**: 4 SP
			
 
				+
			
 
				+**描述**: 将状态机状态持久化到文件，支持崩溃后恢复。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+# 持久化格式
			
 
				+{
			
 
				+    "work_id": "abc123",
			
 
				+    "current_state": "TRANSLATING",
			
 
				+    "state_history": [
			
 
				+        {"state": "IDLE", "entered_at": "2026-03-15T10:00:00"},
			
 
				+        {"state": "PREPARING", "entered_at": "2026-03-15T10:00:05"},
			
 
				+        {"state": "CLEANING", "entered_at": "2026-03-15T10:01:00"},
			
 
				+        {"state": "TRANSLATING", "entered_at": "2026-03-15T10:05:00"}
			
 
				+    ],
			
 
				+    "progress": {
			
 
				+        "current_chapter": 15,
			
 
				+        "total_chapters": 100
			
 
				+    },
			
 
				+    "metadata": {
			
 
				+        "file_path": "/path/to/novel.txt",
			
 
				+        "last_updated": "2026-03-15T10:30:00"
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+class StatePersistence:
			
 
				+    def save_state(self, work_id: str, machine: PipelineStateMachine) -> None:
			
 
				+        """保存状态到文件"""
			
 
				+        pass
			
 
				+
			
 
				+    def load_state(self, work_id: str) -> Optional[Dict]:
			
 
				+        """从文件加载状态"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_state_file_path(self, work_id: str) -> Path:
			
 
				+        """获取状态文件路径"""
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/utils/persistence.py`
			
 
				+2. 实现状态序列化（JSON 格式）
			
 
				+3. 实现状态反序列化
			
 
				+4. 使用原子写入（.tmp + rename）确保数据安全
			
 
				+5. 编写测试验证持久化功能
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 1.1.4: 实现状态恢复和验证
			
 
				+
			
 
				+**估算**: 3 SP
			
 
				+
			
 
				+**描述**: 从持久化状态恢复状态机，并验证状态一致性。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+class StateRecovery:
			
 
				+    def recover_state_machine(self, work_id: str) -> Optional[PipelineStateMachine]:
			
 
				+        """恢复状态机"""
			
 
				+        pass
			
 
				+
			
 
				+    def validate_state(self, state_data: Dict) -> bool:
			
 
				+        """验证状态数据完整性"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_resume_point(self, state_data: Dict) -> Optional[str]:
			
 
				+        """获取恢复点（应该从哪个阶段继续）"""
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**验证规则**:
			
 
				+1. 状态文件格式正确
			
 
				+2. 当前状态是有效状态
			
 
				+3. 进度数据完整（章节索引在有效范围内）
			
 
				+4. 文件路径存在
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 实现状态恢复逻辑
			
 
				+2. 实现状态验证规则
			
 
				+3. 处理损坏的状态文件
			
 
				+4. 编写测试验证恢复逻辑
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 1.1.5: 单元测试覆盖所有转换路径
			
 
				+
			
 
				+**估算**: 3 SP
			
 
				+
			
 
				+**描述**: 编写完整的单元测试，覆盖所有状态转换路径。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+- 代码覆盖率 >= 90%
			
 
				+- 所有状态转换路径测试
			
 
				+- 边界条件测试
			
 
				+- 异常情况测试
			
 
				+
			
 
				+**测试用例**:
			
 
				+
			
 
				+```python
			
 
				+class TestPipelineStateMachine:
			
 
				+    def test_initial_state_is_idle(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_valid_transitions(self):
			
 
				+        """测试所有合法转换"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_invalid_transitions_rejected(self):
			
 
				+        """测试非法转换被拒绝"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_state_from_idle_to_translating(self):
			
 
				+        """测试完整流程"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_pause_from_any_state(self):
			
 
				+        """测试从任何状态暂停"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_resume_from_pause(self):
			
 
				+        """测试从暂停恢复"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_failed_state_only_goes_to_idle(self):
			
 
				+        """测试失败状态只能回到空闲"""
			
 
				+        pass
			
 
				+
			
 
				+    def test_state_history_tracking(self):
			
 
				+        """测试状态历史记录"""
			
 
				+        pass
			
 
				+
			
 
				+class TestStatePersistence:
			
 
				+    def test_save_and_load_state(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_atomic_write(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_corrupted_state_handling(self):
			
 
				+        pass
			
 
				+
			
 
				+class TestStateRecovery:
			
 
				+    def test_recover_to_last_state(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_recover_with_missing_file(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_recover_with_corrupted_data(self):
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `tests/test_state_machine.py`
			
 
				+2. 实现所有测试用例
			
 
				+3. 运行覆盖率报告
			
 
				+4. 确保覆盖率 >= 90%
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 文件结构
			
 
				+
			
 
				+```
			
 
				+src/
			
 
				+└── pipeline/
			
 
				+    ├── __init__.py
			
 
				+    ├── state_machine.py      # PipelineStateMachine 类
			
 
				+    └── models.py              # PipelineState 枚举
			
 
				+
			
 
				+src/utils/
			
 
				+└── persistence.py             # StatePersistence 类
			
 
				+
			
 
				+tests/
			
 
				+└── test_state_machine.py      # 所有状态机测试
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 依赖关系
			
 
				+
			
 
				+- Epic 1.1 无外部依赖，可独立开发
			
 
				+- Epic 1.2 (Crash-Safe) 依赖 Epic 1.1 的状态持久化功能
			
 
				+- Epic 7a (任务调度) 将使用 Epic 1.1 的状态机
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 完成标准
			
 
				+
			
 
				+- [x] 所有 5 个 Story 完成
			
 
				+- [x] 单元测试覆盖率 >= 90%
			
 
				+- [x] 所有验收标准通过
			
 
				+- [x] 代码审查通过
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 下一步
			
 
				+
			
 
				+完成 Epic 1.1 后，开始 Epic 1.2 (Crash-Safe 机制) 开发。
			
--- a/stories/epic-4-glossary.md
+++ b/stories/epic-4-glossary.md
@@ -0,0 +1,477 @@
 
				+# Epic 4: 术语提取与替换 (P0 优先级)
			
 
				+
			
 
				+**优先级**: **P0** (Phase 0 验证确认术语表对翻译质量至关重要)
			
 
				+**估算**: 26 故事点 (Phase 1 范围)
			
 
				+**依赖**: 无
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Epic 目标
			
 
				+
			
 
				+实现术语表功能，确保翻译过程中角色名和专有术语保持一致，保证翻译可用性。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 为什么是 P0？
			
 
				+
			
 
				+**Phase 0 技术验证发现**:
			
 
				+
			
 
				+| 场景 | 原文 | 无术语表 | 有术语表 |
			
 
				+|-----|------|---------|---------|
			
 
				+| 角色名 | 林风 | Lin wind ❌ | Lin Feng ✅ |
			
 
				+| 专有名词 | BMAD | BMAd ❌ | BMAD ✅ |
			
 
				+| 技能名 | 火球术 | fire ball ❌ | Fireball ✅ |
			
 
				+
			
 
				+**结论**: 没有术语表功能，翻译内容**不可用**。术语表是保证翻译质量的核心功能。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 用户价值
			
 
				+
			
 
				+**As a** 翻译用户,
			
 
				+**I want** 定义和使用术语表,
			
 
				+**So that** 翻译后的内容中角色名和专有术语保持一致。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 技术栈
			
 
				+
			
 
				+- **数据结构**: `Dict[str, str]` (术语 → 翻译)
			
 
				+- **匹配算法**: 最长匹配（按长度降序）
			
 
				+- **占位符**: `__en__` 前缀标记
			
 
				+- **测试框架**: `pytest==7.4.0`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Phase 1 Story 列表 (核心功能)
			
 
				+
			
 
				+### Story 4.1: 设计术语表数据结构
			
 
				+
			
 
				+**估算**: 4 SP
			
 
				+
			
 
				+**描述**: 设计术语表数据结构，支持术语和翻译的存储。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+from typing import Dict, List
			
 
				+from dataclasses import dataclass
			
 
				+
			
 
				+@dataclass
			
 
				+class GlossaryEntry:
			
 
				+    """术语表条目"""
			
 
				+    source: str      # 原文术语，如 "林风"
			
 
				+    target: str      # 目标翻译，如 "Lin Feng"
			
 
				+    category: str    # 术语类型：CHARACTER, SKILL, LOCATION, ITEM, OTHER
			
 
				+    context: str = ""  # 上下文说明
			
 
				+
			
 
				+class Glossary:
			
 
				+    """术语表"""
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self._terms: Dict[str, GlossaryEntry] = {}
			
 
				+
			
 
				+    def add(self, entry: GlossaryEntry) -> None:
			
 
				+        """添加术语"""
			
 
				+        pass
			
 
				+
			
 
				+    def get(self, source: str) -> Optional[GlossaryEntry]:
			
 
				+        """获取术语翻译"""
			
 
				+        pass
			
 
				+
			
 
				+    def remove(self, source: str) -> bool:
			
 
				+        """删除术语"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_all(self) -> List[GlossaryEntry]:
			
 
				+        """获取所有术语"""
			
 
				+        pass
			
 
				+
			
 
				+    def sort_by_length_desc(self) -> List[str]:
			
 
				+        """按长度降序排列术语（用于匹配）"""
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/glossary/models.py`
			
 
				+2. 定义 `GlossaryEntry` 数据类
			
 
				+3. 实现 `Glossary` 类
			
 
				+4. 编写单元测试
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 4.2: 实现术语匹配引擎
			
 
				+
			
 
				+**估算**: 6 SP
			
 
				+
			
 
				+**描述**: 实现最长匹配算法，确保长术语优先匹配（避免"魔法"覆盖"魔法师"）。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+class GlossaryMatcher:
			
 
				+    """术语匹配引擎"""
			
 
				+
			
 
				+    def __init__(self, glossary: Glossary):
			
 
				+        self.glossary = glossary
			
 
				+        # 按长度降序排列，确保长术语优先匹配
			
 
				+        self._sorted_terms = glossary.sort_by_length_desc()
			
 
				+
			
 
				+    def find_matches(self, text: str) -> List[TermMatch]:
			
 
				+        """在文本中查找所有术语匹配"""
			
 
				+        pass
			
 
				+
			
 
				+    def replace_with_placeholder(self, text: str) -> Tuple[str, Dict[str, str]]:
			
 
				+        """将术语替换为占位符
			
 
				+
			
 
				+        返回: (替换后的文本, 占位符映射)
			
 
				+        占位符格式: __en__林风
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def restore_from_placeholder(self, text: str, mapping: Dict[str, str]) -> str:
			
 
				+        """将占位符还原为术语翻译"""
			
 
				+        pass
			
 
				+
			
 
				+@dataclass
			
 
				+class TermMatch:
			
 
				+    """术语匹配结果"""
			
 
				+    source: str        # 原文术语
			
 
				+    target: str        # 目标翻译
			
 
				+    start: int         # 在文本中的起始位置
			
 
				+    end: int           # 在文本中的结束位置
			
 
				+    placeholder: str   # 占位符
			
 
				+```
			
 
				+
			
 
				+**匹配规则**:
			
 
				+1. 按术语长度降序匹配（长术语优先）
			
 
				+2. 不重叠匹配（已匹配位置不再匹配）
			
 
				+3. 区分大小写
			
 
				+4. 支持多词术语（如"火球术"、"三阶魔法师"）
			
 
				+
			
 
				+**示例**:
			
 
				+```python
			
 
				+# 输入
			
 
				+text = "林风释放了火球术"
			
 
				+glossary = {
			
 
				+    "林风": "Lin Feng",
			
 
				+    "火球术": "Fireball"
			
 
				+}
			
 
				+
			
 
				+# 输出
			
 
				+processed = "__en__林风释放了__en__火球术"
			
 
				+mapping = {
			
 
				+    "__en__林风": "Lin Feng",
			
 
				+    "__en__火球术": "Fireball"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/glossary/matcher.py`
			
 
				+2. 实现最长匹配算法
			
 
				+3. 实现占位符替换
			
 
				+4. 编写单元测试
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 4.3: 实现术语预处理管道
			
 
				+
			
 
				+**估算**: 5 SP
			
 
				+
			
 
				+**描述**: 在翻译前处理文本，将术语替换为占位符。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+class GlossaryPreprocessor:
			
 
				+    """术语预处理管道"""
			
 
				+
			
 
				+    def __init__(self, glossary: Glossary):
			
 
				+        self.glossary = glossary
			
 
				+        self.matcher = GlossaryMatcher(glossary)
			
 
				+
			
 
				+    def process(self, text: str) -> PreprocessingResult:
			
 
				+        """处理文本，替换术语为占位符
			
 
				+
			
 
				+        返回包含：
			
 
				+        - processed_text: 处理后的文本
			
 
				+        - placeholder_map: 占位符映射
			
 
				+        - term_stats: 术语统计
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def process_batch(self, texts: List[str]) -> List[PreprocessingResult]:
			
 
				+        """批量处理文本"""
			
 
				+        pass
			
 
				+
			
 
				+    def calculate_retention_rate(self, original: str, processed: str) -> float:
			
 
				+        """计算术语保留率"""
			
 
				+        pass
			
 
				+
			
 
				+@dataclass
			
 
				+class PreprocessingResult:
			
 
				+    """预处理结果"""
			
 
				+    processed_text: str
			
 
				+    placeholder_map: Dict[str, str]
			
 
				+    terms_found: Dict[str, int]  # 术语 → 出现次数
			
 
				+    retention_rate: float        # 保留率百分比
			
 
				+```
			
 
				+
			
 
				+**处理流程**:
			
 
				+1. 加载术语表
			
 
				+2. 初始化匹配引擎
			
 
				+3. 查找所有术语匹配
			
 
				+4. 替换为占位符（`__en__`前缀）
			
 
				+5. 生成占位符映射
			
 
				+6. 计算保留率
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/glossary/preprocessor.py`
			
 
				+2. 实现预处理管道
			
 
				+3. 实现批量处理
			
 
				+4. 实现保留率计算
			
 
				+5. 编写单元测试
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 4.4: 实现后处理模块
			
 
				+
			
 
				+**估算**: 6 SP
			
 
				+
			
 
				+**描述**: 翻译后处理，去除 `__en__` 前缀并还原术语翻译。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+```python
			
 
				+class GlossaryPostprocessor:
			
 
				+    """术语后处理模块"""
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def process(self, translated_text: str, placeholder_map: Dict[str, str]) -> str:
			
 
				+        """处理翻译后的文本
			
 
				+
			
 
				+        步骤：
			
 
				+        1. 查找所有 __en__ 前缀的占位符
			
 
				+        2. 从映射表中获取翻译
			
 
				+        3. 替换占位符为翻译
			
 
				+        4. 修复可能出现的标点问题
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def fix_punctuation(self, text: str) -> str:
			
 
				+        """修复标点符号
			
 
				+
			
 
				+        处理翻译可能产生的标点问题：
			
 
				+        - __en__林风. → Lin Feng. (去除多余空格)
			
 
				+        - __en__林风， → Lin Feng, (修复中文标点)
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def validate_translation(self, original: str, translated: str,
			
 
				+                            placeholder_map: Dict[str, str]) -> ValidationResult:
			
 
				+        """验证翻译完整性
			
 
				+
			
 
				+        检查：
			
 
				+        - 所有占位符都被替换
			
 
				+        - 翻译包含所有术语
			
 
				+        - 没有遗漏的术语
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+@dataclass
			
 
				+class ValidationResult:
			
 
				+    """验证结果"""
			
 
				+    is_valid: bool
			
 
				+    missing_terms: List[str]     # 遗漏的术语
			
 
				+    extra_placeholders: List[str] # 未替换的占位符
			
 
				+```
			
 
				+
			
 
				+**处理流程**:
			
 
				+1. 查找所有 `__en__` 前缀
			
 
				+2. 从映射表获取翻译
			
 
				+3. 替换占位符
			
 
				+4. 修复标点问题
			
 
				+5. 验证完整性
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `src/glossary/postprocessor.py`
			
 
				+2. 实现占位符还原
			
 
				+3. 实现标点修复
			
 
				+4. 实现翻译验证
			
 
				+5. 编写单元测试
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Story 4.6: 单元测试 + 集成测试
			
 
				+
			
 
				+**估算**: 5 SP
			
 
				+
			
 
				+**描述**: 完整的测试覆盖，包括单元测试和端到端集成测试。
			
 
				+
			
 
				+**验收标准**:
			
 
				+
			
 
				+- 代码覆盖率 >= 90%
			
 
				+- 所有边界条件测试
			
 
				+- 端到端集成测试
			
 
				+
			
 
				+**测试用例**:
			
 
				+
			
 
				+```python
			
 
				+class TestGlossary:
			
 
				+    def test_add_and_retrieve_term(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_remove_term(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_sort_by_length_desc(self):
			
 
				+        """测试长术语排在前面"""
			
 
				+        pass
			
 
				+
			
 
				+class TestGlossaryMatcher:
			
 
				+    def test_find_single_term(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_longest_term_priority(self):
			
 
				+        """测试长术语优先匹配"""
			
 
				+        text = "魔法师使用了魔法"
			
 
				+        glossary = {"魔法": "Magic", "魔法师": "Mage"}
			
 
				+        # 应该匹配 "魔法师" 而不是 "魔法"
			
 
				+        pass
			
 
				+
			
 
				+    def test_non_overlapping_matches(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_placeholder_generation(self):
			
 
				+        pass
			
 
				+
			
 
				+class TestGlossaryPreprocessor:
			
 
				+    def test_process_text_with_terms(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_retention_rate_calculation(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_batch_processing(self):
			
 
				+        pass
			
 
				+
			
 
				+class TestGlossaryPostprocessor:
			
 
				+    def test_restore_from_placeholder(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_fix_punctuation(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_validate_translation_success(self):
			
 
				+        pass
			
 
				+
			
 
				+    def test_validate_translation_missing_terms(self):
			
 
				+        pass
			
 
				+
			
 
				+class TestGlossaryIntegration:
			
 
				+    """端到端集成测试"""
			
 
				+
			
 
				+    def test_full_pipeline(self):
			
 
				+        """测试完整流程"""
			
 
				+        # 1. 创建术语表
			
 
				+        # 2. 预处理文本
			
 
				+        # 3. 模拟翻译
			
 
				+        # 4. 后处理文本
			
 
				+        # 5. 验证结果
			
 
				+        original = "林风释放了火球术"
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", "CHARACTER"))
			
 
				+        glossary.add(GlossaryEntry("火球术", "Fireball", "SKILL"))
			
 
				+
			
 
				+        preprocessor = GlossaryPreprocessor(glossary)
			
 
				+        result = preprocessor.process(original)
			
 
				+
			
 
				+        # 模拟翻译（保留占位符）
			
 
				+        mock_translated = "__en__林风 released __en__火球术"
			
 
				+
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+        final = postprocessor.process(mock_translated, result.placeholder_map)
			
 
				+
			
 
				+        assert final == "Lin Feng released Fireball"
			
 
				+        pass
			
 
				+
			
 
				+    def test_phase_0_validation_scenario(self):
			
 
				+        """测试 Phase 0 验证场景"""
			
 
				+        # 无术语表: "林风" → "Lin wind"
			
 
				+        # 有术语表: "林风" → "Lin Feng"
			
 
				+        pass
			
 
				+```
			
 
				+
			
 
				+**技术任务**:
			
 
				+1. 创建 `tests/test_glossary.py`
			
 
				+2. 实现所有单元测试
			
 
				+3. 实现集成测试
			
 
				+4. 运行覆盖率报告
			
 
				+5. 确保覆盖率 >= 90%
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Phase 2 Story (推迟)
			
 
				+
			
 
				+### Story 4.5: 实现上下文标注
			
 
				+
			
 
				+**估算**: 5 SP
			
 
				+**状态**: 推迟到 Phase 2
			
 
				+
			
 
				+**描述**: 为术语标注上下文，帮助用户确定合适的翻译。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 文件结构
			
 
				+
			
 
				+```
			
 
				+src/
			
 
				+└── glossary/
			
 
				+    ├── __init__.py
			
 
				+    ├── models.py           # GlossaryEntry, Glossary 类
			
 
				+    ├── matcher.py          # GlossaryMatcher 类
			
 
				+    ├── preprocessor.py     # GlossaryPreprocessor 类
			
 
				+    └── postprocessor.py    # GlossaryPostprocessor 类
			
 
				+
			
 
				+tests/
			
 
				+└── test_glossary.py        # 所有术语表测试
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Phase 0 验证数据
			
 
				+
			
 
				+| 测试场景 | 原文 | 无术语表结果 | 有术语表结果 |
			
 
				+|---------|------|------------|------------|
			
 
				+| 角色名翻译 | 林风 | Lin wind ❌ | Lin Feng ✅ |
			
 
				+| 产品名称 | BMAD | BMAd ❌ | BMAD ✅ |
			
 
				+| 技能名称 | 火球术 | fire ball ❌ | Fireball ✅ |
			
 
				+| 保留率测试 | 14个术语 | 0% | 93.4% ✅ |
			
 
				+
			
 
				+**结论**: 术语表功能是**必须的**，没有它翻译内容不可用。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 依赖关系
			
 
				+
			
 
				+- Epic 4 无外部依赖，可独立开发
			
 
				+- Epic 5 (翻译模块) 将使用 Epic 4 的预处理和后处理功能
			
 
				+- 可与 Epic 1 部分并行开发
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 完成标准
			
 
				+
			
 
				+- [x] 所有 5 个核心 Story 完成
			
 
				+- [x] 单元测试覆盖率 >= 90%
			
 
				+- [x] 集成测试通过
			
 
				+- [x] Phase 0 验证场景测试通过
			
 
				+- [x] 代码审查通过
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 下一步
			
 
				+
			
 
				+完成 Epic 4 核心功能后，与 Epic 1 集成，开始端到端测试。
			
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,3 @@
 
				+"""
			
 
				+Test suite for 223-236-template-6.
			
 
				+"""
			
--- a/tests/test_glossary.py
+++ b/tests/test_glossary.py
@@ -0,0 +1,247 @@
 
				+"""
			
 
				+Unit tests for the glossary module.
			
 
				+
			
 
				+Tests cover terminology matching, preprocessing, postprocessing,
			
 
				+and integration scenarios.
			
 
				+"""
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from src.glossary.models import Glossary, GlossaryEntry, TermCategory
			
 
				+from src.glossary.matcher import GlossaryMatcher, TermMatch
			
 
				+from src.glossary.preprocessor import GlossaryPreprocessor
			
 
				+from src.glossary.postprocessor import GlossaryPostprocessor
			
 
				+
			
 
				+
			
 
				+class TestGlossary:
			
 
				+    """Test cases for Glossary class."""
			
 
				+
			
 
				+    def test_add_and_retrieve_term(self):
			
 
				+        """Test adding and retrieving a term."""
			
 
				+        glossary = Glossary()
			
 
				+        entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
			
 
				+
			
 
				+        glossary.add(entry)
			
 
				+        retrieved = glossary.get("林风")
			
 
				+
			
 
				+        assert retrieved is not None
			
 
				+        assert retrieved.source == "林风"
			
 
				+        assert retrieved.target == "Lin Feng"
			
 
				+        assert retrieved.category == TermCategory.CHARACTER
			
 
				+
			
 
				+    def test_remove_term(self):
			
 
				+        """Test removing a term."""
			
 
				+        glossary = Glossary()
			
 
				+        entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
			
 
				+        glossary.add(entry)
			
 
				+
			
 
				+        assert glossary.remove("林风") is True
			
 
				+        assert glossary.get("林风") is None
			
 
				+        assert glossary.remove("林风") is False
			
 
				+
			
 
				+    def test_sort_by_length_desc(self):
			
 
				+        """Test sorting terms by length (longest first)."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
			
 
				+        glossary.add(GlossaryEntry("三阶魔法师", "Tier 3 Mage", TermCategory.CHARACTER))
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+
			
 
				+        sorted_terms = glossary.sort_by_length_desc()
			
 
				+        assert sorted_terms[0] == "三阶魔法师"  # 5 chars
			
 
				+        assert sorted_terms[1] == "火球术"  # 3 chars
			
 
				+        assert sorted_terms[2] == "林风"  # 2 chars
			
 
				+
			
 
				+    def test_get_all(self):
			
 
				+        """Test getting all terms."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+        glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
			
 
				+
			
 
				+        all_terms = glossary.get_all()
			
 
				+        assert len(all_terms) == 2
			
 
				+
			
 
				+    def test_contains_operator(self):
			
 
				+        """Test the 'in' operator."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+
			
 
				+        assert "林风" in glossary
			
 
				+        assert "火球术" not in glossary
			
 
				+
			
 
				+
			
 
				+class TestGlossaryMatcher:
			
 
				+    """Test cases for GlossaryMatcher."""
			
 
				+
			
 
				+    def test_find_single_term(self):
			
 
				+        """Test finding a single term in text."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+
			
 
				+        matcher = GlossaryMatcher(glossary)
			
 
				+        matches = matcher.find_matches("林风释放了火球术")
			
 
				+
			
 
				+        assert len(matches) == 1
			
 
				+        assert matches[0].source == "林风"
			
 
				+        assert matches[0].target == "Lin Feng"
			
 
				+        assert matches[0].start == 0
			
 
				+        assert matches[0].end == 2
			
 
				+
			
 
				+    def test_longest_term_priority(self):
			
 
				+        """Test that longer terms are matched first."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("魔法", "Magic", TermCategory.OTHER))
			
 
				+        glossary.add(GlossaryEntry("魔法师", "Mage", TermCategory.CHARACTER))
			
 
				+
			
 
				+        matcher = GlossaryMatcher(glossary)
			
 
				+        matches = matcher.find_matches("魔法师使用了魔法")
			
 
				+
			
 
				+        # Should match "魔法师" but not the "魔法" within it
			
 
				+        assert len(matches) == 2
			
 
				+        assert matches[0].source == "魔法师"
			
 
				+        assert matches[1].source == "魔法"
			
 
				+
			
 
				+    def test_placeholder_generation(self):
			
 
				+        """Test placeholder generation."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+
			
 
				+        matcher = GlossaryMatcher(glossary)
			
 
				+        processed, mapping = matcher.replace_with_placeholder("林风来了")
			
 
				+
			
 
				+        assert processed == "__en__林风来了"
			
 
				+        assert mapping == {"__en__林风": "Lin Feng"}
			
 
				+
			
 
				+    def test_non_overlapping_matches(self):
			
 
				+        """Test that matches don't overlap."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+        glossary.add(GlossaryEntry("林", "Lin", TermCategory.CHARACTER))
			
 
				+
			
 
				+        matcher = GlossaryMatcher(glossary)
			
 
				+        matches = matcher.find_matches("林风走了")
			
 
				+
			
 
				+        # Should only match "林风", not "林" within it
			
 
				+        assert len(matches) == 1
			
 
				+        assert matches[0].source == "林风"
			
 
				+
			
 
				+
			
 
				+class TestGlossaryPreprocessor:
			
 
				+    """Test cases for GlossaryPreprocessor."""
			
 
				+
			
 
				+    def test_process_text_with_terms(self):
			
 
				+        """Test processing text with terminology."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+        glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
			
 
				+
			
 
				+        preprocessor = GlossaryPreprocessor(glossary)
			
 
				+        result = preprocessor.process("林风释放了火球术")
			
 
				+
			
 
				+        assert result.processed_text == "__en__林风释放了__en__火球术"
			
 
				+        assert result.terms_found["林风"] == 1
			
 
				+        assert result.terms_found["火球术"] == 1
			
 
				+
			
 
				+    def test_batch_processing(self):
			
 
				+        """Test batch processing of multiple texts."""
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+
			
 
				+        preprocessor = GlossaryPreprocessor(glossary)
			
 
				+        texts = ["林风来了", "林风走了"]
			
 
				+        results = preprocessor.process_batch(texts)
			
 
				+
			
 
				+        assert len(results) == 2
			
 
				+        assert "__en__林风" in results[0].processed_text
			
 
				+        assert "__en__林风" in results[1].processed_text
			
 
				+
			
 
				+
			
 
				+class TestGlossaryPostprocessor:
			
 
				+    """Test cases for GlossaryPostprocessor."""
			
 
				+
			
 
				+    def test_restore_from_placeholder(self):
			
 
				+        """Test restoring placeholders to translations."""
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+        mapping = {"__en__林风": "Lin Feng", "__en__火球术": "Fireball"}
			
 
				+
			
 
				+        result = postprocessor.restore_from_placeholder("__en__林风 released __en__火球术", mapping)
			
 
				+
			
 
				+        assert result == "Lin Feng released Fireball"
			
 
				+
			
 
				+    def test_fix_punctuation(self):
			
 
				+        """Test punctuation fixing."""
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+
			
 
				+        # Remove space before punctuation
			
 
				+        assert postprocessor.fix_punctuation("Lin Feng .") == "Lin Feng."
			
 
				+        # Fix Chinese comma after English
			
 
				+        assert postprocessor.fix_punctuation("Lin Feng，走了") == "Lin Feng, 走了"
			
 
				+
			
 
				+    def test_validate_translation_success(self):
			
 
				+        """Test successful validation."""
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+        mapping = {"__en__林风": "Lin Feng"}
			
 
				+
			
 
				+        result = postprocessor.validate_translation("林风来了", "Lin Feng came", mapping)
			
 
				+
			
 
				+        assert result.is_valid is True
			
 
				+        assert len(result.missing_terms) == 0
			
 
				+
			
 
				+    def test_validate_translation_missing_terms(self):
			
 
				+        """Test validation with missing terms."""
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+        mapping = {"__en__林风": "Lin Feng"}
			
 
				+
			
 
				+        result = postprocessor.validate_translation("林风来了", "Lin came", mapping)
			
 
				+
			
 
				+        assert result.is_valid is False
			
 
				+
			
 
				+
			
 
				+class TestGlossaryIntegration:
			
 
				+    """Integration tests for the glossary module."""
			
 
				+
			
 
				+    def test_full_pipeline(self):
			
 
				+        """Test complete preprocessing and postprocessing pipeline."""
			
 
				+        # Setup glossary
			
 
				+        glossary = Glossary()
			
 
				+        glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+        glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
			
 
				+
			
 
				+        # Preprocess
			
 
				+        preprocessor = GlossaryPreprocessor(glossary)
			
 
				+        original = "林风释放了火球术"
			
 
				+        pre_result = preprocessor.process(original)
			
 
				+
			
 
				+        assert pre_result.processed_text == "__en__林风释放了__en__火球术"
			
 
				+
			
 
				+        # Simulate translation
			
 
				+        mock_translated = "__en__林风 released __en__火球术"
			
 
				+
			
 
				+        # Postprocess
			
 
				+        postprocessor = GlossaryPostprocessor()
			
 
				+        final = postprocessor.process(mock_translated, pre_result.placeholder_map)
			
 
				+
			
 
				+        assert final == "Lin Feng released Fireball"
			
 
				+
			
 
				+    def test_phase_0_validation_scenario(self):
			
 
				+        """Test the Phase 0 validation scenario."""
			
 
				+        # Without glossary (simulated by empty glossary)
			
 
				+        empty_glossary = Glossary()
			
 
				+        preprocessor = GlossaryPreprocessor(empty_glossary)
			
 
				+        result = preprocessor.process("林风释放了火球术")
			
 
				+
			
 
				+        # No placeholders added
			
 
				+        assert result.placeholder_map == {}
			
 
				+        assert result.terms_found == {}
			
 
				+
			
 
				+        # With glossary
			
 
				+        full_glossary = Glossary()
			
 
				+        full_glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
			
 
				+        full_glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
			
 
				+
			
 
				+        preprocessor = GlossaryPreprocessor(full_glossary)
			
 
				+        result = preprocessor.process("林风释放了火球术")
			
 
				+
			
 
				+        # Placeholders added
			
 
				+        assert len(result.placeholder_map) == 2
			
 
				+        assert result.terms_found["林风"] == 1
			
 
				+        assert result.terms_found["火球术"] == 1
			
--- a/tests/test_state_machine.py
+++ b/tests/test_state_machine.py
@@ -0,0 +1,183 @@
 
				+"""
			
 
				+Unit tests for the state machine module.
			
 
				+
			
 
				+Tests cover all state transitions, persistence, and recovery scenarios.
			
 
				+"""
			
 
				+
			
 
				+import pytest
			
 
				+from datetime import datetime
			
 
				+
			
 
				+from src.pipeline.state_machine import PipelineStateMachine, PipelineState
			
 
				+
			
 
				+
			
 
				+class TestPipelineStateMachine:
			
 
				+    """Test cases for PipelineStateMachine."""
			
 
				+
			
 
				+    def test_initial_state_is_idle(self):
			
 
				+        """Test that the machine starts in IDLE state."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        assert machine.state == PipelineState.IDLE
			
 
				+
			
 
				+    def test_valid_transition_idle_to_preparing(self):
			
 
				+        """Test valid transition from IDLE to PREPARING."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        assert machine.transition_to(PipelineState.PREPARING)
			
 
				+        assert machine.state == PipelineState.PREPARING
			
 
				+
			
 
				+    def test_valid_transition_preparing_to_cleaning(self):
			
 
				+        """Test valid transition from PREPARING to CLEANING."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        assert machine.transition_to(PipelineState.CLEANING)
			
 
				+        assert machine.state == PipelineState.CLEANING
			
 
				+
			
 
				+    def test_valid_transition_cleaning_to_translating(self):
			
 
				+        """Test valid transition from CLEANING to TRANSLATING."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        machine.transition_to(PipelineState.CLEANING)
			
 
				+        assert machine.transition_to(PipelineState.TRANSLATING)
			
 
				+        assert machine.state == PipelineState.TRANSLATING
			
 
				+
			
 
				+    def test_valid_transition_translating_to_uploading(self):
			
 
				+        """Test valid transition from TRANSLATING to UPLOADING."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        for state in [
			
 
				+            PipelineState.PREPARING,
			
 
				+            PipelineState.CLEANING,
			
 
				+            PipelineState.TRANSLATING,
			
 
				+        ]:
			
 
				+            machine.transition_to(state)
			
 
				+        assert machine.transition_to(PipelineState.UPLOADING)
			
 
				+        assert machine.state == PipelineState.UPLOADING
			
 
				+
			
 
				+    def test_valid_transition_uploading_to_completed(self):
			
 
				+        """Test valid transition from UPLOADING to COMPLETED."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        for state in [
			
 
				+            PipelineState.PREPARING,
			
 
				+            PipelineState.CLEANING,
			
 
				+            PipelineState.TRANSLATING,
			
 
				+            PipelineState.UPLOADING,
			
 
				+        ]:
			
 
				+            machine.transition_to(state)
			
 
				+        assert machine.transition_to(PipelineState.COMPLETED)
			
 
				+        assert machine.state == PipelineState.COMPLETED
			
 
				+
			
 
				+    def test_invalid_transition_rejected(self):
			
 
				+        """Test that invalid transitions are rejected."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        # Can't skip from IDLE to TRANSLATING
			
 
				+        assert not machine.transition_to(PipelineState.TRANSLATING)
			
 
				+        assert machine.state == PipelineState.IDLE
			
 
				+
			
 
				+    def test_pause_from_any_active_state(self):
			
 
				+        """Test pausing from any active state."""
			
 
				+        active_states = [
			
 
				+            PipelineState.PREPARING,
			
 
				+            PipelineState.CLEANING,
			
 
				+            PipelineState.TRANSLATING,
			
 
				+            PipelineState.UPLOADING,
			
 
				+        ]
			
 
				+
			
 
				+        for state in active_states:
			
 
				+            machine = PipelineStateMachine()
			
 
				+            machine.transition_to(PipelineState.PREPARING)
			
 
				+            machine.transition_to(state)
			
 
				+            assert machine.transition_to(PipelineState.PAUSED)
			
 
				+            assert machine.state == PipelineState.PAUSED
			
 
				+
			
 
				+    def test_resume_from_pause(self):
			
 
				+        """Test resuming from PAUSED back to active state."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        machine.transition_to(PipelineState.TRANSLATING)
			
 
				+        machine.transition_to(PipelineState.PAUSED)
			
 
				+
			
 
				+        # Can resume to TRANSLATING
			
 
				+        assert machine.transition_to(PipelineState.TRANSLATING)
			
 
				+        assert machine.state == PipelineState.TRANSLATING
			
 
				+
			
 
				+    def test_failed_state_only_goes_to_idle(self):
			
 
				+        """Test that FAILED state can only transition to IDLE."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        machine.transition_to(PipelineState.FAILED)
			
 
				+
			
 
				+        # Can go to IDLE
			
 
				+        assert machine.transition_to(PipelineState.IDLE)
			
 
				+        assert machine.state == PipelineState.IDLE
			
 
				+
			
 
				+        # Can't go directly to another state
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        machine.transition_to(PipelineState.FAILED)
			
 
				+        assert not machine.transition_to(PipelineState.TRANSLATING)
			
 
				+
			
 
				+    def test_completed_goes_to_idle(self):
			
 
				+        """Test that COMPLETED transitions to IDLE."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        for state in [
			
 
				+            PipelineState.PREPARING,
			
 
				+            PipelineState.CLEANING,
			
 
				+            PipelineState.TRANSLATING,
			
 
				+            PipelineState.UPLOADING,
			
 
				+            PipelineState.COMPLETED,
			
 
				+        ]:
			
 
				+            machine.transition_to(state)
			
 
				+        assert machine.transition_to(PipelineState.IDLE)
			
 
				+        assert machine.state == PipelineState.IDLE
			
 
				+
			
 
				+    def test_state_history_tracking(self):
			
 
				+        """Test that state history is tracked."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING, reason="Starting task")
			
 
				+        machine.transition_to(PipelineState.CLEANING)
			
 
				+
			
 
				+        history = machine.get_state_history()
			
 
				+        assert len(history) == 3  # IDLE + PREPARING + CLEANING
			
 
				+        assert history[0]["state"] == "idle"
			
 
				+        assert history[1]["state"] == "preparing"
			
 
				+        assert history[1]["reason"] == "Starting task"
			
 
				+        assert history[2]["state"] == "cleaning"
			
 
				+
			
 
				+    def test_can_transition_to_check(self):
			
 
				+        """Test the can_transition_to method."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+
			
 
				+        assert machine.can_transition_to(PipelineState.PREPARING)
			
 
				+        assert not machine.can_transition_to(PipelineState.TRANSLATING)
			
 
				+
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        assert machine.can_transition_to(PipelineState.CLEANING)
			
 
				+        assert not machine.can_transition_to(PipelineState.IDLE)
			
 
				+
			
 
				+    def test_reset(self):
			
 
				+        """Test resetting the state machine."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+        machine.transition_to(PipelineState.PREPARING)
			
 
				+        machine.transition_to(PipelineState.CLEANING)
			
 
				+
			
 
				+        machine.reset()
			
 
				+
			
 
				+        assert machine.state == PipelineState.IDLE
			
 
				+        assert len(machine.get_state_history()) == 1
			
 
				+
			
 
				+    def test_full_workflow(self):
			
 
				+        """Test a complete workflow from start to finish."""
			
 
				+        machine = PipelineStateMachine()
			
 
				+
			
 
				+        workflow = [
			
 
				+            PipelineState.PREPARING,
			
 
				+            PipelineState.CLEANING,
			
 
				+            PipelineState.TRANSLATING,
			
 
				+            PipelineState.UPLOADING,
			
 
				+            PipelineState.COMPLETED,
			
 
				+        ]
			
 
				+
			
 
				+        for state in workflow:
			
 
				+            assert machine.transition_to(state), f"Failed to transition to {state}"
			
 
				+            assert machine.state == state
			
 
				+
			
 
				+        # Should be able to start a new task
			
 
				+        assert machine.transition_to(PipelineState.IDLE)
			
 
				+        assert machine.transition_to(PipelineState.PREPARING)