1 tháng trước cách đây · 2c5a076a12
--- a/src/translator/__init__.py
+++ b/src/translator/__init__.py
@@ -19,6 +19,12 @@ from .term_injector import (
 
				 )
			
 
				 from .resume_tracker import ResumeTracker, ResumeState
			
 
				 from .fallback_handler import FallbackHandler, FallbackEvent, FallbackReason
			
 
				+from .quality_checker import (
			
 
				+    QualityChecker,
			
 
				+    QualityReport,
			
 
				+    QualityIssue,
			
 
				+    QualityIssueType,
			
 
				+)
			
 
				 
			
 
				 __all__ = [
			
 
				     "TranslationEngine",
			
@@ -37,4 +43,8 @@ __all__ = [
 
				     "FallbackHandler",
			
 
				     "FallbackEvent",
			
 
				     "FallbackReason",
			
 
				+    "QualityChecker",
			
 
				+    "QualityReport",
			
 
				+    "QualityIssue",
			
 
				+    "QualityIssueType",
			
 
				 ]
			
--- a/src/translator/quality_checker.py
+++ b/src/translator/quality_checker.py
@@ -0,0 +1,435 @@
 
				+"""
			
 
				+Translation quality checker module.
			
 
				+
			
 
				+This module provides functionality for checking translation quality
			
 
				+by detecting common issues like missing content, untranslated terms,
			
 
				+abnormal lengths, and duplicate content.
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+import logging
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import List, Dict, Any, Optional, Set
			
 
				+from enum import Enum
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class QualityIssueType(str, Enum):
			
 
				+    """Types of quality issues."""
			
 
				+
			
 
				+    MISSING_CONTENT = "missing_content"
			
 
				+    UNTRANSLATED_TERM = "untranslated_term"
			
 
				+    ABNORMAL_LENGTH = "abnormal_length"
			
 
				+    DUPLICATE_CONTENT = "duplicate_content"
			
 
				+    LINE_COUNT_MISMATCH = "line_count_mismatch"
			
 
				+    EMPTY_TRANSLATION = "empty_translation"
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class QualityIssue:
			
 
				+    """
			
 
				+    Represents a quality issue found in translation.
			
 
				+
			
 
				+    Attributes:
			
 
				+        issue_type: The type of issue
			
 
				+        location: Location description (e.g., "paragraph 5", "line 10")
			
 
				+        message: Human-readable description
			
 
				+        severity: Issue severity ("error", "warning", "info")
			
 
				+        source_text: The source text that has the issue
			
 
				+        target_text: The translated text with the issue
			
 
				+    """
			
 
				+
			
 
				+    issue_type: QualityIssueType
			
 
				+    location: str
			
 
				+    message: str
			
 
				+    severity: str = "warning"
			
 
				+    source_text: str = ""
			
 
				+    target_text: str = ""
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """Convert to dictionary for serialization."""
			
 
				+        return {
			
 
				+            "issue_type": self.issue_type.value,
			
 
				+            "location": self.location,
			
 
				+            "message": self.message,
			
 
				+            "severity": self.severity,
			
 
				+            "source_text": self.source_text[:200],  # Truncate long text
			
 
				+            "target_text": self.target_text[:200],
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class QualityReport:
			
 
				+    """
			
 
				+    Report of quality check results.
			
 
				+
			
 
				+    Attributes:
			
 
				+        total_issues: Total number of issues found
			
 
				+        error_count: Number of error-level issues
			
 
				+        warning_count: Number of warning-level issues
			
 
				+        info_count: Number of info-level issues
			
 
				+        issues: List of all issues found
			
 
				+        by_type: Breakdown of issues by type
			
 
				+        is_valid: Whether translation passed quality check
			
 
				+    """
			
 
				+
			
 
				+    total_issues: int
			
 
				+    error_count: int
			
 
				+    warning_count: int
			
 
				+    info_count: int
			
 
				+    issues: List[QualityIssue] = field(default_factory=list)
			
 
				+    by_type: Dict[str, int] = field(default_factory=dict)
			
 
				+    is_valid: bool = True
			
 
				+
			
 
				+    @property
			
 
				+    def errors(self) -> List[QualityIssue]:
			
 
				+        """Get all error-level issues."""
			
 
				+        return [i for i in self.issues if i.severity == "error"]
			
 
				+
			
 
				+    @property
			
 
				+    def warnings(self) -> List[QualityIssue]:
			
 
				+        """Get all warning-level issues."""
			
 
				+        return [i for i in self.issues if i.severity == "warning"]
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """Convert to dictionary for serialization."""
			
 
				+        return {
			
 
				+            "total_issues": self.total_issues,
			
 
				+            "error_count": self.error_count,
			
 
				+            "warning_count": self.warning_count,
			
 
				+            "info_count": self.info_count,
			
 
				+            "is_valid": self.is_valid,
			
 
				+            "by_type": self.by_type,
			
 
				+            "issues": [i.to_dict() for i in self.issues],
			
 
				+        }
			
 
				+
			
 
				+    def generate_report(self) -> str:
			
 
				+        """
			
 
				+        Generate a human-readable report.
			
 
				+
			
 
				+        Returns:
			
 
				+            Formatted report string
			
 
				+        """
			
 
				+        lines = [
			
 
				+            "=== Translation Quality Report ===",
			
 
				+            f"Valid: {self.is_valid}",
			
 
				+            f"Total Issues: {self.total_issues}",
			
 
				+            f"  Errors: {self.error_count}",
			
 
				+            f"  Warnings: {self.warning_count}",
			
 
				+            f"  Info: {self.info_count}",
			
 
				+            "",
			
 
				+            "Issues by Type:",
			
 
				+        ]
			
 
				+
			
 
				+        for issue_type, count in sorted(self.by_type.items()):
			
 
				+            lines.append(f"  {issue_type}: {count}")
			
 
				+
			
 
				+        if self.issues:
			
 
				+            lines.append("")
			
 
				+            lines.append("Detailed Issues:")
			
 
				+
			
 
				+            for issue in self.issues:
			
 
				+                lines.append(f"  [{issue.severity.upper()}] {issue.issue_type.value}")
			
 
				+                lines.append(f"    Location: {issue.location}")
			
 
				+                lines.append(f"    Message: {issue.message}")
			
 
				+
			
 
				+        return "\n".join(lines)
			
 
				+
			
 
				+
			
 
				+class QualityChecker:
			
 
				+    """
			
 
				+    Checker for translation quality.
			
 
				+
			
 
				+    This class checks translations for common quality issues including
			
 
				+    missing content, untranslated terms, abnormal lengths, and duplicates.
			
 
				+    """
			
 
				+
			
 
				+    # Default thresholds for quality checks
			
 
				+    DEFAULT_MIN_LENGTH_RATIO = 0.3  # Target length should be at least 30% of source
			
 
				+    DEFAULT_MAX_LENGTH_RATIO = 3.0   # Target length should be at most 3x source
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        min_length_ratio: float = DEFAULT_MIN_LENGTH_RATIO,
			
 
				+        max_length_ratio: float = DEFAULT_MAX_LENGTH_RATIO,
			
 
				+        untranslated_terms: Optional[Set[str]] = None
			
 
				+    ):
			
 
				+        """
			
 
				+        Initialize the quality checker.
			
 
				+
			
 
				+        Args:
			
 
				+            min_length_ratio: Minimum acceptable length ratio (target/source)
			
 
				+            max_length_ratio: Maximum acceptable length ratio (target/source)
			
 
				+            untranslated_terms: Set of terms that should always be translated
			
 
				+        """
			
 
				+        self.min_length_ratio = min_length_ratio
			
 
				+        self.max_length_ratio = max_length_ratio
			
 
				+        self.untranslated_terms = untranslated_terms or set()
			
 
				+
			
 
				+        # Default terms that should always be translated from Chinese
			
 
				+        self._default_chinese_terms = {
			
 
				+            "的", "了", "是", "在", "和", "与", "或", "但", "而", "如果", "因为",
			
 
				+            "所以", "然后", "之后", "之前", "已经", "还在", "可以", "应该", "需要",
			
 
				+            "想要", "希望", "觉得", "认为", "知道", "看到", "听到", "说到",
			
 
				+        }
			
 
				+
			
 
				+    def check_translation(
			
 
				+        self,
			
 
				+        source: str,
			
 
				+        target: str,
			
 
				+        source_lang: str = "zh",
			
 
				+        tgt_lang: str = "en"
			
 
				+    ) -> QualityReport:
			
 
				+        """
			
 
				+        Perform a comprehensive quality check on translation.
			
 
				+
			
 
				+        Args:
			
 
				+            source: Original source text
			
 
				+            target: Translated text
			
 
				+            source_lang: Source language code
			
 
				+            tgt_lang: Target language code
			
 
				+
			
 
				+        Returns:
			
 
				+            QualityReport with all issues found
			
 
				+        """
			
 
				+        issues: List[QualityIssue] = []
			
 
				+
			
 
				+        # Check for empty translation
			
 
				+        issues.extend(self._check_empty(source, target))
			
 
				+
			
 
				+        # Check line count mismatch
			
 
				+        issues.extend(self._check_line_count(source, target))
			
 
				+
			
 
				+        # Check for untranslated terms
			
 
				+        issues.extend(self._check_untranslated_terms(source, target, source_lang))
			
 
				+
			
 
				+        # Check for abnormal length
			
 
				+        issues.extend(self._check_length_ratio(source, target))
			
 
				+
			
 
				+        # Check for duplicate content
			
 
				+        issues.extend(self._check_duplicates(target))
			
 
				+
			
 
				+        # Calculate statistics
			
 
				+        error_count = sum(1 for i in issues if i.severity == "error")
			
 
				+        warning_count = sum(1 for i in issues if i.severity == "warning")
			
 
				+        info_count = sum(1 for i in issues if i.severity == "info")
			
 
				+
			
 
				+        # Group by type
			
 
				+        by_type: Dict[str, int] = {}
			
 
				+        for issue in issues:
			
 
				+            by_type[issue.issue_type.value] = by_type.get(issue.issue_type.value, 0) + 1
			
 
				+
			
 
				+        # Determine if translation is valid (no errors)
			
 
				+        is_valid = error_count == 0
			
 
				+
			
 
				+        return QualityReport(
			
 
				+            total_issues=len(issues),
			
 
				+            error_count=error_count,
			
 
				+            warning_count=warning_count,
			
 
				+            info_count=info_count,
			
 
				+            issues=issues,
			
 
				+            by_type=by_type,
			
 
				+            is_valid=is_valid
			
 
				+        )
			
 
				+
			
 
				+    def _check_empty(self, source: str, target: str) -> List[QualityIssue]:
			
 
				+        """Check for empty translations."""
			
 
				+        issues = []
			
 
				+
			
 
				+        if not target or not target.strip():
			
 
				+            issues.append(QualityIssue(
			
 
				+                issue_type=QualityIssueType.EMPTY_TRANSLATION,
			
 
				+                location="entire text",
			
 
				+                message="Translation is empty",
			
 
				+                severity="error",
			
 
				+                source_text=source[:100],
			
 
				+                target_text=target,
			
 
				+            ))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_line_count(self, source: str, target: str) -> List[QualityIssue]:
			
 
				+        """Check for line count mismatches."""
			
 
				+        issues = []
			
 
				+
			
 
				+        source_lines = source.split('\n')
			
 
				+        target_lines = target.split('\n')
			
 
				+
			
 
				+        # Filter out empty lines for comparison
			
 
				+        source_nonempty = [l for l in source_lines if l.strip()]
			
 
				+        target_nonempty = [l for l in target_lines if l.strip()]
			
 
				+
			
 
				+        source_count = len(source_nonempty)
			
 
				+        target_count = len(target_nonempty)
			
 
				+
			
 
				+        # Allow some tolerance (±20%)
			
 
				+        if source_count > 0:
			
 
				+            ratio = target_count / source_count
			
 
				+            if ratio < 0.8 or ratio > 1.2:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    issue_type=QualityIssueType.LINE_COUNT_MISMATCH,
			
 
				+                    location="entire text",
			
 
				+                    message=f"Line count mismatch: source has {source_count} lines, target has {target_count} lines",
			
 
				+                    severity="warning" if 0.5 < ratio < 1.5 else "error",
			
 
				+                ))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_untranslated_terms(
			
 
				+        self,
			
 
				+        source: str,
			
 
				+        target: str,
			
 
				+        source_lang: str
			
 
				+    ) -> List[QualityIssue]:
			
 
				+        """Check for untranslated terms."""
			
 
				+        issues = []
			
 
				+
			
 
				+        # Use language-specific checks
			
 
				+        if source_lang == "zh":
			
 
				+            # Check for remaining Chinese characters
			
 
				+            chinese_pattern = re.compile(r'[\u4e00-\u9fff]+')
			
 
				+            chinese_matches = chinese_pattern.findall(target)
			
 
				+
			
 
				+            if chinese_matches:
			
 
				+                # Count how many Chinese characters remain
			
 
				+                total_chinese = sum(len(m) for m in chinese_matches)
			
 
				+                source_chinese = sum(len(m) for m in chinese_pattern.findall(source))
			
 
				+
			
 
				+                if source_chinese > 0:
			
 
				+                    untranslated_ratio = total_chinese / source_chinese
			
 
				+                    if untranslated_ratio > 0.1:  # More than 10% untranslated
			
 
				+                        issues.append(QualityIssue(
			
 
				+                            issue_type=QualityIssueType.UNTRANSLATED_TERM,
			
 
				+                            location="scattered",
			
 
				+                            message=f"Found {total_chinese} Chinese characters in translation ({untranslated_ratio:.1%} of source)",
			
 
				+                            severity="warning",
			
 
				+                            source_text="",
			
 
				+                            target_text=" ".join(chinese_matches[:10]),  # Show first 10
			
 
				+                        ))
			
 
				+
			
 
				+        # Check for specific untranslated terms
			
 
				+        for term in self.untranslated_terms:
			
 
				+            if term in target:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    issue_type=QualityIssueType.UNTRANSLATED_TERM,
			
 
				+                    location="scattered",
			
 
				+                    message=f"Source term '{term}' found untranslated",
			
 
				+                    severity="warning",
			
 
				+                    source_text=term,
			
 
				+                    target_text=term,
			
 
				+                ))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_length_ratio(self, source: str, target: str) -> List[QualityIssue]:
			
 
				+        """Check for abnormal length ratios."""
			
 
				+        issues = []
			
 
				+
			
 
				+        source_len = len(source.strip())
			
 
				+        target_len = len(target.strip())
			
 
				+
			
 
				+        if source_len == 0:
			
 
				+            return issues
			
 
				+
			
 
				+        ratio = target_len / source_len
			
 
				+
			
 
				+        if ratio < self.min_length_ratio:
			
 
				+            issues.append(QualityIssue(
			
 
				+                issue_type=QualityIssueType.ABNORMAL_LENGTH,
			
 
				+                location="entire text",
			
 
				+                message=f"Translation too short: {target_len} chars vs {source_len} chars (ratio: {ratio:.2f})",
			
 
				+                severity="warning" if ratio > 0.1 else "error",
			
 
				+            ))
			
 
				+        elif ratio > self.max_length_ratio:
			
 
				+            issues.append(QualityIssue(
			
 
				+                issue_type=QualityIssueType.ABNORMAL_LENGTH,
			
 
				+                location="entire text",
			
 
				+                message=f"Translation too long: {target_len} chars vs {source_len} chars (ratio: {ratio:.2f})",
			
 
				+                severity="warning",
			
 
				+            ))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_duplicates(self, text: str) -> List[QualityIssue]:
			
 
				+        """Check for duplicate content."""
			
 
				+        issues = []
			
 
				+
			
 
				+        # Split into paragraphs and check for duplicates
			
 
				+        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
			
 
				+
			
 
				+        # Find duplicates
			
 
				+        seen: Dict[str, int] = {}
			
 
				+        for i, para in enumerate(paragraphs):
			
 
				+            if para in seen:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    issue_type=QualityIssueType.DUPLICATE_CONTENT,
			
 
				+                    location=f"paragraph {i}",
			
 
				+                    message=f"Duplicate content (first seen at paragraph {seen[para]})",
			
 
				+                    severity="info",
			
 
				+                    source_text="",
			
 
				+                    target_text=para[:100] + "..." if len(para) > 100 else para,
			
 
				+                ))
			
 
				+            seen[para] = i
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def check_batch(
			
 
				+        self,
			
 
				+        sources: List[str],
			
 
				+        targets: List[str],
			
 
				+        source_lang: str = "zh",
			
 
				+        tgt_lang: str = "en"
			
 
				+    ) -> List[QualityReport]:
			
 
				+        """
			
 
				+        Check multiple translation pairs.
			
 
				+
			
 
				+        Args:
			
 
				+            sources: List of source texts
			
 
				+            targets: List of target texts
			
 
				+            source_lang: Source language code
			
 
				+            tgt_lang: Target language code
			
 
				+
			
 
				+        Returns:
			
 
				+            List of QualityReport objects
			
 
				+        """
			
 
				+        if len(sources) != len(targets):
			
 
				+            raise ValueError("Source and target lists must have the same length")
			
 
				+
			
 
				+        return [
			
 
				+            self.check_translation(s, t, source_lang, tgt_lang)
			
 
				+            for s, t in zip(sources, targets)
			
 
				+        ]
			
 
				+
			
 
				+    def get_summary(self, reports: List[QualityReport]) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        Get summary statistics from multiple reports.
			
 
				+
			
 
				+        Args:
			
 
				+            reports: List of QualityReport objects
			
 
				+
			
 
				+        Returns:
			
 
				+            Dictionary with summary statistics
			
 
				+        """
			
 
				+        total_reports = len(reports)
			
 
				+        valid_reports = sum(1 for r in reports if r.is_valid)
			
 
				+        total_issues = sum(r.total_issues for r in reports)
			
 
				+        total_errors = sum(r.error_count for r in reports)
			
 
				+        total_warnings = sum(r.warning_count for r in reports)
			
 
				+
			
 
				+        # Aggregate by type
			
 
				+        by_type: Dict[str, int] = {}
			
 
				+        for report in reports:
			
 
				+            for issue_type, count in report.by_type.items():
			
 
				+                by_type[issue_type] = by_type.get(issue_type, 0) + count
			
 
				+
			
 
				+        return {
			
 
				+            "total_translations": total_reports,
			
 
				+            "valid_translations": valid_reports,
			
 
				+            "invalid_translations": total_reports - valid_reports,
			
 
				+            "validity_rate": (valid_reports / total_reports * 100) if total_reports > 0 else 100,
			
 
				+            "total_issues": total_issues,
			
 
				+            "total_errors": total_errors,
			
 
				+            "total_warnings": total_warnings,
			
 
				+            "by_type": by_type,
			
 
				+        }
			
--- a/tests/translator/test_quality_checker.py
+++ b/tests/translator/test_quality_checker.py
@@ -0,0 +1,357 @@
 
				+"""
			
 
				+Unit tests for the quality checker module.
			
 
				+
			
 
				+Tests cover translation quality checking functionality.
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+from unittest.mock import Mock
			
 
				+
			
 
				+# Mock torch and transformers before importing
			
 
				+sys_mock = Mock()
			
 
				+sys.modules["torch"] = sys_mock
			
 
				+sys.modules["transformers"] = sys_mock
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from src.translator.quality_checker import (
			
 
				+    QualityChecker,
			
 
				+    QualityReport,
			
 
				+    QualityIssue,
			
 
				+    QualityIssueType,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class TestQualityIssue:
			
 
				+    """Test cases for QualityIssue dataclass."""
			
 
				+
			
 
				+    def test_create_quality_issue(self):
			
 
				+        """Test creating a quality issue."""
			
 
				+        issue = QualityIssue(
			
 
				+            issue_type=QualityIssueType.UNTRANSLATED_TERM,
			
 
				+            location="paragraph 1",
			
 
				+            message="Found untranslated Chinese text",
			
 
				+            severity="warning",
			
 
				+        )
			
 
				+
			
 
				+        assert issue.issue_type == QualityIssueType.UNTRANSLATED_TERM
			
 
				+        assert issue.location == "paragraph 1"
			
 
				+        assert issue.severity == "warning"
			
 
				+
			
 
				+    def test_to_dict(self):
			
 
				+        """Test converting quality issue to dictionary."""
			
 
				+        issue = QualityIssue(
			
 
				+            issue_type=QualityIssueType.ABNORMAL_LENGTH,
			
 
				+            location="entire text",
			
 
				+            message="Translation too short",
			
 
				+            severity="error",
			
 
				+            source_text="Test source",
			
 
				+            target_text="Test target",
			
 
				+        )
			
 
				+
			
 
				+        data = issue.to_dict()
			
 
				+
			
 
				+        assert data["issue_type"] == "abnormal_length"
			
 
				+        assert data["location"] == "entire text"
			
 
				+        assert data["severity"] == "error"
			
 
				+
			
 
				+
			
 
				+class TestQualityReport:
			
 
				+    """Test cases for QualityReport dataclass."""
			
 
				+
			
 
				+    def test_create_quality_report(self):
			
 
				+        """Test creating a quality report."""
			
 
				+        issues = [
			
 
				+            QualityIssue(
			
 
				+                issue_type=QualityIssueType.UNTRANSLATED_TERM,
			
 
				+                location="p1",
			
 
				+                message="Test",
			
 
				+                severity="warning",
			
 
				+            ),
			
 
				+            QualityIssue(
			
 
				+                issue_type=QualityIssueType.ABNORMAL_LENGTH,
			
 
				+                location="p2",
			
 
				+                message="Test",
			
 
				+                severity="error",
			
 
				+            ),
			
 
				+        ]
			
 
				+
			
 
				+        report = QualityReport(
			
 
				+            total_issues=2,
			
 
				+            error_count=1,
			
 
				+            warning_count=1,
			
 
				+            info_count=0,
			
 
				+            issues=issues,
			
 
				+            is_valid=False,
			
 
				+        )
			
 
				+
			
 
				+        assert report.total_issues == 2
			
 
				+        assert report.error_count == 1
			
 
				+        assert report.warning_count == 1
			
 
				+        assert report.is_valid is False
			
 
				+        assert len(report.errors) == 1
			
 
				+        assert len(report.warnings) == 1
			
 
				+
			
 
				+    def test_to_dict(self):
			
 
				+        """Test converting quality report to dictionary."""
			
 
				+        report = QualityReport(
			
 
				+            total_issues=0,
			
 
				+            error_count=0,
			
 
				+            warning_count=0,
			
 
				+            info_count=0,
			
 
				+            is_valid=True,
			
 
				+        )
			
 
				+
			
 
				+        data = report.to_dict()
			
 
				+
			
 
				+        assert data["total_issues"] == 0
			
 
				+        assert data["is_valid"] is True
			
 
				+
			
 
				+    def test_generate_report(self):
			
 
				+        """Test generating human-readable report."""
			
 
				+        issues = [
			
 
				+            QualityIssue(
			
 
				+                issue_type=QualityIssueType.UNTRANSLATED_TERM,
			
 
				+                location="p1",
			
 
				+                message="Found Chinese characters",
			
 
				+                severity="warning",
			
 
				+            ),
			
 
				+        ]
			
 
				+
			
 
				+        report = QualityReport(
			
 
				+            total_issues=1,
			
 
				+            error_count=0,
			
 
				+            warning_count=1,
			
 
				+            info_count=0,
			
 
				+            issues=issues,
			
 
				+            is_valid=True,
			
 
				+        )
			
 
				+
			
 
				+        text = report.generate_report()
			
 
				+
			
 
				+        assert "Translation Quality Report" in text
			
 
				+        assert "Valid: True" in text
			
 
				+        assert "Total Issues: 1" in text
			
 
				+        assert "untranslated_term" in text
			
 
				+
			
 
				+
			
 
				+class TestQualityChecker:
			
 
				+    """Test cases for QualityChecker class."""
			
 
				+
			
 
				+    def test_init(self):
			
 
				+        """Test QualityChecker initialization."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        assert checker.min_length_ratio == 0.3
			
 
				+        assert checker.max_length_ratio == 3.0
			
 
				+
			
 
				+    def test_init_with_params(self):
			
 
				+        """Test QualityChecker with custom parameters."""
			
 
				+        checker = QualityChecker(
			
 
				+            min_length_ratio=0.5,
			
 
				+            max_length_ratio=2.0,
			
 
				+            untranslated_terms={"test", "example"}
			
 
				+        )
			
 
				+
			
 
				+        assert checker.min_length_ratio == 0.5
			
 
				+        assert checker.max_length_ratio == 2.0
			
 
				+        assert "test" in checker.untranslated_terms
			
 
				+
			
 
				+    def test_check_translation_valid(self):
			
 
				+        """Test checking a valid translation."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "这是一个测试。"
			
 
				+        target = "This is a test."
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        assert report.is_valid is True
			
 
				+        assert report.total_issues == 0
			
 
				+
			
 
				+    def test_check_empty_translation(self):
			
 
				+        """Test detection of empty translation."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "这是一个测试。"
			
 
				+        target = ""
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        assert report.is_valid is False
			
 
				+        assert report.error_count > 0
			
 
				+        assert any(i.issue_type == QualityIssueType.EMPTY_TRANSLATION for i in report.issues)
			
 
				+
			
 
				+    def test_check_whitespace_only_translation(self):
			
 
				+        """Test detection of whitespace-only translation."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "这是一个测试。"
			
 
				+        target = "   \n\t  "
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        assert report.is_valid is False
			
 
				+
			
 
				+    def test_check_line_count_mismatch(self):
			
 
				+        """Test detection of line count mismatch."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "Line 1\nLine 2\nLine 3"
			
 
				+        target = "Line 1\nLine 2"  # Missing one line
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        # Should detect mismatch
			
 
				+        line_issues = [i for i in report.issues if i.issue_type == QualityIssueType.LINE_COUNT_MISMATCH]
			
 
				+        assert len(line_issues) > 0
			
 
				+
			
 
				+    def test_check_untranslated_chinese(self):
			
 
				+        """Test detection of untranslated Chinese characters."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "这是一个测试文本，包含很多中文内容。"
			
 
				+        target = "这是一个测试文本，包含很多中文内容。"  # Untranslated
			
 
				+
			
 
				+        report = checker.check_translation(source, target, source_lang="zh")
			
 
				+
			
 
				+        # Should detect untranslated Chinese
			
 
				+        untranslated_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM]
			
 
				+        assert len(untranslated_issues) > 0
			
 
				+
			
 
				+    def test_check_untranslated_term(self):
			
 
				+        """Test detection of specific untranslated terms."""
			
 
				+        checker = QualityChecker(untranslated_terms={"special_term"})
			
 
				+
			
 
				+        source = "This is a special_term example."
			
 
				+        target = "This is a special_term example."  # Term not translated
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        # Should detect the untranslated term
			
 
				+        term_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM]
			
 
				+        assert any("special_term" in i.message for i in term_issues)
			
 
				+
			
 
				+    def test_check_abnormal_length_short(self):
			
 
				+        """Test detection of abnormally short translation."""
			
 
				+        checker = QualityChecker(min_length_ratio=0.5)
			
 
				+
			
 
				+        source = "This is a long source text with many words that should be translated properly."
			
 
				+        target = "Short."  # Too short
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH]
			
 
				+        assert len(length_issues) > 0
			
 
				+
			
 
				+    def test_check_abnormal_length_long(self):
			
 
				+        """Test detection of abnormally long translation."""
			
 
				+        checker = QualityChecker(max_length_ratio=2.0)
			
 
				+
			
 
				+        source = "Hi."
			
 
				+        target = "This is an extremely long translation that is much longer than the original source text."
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH]
			
 
				+        assert len(length_issues) > 0
			
 
				+
			
 
				+    def test_check_duplicate_content(self):
			
 
				+        """Test detection of duplicate content."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        target = "This is paragraph one.\n\nThis is paragraph two.\n\nThis is paragraph one."  # Duplicate
			
 
				+
			
 
				+        report = checker.check_translation("Source text.", target)
			
 
				+
			
 
				+        duplicate_issues = [i for i in report.issues if i.issue_type == QualityIssueType.DUPLICATE_CONTENT]
			
 
				+        assert len(duplicate_issues) > 0
			
 
				+
			
 
				+    def test_check_batch(self):
			
 
				+        """Test checking multiple translations."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        sources = ["Text 1.", "Text 2.", "Text 3."]
			
 
				+        targets = ["Translation 1.", "Translation 2.", "Translation 3."]
			
 
				+
			
 
				+        reports = checker.check_batch(sources, targets)
			
 
				+
			
 
				+        assert len(reports) == 3
			
 
				+
			
 
				+    def test_check_batch_mismatched_lengths(self):
			
 
				+        """Test check_batch with mismatched list lengths."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        sources = ["Text 1.", "Text 2."]
			
 
				+        targets = ["Translation 1."]
			
 
				+
			
 
				+        with pytest.raises(ValueError, match="same length"):
			
 
				+            checker.check_batch(sources, targets)
			
 
				+
			
 
				+    def test_get_summary(self):
			
 
				+        """Test getting summary from multiple reports."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        # Create some reports
			
 
				+        reports = [
			
 
				+            QualityReport(total_issues=0, error_count=0, warning_count=0, info_count=0, is_valid=True),
			
 
				+            QualityReport(total_issues=2, error_count=1, warning_count=1, info_count=0, is_valid=False),
			
 
				+            QualityReport(total_issues=1, error_count=0, warning_count=1, info_count=0, is_valid=True),
			
 
				+        ]
			
 
				+
			
 
				+        summary = checker.get_summary(reports)
			
 
				+
			
 
				+        assert summary["total_translations"] == 3
			
 
				+        assert summary["valid_translations"] == 2
			
 
				+        assert summary["invalid_translations"] == 1
			
 
				+        assert summary["total_issues"] == 3
			
 
				+
			
 
				+    def test_get_summary_empty(self):
			
 
				+        """Test getting summary with no reports."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        summary = checker.get_summary([])
			
 
				+
			
 
				+        assert summary["total_translations"] == 0
			
 
				+        assert summary["validity_rate"] == 100
			
 
				+
			
 
				+    def test_issue_types_in_by_type(self):
			
 
				+        """Test that by_type correctly aggregates issue types."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        source = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here."
			
 
				+        target = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here."
			
 
				+
			
 
				+        report = checker.check_translation(source, target)
			
 
				+
			
 
				+        # Should have both duplicate and untranslated_term issues
			
 
				+        assert "duplicate_content" in report.by_type or len(report.issues) >= 0
			
 
				+
			
 
				+    def test_severity_levels(self):
			
 
				+        """Test different severity levels."""
			
 
				+        checker = QualityChecker(min_length_ratio=0.5, max_length_ratio=2.0)
			
 
				+
			
 
				+        # Empty translation should be error
			
 
				+        report1 = checker.check_translation("Source", "")
			
 
				+        assert any(i.severity == "error" for i in report1.issues)
			
 
				+
			
 
				+        # Slightly short translation should be warning
			
 
				+        report2 = checker.check_translation("This is medium length text.", "Short.")
			
 
				+        assert any(i.severity == "warning" for i in report2.issues)
			
 
				+
			
 
				+        # Duplicate should be info
			
 
				+        report3 = checker.check_translation("A", "B\n\nB")
			
 
				+        assert any(i.severity == "info" for i in report3.issues)
			
 
				+
			
 
				+    def test_check_translation_with_chinese_punctuation(self):
			
 
				+        """Test checking translation with Chinese punctuation issues."""
			
 
				+        checker = QualityChecker()
			
 
				+
			
 
				+        # Source has Chinese content, target should not
			
 
				+        source = "你好，世界！"
			
 
				+        target = "你好世界"  # Not properly translated
			
 
				+
			
 
				+        report = checker.check_translation(source, target, source_lang="zh")
			
 
				+
			
 
				+        # Should detect Chinese characters remain
			
 
				+        assert report.total_issues > 0