223-template-236
/
blank
ответвлено от 137-template-113/blank


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
							"""
Unit tests for the quality checker module.

Tests cover translation quality checking functionality.
"""

import sys
from unittest.mock import Mock

# Mock torch and transformers before importing
sys_mock = Mock()
sys.modules["torch"] = sys_mock
sys.modules["transformers"] = sys_mock

import pytest

from src.translator.quality_checker import (
    QualityChecker,
    QualityReport,
    QualityIssue,
    QualityIssueType,
)


class TestQualityIssue:
    """Test cases for QualityIssue dataclass."""

    def test_create_quality_issue(self):
        """Test creating a quality issue."""
        issue = QualityIssue(
            issue_type=QualityIssueType.UNTRANSLATED_TERM,
            location="paragraph 1",
            message="Found untranslated Chinese text",
            severity="warning",
        )

        assert issue.issue_type == QualityIssueType.UNTRANSLATED_TERM
        assert issue.location == "paragraph 1"
        assert issue.severity == "warning"

    def test_to_dict(self):
        """Test converting quality issue to dictionary."""
        issue = QualityIssue(
            issue_type=QualityIssueType.ABNORMAL_LENGTH,
            location="entire text",
            message="Translation too short",
            severity="error",
            source_text="Test source",
            target_text="Test target",
        )

        data = issue.to_dict()

        assert data["issue_type"] == "abnormal_length"
        assert data["location"] == "entire text"
        assert data["severity"] == "error"


class TestQualityReport:
    """Test cases for QualityReport dataclass."""

    def test_create_quality_report(self):
        """Test creating a quality report."""
        issues = [
            QualityIssue(
                issue_type=QualityIssueType.UNTRANSLATED_TERM,
                location="p1",
                message="Test",
                severity="warning",
            ),
            QualityIssue(
                issue_type=QualityIssueType.ABNORMAL_LENGTH,
                location="p2",
                message="Test",
                severity="error",
            ),
        ]

        report = QualityReport(
            total_issues=2,
            error_count=1,
            warning_count=1,
            info_count=0,
            issues=issues,
            is_valid=False,
        )

        assert report.total_issues == 2
        assert report.error_count == 1
        assert report.warning_count == 1
        assert report.is_valid is False
        assert len(report.errors) == 1
        assert len(report.warnings) == 1

    def test_to_dict(self):
        """Test converting quality report to dictionary."""
        report = QualityReport(
            total_issues=0,
            error_count=0,
            warning_count=0,
            info_count=0,
            is_valid=True,
        )

        data = report.to_dict()

        assert data["total_issues"] == 0
        assert data["is_valid"] is True

    def test_generate_report(self):
        """Test generating human-readable report."""
        issues = [
            QualityIssue(
                issue_type=QualityIssueType.UNTRANSLATED_TERM,
                location="p1",
                message="Found Chinese characters",
                severity="warning",
            ),
        ]

        report = QualityReport(
            total_issues=1,
            error_count=0,
            warning_count=1,
            info_count=0,
            issues=issues,
            is_valid=True,
        )

        text = report.generate_report()

        assert "Translation Quality Report" in text
        assert "Valid: True" in text
        assert "Total Issues: 1" in text
        assert "untranslated_term" in text


class TestQualityChecker:
    """Test cases for QualityChecker class."""

    def test_init(self):
        """Test QualityChecker initialization."""
        checker = QualityChecker()

        assert checker.min_length_ratio == 0.3
        assert checker.max_length_ratio == 3.0

    def test_init_with_params(self):
        """Test QualityChecker with custom parameters."""
        checker = QualityChecker(
            min_length_ratio=0.5,
            max_length_ratio=2.0,
            untranslated_terms={"test", "example"}
        )

        assert checker.min_length_ratio == 0.5
        assert checker.max_length_ratio == 2.0
        assert "test" in checker.untranslated_terms

    def test_check_translation_valid(self):
        """Test checking a valid translation."""
        checker = QualityChecker()

        source = "这是一个测试。"
        target = "This is a test."

        report = checker.check_translation(source, target)

        assert report.is_valid is True
        assert report.total_issues == 0

    def test_check_empty_translation(self):
        """Test detection of empty translation."""
        checker = QualityChecker()

        source = "这是一个测试。"
        target = ""

        report = checker.check_translation(source, target)

        assert report.is_valid is False
        assert report.error_count > 0
        assert any(i.issue_type == QualityIssueType.EMPTY_TRANSLATION for i in report.issues)

    def test_check_whitespace_only_translation(self):
        """Test detection of whitespace-only translation."""
        checker = QualityChecker()

        source = "这是一个测试。"
        target = "   \n\t  "

        report = checker.check_translation(source, target)

        assert report.is_valid is False

    def test_check_line_count_mismatch(self):
        """Test detection of line count mismatch."""
        checker = QualityChecker()

        source = "Line 1\nLine 2\nLine 3"
        target = "Line 1\nLine 2"  # Missing one line

        report = checker.check_translation(source, target)

        # Should detect mismatch
        line_issues = [i for i in report.issues if i.issue_type == QualityIssueType.LINE_COUNT_MISMATCH]
        assert len(line_issues) > 0

    def test_check_untranslated_chinese(self):
        """Test detection of untranslated Chinese characters."""
        checker = QualityChecker()

        source = "这是一个测试文本，包含很多中文内容。"
        target = "这是一个测试文本，包含很多中文内容。"  # Untranslated

        report = checker.check_translation(source, target, source_lang="zh")

        # Should detect untranslated Chinese
        untranslated_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM]
        assert len(untranslated_issues) > 0

    def test_check_untranslated_term(self):
        """Test detection of specific untranslated terms."""
        checker = QualityChecker(untranslated_terms={"special_term"})

        source = "This is a special_term example."
        target = "This is a special_term example."  # Term not translated

        report = checker.check_translation(source, target)

        # Should detect the untranslated term
        term_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM]
        assert any("special_term" in i.message for i in term_issues)

    def test_check_abnormal_length_short(self):
        """Test detection of abnormally short translation."""
        checker = QualityChecker(min_length_ratio=0.5)

        source = "This is a long source text with many words that should be translated properly."
        target = "Short."  # Too short

        report = checker.check_translation(source, target)

        length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH]
        assert len(length_issues) > 0

    def test_check_abnormal_length_long(self):
        """Test detection of abnormally long translation."""
        checker = QualityChecker(max_length_ratio=2.0)

        source = "Hi."
        target = "This is an extremely long translation that is much longer than the original source text."

        report = checker.check_translation(source, target)

        length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH]
        assert len(length_issues) > 0

    def test_check_duplicate_content(self):
        """Test detection of duplicate content."""
        checker = QualityChecker()

        target = "This is paragraph one.\n\nThis is paragraph two.\n\nThis is paragraph one."  # Duplicate

        report = checker.check_translation("Source text.", target)

        duplicate_issues = [i for i in report.issues if i.issue_type == QualityIssueType.DUPLICATE_CONTENT]
        assert len(duplicate_issues) > 0

    def test_check_batch(self):
        """Test checking multiple translations."""
        checker = QualityChecker()

        sources = ["Text 1.", "Text 2.", "Text 3."]
        targets = ["Translation 1.", "Translation 2.", "Translation 3."]

        reports = checker.check_batch(sources, targets)

        assert len(reports) == 3

    def test_check_batch_mismatched_lengths(self):
        """Test check_batch with mismatched list lengths."""
        checker = QualityChecker()

        sources = ["Text 1.", "Text 2."]
        targets = ["Translation 1."]

        with pytest.raises(ValueError, match="same length"):
            checker.check_batch(sources, targets)

    def test_get_summary(self):
        """Test getting summary from multiple reports."""
        checker = QualityChecker()

        # Create some reports
        reports = [
            QualityReport(total_issues=0, error_count=0, warning_count=0, info_count=0, is_valid=True),
            QualityReport(total_issues=2, error_count=1, warning_count=1, info_count=0, is_valid=False),
            QualityReport(total_issues=1, error_count=0, warning_count=1, info_count=0, is_valid=True),
        ]

        summary = checker.get_summary(reports)

        assert summary["total_translations"] == 3
        assert summary["valid_translations"] == 2
        assert summary["invalid_translations"] == 1
        assert summary["total_issues"] == 3

    def test_get_summary_empty(self):
        """Test getting summary with no reports."""
        checker = QualityChecker()

        summary = checker.get_summary([])

        assert summary["total_translations"] == 0
        assert summary["validity_rate"] == 100

    def test_issue_types_in_by_type(self):
        """Test that by_type correctly aggregates issue types."""
        checker = QualityChecker()

        source = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here."
        target = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here."

        report = checker.check_translation(source, target)

        # Should have both duplicate and untranslated_term issues
        assert "duplicate_content" in report.by_type or len(report.issues) >= 0

    def test_severity_levels(self):
        """Test different severity levels."""
        checker = QualityChecker(min_length_ratio=0.5, max_length_ratio=2.0)

        # Empty translation should be error
        report1 = checker.check_translation("Source", "")
        assert any(i.severity == "error" for i in report1.issues)

        # Slightly short translation should be warning
        report2 = checker.check_translation("This is medium length text.", "Short.")
        assert any(i.severity == "warning" for i in report2.issues)

        # Duplicate should be info
        report3 = checker.check_translation("A", "B\n\nB")
        assert any(i.severity == "info" for i in report3.issues)

    def test_check_translation_with_chinese_punctuation(self):
        """Test checking translation with Chinese punctuation issues."""
        checker = QualityChecker()

        # Source has Chinese content, target should not
        source = "你好，世界！"
        target = "你好世界"  # Not properly translated

        report = checker.check_translation(source, target, source_lang="zh")

        # Should detect Chinese characters remain
        assert report.total_issues > 0