""" Unit tests for the quality checker module. Tests cover translation quality checking functionality. """ import sys from unittest.mock import Mock # Mock torch and transformers before importing sys_mock = Mock() sys.modules["torch"] = sys_mock sys.modules["transformers"] = sys_mock import pytest from src.translator.quality_checker import ( QualityChecker, QualityReport, QualityIssue, QualityIssueType, ) class TestQualityIssue: """Test cases for QualityIssue dataclass.""" def test_create_quality_issue(self): """Test creating a quality issue.""" issue = QualityIssue( issue_type=QualityIssueType.UNTRANSLATED_TERM, location="paragraph 1", message="Found untranslated Chinese text", severity="warning", ) assert issue.issue_type == QualityIssueType.UNTRANSLATED_TERM assert issue.location == "paragraph 1" assert issue.severity == "warning" def test_to_dict(self): """Test converting quality issue to dictionary.""" issue = QualityIssue( issue_type=QualityIssueType.ABNORMAL_LENGTH, location="entire text", message="Translation too short", severity="error", source_text="Test source", target_text="Test target", ) data = issue.to_dict() assert data["issue_type"] == "abnormal_length" assert data["location"] == "entire text" assert data["severity"] == "error" class TestQualityReport: """Test cases for QualityReport dataclass.""" def test_create_quality_report(self): """Test creating a quality report.""" issues = [ QualityIssue( issue_type=QualityIssueType.UNTRANSLATED_TERM, location="p1", message="Test", severity="warning", ), QualityIssue( issue_type=QualityIssueType.ABNORMAL_LENGTH, location="p2", message="Test", severity="error", ), ] report = QualityReport( total_issues=2, error_count=1, warning_count=1, info_count=0, issues=issues, is_valid=False, ) assert report.total_issues == 2 assert report.error_count == 1 assert report.warning_count == 1 assert report.is_valid is False assert len(report.errors) == 1 assert len(report.warnings) == 1 def test_to_dict(self): """Test converting quality report to dictionary.""" report = QualityReport( total_issues=0, error_count=0, warning_count=0, info_count=0, is_valid=True, ) data = report.to_dict() assert data["total_issues"] == 0 assert data["is_valid"] is True def test_generate_report(self): """Test generating human-readable report.""" issues = [ QualityIssue( issue_type=QualityIssueType.UNTRANSLATED_TERM, location="p1", message="Found Chinese characters", severity="warning", ), ] report = QualityReport( total_issues=1, error_count=0, warning_count=1, info_count=0, issues=issues, is_valid=True, ) text = report.generate_report() assert "Translation Quality Report" in text assert "Valid: True" in text assert "Total Issues: 1" in text assert "untranslated_term" in text class TestQualityChecker: """Test cases for QualityChecker class.""" def test_init(self): """Test QualityChecker initialization.""" checker = QualityChecker() assert checker.min_length_ratio == 0.3 assert checker.max_length_ratio == 3.0 def test_init_with_params(self): """Test QualityChecker with custom parameters.""" checker = QualityChecker( min_length_ratio=0.5, max_length_ratio=2.0, untranslated_terms={"test", "example"} ) assert checker.min_length_ratio == 0.5 assert checker.max_length_ratio == 2.0 assert "test" in checker.untranslated_terms def test_check_translation_valid(self): """Test checking a valid translation.""" checker = QualityChecker() source = "这是一个测试。" target = "This is a test." report = checker.check_translation(source, target) assert report.is_valid is True assert report.total_issues == 0 def test_check_empty_translation(self): """Test detection of empty translation.""" checker = QualityChecker() source = "这是一个测试。" target = "" report = checker.check_translation(source, target) assert report.is_valid is False assert report.error_count > 0 assert any(i.issue_type == QualityIssueType.EMPTY_TRANSLATION for i in report.issues) def test_check_whitespace_only_translation(self): """Test detection of whitespace-only translation.""" checker = QualityChecker() source = "这是一个测试。" target = " \n\t " report = checker.check_translation(source, target) assert report.is_valid is False def test_check_line_count_mismatch(self): """Test detection of line count mismatch.""" checker = QualityChecker() source = "Line 1\nLine 2\nLine 3" target = "Line 1\nLine 2" # Missing one line report = checker.check_translation(source, target) # Should detect mismatch line_issues = [i for i in report.issues if i.issue_type == QualityIssueType.LINE_COUNT_MISMATCH] assert len(line_issues) > 0 def test_check_untranslated_chinese(self): """Test detection of untranslated Chinese characters.""" checker = QualityChecker() source = "这是一个测试文本,包含很多中文内容。" target = "这是一个测试文本,包含很多中文内容。" # Untranslated report = checker.check_translation(source, target, source_lang="zh") # Should detect untranslated Chinese untranslated_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM] assert len(untranslated_issues) > 0 def test_check_untranslated_term(self): """Test detection of specific untranslated terms.""" checker = QualityChecker(untranslated_terms={"special_term"}) source = "This is a special_term example." target = "This is a special_term example." # Term not translated report = checker.check_translation(source, target) # Should detect the untranslated term term_issues = [i for i in report.issues if i.issue_type == QualityIssueType.UNTRANSLATED_TERM] assert any("special_term" in i.message for i in term_issues) def test_check_abnormal_length_short(self): """Test detection of abnormally short translation.""" checker = QualityChecker(min_length_ratio=0.5) source = "This is a long source text with many words that should be translated properly." target = "Short." # Too short report = checker.check_translation(source, target) length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH] assert len(length_issues) > 0 def test_check_abnormal_length_long(self): """Test detection of abnormally long translation.""" checker = QualityChecker(max_length_ratio=2.0) source = "Hi." target = "This is an extremely long translation that is much longer than the original source text." report = checker.check_translation(source, target) length_issues = [i for i in report.issues if i.issue_type == QualityIssueType.ABNORMAL_LENGTH] assert len(length_issues) > 0 def test_check_duplicate_content(self): """Test detection of duplicate content.""" checker = QualityChecker() target = "This is paragraph one.\n\nThis is paragraph two.\n\nThis is paragraph one." # Duplicate report = checker.check_translation("Source text.", target) duplicate_issues = [i for i in report.issues if i.issue_type == QualityIssueType.DUPLICATE_CONTENT] assert len(duplicate_issues) > 0 def test_check_batch(self): """Test checking multiple translations.""" checker = QualityChecker() sources = ["Text 1.", "Text 2.", "Text 3."] targets = ["Translation 1.", "Translation 2.", "Translation 3."] reports = checker.check_batch(sources, targets) assert len(reports) == 3 def test_check_batch_mismatched_lengths(self): """Test check_batch with mismatched list lengths.""" checker = QualityChecker() sources = ["Text 1.", "Text 2."] targets = ["Translation 1."] with pytest.raises(ValueError, match="same length"): checker.check_batch(sources, targets) def test_get_summary(self): """Test getting summary from multiple reports.""" checker = QualityChecker() # Create some reports reports = [ QualityReport(total_issues=0, error_count=0, warning_count=0, info_count=0, is_valid=True), QualityReport(total_issues=2, error_count=1, warning_count=1, info_count=0, is_valid=False), QualityReport(total_issues=1, error_count=0, warning_count=1, info_count=0, is_valid=True), ] summary = checker.get_summary(reports) assert summary["total_translations"] == 3 assert summary["valid_translations"] == 2 assert summary["invalid_translations"] == 1 assert summary["total_issues"] == 3 def test_get_summary_empty(self): """Test getting summary with no reports.""" checker = QualityChecker() summary = checker.get_summary([]) assert summary["total_translations"] == 0 assert summary["validity_rate"] == 100 def test_issue_types_in_by_type(self): """Test that by_type correctly aggregates issue types.""" checker = QualityChecker() source = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here." target = "Test content with special_term here.\n\nAnother line.\n\nTest content with special_term here." report = checker.check_translation(source, target) # Should have both duplicate and untranslated_term issues assert "duplicate_content" in report.by_type or len(report.issues) >= 0 def test_severity_levels(self): """Test different severity levels.""" checker = QualityChecker(min_length_ratio=0.5, max_length_ratio=2.0) # Empty translation should be error report1 = checker.check_translation("Source", "") assert any(i.severity == "error" for i in report1.issues) # Slightly short translation should be warning report2 = checker.check_translation("This is medium length text.", "Short.") assert any(i.severity == "warning" for i in report2.issues) # Duplicate should be info report3 = checker.check_translation("A", "B\n\nB") assert any(i.severity == "info" for i in report3.issues) def test_check_translation_with_chinese_punctuation(self): """Test checking translation with Chinese punctuation issues.""" checker = QualityChecker() # Source has Chinese content, target should not source = "你好,世界!" target = "你好世界" # Not properly translated report = checker.check_translation(source, target, source_lang="zh") # Should detect Chinese characters remain assert report.total_issues > 0