""" Unit tests for the term injection module. Tests cover term injection, validation, and statistics tracking. """ import sys from unittest.mock import Mock # Mock torch and transformers before importing sys_mock = Mock() sys.modules["torch"] = sys_mock sys.modules["transformers"] = sys_mock import pytest from src.glossary.models import Glossary, GlossaryEntry, TermCategory from src.translator.term_injector import ( TermInjector, TermValidator, TermStatistics, TermValidationResult, TermResult, TermUsageRecord, ) class TestTermInjector: """Test cases for TermInjector class.""" def test_init(self): """Test TermInjector initialization.""" glossary = Glossary() injector = TermInjector(glossary, "zh", "en") assert injector.glossary == glossary assert injector.src_lang == "zh" assert injector.tgt_lang == "en" def test_generate_prompt_with_terms(self): """Test prompt generation with glossary terms.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER, "Protagonist")) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) injector = TermInjector(glossary, "zh", "en") prompt = injector.generate_prompt("林风释放了火球术") # Check that prompt contains key elements assert "Chinese" in prompt or "English" in prompt assert "林风" in prompt assert "Lin Feng" in prompt assert "火球术" in prompt assert "Fireball" in prompt assert "character" in prompt.lower() assert "skill" in prompt.lower() def test_generate_prompt_without_examples(self): """Test prompt generation without few-shot examples.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) injector = TermInjector(glossary, "zh", "en") prompt = injector.generate_prompt("林风来了", include_examples=False) assert "林风" in prompt assert "Lin Feng" in prompt assert "Examples:" not in prompt def test_generate_prompt_empty_glossary(self): """Test prompt generation with empty glossary.""" glossary = Glossary() injector = TermInjector(glossary, "zh", "en") prompt = injector.generate_prompt("测试文本") # Should just return the source text assert prompt == "测试文本" def test_generate_prompt_max_examples(self): """Test prompt generation with limited examples.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.ORGANIZATION)) glossary.add(GlossaryEntry("龙剑", "Dragon Sword", TermCategory.ITEM)) injector = TermInjector(glossary, "zh", "en") prompt = injector.generate_prompt("林风使用龙剑", max_examples=2) # Should limit examples assert "Examples:" in prompt def test_inject_terms(self): """Test term injection into source text.""" glossary = Glossary() injector = TermInjector(glossary, "zh", "en") result = injector.inject_terms("林风释放了火球术", ["林风", "火球术"]) assert "[TERM:林风]" in result assert "[TERM:火球术]" in result def test_get_relevant_terms(self): """Test getting relevant terms for source text.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.ORGANIZATION)) injector = TermInjector(glossary, "zh", "en") relevant = injector._get_relevant_terms("林风释放了火球术") # Should find two terms assert len(relevant) == 2 sources = [t.source for t in relevant] assert "林风" in sources assert "火球术" in sources assert "青云宗" not in sources def test_build_terminology_table(self): """Test terminology table building.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) injector = TermInjector(glossary, "zh", "en") terms = glossary.get_all() table = injector._build_terminology_table(terms) assert "Terminology Table:" in table assert "林风" in table assert "Lin Feng" in table assert "Character:" in table class TestTermValidator: """Test cases for TermValidator class.""" def test_init(self): """Test TermValidator initialization.""" glossary = Glossary() validator = TermValidator(glossary) assert validator.glossary == glossary def test_validate_translation_success(self): """Test successful validation.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) validator = TermValidator(glossary) result = validator.validate_translation( source="林风来了", target="Lin Feng came" ) assert result.is_valid is True assert result.success_rate == 100.0 assert len(result.term_results) == 1 assert result.term_results["林风"].success is True def test_validate_translation_failure(self): """Test validation with missing expected translation.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) validator = TermValidator(glossary) result = validator.validate_translation( source="林风来了", target="Lin came" # Missing "Feng" ) assert result.is_valid is False assert result.success_rate < 100.0 assert len(result.issues) > 0 def test_validate_translation_multiple_terms(self): """Test validation with multiple terms.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) validator = TermValidator(glossary) result = validator.validate_translation( source="林风释放了火球术", target="Lin Feng released Fireball" ) assert result.is_valid is True assert len(result.term_results) == 2 assert result.term_results["林风"].success is True assert result.term_results["火球术"].success is True def test_validate_translation_partial_match(self): """Test validation with partial term match.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) validator = TermValidator(glossary) # Multiple occurrences but only one translated correctly result = validator.validate_translation( source="林风说,林风知道", target="Lin Feng said, Lin knows" # Only one "Lin Feng" ) # Should detect the mismatch assert result.success_rate < 100 def test_validate_translation_empty_source(self): """Test validation with source text that has no terms.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) validator = TermValidator(glossary) result = validator.validate_translation( source="这是一个测试", # No glossary terms target="This is a test" ) assert result.is_valid is True assert len(result.term_results) == 0 def test_check_term_consistency(self): """Test individual term consistency check.""" glossary = Glossary() entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER) glossary.add(entry) validator = TermValidator(glossary) # Successful case result = validator._check_term_consistency( source="林风来了", target="Lin Feng came", entry=entry ) assert result.success is True assert result.source == "林风" assert result.expected == "Lin Feng" # Failed case result = validator._check_term_consistency( source="林风来了", target="Lin came", entry=entry ) assert result.success is False def test_successful_terms_property(self): """Test successful_terms property.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) validator = TermValidator(glossary) result = validator.validate_translation( source="林风释放了火球术", target="Lin Feng released Fireball" ) assert "林风" in result.successful_terms assert "火球术" in result.successful_terms def test_failed_terms_property(self): """Test failed_terms property.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL)) validator = TermValidator(glossary) result = validator.validate_translation( source="林风释放了火球术", target="Lin released Fireball" # Missing "Feng" ) assert "林风" in result.failed_terms assert "火球术" not in result.failed_terms class TestTermStatistics: """Test cases for TermStatistics class.""" def test_init(self): """Test TermStatistics initialization.""" stats = TermStatistics() assert len(stats) == 0 assert stats.get_statistics()["total_usages"] == 0 def test_record_usage_success(self): """Test recording successful term usage.""" stats = TermStatistics() stats.record_usage( term="林风", expected="Lin Feng", success=True, context="source_count=1, target_count=1" ) assert len(stats) == 1 stat_dict = stats.get_statistics() assert stat_dict["total_usages"] == 1 assert stat_dict["total_successes"] == 1 assert stat_dict["total_failures"] == 0 def test_record_usage_failure(self): """Test recording failed term usage.""" stats = TermStatistics() stats.record_usage( term="林风", expected="Lin Feng", success=False, context="source_count=1, target_count=0" ) assert len(stats) == 1 stat_dict = stats.get_statistics() assert stat_dict["total_usages"] == 1 assert stat_dict["total_successes"] == 0 assert stat_dict["total_failures"] == 1 def test_record_multiple_usages(self): """Test recording multiple term usages.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", False) stat_dict = stats.get_statistics() assert stat_dict["total_usages"] == 3 assert stat_dict["total_successes"] == 2 assert stat_dict["total_failures"] == 1 assert stat_dict["unique_terms"] == 2 def test_get_term_statistics(self): """Test getting statistics for a specific term.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("林风", "Lin Feng", False) term_stats = stats.get_term_statistics("林风") assert term_stats is not None assert term_stats["term"] == "林风" assert term_stats["total_usages"] == 2 assert term_stats["successes"] == 1 assert term_stats["failures"] == 1 assert term_stats["success_rate"] == 50.0 def test_get_term_statistics_nonexistent(self): """Test getting statistics for a non-existent term.""" stats = TermStatistics() term_stats = stats.get_term_statistics("nonexistent") assert term_stats is None def test_get_failed_terms(self): """Test getting list of failed terms.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", False) stats.record_usage("青云宗", "Qingyun Sect", False) failed = stats.get_failed_terms() assert "林风" not in failed assert "火球术" in failed assert "青云宗" in failed def test_reset(self): """Test resetting statistics.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", False) assert len(stats) == 2 stats.reset() assert len(stats) == 0 stat_dict = stats.get_statistics() assert stat_dict["total_usages"] == 0 def test_merge(self): """Test merging statistics from another instance.""" stats1 = TermStatistics() stats1.record_usage("林风", "Lin Feng", True) stats2 = TermStatistics() stats2.record_usage("火球术", "Fireball", False) stats1.merge(stats2) assert len(stats1) == 2 stat_dict = stats1.get_statistics() assert stat_dict["unique_terms"] == 2 def test_get_records(self): """Test getting all recorded usage records.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", False) records = stats.get_records() assert len(records) == 2 assert all(isinstance(r, TermUsageRecord) for r in records) def test_generate_report(self): """Test generating a human-readable report.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", False) report = stats.generate_report() assert "Term Translation Statistics" in report assert "Total usages: 2" in report assert "林风" in report assert "火球术" in report assert "Failed terms:" in report def test_overall_success_rate_calculation(self): """Test overall success rate calculation.""" stats = TermStatistics() stats.record_usage("林风", "Lin Feng", True) stats.record_usage("林风", "Lin Feng", True) stats.record_usage("火球术", "Fireball", True) stats.record_usage("青云宗", "Qingyun Sect", False) stat_dict = stats.get_statistics() assert stat_dict["overall_success_rate"] == 75.0 def test_empty_statistics(self): """Test statistics with no records.""" stats = TermStatistics() stat_dict = stats.get_statistics() assert stat_dict["total_usages"] == 0 assert stat_dict["unique_terms"] == 0 assert stat_dict["overall_success_rate"] == 100.0 report = stats.generate_report() assert "Total usages: 0" in report class TestTermUsageRecord: """Test cases for TermUsageRecord dataclass.""" def test_create_record(self): """Test creating a usage record.""" record = TermUsageRecord( term="林风", expected="Lin Feng", success=True, context="source_count=1" ) assert record.term == "林风" assert record.expected == "Lin Feng" assert record.success is True assert record.context == "source_count=1" class TestTermResult: """Test cases for TermResult dataclass.""" def test_create_result(self): """Test creating a term result.""" result = TermResult( source="林风", expected="Lin Feng", found=True, success=True, context="Valid translation" ) assert result.source == "林风" assert result.expected == "Lin Feng" assert result.found is True assert result.success is True assert result.context == "Valid translation" class TestTermValidationResult: """Test cases for TermValidationResult dataclass.""" def test_create_validation_result(self): """Test creating a validation result.""" term_results = { "林风": TermResult("林风", "Lin Feng", True, True), "火球术": TermResult("火球术", "Fireball", False, False) } result = TermValidationResult( is_valid=False, term_results=term_results, success_rate=50.0, issues=["Fireball not found"] ) assert result.is_valid is False assert result.success_rate == 50.0 assert len(result.issues) == 1 assert "林风" in result.successful_terms assert "火球术" in result.failed_terms