| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510 |
- """
- Unit tests for the term injection module.
- Tests cover term injection, validation, and statistics tracking.
- """
- import sys
- from unittest.mock import Mock
- # Mock torch and transformers before importing
- sys_mock = Mock()
- sys.modules["torch"] = sys_mock
- sys.modules["transformers"] = sys_mock
- import pytest
- from src.glossary.models import Glossary, GlossaryEntry, TermCategory
- from src.translator.term_injector import (
- TermInjector,
- TermValidator,
- TermStatistics,
- TermValidationResult,
- TermResult,
- TermUsageRecord,
- )
- class TestTermInjector:
- """Test cases for TermInjector class."""
- def test_init(self):
- """Test TermInjector initialization."""
- glossary = Glossary()
- injector = TermInjector(glossary, "zh", "en")
- assert injector.glossary == glossary
- assert injector.src_lang == "zh"
- assert injector.tgt_lang == "en"
- def test_generate_prompt_with_terms(self):
- """Test prompt generation with glossary terms."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER, "Protagonist"))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- injector = TermInjector(glossary, "zh", "en")
- prompt = injector.generate_prompt("林风释放了火球术")
- # Check that prompt contains key elements
- assert "Chinese" in prompt or "English" in prompt
- assert "林风" in prompt
- assert "Lin Feng" in prompt
- assert "火球术" in prompt
- assert "Fireball" in prompt
- assert "character" in prompt.lower()
- assert "skill" in prompt.lower()
- def test_generate_prompt_without_examples(self):
- """Test prompt generation without few-shot examples."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- injector = TermInjector(glossary, "zh", "en")
- prompt = injector.generate_prompt("林风来了", include_examples=False)
- assert "林风" in prompt
- assert "Lin Feng" in prompt
- assert "Examples:" not in prompt
- def test_generate_prompt_empty_glossary(self):
- """Test prompt generation with empty glossary."""
- glossary = Glossary()
- injector = TermInjector(glossary, "zh", "en")
- prompt = injector.generate_prompt("测试文本")
- # Should just return the source text
- assert prompt == "测试文本"
- def test_generate_prompt_max_examples(self):
- """Test prompt generation with limited examples."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.ORGANIZATION))
- glossary.add(GlossaryEntry("龙剑", "Dragon Sword", TermCategory.ITEM))
- injector = TermInjector(glossary, "zh", "en")
- prompt = injector.generate_prompt("林风使用龙剑", max_examples=2)
- # Should limit examples
- assert "Examples:" in prompt
- def test_inject_terms(self):
- """Test term injection into source text."""
- glossary = Glossary()
- injector = TermInjector(glossary, "zh", "en")
- result = injector.inject_terms("林风释放了火球术", ["林风", "火球术"])
- assert "[TERM:林风]" in result
- assert "[TERM:火球术]" in result
- def test_get_relevant_terms(self):
- """Test getting relevant terms for source text."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.ORGANIZATION))
- injector = TermInjector(glossary, "zh", "en")
- relevant = injector._get_relevant_terms("林风释放了火球术")
- # Should find two terms
- assert len(relevant) == 2
- sources = [t.source for t in relevant]
- assert "林风" in sources
- assert "火球术" in sources
- assert "青云宗" not in sources
- def test_build_terminology_table(self):
- """Test terminology table building."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- injector = TermInjector(glossary, "zh", "en")
- terms = glossary.get_all()
- table = injector._build_terminology_table(terms)
- assert "Terminology Table:" in table
- assert "林风" in table
- assert "Lin Feng" in table
- assert "Character:" in table
- class TestTermValidator:
- """Test cases for TermValidator class."""
- def test_init(self):
- """Test TermValidator initialization."""
- glossary = Glossary()
- validator = TermValidator(glossary)
- assert validator.glossary == glossary
- def test_validate_translation_success(self):
- """Test successful validation."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="林风来了",
- target="Lin Feng came"
- )
- assert result.is_valid is True
- assert result.success_rate == 100.0
- assert len(result.term_results) == 1
- assert result.term_results["林风"].success is True
- def test_validate_translation_failure(self):
- """Test validation with missing expected translation."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="林风来了",
- target="Lin came" # Missing "Feng"
- )
- assert result.is_valid is False
- assert result.success_rate < 100.0
- assert len(result.issues) > 0
- def test_validate_translation_multiple_terms(self):
- """Test validation with multiple terms."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="林风释放了火球术",
- target="Lin Feng released Fireball"
- )
- assert result.is_valid is True
- assert len(result.term_results) == 2
- assert result.term_results["林风"].success is True
- assert result.term_results["火球术"].success is True
- def test_validate_translation_partial_match(self):
- """Test validation with partial term match."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- validator = TermValidator(glossary)
- # Multiple occurrences but only one translated correctly
- result = validator.validate_translation(
- source="林风说,林风知道",
- target="Lin Feng said, Lin knows" # Only one "Lin Feng"
- )
- # Should detect the mismatch
- assert result.success_rate < 100
- def test_validate_translation_empty_source(self):
- """Test validation with source text that has no terms."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="这是一个测试", # No glossary terms
- target="This is a test"
- )
- assert result.is_valid is True
- assert len(result.term_results) == 0
- def test_check_term_consistency(self):
- """Test individual term consistency check."""
- glossary = Glossary()
- entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
- glossary.add(entry)
- validator = TermValidator(glossary)
- # Successful case
- result = validator._check_term_consistency(
- source="林风来了",
- target="Lin Feng came",
- entry=entry
- )
- assert result.success is True
- assert result.source == "林风"
- assert result.expected == "Lin Feng"
- # Failed case
- result = validator._check_term_consistency(
- source="林风来了",
- target="Lin came",
- entry=entry
- )
- assert result.success is False
- def test_successful_terms_property(self):
- """Test successful_terms property."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="林风释放了火球术",
- target="Lin Feng released Fireball"
- )
- assert "林风" in result.successful_terms
- assert "火球术" in result.successful_terms
- def test_failed_terms_property(self):
- """Test failed_terms property."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- validator = TermValidator(glossary)
- result = validator.validate_translation(
- source="林风释放了火球术",
- target="Lin released Fireball" # Missing "Feng"
- )
- assert "林风" in result.failed_terms
- assert "火球术" not in result.failed_terms
- class TestTermStatistics:
- """Test cases for TermStatistics class."""
- def test_init(self):
- """Test TermStatistics initialization."""
- stats = TermStatistics()
- assert len(stats) == 0
- assert stats.get_statistics()["total_usages"] == 0
- def test_record_usage_success(self):
- """Test recording successful term usage."""
- stats = TermStatistics()
- stats.record_usage(
- term="林风",
- expected="Lin Feng",
- success=True,
- context="source_count=1, target_count=1"
- )
- assert len(stats) == 1
- stat_dict = stats.get_statistics()
- assert stat_dict["total_usages"] == 1
- assert stat_dict["total_successes"] == 1
- assert stat_dict["total_failures"] == 0
- def test_record_usage_failure(self):
- """Test recording failed term usage."""
- stats = TermStatistics()
- stats.record_usage(
- term="林风",
- expected="Lin Feng",
- success=False,
- context="source_count=1, target_count=0"
- )
- assert len(stats) == 1
- stat_dict = stats.get_statistics()
- assert stat_dict["total_usages"] == 1
- assert stat_dict["total_successes"] == 0
- assert stat_dict["total_failures"] == 1
- def test_record_multiple_usages(self):
- """Test recording multiple term usages."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", False)
- stat_dict = stats.get_statistics()
- assert stat_dict["total_usages"] == 3
- assert stat_dict["total_successes"] == 2
- assert stat_dict["total_failures"] == 1
- assert stat_dict["unique_terms"] == 2
- def test_get_term_statistics(self):
- """Test getting statistics for a specific term."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("林风", "Lin Feng", False)
- term_stats = stats.get_term_statistics("林风")
- assert term_stats is not None
- assert term_stats["term"] == "林风"
- assert term_stats["total_usages"] == 2
- assert term_stats["successes"] == 1
- assert term_stats["failures"] == 1
- assert term_stats["success_rate"] == 50.0
- def test_get_term_statistics_nonexistent(self):
- """Test getting statistics for a non-existent term."""
- stats = TermStatistics()
- term_stats = stats.get_term_statistics("nonexistent")
- assert term_stats is None
- def test_get_failed_terms(self):
- """Test getting list of failed terms."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", False)
- stats.record_usage("青云宗", "Qingyun Sect", False)
- failed = stats.get_failed_terms()
- assert "林风" not in failed
- assert "火球术" in failed
- assert "青云宗" in failed
- def test_reset(self):
- """Test resetting statistics."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", False)
- assert len(stats) == 2
- stats.reset()
- assert len(stats) == 0
- stat_dict = stats.get_statistics()
- assert stat_dict["total_usages"] == 0
- def test_merge(self):
- """Test merging statistics from another instance."""
- stats1 = TermStatistics()
- stats1.record_usage("林风", "Lin Feng", True)
- stats2 = TermStatistics()
- stats2.record_usage("火球术", "Fireball", False)
- stats1.merge(stats2)
- assert len(stats1) == 2
- stat_dict = stats1.get_statistics()
- assert stat_dict["unique_terms"] == 2
- def test_get_records(self):
- """Test getting all recorded usage records."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", False)
- records = stats.get_records()
- assert len(records) == 2
- assert all(isinstance(r, TermUsageRecord) for r in records)
- def test_generate_report(self):
- """Test generating a human-readable report."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", False)
- report = stats.generate_report()
- assert "Term Translation Statistics" in report
- assert "Total usages: 2" in report
- assert "林风" in report
- assert "火球术" in report
- assert "Failed terms:" in report
- def test_overall_success_rate_calculation(self):
- """Test overall success rate calculation."""
- stats = TermStatistics()
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("林风", "Lin Feng", True)
- stats.record_usage("火球术", "Fireball", True)
- stats.record_usage("青云宗", "Qingyun Sect", False)
- stat_dict = stats.get_statistics()
- assert stat_dict["overall_success_rate"] == 75.0
- def test_empty_statistics(self):
- """Test statistics with no records."""
- stats = TermStatistics()
- stat_dict = stats.get_statistics()
- assert stat_dict["total_usages"] == 0
- assert stat_dict["unique_terms"] == 0
- assert stat_dict["overall_success_rate"] == 100.0
- report = stats.generate_report()
- assert "Total usages: 0" in report
- class TestTermUsageRecord:
- """Test cases for TermUsageRecord dataclass."""
- def test_create_record(self):
- """Test creating a usage record."""
- record = TermUsageRecord(
- term="林风",
- expected="Lin Feng",
- success=True,
- context="source_count=1"
- )
- assert record.term == "林风"
- assert record.expected == "Lin Feng"
- assert record.success is True
- assert record.context == "source_count=1"
- class TestTermResult:
- """Test cases for TermResult dataclass."""
- def test_create_result(self):
- """Test creating a term result."""
- result = TermResult(
- source="林风",
- expected="Lin Feng",
- found=True,
- success=True,
- context="Valid translation"
- )
- assert result.source == "林风"
- assert result.expected == "Lin Feng"
- assert result.found is True
- assert result.success is True
- assert result.context == "Valid translation"
- class TestTermValidationResult:
- """Test cases for TermValidationResult dataclass."""
- def test_create_validation_result(self):
- """Test creating a validation result."""
- term_results = {
- "林风": TermResult("林风", "Lin Feng", True, True),
- "火球术": TermResult("火球术", "Fireball", False, False)
- }
- result = TermValidationResult(
- is_valid=False,
- term_results=term_results,
- success_rate=50.0,
- issues=["Fireball not found"]
- )
- assert result.is_valid is False
- assert result.success_rate == 50.0
- assert len(result.issues) == 1
- assert "林风" in result.successful_terms
- assert "火球术" in result.failed_terms
|