| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- """
- Unit tests for the glossary module.
- Tests cover terminology matching, preprocessing, postprocessing,
- and integration scenarios.
- """
- import pytest
- from src.glossary.models import Glossary, GlossaryEntry, TermCategory
- from src.glossary.matcher import GlossaryMatcher, TermMatch
- from src.glossary.preprocessor import GlossaryPreprocessor
- from src.glossary.postprocessor import GlossaryPostprocessor
- class TestGlossary:
- """Test cases for Glossary class."""
- def test_add_and_retrieve_term(self):
- """Test adding and retrieving a term."""
- glossary = Glossary()
- entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
- glossary.add(entry)
- retrieved = glossary.get("林风")
- assert retrieved is not None
- assert retrieved.source == "林风"
- assert retrieved.target == "Lin Feng"
- assert retrieved.category == TermCategory.CHARACTER
- def test_remove_term(self):
- """Test removing a term."""
- glossary = Glossary()
- entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
- glossary.add(entry)
- assert glossary.remove("林风") is True
- assert glossary.get("林风") is None
- assert glossary.remove("林风") is False
- def test_sort_by_length_desc(self):
- """Test sorting terms by length (longest first)."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- glossary.add(GlossaryEntry("三阶魔法师", "Tier 3 Mage", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- sorted_terms = glossary.sort_by_length_desc()
- assert sorted_terms[0] == "三阶魔法师" # 5 chars
- assert sorted_terms[1] == "火球术" # 3 chars
- assert sorted_terms[2] == "林风" # 2 chars
- def test_get_all(self):
- """Test getting all terms."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- all_terms = glossary.get_all()
- assert len(all_terms) == 2
- def test_contains_operator(self):
- """Test the 'in' operator."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- assert "林风" in glossary
- assert "火球术" not in glossary
- class TestGlossaryMatcher:
- """Test cases for GlossaryMatcher."""
- def test_find_single_term(self):
- """Test finding a single term in text."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- matcher = GlossaryMatcher(glossary)
- matches = matcher.find_matches("林风释放了火球术")
- assert len(matches) == 1
- assert matches[0].source == "林风"
- assert matches[0].target == "Lin Feng"
- assert matches[0].start == 0
- assert matches[0].end == 2
- def test_longest_term_priority(self):
- """Test that longer terms are matched first."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("魔法", "Magic", TermCategory.OTHER))
- glossary.add(GlossaryEntry("魔法师", "Mage", TermCategory.CHARACTER))
- matcher = GlossaryMatcher(glossary)
- matches = matcher.find_matches("魔法师使用了魔法")
- # Should match "魔法师" but not the "魔法" within it
- assert len(matches) == 2
- assert matches[0].source == "魔法师"
- assert matches[1].source == "魔法"
- def test_placeholder_generation(self):
- """Test placeholder generation."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- matcher = GlossaryMatcher(glossary)
- processed, mapping = matcher.replace_with_placeholder("林风来了")
- assert processed == "__en__林风来了"
- assert mapping == {"__en__林风": "Lin Feng"}
- def test_non_overlapping_matches(self):
- """Test that matches don't overlap."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("林", "Lin", TermCategory.CHARACTER))
- matcher = GlossaryMatcher(glossary)
- matches = matcher.find_matches("林风走了")
- # Should only match "林风", not "林" within it
- assert len(matches) == 1
- assert matches[0].source == "林风"
- class TestGlossaryPreprocessor:
- """Test cases for GlossaryPreprocessor."""
- def test_process_text_with_terms(self):
- """Test processing text with terminology."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- preprocessor = GlossaryPreprocessor(glossary)
- result = preprocessor.process("林风释放了火球术")
- assert result.processed_text == "__en__林风释放了__en__火球术"
- assert result.terms_found["林风"] == 1
- assert result.terms_found["火球术"] == 1
- def test_batch_processing(self):
- """Test batch processing of multiple texts."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- preprocessor = GlossaryPreprocessor(glossary)
- texts = ["林风来了", "林风走了"]
- results = preprocessor.process_batch(texts)
- assert len(results) == 2
- assert "__en__林风" in results[0].processed_text
- assert "__en__林风" in results[1].processed_text
- class TestGlossaryPostprocessor:
- """Test cases for GlossaryPostprocessor."""
- def test_restore_from_placeholder(self):
- """Test restoring placeholders to translations."""
- postprocessor = GlossaryPostprocessor()
- mapping = {"__en__林风": "Lin Feng", "__en__火球术": "Fireball"}
- result = postprocessor.restore_from_placeholder("__en__林风 released __en__火球术", mapping)
- assert result == "Lin Feng released Fireball"
- def test_fix_punctuation(self):
- """Test punctuation fixing."""
- postprocessor = GlossaryPostprocessor()
- # Remove space before punctuation
- assert postprocessor.fix_punctuation("Lin Feng .") == "Lin Feng."
- # Fix Chinese comma after English
- assert postprocessor.fix_punctuation("Lin Feng,走了") == "Lin Feng, 走了"
- def test_validate_translation_success(self):
- """Test successful validation."""
- postprocessor = GlossaryPostprocessor()
- mapping = {"__en__林风": "Lin Feng"}
- result = postprocessor.validate_translation("林风来了", "Lin Feng came", mapping)
- assert result.is_valid is True
- assert len(result.missing_terms) == 0
- def test_validate_translation_missing_terms(self):
- """Test validation with missing terms."""
- postprocessor = GlossaryPostprocessor()
- mapping = {"__en__林风": "Lin Feng"}
- result = postprocessor.validate_translation("林风来了", "Lin came", mapping)
- assert result.is_valid is False
- class TestGlossaryIntegration:
- """Integration tests for the glossary module."""
- def test_full_pipeline(self):
- """Test complete preprocessing and postprocessing pipeline."""
- # Setup glossary
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- # Preprocess
- preprocessor = GlossaryPreprocessor(glossary)
- original = "林风释放了火球术"
- pre_result = preprocessor.process(original)
- assert pre_result.processed_text == "__en__林风释放了__en__火球术"
- # Simulate translation
- mock_translated = "__en__林风 released __en__火球术"
- # Postprocess
- postprocessor = GlossaryPostprocessor()
- final = postprocessor.process(mock_translated, pre_result.placeholder_map)
- assert final == "Lin Feng released Fireball"
- def test_phase_0_validation_scenario(self):
- """Test the Phase 0 validation scenario."""
- # Without glossary (simulated by empty glossary)
- empty_glossary = Glossary()
- preprocessor = GlossaryPreprocessor(empty_glossary)
- result = preprocessor.process("林风释放了火球术")
- # No placeholders added
- assert result.placeholder_map == {}
- assert result.terms_found == {}
- # With glossary
- full_glossary = Glossary()
- full_glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- full_glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
- preprocessor = GlossaryPreprocessor(full_glossary)
- result = preprocessor.process("林风释放了火球术")
- # Placeholders added
- assert len(result.placeholder_map) == 2
- assert result.terms_found["林风"] == 1
- assert result.terms_found["火球术"] == 1
|