|
@@ -0,0 +1,247 @@
|
|
|
|
|
+"""
|
|
|
|
|
+Unit tests for the glossary module.
|
|
|
|
|
+
|
|
|
|
|
+Tests cover terminology matching, preprocessing, postprocessing,
|
|
|
|
|
+and integration scenarios.
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+import pytest
|
|
|
|
|
+
|
|
|
|
|
+from src.glossary.models import Glossary, GlossaryEntry, TermCategory
|
|
|
|
|
+from src.glossary.matcher import GlossaryMatcher, TermMatch
|
|
|
|
|
+from src.glossary.preprocessor import GlossaryPreprocessor
|
|
|
|
|
+from src.glossary.postprocessor import GlossaryPostprocessor
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestGlossary:
|
|
|
|
|
+ """Test cases for Glossary class."""
|
|
|
|
|
+
|
|
|
|
|
+ def test_add_and_retrieve_term(self):
|
|
|
|
|
+ """Test adding and retrieving a term."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
|
|
|
|
|
+
|
|
|
|
|
+ glossary.add(entry)
|
|
|
|
|
+ retrieved = glossary.get("林风")
|
|
|
|
|
+
|
|
|
|
|
+ assert retrieved is not None
|
|
|
|
|
+ assert retrieved.source == "林风"
|
|
|
|
|
+ assert retrieved.target == "Lin Feng"
|
|
|
|
|
+ assert retrieved.category == TermCategory.CHARACTER
|
|
|
|
|
+
|
|
|
|
|
+ def test_remove_term(self):
|
|
|
|
|
+ """Test removing a term."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
|
|
|
|
|
+ glossary.add(entry)
|
|
|
|
|
+
|
|
|
|
|
+ assert glossary.remove("林风") is True
|
|
|
|
|
+ assert glossary.get("林风") is None
|
|
|
|
|
+ assert glossary.remove("林风") is False
|
|
|
|
|
+
|
|
|
|
|
+ def test_sort_by_length_desc(self):
|
|
|
|
|
+ """Test sorting terms by length (longest first)."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
|
|
|
|
|
+ glossary.add(GlossaryEntry("三阶魔法师", "Tier 3 Mage", TermCategory.CHARACTER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ sorted_terms = glossary.sort_by_length_desc()
|
|
|
|
|
+ assert sorted_terms[0] == "三阶魔法师" # 5 chars
|
|
|
|
|
+ assert sorted_terms[1] == "火球术" # 3 chars
|
|
|
|
|
+ assert sorted_terms[2] == "林风" # 2 chars
|
|
|
|
|
+
|
|
|
|
|
+ def test_get_all(self):
|
|
|
|
|
+ """Test getting all terms."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
|
|
|
|
|
+
|
|
|
|
|
+ all_terms = glossary.get_all()
|
|
|
|
|
+ assert len(all_terms) == 2
|
|
|
|
|
+
|
|
|
|
|
+ def test_contains_operator(self):
|
|
|
|
|
+ """Test the 'in' operator."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ assert "林风" in glossary
|
|
|
|
|
+ assert "火球术" not in glossary
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestGlossaryMatcher:
|
|
|
|
|
+ """Test cases for GlossaryMatcher."""
|
|
|
|
|
+
|
|
|
|
|
+ def test_find_single_term(self):
|
|
|
|
|
+ """Test finding a single term in text."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
|
|
+ matches = matcher.find_matches("林风释放了火球术")
|
|
|
|
|
+
|
|
|
|
|
+ assert len(matches) == 1
|
|
|
|
|
+ assert matches[0].source == "林风"
|
|
|
|
|
+ assert matches[0].target == "Lin Feng"
|
|
|
|
|
+ assert matches[0].start == 0
|
|
|
|
|
+ assert matches[0].end == 2
|
|
|
|
|
+
|
|
|
|
|
+ def test_longest_term_priority(self):
|
|
|
|
|
+ """Test that longer terms are matched first."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("魔法", "Magic", TermCategory.OTHER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("魔法师", "Mage", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
|
|
+ matches = matcher.find_matches("魔法师使用了魔法")
|
|
|
|
|
+
|
|
|
|
|
+ # Should match "魔法师" but not the "魔法" within it
|
|
|
|
|
+ assert len(matches) == 2
|
|
|
|
|
+ assert matches[0].source == "魔法师"
|
|
|
|
|
+ assert matches[1].source == "魔法"
|
|
|
|
|
+
|
|
|
|
|
+ def test_placeholder_generation(self):
|
|
|
|
|
+ """Test placeholder generation."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
|
|
+ processed, mapping = matcher.replace_with_placeholder("林风来了")
|
|
|
|
|
+
|
|
|
|
|
+ assert processed == "__en__林风来了"
|
|
|
|
|
+ assert mapping == {"__en__林风": "Lin Feng"}
|
|
|
|
|
+
|
|
|
|
|
+ def test_non_overlapping_matches(self):
|
|
|
|
|
+ """Test that matches don't overlap."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("林", "Lin", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
|
|
+ matches = matcher.find_matches("林风走了")
|
|
|
|
|
+
|
|
|
|
|
+ # Should only match "林风", not "林" within it
|
|
|
|
|
+ assert len(matches) == 1
|
|
|
|
|
+ assert matches[0].source == "林风"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestGlossaryPreprocessor:
|
|
|
|
|
+ """Test cases for GlossaryPreprocessor."""
|
|
|
|
|
+
|
|
|
|
|
+ def test_process_text_with_terms(self):
|
|
|
|
|
+ """Test processing text with terminology."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
|
|
|
|
|
+
|
|
|
|
|
+ preprocessor = GlossaryPreprocessor(glossary)
|
|
|
|
|
+ result = preprocessor.process("林风释放了火球术")
|
|
|
|
|
+
|
|
|
|
|
+ assert result.processed_text == "__en__林风释放了__en__火球术"
|
|
|
|
|
+ assert result.terms_found["林风"] == 1
|
|
|
|
|
+ assert result.terms_found["火球术"] == 1
|
|
|
|
|
+
|
|
|
|
|
+ def test_batch_processing(self):
|
|
|
|
|
+ """Test batch processing of multiple texts."""
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+
|
|
|
|
|
+ preprocessor = GlossaryPreprocessor(glossary)
|
|
|
|
|
+ texts = ["林风来了", "林风走了"]
|
|
|
|
|
+ results = preprocessor.process_batch(texts)
|
|
|
|
|
+
|
|
|
|
|
+ assert len(results) == 2
|
|
|
|
|
+ assert "__en__林风" in results[0].processed_text
|
|
|
|
|
+ assert "__en__林风" in results[1].processed_text
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestGlossaryPostprocessor:
|
|
|
|
|
+ """Test cases for GlossaryPostprocessor."""
|
|
|
|
|
+
|
|
|
|
|
+ def test_restore_from_placeholder(self):
|
|
|
|
|
+ """Test restoring placeholders to translations."""
|
|
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
|
|
+ mapping = {"__en__林风": "Lin Feng", "__en__火球术": "Fireball"}
|
|
|
|
|
+
|
|
|
|
|
+ result = postprocessor.restore_from_placeholder("__en__林风 released __en__火球术", mapping)
|
|
|
|
|
+
|
|
|
|
|
+ assert result == "Lin Feng released Fireball"
|
|
|
|
|
+
|
|
|
|
|
+ def test_fix_punctuation(self):
|
|
|
|
|
+ """Test punctuation fixing."""
|
|
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
|
|
+
|
|
|
|
|
+ # Remove space before punctuation
|
|
|
|
|
+ assert postprocessor.fix_punctuation("Lin Feng .") == "Lin Feng."
|
|
|
|
|
+ # Fix Chinese comma after English
|
|
|
|
|
+ assert postprocessor.fix_punctuation("Lin Feng,走了") == "Lin Feng, 走了"
|
|
|
|
|
+
|
|
|
|
|
+ def test_validate_translation_success(self):
|
|
|
|
|
+ """Test successful validation."""
|
|
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
|
|
+ mapping = {"__en__林风": "Lin Feng"}
|
|
|
|
|
+
|
|
|
|
|
+ result = postprocessor.validate_translation("林风来了", "Lin Feng came", mapping)
|
|
|
|
|
+
|
|
|
|
|
+ assert result.is_valid is True
|
|
|
|
|
+ assert len(result.missing_terms) == 0
|
|
|
|
|
+
|
|
|
|
|
+ def test_validate_translation_missing_terms(self):
|
|
|
|
|
+ """Test validation with missing terms."""
|
|
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
|
|
+ mapping = {"__en__林风": "Lin Feng"}
|
|
|
|
|
+
|
|
|
|
|
+ result = postprocessor.validate_translation("林风来了", "Lin came", mapping)
|
|
|
|
|
+
|
|
|
|
|
+ assert result.is_valid is False
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestGlossaryIntegration:
|
|
|
|
|
+ """Integration tests for the glossary module."""
|
|
|
|
|
+
|
|
|
|
|
+ def test_full_pipeline(self):
|
|
|
|
|
+ """Test complete preprocessing and postprocessing pipeline."""
|
|
|
|
|
+ # Setup glossary
|
|
|
|
|
+ glossary = Glossary()
|
|
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+ glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
|
|
|
|
|
+
|
|
|
|
|
+ # Preprocess
|
|
|
|
|
+ preprocessor = GlossaryPreprocessor(glossary)
|
|
|
|
|
+ original = "林风释放了火球术"
|
|
|
|
|
+ pre_result = preprocessor.process(original)
|
|
|
|
|
+
|
|
|
|
|
+ assert pre_result.processed_text == "__en__林风释放了__en__火球术"
|
|
|
|
|
+
|
|
|
|
|
+ # Simulate translation
|
|
|
|
|
+ mock_translated = "__en__林风 released __en__火球术"
|
|
|
|
|
+
|
|
|
|
|
+ # Postprocess
|
|
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
|
|
+ final = postprocessor.process(mock_translated, pre_result.placeholder_map)
|
|
|
|
|
+
|
|
|
|
|
+ assert final == "Lin Feng released Fireball"
|
|
|
|
|
+
|
|
|
|
|
+ def test_phase_0_validation_scenario(self):
|
|
|
|
|
+ """Test the Phase 0 validation scenario."""
|
|
|
|
|
+ # Without glossary (simulated by empty glossary)
|
|
|
|
|
+ empty_glossary = Glossary()
|
|
|
|
|
+ preprocessor = GlossaryPreprocessor(empty_glossary)
|
|
|
|
|
+ result = preprocessor.process("林风释放了火球术")
|
|
|
|
|
+
|
|
|
|
|
+ # No placeholders added
|
|
|
|
|
+ assert result.placeholder_map == {}
|
|
|
|
|
+ assert result.terms_found == {}
|
|
|
|
|
+
|
|
|
|
|
+ # With glossary
|
|
|
|
|
+ full_glossary = Glossary()
|
|
|
|
|
+ full_glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
|
|
+ full_glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
|
|
|
|
|
+
|
|
|
|
|
+ preprocessor = GlossaryPreprocessor(full_glossary)
|
|
|
|
|
+ result = preprocessor.process("林风释放了火球术")
|
|
|
|
|
+
|
|
|
|
|
+ # Placeholders added
|
|
|
|
|
+ assert len(result.placeholder_map) == 2
|
|
|
|
|
+ assert result.terms_found["林风"] == 1
|
|
|
|
|
+ assert result.terms_found["火球术"] == 1
|