test_glossary.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. """
  2. Unit tests for the glossary module.
  3. Tests cover terminology matching, preprocessing, postprocessing,
  4. and integration scenarios.
  5. """
  6. import pytest
  7. from src.glossary.models import Glossary, GlossaryEntry, TermCategory
  8. from src.glossary.matcher import GlossaryMatcher, TermMatch
  9. from src.glossary.preprocessor import GlossaryPreprocessor
  10. from src.glossary.postprocessor import GlossaryPostprocessor
  11. class TestGlossary:
  12. """Test cases for Glossary class."""
  13. def test_add_and_retrieve_term(self):
  14. """Test adding and retrieving a term."""
  15. glossary = Glossary()
  16. entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
  17. glossary.add(entry)
  18. retrieved = glossary.get("林风")
  19. assert retrieved is not None
  20. assert retrieved.source == "林风"
  21. assert retrieved.target == "Lin Feng"
  22. assert retrieved.category == TermCategory.CHARACTER
  23. def test_remove_term(self):
  24. """Test removing a term."""
  25. glossary = Glossary()
  26. entry = GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)
  27. glossary.add(entry)
  28. assert glossary.remove("林风") is True
  29. assert glossary.get("林风") is None
  30. assert glossary.remove("林风") is False
  31. def test_sort_by_length_desc(self):
  32. """Test sorting terms by length (longest first)."""
  33. glossary = Glossary()
  34. glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
  35. glossary.add(GlossaryEntry("三阶魔法师", "Tier 3 Mage", TermCategory.CHARACTER))
  36. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  37. sorted_terms = glossary.sort_by_length_desc()
  38. assert sorted_terms[0] == "三阶魔法师" # 5 chars
  39. assert sorted_terms[1] == "火球术" # 3 chars
  40. assert sorted_terms[2] == "林风" # 2 chars
  41. def test_get_all(self):
  42. """Test getting all terms."""
  43. glossary = Glossary()
  44. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  45. glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
  46. all_terms = glossary.get_all()
  47. assert len(all_terms) == 2
  48. def test_contains_operator(self):
  49. """Test the 'in' operator."""
  50. glossary = Glossary()
  51. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  52. assert "林风" in glossary
  53. assert "火球术" not in glossary
  54. class TestGlossaryMatcher:
  55. """Test cases for GlossaryMatcher."""
  56. def test_find_single_term(self):
  57. """Test finding a single term in text."""
  58. glossary = Glossary()
  59. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  60. matcher = GlossaryMatcher(glossary)
  61. matches = matcher.find_matches("林风释放了火球术")
  62. assert len(matches) == 1
  63. assert matches[0].source == "林风"
  64. assert matches[0].target == "Lin Feng"
  65. assert matches[0].start == 0
  66. assert matches[0].end == 2
  67. def test_longest_term_priority(self):
  68. """Test that longer terms are matched first."""
  69. glossary = Glossary()
  70. glossary.add(GlossaryEntry("魔法", "Magic", TermCategory.OTHER))
  71. glossary.add(GlossaryEntry("魔法师", "Mage", TermCategory.CHARACTER))
  72. matcher = GlossaryMatcher(glossary)
  73. matches = matcher.find_matches("魔法师使用了魔法")
  74. # Should match "魔法师" but not the "魔法" within it
  75. assert len(matches) == 2
  76. assert matches[0].source == "魔法师"
  77. assert matches[1].source == "魔法"
  78. def test_placeholder_generation(self):
  79. """Test placeholder generation."""
  80. glossary = Glossary()
  81. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  82. matcher = GlossaryMatcher(glossary)
  83. processed, mapping = matcher.replace_with_placeholder("林风来了")
  84. assert processed == "__en__林风来了"
  85. assert mapping == {"__en__林风": "Lin Feng"}
  86. def test_non_overlapping_matches(self):
  87. """Test that matches don't overlap."""
  88. glossary = Glossary()
  89. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  90. glossary.add(GlossaryEntry("林", "Lin", TermCategory.CHARACTER))
  91. matcher = GlossaryMatcher(glossary)
  92. matches = matcher.find_matches("林风走了")
  93. # Should only match "林风", not "林" within it
  94. assert len(matches) == 1
  95. assert matches[0].source == "林风"
  96. class TestGlossaryPreprocessor:
  97. """Test cases for GlossaryPreprocessor."""
  98. def test_process_text_with_terms(self):
  99. """Test processing text with terminology."""
  100. glossary = Glossary()
  101. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  102. glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
  103. preprocessor = GlossaryPreprocessor(glossary)
  104. result = preprocessor.process("林风释放了火球术")
  105. assert result.processed_text == "__en__林风释放了__en__火球术"
  106. assert result.terms_found["林风"] == 1
  107. assert result.terms_found["火球术"] == 1
  108. def test_batch_processing(self):
  109. """Test batch processing of multiple texts."""
  110. glossary = Glossary()
  111. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  112. preprocessor = GlossaryPreprocessor(glossary)
  113. texts = ["林风来了", "林风走了"]
  114. results = preprocessor.process_batch(texts)
  115. assert len(results) == 2
  116. assert "__en__林风" in results[0].processed_text
  117. assert "__en__林风" in results[1].processed_text
  118. class TestGlossaryPostprocessor:
  119. """Test cases for GlossaryPostprocessor."""
  120. def test_restore_from_placeholder(self):
  121. """Test restoring placeholders to translations."""
  122. postprocessor = GlossaryPostprocessor()
  123. mapping = {"__en__林风": "Lin Feng", "__en__火球术": "Fireball"}
  124. result = postprocessor.restore_from_placeholder("__en__林风 released __en__火球术", mapping)
  125. assert result == "Lin Feng released Fireball"
  126. def test_fix_punctuation(self):
  127. """Test punctuation fixing."""
  128. postprocessor = GlossaryPostprocessor()
  129. # Remove space before punctuation
  130. assert postprocessor.fix_punctuation("Lin Feng .") == "Lin Feng."
  131. # Fix Chinese comma after English
  132. assert postprocessor.fix_punctuation("Lin Feng,走了") == "Lin Feng, 走了"
  133. def test_validate_translation_success(self):
  134. """Test successful validation."""
  135. postprocessor = GlossaryPostprocessor()
  136. mapping = {"__en__林风": "Lin Feng"}
  137. result = postprocessor.validate_translation("林风来了", "Lin Feng came", mapping)
  138. assert result.is_valid is True
  139. assert len(result.missing_terms) == 0
  140. def test_validate_translation_missing_terms(self):
  141. """Test validation with missing terms."""
  142. postprocessor = GlossaryPostprocessor()
  143. mapping = {"__en__林风": "Lin Feng"}
  144. result = postprocessor.validate_translation("林风来了", "Lin came", mapping)
  145. assert result.is_valid is False
  146. class TestGlossaryIntegration:
  147. """Integration tests for the glossary module."""
  148. def test_full_pipeline(self):
  149. """Test complete preprocessing and postprocessing pipeline."""
  150. # Setup glossary
  151. glossary = Glossary()
  152. glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  153. glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
  154. # Preprocess
  155. preprocessor = GlossaryPreprocessor(glossary)
  156. original = "林风释放了火球术"
  157. pre_result = preprocessor.process(original)
  158. assert pre_result.processed_text == "__en__林风释放了__en__火球术"
  159. # Simulate translation
  160. mock_translated = "__en__林风 released __en__火球术"
  161. # Postprocess
  162. postprocessor = GlossaryPostprocessor()
  163. final = postprocessor.process(mock_translated, pre_result.placeholder_map)
  164. assert final == "Lin Feng released Fireball"
  165. def test_phase_0_validation_scenario(self):
  166. """Test the Phase 0 validation scenario."""
  167. # Without glossary (simulated by empty glossary)
  168. empty_glossary = Glossary()
  169. preprocessor = GlossaryPreprocessor(empty_glossary)
  170. result = preprocessor.process("林风释放了火球术")
  171. # No placeholders added
  172. assert result.placeholder_map == {}
  173. assert result.terms_found == {}
  174. # With glossary
  175. full_glossary = Glossary()
  176. full_glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
  177. full_glossary.add(GlossaryEntry("火球术", "Fireball", TermCategory.SKILL))
  178. preprocessor = GlossaryPreprocessor(full_glossary)
  179. result = preprocessor.process("林风释放了火球术")
  180. # Placeholders added
  181. assert len(result.placeholder_map) == 2
  182. assert result.terms_found["林风"] == 1
  183. assert result.terms_found["火球术"] == 1