|
|
@@ -440,3 +440,76 @@ class TestGlossaryIntegration:
|
|
|
assert len(result.placeholder_map) == 2
|
|
|
assert result.terms_found["林风"] == 1
|
|
|
assert result.terms_found["火球术"] == 1
|
|
|
+
|
|
|
+ def test_retention_rate_calculation(self):
|
|
|
+ """Test retention rate calculation."""
|
|
|
+ glossary = Glossary()
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+
|
|
|
+ preprocessor = GlossaryPreprocessor(glossary)
|
|
|
+ original = "林风释放了火球术"
|
|
|
+ result = preprocessor.process(original)
|
|
|
+
|
|
|
+ # Retention rate should be calculated
|
|
|
+ assert 0 <= result.retention_rate <= 100
|
|
|
+
|
|
|
+ def test_empty_string_retention_rate(self):
|
|
|
+ """Test retention rate with empty string."""
|
|
|
+ glossary = Glossary()
|
|
|
+ preprocessor = GlossaryPreprocessor(glossary)
|
|
|
+
|
|
|
+ # Empty string should return 100% retention
|
|
|
+ rate = preprocessor.calculate_retention_rate("", "")
|
|
|
+ assert rate == 100.0
|
|
|
+
|
|
|
+ def test_matcher_restore_from_placeholder(self):
|
|
|
+ """Test GlossaryMatcher.restore_from_placeholder method."""
|
|
|
+ glossary = Glossary()
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
+ text = "__en__林风 came here"
|
|
|
+ mapping = {"__en__林风": "Lin Feng"}
|
|
|
+
|
|
|
+ result = matcher.restore_from_placeholder(text, mapping)
|
|
|
+ assert result == "Lin Feng came here"
|
|
|
+
|
|
|
+ def test_glossary_entry_validation(self):
|
|
|
+ """Test GlossaryEntry validation."""
|
|
|
+ with pytest.raises(ValueError):
|
|
|
+ GlossaryEntry("", "Lin Feng", TermCategory.CHARACTER)
|
|
|
+
|
|
|
+ with pytest.raises(ValueError):
|
|
|
+ GlossaryEntry("林风", "", TermCategory.CHARACTER)
|
|
|
+
|
|
|
+ def test_multiple_occurrences_same_term(self):
|
|
|
+ """Test matching the same term multiple times."""
|
|
|
+ glossary = Glossary()
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+
|
|
|
+ matcher = GlossaryMatcher(glossary)
|
|
|
+ matches = matcher.find_matches("林风说,林风知道")
|
|
|
+
|
|
|
+ # Should find both occurrences
|
|
|
+ assert len(matches) == 2
|
|
|
+ assert matches[0].source == "林风"
|
|
|
+ assert matches[1].source == "林风"
|
|
|
+
|
|
|
+ def test_postprocessor_clean_language_tags(self):
|
|
|
+ """Test clean_language_tags method."""
|
|
|
+ postprocessor = GlossaryPostprocessor()
|
|
|
+
|
|
|
+ # Clean orphaned __en__ prefixes
|
|
|
+ result = postprocessor.clean_language_tags("__en__ some text here")
|
|
|
+ assert "__en__" not in result
|
|
|
+ assert "some text here" in result
|
|
|
+
|
|
|
+ def test_glossary_len_and_contains(self):
|
|
|
+ """Test __len__ and __contains__ methods."""
|
|
|
+ glossary = Glossary()
|
|
|
+ assert len(glossary) == 0
|
|
|
+
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+ assert len(glossary) == 1
|
|
|
+ assert "林风" in glossary
|
|
|
+ assert "不存在" not in glossary
|