models.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """
  2. Data models for the glossary module.
  3. This module defines the core data structures for terminology management.
  4. """
  5. from dataclasses import dataclass
  6. from typing import Dict, List, Optional
  7. from enum import Enum
  8. class TermCategory(Enum):
  9. """Categories for terminology entries."""
  10. CHARACTER = "character" # Character names (e.g., 林风)
  11. SKILL = "skill" # Skill names (e.g., 火球术)
  12. LOCATION = "location" # Location names (e.g., 东方大陆)
  13. ITEM = "item" # Item names (e.g., 龙剑)
  14. ORGANIZATION = "organization" # Organization names (e.g., 魔法学院)
  15. OTHER = "other" # Other terms
  16. @dataclass
  17. class GlossaryEntry:
  18. """
  19. A single entry in the glossary.
  20. Attributes:
  21. source: The original term in the source language
  22. target: The translated term in the target language
  23. category: The category of the term
  24. context: Optional context information for the term
  25. """
  26. source: str
  27. target: str
  28. category: TermCategory
  29. context: str = ""
  30. def __post_init__(self):
  31. """Validate the glossary entry."""
  32. if not self.source or not self.source.strip():
  33. raise ValueError("Source term cannot be empty")
  34. if not self.target or not self.target.strip():
  35. raise ValueError("Target term cannot be empty")
  36. @property
  37. def length(self) -> int:
  38. """Return the length of the source term."""
  39. return len(self.source)
  40. class Glossary:
  41. """
  42. Glossary for managing terminology translations.
  43. The glossary stores terms and their translations, ensuring consistent
  44. translation across the entire document.
  45. """
  46. def __init__(self):
  47. """Initialize an empty glossary."""
  48. self._terms: Dict[str, GlossaryEntry] = {}
  49. def add(self, entry: GlossaryEntry) -> None:
  50. """
  51. Add a term to the glossary.
  52. Args:
  53. entry: The GlossaryEntry to add
  54. """
  55. self._terms[entry.source] = entry
  56. def get(self, source: str) -> Optional[GlossaryEntry]:
  57. """
  58. Retrieve a term from the glossary.
  59. Args:
  60. source: The source term to look up
  61. Returns:
  62. The GlossaryEntry if found, None otherwise
  63. """
  64. return self._terms.get(source)
  65. def remove(self, source: str) -> bool:
  66. """
  67. Remove a term from the glossary.
  68. Args:
  69. source: The source term to remove
  70. Returns:
  71. True if the term was removed, False if it wasn't found
  72. """
  73. if source in self._terms:
  74. del self._terms[source]
  75. return True
  76. return False
  77. def get_all(self) -> List[GlossaryEntry]:
  78. """
  79. Get all terms in the glossary.
  80. Returns:
  81. List of all GlossaryEntry objects
  82. """
  83. return list(self._terms.values())
  84. def sort_by_length_desc(self) -> List[str]:
  85. """
  86. Get term sources sorted by length in descending order.
  87. This is used for longest-match processing, where longer terms
  88. should be matched first to avoid partial matches.
  89. Returns:
  90. List of source terms sorted by length (longest first)
  91. """
  92. return sorted(self._terms.keys(), key=lambda x: len(x), reverse=True)
  93. def __len__(self) -> int:
  94. """Return the number of terms in the glossary."""
  95. return len(self._terms)
  96. def __contains__(self, source: str) -> bool:
  97. """Check if a term is in the glossary."""
  98. return source in self._terms