""" Data models for the glossary module. This module defines the core data structures for terminology management. """ from dataclasses import dataclass from typing import Dict, List, Optional from enum import Enum class TermCategory(Enum): """Categories for terminology entries.""" CHARACTER = "character" # Character names (e.g., 林风) SKILL = "skill" # Skill names (e.g., 火球术) LOCATION = "location" # Location names (e.g., 东方大陆) ITEM = "item" # Item names (e.g., 龙剑) ORGANIZATION = "organization" # Organization names (e.g., 魔法学院) OTHER = "other" # Other terms @dataclass class GlossaryEntry: """ A single entry in the glossary. Attributes: source: The original term in the source language target: The translated term in the target language category: The category of the term context: Optional context information for the term """ source: str target: str category: TermCategory context: str = "" def __post_init__(self): """Validate the glossary entry.""" if not self.source or not self.source.strip(): raise ValueError("Source term cannot be empty") if not self.target or not self.target.strip(): raise ValueError("Target term cannot be empty") @property def length(self) -> int: """Return the length of the source term.""" return len(self.source) class Glossary: """ Glossary for managing terminology translations. The glossary stores terms and their translations, ensuring consistent translation across the entire document. """ def __init__(self): """Initialize an empty glossary.""" self._terms: Dict[str, GlossaryEntry] = {} def add(self, entry: GlossaryEntry) -> None: """ Add a term to the glossary. Args: entry: The GlossaryEntry to add """ self._terms[entry.source] = entry def get(self, source: str) -> Optional[GlossaryEntry]: """ Retrieve a term from the glossary. Args: source: The source term to look up Returns: The GlossaryEntry if found, None otherwise """ return self._terms.get(source) def remove(self, source: str) -> bool: """ Remove a term from the glossary. Args: source: The source term to remove Returns: True if the term was removed, False if it wasn't found """ if source in self._terms: del self._terms[source] return True return False def get_all(self) -> List[GlossaryEntry]: """ Get all terms in the glossary. Returns: List of all GlossaryEntry objects """ return list(self._terms.values()) def sort_by_length_desc(self) -> List[str]: """ Get term sources sorted by length in descending order. This is used for longest-match processing, where longer terms should be matched first to avoid partial matches. Returns: List of source terms sorted by length (longest first) """ return sorted(self._terms.keys(), key=lambda x: len(x), reverse=True) def __len__(self) -> int: """Return the number of terms in the glossary.""" return len(self._terms) def __contains__(self, source: str) -> bool: """Check if a term is in the glossary.""" return source in self._terms