| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- """
- Unit tests for the translator module.
- Tests cover:
- - TranslationEngine initialization and basic translation
- - TranslationPipeline with glossary integration
- - ProgressReporter callbacks
- - ChapterTranslator (mocked)
- """
- import pytest
- from pathlib import Path
- from unittest.mock import Mock, MagicMock, patch
- from datetime import datetime
- from src.translator.engine import TranslationEngine
- from src.translator.pipeline import TranslationPipeline, TranslationResult
- from src.translator.progress import ProgressReporter, ProgressStatus, ConsoleProgressReporter
- from src.translator.chapter_translator import ChapterTranslator
- from src.glossary.models import Glossary, GlossaryEntry, TermCategory
- # ============================================================================
- # Test TranslationEngine (Story 5.1)
- # ============================================================================
- class TestTranslationEngine:
- """Tests for TranslationEngine class."""
- @pytest.fixture
- def mock_transformers(self):
- """Mock the transformers library."""
- with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
- patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
- # Setup mock tokenizer
- mock_tok_instance = MagicMock()
- mock_tok_instance.src_lang = "zh"
- mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
- mock_tok_instance.return_tensors = "pt"
- mock_tokenizer.from_pretrained.return_value = mock_tok_instance
- # Setup mock model
- mock_model_instance = MagicMock()
- mock_model.from_pretrained.return_value = mock_model_instance
- mock_model_instance.eval.return_value = None
- yield {
- "model": mock_model,
- "tokenizer": mock_tokenizer,
- "model_instance": mock_model_instance,
- "tokenizer_instance": mock_tok_instance
- }
- @pytest.fixture
- def mock_model_path(self, tmp_path):
- """Create a temporary mock model directory."""
- model_dir = tmp_path / "m2m100_418M"
- model_dir.mkdir()
- (model_dir / "config.json").write_text("{}")
- return str(model_dir)
- def test_engine_init_with_mock_path(self, mock_transformers, mock_model_path):
- """Test engine initialization with a mock model path."""
- mock_transformers["tokenizer_instance"].batch_decode.return_value = ["Hello world"]
- engine = TranslationEngine(model_path=mock_model_path)
- assert engine.model_path == mock_model_path
- assert engine.device in ("cpu", "cuda")
- def test_engine_init_import_error(self):
- """Test that ImportError is raised when transformers is not available."""
- with patch('src.translator.engine.M2M100ForConditionalGeneration', None):
- with pytest.raises(ImportError, match="transformers library"):
- TranslationEngine(model_path="/fake/path")
- def test_translate_single_text(self, mock_transformers, mock_model_path):
- """Test basic single-text translation."""
- mock_tok = mock_transformers["tokenizer_instance"]
- mock_tok.batch_decode.return_value = ["Hello world"]
- engine = TranslationEngine(model_path=mock_model_path)
- result = engine.translate("你好世界", src_lang="zh", tgt_lang="en")
- assert result == "Hello world"
- mock_tok.batch_decode.assert_called_once()
- def test_translate_empty_text_raises_error(self, mock_transformers, mock_model_path):
- """Test that translating empty text raises ValueError."""
- mock_tok = mock_transformers["tokenizer_instance"]
- mock_tok.batch_decode.return_value = ["Hello"]
- engine = TranslationEngine(model_path=mock_model_path)
- with pytest.raises(ValueError, match="cannot be empty"):
- engine.translate("", src_lang="zh", tgt_lang="en")
- def test_translate_batch(self, mock_transformers, mock_model_path):
- """Test batch translation."""
- mock_tok = mock_transformers["tokenizer_instance"]
- mock_tok.batch_decode.return_value = ["Hello", "World", "Test"]
- engine = TranslationEngine(model_path=mock_model_path)
- results = engine.translate_batch(
- ["你好", "世界", "测试"],
- src_lang="zh",
- tgt_lang="en",
- batch_size=3
- )
- assert len(results) == 3
- assert results == ["Hello", "World", "Test"]
- def test_translate_batch_empty_raises_error(self, mock_transformers, mock_model_path):
- """Test that empty batch list raises ValueError."""
- mock_tok = mock_transformers["tokenizer_instance"]
- mock_tok.batch_decode.return_value = []
- engine = TranslationEngine(model_path=mock_model_path)
- with pytest.raises(ValueError, match="cannot be empty"):
- engine.translate_batch([], src_lang="zh", tgt_lang="en")
- def test_is_language_supported(self, mock_transformers, mock_model_path):
- """Test language support checking."""
- mock_tok = mock_transformers["tokenizer_instance"]
- mock_tok.batch_decode.return_value = ["Hello"]
- mock_tok.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
- engine = TranslationEngine(model_path=mock_model_path)
- assert engine.is_language_supported("zh") is True
- assert engine.is_language_supported("en") is True
- assert engine.is_language_supported("de") is False
- # ============================================================================
- # Test TranslationPipeline (Story 5.2)
- # ============================================================================
- class TestTranslationPipeline:
- """Tests for TranslationPipeline class."""
- @pytest.fixture
- def mock_engine(self):
- """Create a mock translation engine."""
- engine = MagicMock(spec=TranslationEngine)
- engine.translate.return_value = "Lin Feng is a disciple"
- engine.translate_batch.return_value = ["Hello", "World"]
- engine.is_language_supported.return_value = True
- return engine
- @pytest.fixture
- def sample_glossary(self):
- """Create a sample glossary."""
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.LOCATION))
- return glossary
- def test_pipeline_init(self, mock_engine):
- """Test pipeline initialization."""
- pipeline = TranslationPipeline(mock_engine)
- assert pipeline.engine is mock_engine
- assert pipeline.src_lang == "zh"
- assert pipeline.tgt_lang == "en"
- def test_pipeline_with_glossary(self, mock_engine, sample_glossary):
- """Test pipeline with glossary."""
- pipeline = TranslationPipeline(mock_engine, sample_glossary)
- assert pipeline.has_glossary is True
- assert len(pipeline.glossary) == 2
- def test_translate_simple(self, mock_engine):
- """Test simple translation without glossary."""
- pipeline = TranslationPipeline(mock_engine)
- result = pipeline.translate("Hello world")
- assert result == "Lin Feng is a disciple"
- mock_engine.translate.assert_called_once()
- def test_translate_with_glossary(self, mock_engine, sample_glossary):
- """Test translation with glossary preprocessing."""
- mock_engine.translate.return_value = "__en__Lin Feng is a disciple of __en__Qingyun Sect"
- pipeline = TranslationPipeline(mock_engine, sample_glossary)
- result = pipeline.translate("林风是青云宗的弟子")
- # After post-processing, placeholders should be replaced
- assert "Lin Feng" in result
- assert "Qingyun Sect" in result
- def test_translate_return_details(self, mock_engine, sample_glossary):
- """Test translation with detailed result."""
- mock_engine.translate.return_value = "__en__Lin Feng is here"
- pipeline = TranslationPipeline(mock_engine, sample_glossary)
- result = pipeline.translate("林风在这里", return_details=True)
- assert isinstance(result, TranslationResult)
- assert result.original == "林风在这里"
- assert "Lin Feng" in result.translated
- assert len(result.terms_used) > 0
- assert isinstance(result.placeholder_map, dict)
- def test_translate_batch(self, mock_engine):
- """Test batch translation."""
- mock_engine.translate_batch.return_value = ["Result 1", "Result 2"]
- pipeline = TranslationPipeline(mock_engine)
- results = pipeline.translate_batch(["Text 1", "Text 2"])
- assert len(results) == 2
- mock_engine.translate_batch.assert_called_once()
- def test_add_term(self, mock_engine):
- """Test adding a term to the pipeline glossary."""
- pipeline = TranslationPipeline(mock_engine)
- entry = GlossaryEntry("test", "TEST", TermCategory.OTHER)
- pipeline.add_term(entry)
- assert pipeline.has_glossary is True
- assert "test" in pipeline.glossary
- def test_set_languages_valid(self, mock_engine):
- """Test setting valid languages."""
- pipeline = TranslationPipeline(mock_engine)
- mock_engine.is_language_supported.return_value = True
- pipeline.set_languages("en", "fr")
- assert pipeline.src_lang == "en"
- assert pipeline.tgt_lang == "fr"
- def test_set_languages_invalid(self, mock_engine):
- """Test setting invalid language raises error."""
- pipeline = TranslationPipeline(mock_engine)
- mock_engine.is_language_supported.side_effect = lambda x: x in ["zh", "en"]
- with pytest.raises(ValueError, match="not supported"):
- pipeline.set_languages("xx", "yy")
- # ============================================================================
- # Test ProgressReporter (Story 5.4)
- # ============================================================================
- class TestProgressReporter:
- """Tests for ProgressReporter class."""
- def test_reporter_init(self):
- """Test reporter initialization."""
- callback = Mock()
- reporter = ProgressReporter(callback)
- assert reporter.callback is callback
- assert reporter.total == 0
- assert reporter.completed == 0
- def test_on_start(self):
- """Test start event."""
- callback = Mock()
- reporter = ProgressReporter(callback)
- reporter.on_start(total=10)
- assert reporter.total == 10
- callback.assert_called_once()
- status, data = callback.call_args[0]
- assert status == ProgressStatus.START
- assert data["total"] == 10
- def test_on_chapter_complete(self):
- """Test chapter complete event."""
- callback = Mock()
- reporter = ProgressReporter(callback)
- reporter.on_start(total=5)
- reporter.on_chapter_complete(chapter_index=0, chapter_title="Chapter 1")
- assert reporter.completed == 1
- assert reporter.progress_percent == 20.0
- def test_on_chapter_failed(self):
- """Test chapter failed event."""
- callback = Mock()
- reporter = ProgressReporter(callback)
- reporter.on_start(total=5)
- error = Exception("Test error")
- reporter.on_chapter_failed(chapter_index=0, error=error)
- assert reporter.failed == 1
- def test_on_complete(self):
- """Test complete event."""
- callback = Mock()
- reporter = ProgressReporter(callback)
- reporter.on_start(total=3)
- reporter.on_chapter_complete(chapter_index=0)
- reporter.on_chapter_complete(chapter_index=1)
- reporter.on_complete()
- assert reporter.is_complete is True
- assert reporter.duration_seconds is not None
- def test_progress_percent(self):
- """Test progress percentage calculation."""
- reporter = ProgressReporter()
- reporter.on_start(total=10)
- assert reporter.progress_percent == 0.0
- for i in range(5):
- reporter.on_chapter_complete(chapter_index=i)
- assert reporter.progress_percent == 50.0
- def test_get_summary(self):
- """Test getting progress summary."""
- reporter = ProgressReporter()
- reporter.on_start(total=10)
- reporter.on_chapter_complete(chapter_index=0)
- reporter.on_chapter_complete(chapter_index=1)
- reporter.on_chapter_failed(chapter_index=2, error=Exception("test"))
- summary = reporter.get_summary()
- assert summary["total"] == 10
- assert summary["completed"] == 2
- assert summary["failed"] == 1
- assert summary["remaining"] == 7
- assert summary["progress_percent"] == 20.0
- class TestConsoleProgressReporter:
- """Tests for ConsoleProgressReporter class."""
- def test_console_reporter_init(self):
- """Test console reporter initialization."""
- reporter = ConsoleProgressReporter(show_details=True)
- assert reporter.show_details is True
- assert reporter.reporter is not None
- def test_get_reporter(self):
- """Test getting underlying reporter."""
- console = ConsoleProgressReporter()
- reporter = console.get_reporter()
- assert isinstance(reporter, ProgressReporter)
- # ============================================================================
- # Test ChapterTranslator (Story 5.3)
- # ============================================================================
- class TestChapterTranslator:
- """Tests for ChapterTranslator class."""
- @pytest.fixture
- def mock_pipeline(self):
- """Create a mock translation pipeline."""
- pipeline = MagicMock(spec=TranslationPipeline)
- pipeline.translate.return_value = "Translated text"
- return pipeline
- @pytest.fixture
- def mock_repository(self):
- """Create a mock repository."""
- repo = MagicMock()
- repo.save_chapter = MagicMock()
- repo.get_pending_chapters.return_value = []
- repo.get_chapters.return_value = []
- repo.get_failed_chapters.return_value = []
- repo.record_failure = MagicMock()
- repo.update_work_status = MagicMock()
- return repo
- @pytest.fixture
- def sample_chapter(self):
- """Create a sample chapter."""
- from src.repository.models import ChapterItem, ChapterStatus
- return ChapterItem(
- work_id="test_work",
- chapter_index=0,
- title="Test Chapter",
- content="Test content for translation.",
- status=ChapterStatus.PENDING
- )
- def test_translator_init(self, mock_pipeline, mock_repository):
- """Test translator initialization."""
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- assert translator.pipeline is mock_pipeline
- assert translator.repository is mock_repository
- def test_split_paragraphs_simple(self, mock_pipeline, mock_repository):
- """Test splitting simple paragraphs."""
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- content = "Para 1\n\nPara 2\n\nPara 3"
- segments = translator._split_paragraphs(content)
- assert len(segments) == 3
- assert segments[0] == "Para 1"
- assert segments[1] == "Para 2"
- assert segments[2] == "Para 3"
- def test_split_long_paragraph(self, mock_pipeline, mock_repository):
- """Test splitting a long paragraph."""
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- # Create a long paragraph
- long_text = "。".join(["Sentence " + str(i) for i in range(100)])
- segments = translator._split_long_paragraph(long_text)
- assert len(segments) > 1
- # Each segment should be under the max length
- for seg in segments:
- assert len(seg) <= translator.MAX_SEGMENT_LENGTH + 100 # Allow some buffer
- def test_translate_chapter_success(
- self, mock_pipeline, mock_repository, sample_chapter
- ):
- """Test successful chapter translation."""
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- result = translator.translate_chapter("test_work", sample_chapter)
- assert result.status == "completed"
- assert result.translation is not None
- mock_repository.save_chapter.assert_called()
- def test_translate_chapter_already_completed(
- self, mock_pipeline, mock_repository
- ):
- """Test skipping already translated chapter."""
- from src.repository.models import ChapterItem, ChapterStatus
- chapter = ChapterItem(
- work_id="test_work",
- chapter_index=0,
- title="Test",
- content="Content",
- status=ChapterStatus.COMPLETED,
- translation="Already translated"
- )
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- result = translator.translate_chapter("test_work", chapter)
- assert result.translation == "Already translated"
- # translate should not be called
- mock_pipeline.translate.assert_not_called()
- def test_translate_work_empty(self, mock_pipeline, mock_repository):
- """Test translating work with no pending chapters."""
- mock_repository.get_pending_chapters.return_value = []
- mock_repository.get_chapters.return_value = []
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- translator.translate_work("test_work")
- # Should not crash, should just return
- mock_pipeline.translate.assert_not_called()
- def test_retry_failed_chapters(self, mock_pipeline, mock_repository):
- """Test retrying failed chapters."""
- from src.repository.models import ChapterItem, ChapterStatus
- failed_chapter = ChapterItem(
- work_id="test_work",
- chapter_index=0,
- title="Failed",
- content="Content",
- status=ChapterStatus.FAILED,
- retry_count=0
- )
- mock_repository.get_failed_chapters.return_value = [failed_chapter]
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- translator.retry_failed_chapters("test_work")
- assert mock_pipeline.translate.called
- def test_set_progress_callback(self, mock_pipeline, mock_repository):
- """Test setting a new progress callback."""
- translator = ChapterTranslator(mock_pipeline, mock_repository)
- new_callback = Mock()
- translator.set_progress_callback(new_callback)
- assert translator.progress_reporter.callback is new_callback
- # ============================================================================
- # Integration Tests (with mocked external dependencies)
- # ============================================================================
- class TestIntegration:
- """Integration tests for the translator module."""
- @pytest.fixture
- def full_pipeline(self, tmp_path):
- """Create a full pipeline with mocked model but real other components."""
- with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
- patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
- # Setup mocks
- mock_tok_instance = MagicMock()
- mock_tok_instance.src_lang = "zh"
- mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2}
- mock_tokenizer.from_pretrained.return_value = mock_tok_instance
- mock_model_instance = MagicMock()
- mock_model.from_pretrained.return_value = mock_model_instance
- # Create mock model directory
- model_dir = tmp_path / "model"
- model_dir.mkdir()
- (model_dir / "config.json").write_text("{}")
- # Return configured components
- mock_tok_instance.batch_decode.return_value = ["Translated text"]
- engine = TranslationEngine(model_path=str(model_dir))
- glossary = Glossary()
- glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
- pipeline = TranslationPipeline(engine, glossary)
- return {
- "engine": engine,
- "pipeline": pipeline,
- "glossary": glossary,
- "mock_tok": mock_tok_instance
- }
- def test_full_pipeline_translate(self, full_pipeline):
- """Test full pipeline from text to translation."""
- pipeline = full_pipeline["pipeline"]
- mock_tok = full_pipeline["mock_tok"]
- # Setup mock to return text with placeholder
- mock_tok.batch_decode.return_value = ["__en__Lin Feng is here"]
- result = pipeline.translate("林风在这里")
- assert "Lin Feng" in result
- def test_full_pipeline_statistics(self, full_pipeline):
- """Test getting statistics from pipeline."""
- pipeline = full_pipeline["pipeline"]
- stats = pipeline.get_statistics("林风是林风的剑")
- assert "林风" in stats
- assert stats["林风"] == 2
|