""" Unit tests for the translator module. Tests cover: - TranslationEngine initialization and basic translation - TranslationPipeline with glossary integration - ProgressReporter callbacks - ChapterTranslator (mocked) """ import pytest from pathlib import Path from unittest.mock import Mock, MagicMock, patch from datetime import datetime from src.translator.engine import TranslationEngine from src.translator.pipeline import TranslationPipeline, TranslationResult from src.translator.progress import ProgressReporter, ProgressStatus, ConsoleProgressReporter from src.translator.chapter_translator import ChapterTranslator from src.glossary.models import Glossary, GlossaryEntry, TermCategory # ============================================================================ # Test TranslationEngine (Story 5.1) # ============================================================================ class TestTranslationEngine: """Tests for TranslationEngine class.""" @pytest.fixture def mock_transformers(self): """Mock the transformers library.""" with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \ patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer: # Setup mock tokenizer mock_tok_instance = MagicMock() mock_tok_instance.src_lang = "zh" mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3} mock_tok_instance.return_tensors = "pt" mock_tokenizer.from_pretrained.return_value = mock_tok_instance # Setup mock model mock_model_instance = MagicMock() mock_model.from_pretrained.return_value = mock_model_instance mock_model_instance.eval.return_value = None yield { "model": mock_model, "tokenizer": mock_tokenizer, "model_instance": mock_model_instance, "tokenizer_instance": mock_tok_instance } @pytest.fixture def mock_model_path(self, tmp_path): """Create a temporary mock model directory.""" model_dir = tmp_path / "m2m100_418M" model_dir.mkdir() (model_dir / "config.json").write_text("{}") return str(model_dir) def test_engine_init_with_mock_path(self, mock_transformers, mock_model_path): """Test engine initialization with a mock model path.""" mock_transformers["tokenizer_instance"].batch_decode.return_value = ["Hello world"] engine = TranslationEngine(model_path=mock_model_path) assert engine.model_path == mock_model_path assert engine.device in ("cpu", "cuda") def test_engine_init_import_error(self): """Test that ImportError is raised when transformers is not available.""" with patch('src.translator.engine.M2M100ForConditionalGeneration', None): with pytest.raises(ImportError, match="transformers library"): TranslationEngine(model_path="/fake/path") def test_translate_single_text(self, mock_transformers, mock_model_path): """Test basic single-text translation.""" mock_tok = mock_transformers["tokenizer_instance"] mock_tok.batch_decode.return_value = ["Hello world"] engine = TranslationEngine(model_path=mock_model_path) result = engine.translate("你好世界", src_lang="zh", tgt_lang="en") assert result == "Hello world" mock_tok.batch_decode.assert_called_once() def test_translate_empty_text_raises_error(self, mock_transformers, mock_model_path): """Test that translating empty text raises ValueError.""" mock_tok = mock_transformers["tokenizer_instance"] mock_tok.batch_decode.return_value = ["Hello"] engine = TranslationEngine(model_path=mock_model_path) with pytest.raises(ValueError, match="cannot be empty"): engine.translate("", src_lang="zh", tgt_lang="en") def test_translate_batch(self, mock_transformers, mock_model_path): """Test batch translation.""" mock_tok = mock_transformers["tokenizer_instance"] mock_tok.batch_decode.return_value = ["Hello", "World", "Test"] engine = TranslationEngine(model_path=mock_model_path) results = engine.translate_batch( ["你好", "世界", "测试"], src_lang="zh", tgt_lang="en", batch_size=3 ) assert len(results) == 3 assert results == ["Hello", "World", "Test"] def test_translate_batch_empty_raises_error(self, mock_transformers, mock_model_path): """Test that empty batch list raises ValueError.""" mock_tok = mock_transformers["tokenizer_instance"] mock_tok.batch_decode.return_value = [] engine = TranslationEngine(model_path=mock_model_path) with pytest.raises(ValueError, match="cannot be empty"): engine.translate_batch([], src_lang="zh", tgt_lang="en") def test_is_language_supported(self, mock_transformers, mock_model_path): """Test language support checking.""" mock_tok = mock_transformers["tokenizer_instance"] mock_tok.batch_decode.return_value = ["Hello"] mock_tok.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3} engine = TranslationEngine(model_path=mock_model_path) assert engine.is_language_supported("zh") is True assert engine.is_language_supported("en") is True assert engine.is_language_supported("de") is False # ============================================================================ # Test TranslationPipeline (Story 5.2) # ============================================================================ class TestTranslationPipeline: """Tests for TranslationPipeline class.""" @pytest.fixture def mock_engine(self): """Create a mock translation engine.""" engine = MagicMock(spec=TranslationEngine) engine.translate.return_value = "Lin Feng is a disciple" engine.translate_batch.return_value = ["Hello", "World"] engine.is_language_supported.return_value = True return engine @pytest.fixture def sample_glossary(self): """Create a sample glossary.""" glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.LOCATION)) return glossary def test_pipeline_init(self, mock_engine): """Test pipeline initialization.""" pipeline = TranslationPipeline(mock_engine) assert pipeline.engine is mock_engine assert pipeline.src_lang == "zh" assert pipeline.tgt_lang == "en" def test_pipeline_with_glossary(self, mock_engine, sample_glossary): """Test pipeline with glossary.""" pipeline = TranslationPipeline(mock_engine, sample_glossary) assert pipeline.has_glossary is True assert len(pipeline.glossary) == 2 def test_translate_simple(self, mock_engine): """Test simple translation without glossary.""" pipeline = TranslationPipeline(mock_engine) result = pipeline.translate("Hello world") assert result == "Lin Feng is a disciple" mock_engine.translate.assert_called_once() def test_translate_with_glossary(self, mock_engine, sample_glossary): """Test translation with glossary preprocessing.""" mock_engine.translate.return_value = "__en__Lin Feng is a disciple of __en__Qingyun Sect" pipeline = TranslationPipeline(mock_engine, sample_glossary) result = pipeline.translate("林风是青云宗的弟子") # After post-processing, placeholders should be replaced assert "Lin Feng" in result assert "Qingyun Sect" in result def test_translate_return_details(self, mock_engine, sample_glossary): """Test translation with detailed result.""" mock_engine.translate.return_value = "__en__Lin Feng is here" pipeline = TranslationPipeline(mock_engine, sample_glossary) result = pipeline.translate("林风在这里", return_details=True) assert isinstance(result, TranslationResult) assert result.original == "林风在这里" assert "Lin Feng" in result.translated assert len(result.terms_used) > 0 assert isinstance(result.placeholder_map, dict) def test_translate_batch(self, mock_engine): """Test batch translation.""" mock_engine.translate_batch.return_value = ["Result 1", "Result 2"] pipeline = TranslationPipeline(mock_engine) results = pipeline.translate_batch(["Text 1", "Text 2"]) assert len(results) == 2 mock_engine.translate_batch.assert_called_once() def test_add_term(self, mock_engine): """Test adding a term to the pipeline glossary.""" pipeline = TranslationPipeline(mock_engine) entry = GlossaryEntry("test", "TEST", TermCategory.OTHER) pipeline.add_term(entry) assert pipeline.has_glossary is True assert "test" in pipeline.glossary def test_set_languages_valid(self, mock_engine): """Test setting valid languages.""" pipeline = TranslationPipeline(mock_engine) mock_engine.is_language_supported.return_value = True pipeline.set_languages("en", "fr") assert pipeline.src_lang == "en" assert pipeline.tgt_lang == "fr" def test_set_languages_invalid(self, mock_engine): """Test setting invalid language raises error.""" pipeline = TranslationPipeline(mock_engine) mock_engine.is_language_supported.side_effect = lambda x: x in ["zh", "en"] with pytest.raises(ValueError, match="not supported"): pipeline.set_languages("xx", "yy") # ============================================================================ # Test ProgressReporter (Story 5.4) # ============================================================================ class TestProgressReporter: """Tests for ProgressReporter class.""" def test_reporter_init(self): """Test reporter initialization.""" callback = Mock() reporter = ProgressReporter(callback) assert reporter.callback is callback assert reporter.total == 0 assert reporter.completed == 0 def test_on_start(self): """Test start event.""" callback = Mock() reporter = ProgressReporter(callback) reporter.on_start(total=10) assert reporter.total == 10 callback.assert_called_once() status, data = callback.call_args[0] assert status == ProgressStatus.START assert data["total"] == 10 def test_on_chapter_complete(self): """Test chapter complete event.""" callback = Mock() reporter = ProgressReporter(callback) reporter.on_start(total=5) reporter.on_chapter_complete(chapter_index=0, chapter_title="Chapter 1") assert reporter.completed == 1 assert reporter.progress_percent == 20.0 def test_on_chapter_failed(self): """Test chapter failed event.""" callback = Mock() reporter = ProgressReporter(callback) reporter.on_start(total=5) error = Exception("Test error") reporter.on_chapter_failed(chapter_index=0, error=error) assert reporter.failed == 1 def test_on_complete(self): """Test complete event.""" callback = Mock() reporter = ProgressReporter(callback) reporter.on_start(total=3) reporter.on_chapter_complete(chapter_index=0) reporter.on_chapter_complete(chapter_index=1) reporter.on_complete() assert reporter.is_complete is True assert reporter.duration_seconds is not None def test_progress_percent(self): """Test progress percentage calculation.""" reporter = ProgressReporter() reporter.on_start(total=10) assert reporter.progress_percent == 0.0 for i in range(5): reporter.on_chapter_complete(chapter_index=i) assert reporter.progress_percent == 50.0 def test_get_summary(self): """Test getting progress summary.""" reporter = ProgressReporter() reporter.on_start(total=10) reporter.on_chapter_complete(chapter_index=0) reporter.on_chapter_complete(chapter_index=1) reporter.on_chapter_failed(chapter_index=2, error=Exception("test")) summary = reporter.get_summary() assert summary["total"] == 10 assert summary["completed"] == 2 assert summary["failed"] == 1 assert summary["remaining"] == 7 assert summary["progress_percent"] == 20.0 class TestConsoleProgressReporter: """Tests for ConsoleProgressReporter class.""" def test_console_reporter_init(self): """Test console reporter initialization.""" reporter = ConsoleProgressReporter(show_details=True) assert reporter.show_details is True assert reporter.reporter is not None def test_get_reporter(self): """Test getting underlying reporter.""" console = ConsoleProgressReporter() reporter = console.get_reporter() assert isinstance(reporter, ProgressReporter) # ============================================================================ # Test ChapterTranslator (Story 5.3) # ============================================================================ class TestChapterTranslator: """Tests for ChapterTranslator class.""" @pytest.fixture def mock_pipeline(self): """Create a mock translation pipeline.""" pipeline = MagicMock(spec=TranslationPipeline) pipeline.translate.return_value = "Translated text" return pipeline @pytest.fixture def mock_repository(self): """Create a mock repository.""" repo = MagicMock() repo.save_chapter = MagicMock() repo.get_pending_chapters.return_value = [] repo.get_chapters.return_value = [] repo.get_failed_chapters.return_value = [] repo.record_failure = MagicMock() repo.update_work_status = MagicMock() return repo @pytest.fixture def sample_chapter(self): """Create a sample chapter.""" from src.repository.models import ChapterItem, ChapterStatus return ChapterItem( work_id="test_work", chapter_index=0, title="Test Chapter", content="Test content for translation.", status=ChapterStatus.PENDING ) def test_translator_init(self, mock_pipeline, mock_repository): """Test translator initialization.""" translator = ChapterTranslator(mock_pipeline, mock_repository) assert translator.pipeline is mock_pipeline assert translator.repository is mock_repository def test_split_paragraphs_simple(self, mock_pipeline, mock_repository): """Test splitting simple paragraphs.""" translator = ChapterTranslator(mock_pipeline, mock_repository) content = "Para 1\n\nPara 2\n\nPara 3" segments = translator._split_paragraphs(content) assert len(segments) == 3 assert segments[0] == "Para 1" assert segments[1] == "Para 2" assert segments[2] == "Para 3" def test_split_long_paragraph(self, mock_pipeline, mock_repository): """Test splitting a long paragraph.""" translator = ChapterTranslator(mock_pipeline, mock_repository) # Create a long paragraph long_text = "。".join(["Sentence " + str(i) for i in range(100)]) segments = translator._split_long_paragraph(long_text) assert len(segments) > 1 # Each segment should be under the max length for seg in segments: assert len(seg) <= translator.MAX_SEGMENT_LENGTH + 100 # Allow some buffer def test_translate_chapter_success( self, mock_pipeline, mock_repository, sample_chapter ): """Test successful chapter translation.""" translator = ChapterTranslator(mock_pipeline, mock_repository) result = translator.translate_chapter("test_work", sample_chapter) assert result.status == "completed" assert result.translation is not None mock_repository.save_chapter.assert_called() def test_translate_chapter_already_completed( self, mock_pipeline, mock_repository ): """Test skipping already translated chapter.""" from src.repository.models import ChapterItem, ChapterStatus chapter = ChapterItem( work_id="test_work", chapter_index=0, title="Test", content="Content", status=ChapterStatus.COMPLETED, translation="Already translated" ) translator = ChapterTranslator(mock_pipeline, mock_repository) result = translator.translate_chapter("test_work", chapter) assert result.translation == "Already translated" # translate should not be called mock_pipeline.translate.assert_not_called() def test_translate_work_empty(self, mock_pipeline, mock_repository): """Test translating work with no pending chapters.""" mock_repository.get_pending_chapters.return_value = [] mock_repository.get_chapters.return_value = [] translator = ChapterTranslator(mock_pipeline, mock_repository) translator.translate_work("test_work") # Should not crash, should just return mock_pipeline.translate.assert_not_called() def test_retry_failed_chapters(self, mock_pipeline, mock_repository): """Test retrying failed chapters.""" from src.repository.models import ChapterItem, ChapterStatus failed_chapter = ChapterItem( work_id="test_work", chapter_index=0, title="Failed", content="Content", status=ChapterStatus.FAILED, retry_count=0 ) mock_repository.get_failed_chapters.return_value = [failed_chapter] translator = ChapterTranslator(mock_pipeline, mock_repository) translator.retry_failed_chapters("test_work") assert mock_pipeline.translate.called def test_set_progress_callback(self, mock_pipeline, mock_repository): """Test setting a new progress callback.""" translator = ChapterTranslator(mock_pipeline, mock_repository) new_callback = Mock() translator.set_progress_callback(new_callback) assert translator.progress_reporter.callback is new_callback # ============================================================================ # Integration Tests (with mocked external dependencies) # ============================================================================ class TestIntegration: """Integration tests for the translator module.""" @pytest.fixture def full_pipeline(self, tmp_path): """Create a full pipeline with mocked model but real other components.""" with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \ patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer: # Setup mocks mock_tok_instance = MagicMock() mock_tok_instance.src_lang = "zh" mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2} mock_tokenizer.from_pretrained.return_value = mock_tok_instance mock_model_instance = MagicMock() mock_model.from_pretrained.return_value = mock_model_instance # Create mock model directory model_dir = tmp_path / "model" model_dir.mkdir() (model_dir / "config.json").write_text("{}") # Return configured components mock_tok_instance.batch_decode.return_value = ["Translated text"] engine = TranslationEngine(model_path=str(model_dir)) glossary = Glossary() glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER)) pipeline = TranslationPipeline(engine, glossary) return { "engine": engine, "pipeline": pipeline, "glossary": glossary, "mock_tok": mock_tok_instance } def test_full_pipeline_translate(self, full_pipeline): """Test full pipeline from text to translation.""" pipeline = full_pipeline["pipeline"] mock_tok = full_pipeline["mock_tok"] # Setup mock to return text with placeholder mock_tok.batch_decode.return_value = ["__en__Lin Feng is here"] result = pipeline.translate("林风在这里") assert "Lin Feng" in result def test_full_pipeline_statistics(self, full_pipeline): """Test getting statistics from pipeline.""" pipeline = full_pipeline["pipeline"] stats = pipeline.get_statistics("林风是林风的剑") assert "林风" in stats assert stats["林风"] == 2