|
|
@@ -0,0 +1,568 @@
|
|
|
+"""
|
|
|
+Unit tests for the translator module.
|
|
|
+
|
|
|
+Tests cover:
|
|
|
+- TranslationEngine initialization and basic translation
|
|
|
+- TranslationPipeline with glossary integration
|
|
|
+- ProgressReporter callbacks
|
|
|
+- ChapterTranslator (mocked)
|
|
|
+"""
|
|
|
+
|
|
|
+import pytest
|
|
|
+from pathlib import Path
|
|
|
+from unittest.mock import Mock, MagicMock, patch
|
|
|
+from datetime import datetime
|
|
|
+
|
|
|
+from src.translator.engine import TranslationEngine
|
|
|
+from src.translator.pipeline import TranslationPipeline, TranslationResult
|
|
|
+from src.translator.progress import ProgressReporter, ProgressStatus, ConsoleProgressReporter
|
|
|
+from src.translator.chapter_translator import ChapterTranslator
|
|
|
+
|
|
|
+from src.glossary.models import Glossary, GlossaryEntry, TermCategory
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# Test TranslationEngine (Story 5.1)
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+class TestTranslationEngine:
|
|
|
+ """Tests for TranslationEngine class."""
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def mock_transformers(self):
|
|
|
+ """Mock the transformers library."""
|
|
|
+ with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
|
|
|
+ patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
|
|
|
+ # Setup mock tokenizer
|
|
|
+ mock_tok_instance = MagicMock()
|
|
|
+ mock_tok_instance.src_lang = "zh"
|
|
|
+ mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
|
|
|
+ mock_tok_instance.return_tensors = "pt"
|
|
|
+ mock_tokenizer.from_pretrained.return_value = mock_tok_instance
|
|
|
+
|
|
|
+ # Setup mock model
|
|
|
+ mock_model_instance = MagicMock()
|
|
|
+ mock_model.from_pretrained.return_value = mock_model_instance
|
|
|
+ mock_model_instance.eval.return_value = None
|
|
|
+
|
|
|
+ yield {
|
|
|
+ "model": mock_model,
|
|
|
+ "tokenizer": mock_tokenizer,
|
|
|
+ "model_instance": mock_model_instance,
|
|
|
+ "tokenizer_instance": mock_tok_instance
|
|
|
+ }
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def mock_model_path(self, tmp_path):
|
|
|
+ """Create a temporary mock model directory."""
|
|
|
+ model_dir = tmp_path / "m2m100_418M"
|
|
|
+ model_dir.mkdir()
|
|
|
+ (model_dir / "config.json").write_text("{}")
|
|
|
+ return str(model_dir)
|
|
|
+
|
|
|
+ def test_engine_init_with_mock_path(self, mock_transformers, mock_model_path):
|
|
|
+ """Test engine initialization with a mock model path."""
|
|
|
+ mock_transformers["tokenizer_instance"].batch_decode.return_value = ["Hello world"]
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+
|
|
|
+ assert engine.model_path == mock_model_path
|
|
|
+ assert engine.device in ("cpu", "cuda")
|
|
|
+
|
|
|
+ def test_engine_init_import_error(self):
|
|
|
+ """Test that ImportError is raised when transformers is not available."""
|
|
|
+ with patch('src.translator.engine.M2M100ForConditionalGeneration', None):
|
|
|
+ with pytest.raises(ImportError, match="transformers library"):
|
|
|
+ TranslationEngine(model_path="/fake/path")
|
|
|
+
|
|
|
+ def test_translate_single_text(self, mock_transformers, mock_model_path):
|
|
|
+ """Test basic single-text translation."""
|
|
|
+ mock_tok = mock_transformers["tokenizer_instance"]
|
|
|
+ mock_tok.batch_decode.return_value = ["Hello world"]
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+ result = engine.translate("你好世界", src_lang="zh", tgt_lang="en")
|
|
|
+
|
|
|
+ assert result == "Hello world"
|
|
|
+ mock_tok.batch_decode.assert_called_once()
|
|
|
+
|
|
|
+ def test_translate_empty_text_raises_error(self, mock_transformers, mock_model_path):
|
|
|
+ """Test that translating empty text raises ValueError."""
|
|
|
+ mock_tok = mock_transformers["tokenizer_instance"]
|
|
|
+ mock_tok.batch_decode.return_value = ["Hello"]
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+
|
|
|
+ with pytest.raises(ValueError, match="cannot be empty"):
|
|
|
+ engine.translate("", src_lang="zh", tgt_lang="en")
|
|
|
+
|
|
|
+ def test_translate_batch(self, mock_transformers, mock_model_path):
|
|
|
+ """Test batch translation."""
|
|
|
+ mock_tok = mock_transformers["tokenizer_instance"]
|
|
|
+ mock_tok.batch_decode.return_value = ["Hello", "World", "Test"]
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+ results = engine.translate_batch(
|
|
|
+ ["你好", "世界", "测试"],
|
|
|
+ src_lang="zh",
|
|
|
+ tgt_lang="en",
|
|
|
+ batch_size=3
|
|
|
+ )
|
|
|
+
|
|
|
+ assert len(results) == 3
|
|
|
+ assert results == ["Hello", "World", "Test"]
|
|
|
+
|
|
|
+ def test_translate_batch_empty_raises_error(self, mock_transformers, mock_model_path):
|
|
|
+ """Test that empty batch list raises ValueError."""
|
|
|
+ mock_tok = mock_transformers["tokenizer_instance"]
|
|
|
+ mock_tok.batch_decode.return_value = []
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+
|
|
|
+ with pytest.raises(ValueError, match="cannot be empty"):
|
|
|
+ engine.translate_batch([], src_lang="zh", tgt_lang="en")
|
|
|
+
|
|
|
+ def test_is_language_supported(self, mock_transformers, mock_model_path):
|
|
|
+ """Test language support checking."""
|
|
|
+ mock_tok = mock_transformers["tokenizer_instance"]
|
|
|
+ mock_tok.batch_decode.return_value = ["Hello"]
|
|
|
+ mock_tok.lang_code_to_id = {"zh": 1, "en": 2, "fr": 3}
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=mock_model_path)
|
|
|
+
|
|
|
+ assert engine.is_language_supported("zh") is True
|
|
|
+ assert engine.is_language_supported("en") is True
|
|
|
+ assert engine.is_language_supported("de") is False
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# Test TranslationPipeline (Story 5.2)
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+class TestTranslationPipeline:
|
|
|
+ """Tests for TranslationPipeline class."""
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def mock_engine(self):
|
|
|
+ """Create a mock translation engine."""
|
|
|
+ engine = MagicMock(spec=TranslationEngine)
|
|
|
+ engine.translate.return_value = "Lin Feng is a disciple"
|
|
|
+ engine.translate_batch.return_value = ["Hello", "World"]
|
|
|
+ engine.is_language_supported.return_value = True
|
|
|
+ return engine
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def sample_glossary(self):
|
|
|
+ """Create a sample glossary."""
|
|
|
+ glossary = Glossary()
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+ glossary.add(GlossaryEntry("青云宗", "Qingyun Sect", TermCategory.LOCATION))
|
|
|
+ return glossary
|
|
|
+
|
|
|
+ def test_pipeline_init(self, mock_engine):
|
|
|
+ """Test pipeline initialization."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+
|
|
|
+ assert pipeline.engine is mock_engine
|
|
|
+ assert pipeline.src_lang == "zh"
|
|
|
+ assert pipeline.tgt_lang == "en"
|
|
|
+
|
|
|
+ def test_pipeline_with_glossary(self, mock_engine, sample_glossary):
|
|
|
+ """Test pipeline with glossary."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine, sample_glossary)
|
|
|
+
|
|
|
+ assert pipeline.has_glossary is True
|
|
|
+ assert len(pipeline.glossary) == 2
|
|
|
+
|
|
|
+ def test_translate_simple(self, mock_engine):
|
|
|
+ """Test simple translation without glossary."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+ result = pipeline.translate("Hello world")
|
|
|
+
|
|
|
+ assert result == "Lin Feng is a disciple"
|
|
|
+ mock_engine.translate.assert_called_once()
|
|
|
+
|
|
|
+ def test_translate_with_glossary(self, mock_engine, sample_glossary):
|
|
|
+ """Test translation with glossary preprocessing."""
|
|
|
+ mock_engine.translate.return_value = "__en__Lin Feng is a disciple of __en__Qingyun Sect"
|
|
|
+ pipeline = TranslationPipeline(mock_engine, sample_glossary)
|
|
|
+
|
|
|
+ result = pipeline.translate("林风是青云宗的弟子")
|
|
|
+
|
|
|
+ # After post-processing, placeholders should be replaced
|
|
|
+ assert "Lin Feng" in result
|
|
|
+ assert "Qingyun Sect" in result
|
|
|
+
|
|
|
+ def test_translate_return_details(self, mock_engine, sample_glossary):
|
|
|
+ """Test translation with detailed result."""
|
|
|
+ mock_engine.translate.return_value = "__en__Lin Feng is here"
|
|
|
+ pipeline = TranslationPipeline(mock_engine, sample_glossary)
|
|
|
+
|
|
|
+ result = pipeline.translate("林风在这里", return_details=True)
|
|
|
+
|
|
|
+ assert isinstance(result, TranslationResult)
|
|
|
+ assert result.original == "林风在这里"
|
|
|
+ assert "Lin Feng" in result.translated
|
|
|
+ assert len(result.terms_used) > 0
|
|
|
+ assert isinstance(result.placeholder_map, dict)
|
|
|
+
|
|
|
+ def test_translate_batch(self, mock_engine):
|
|
|
+ """Test batch translation."""
|
|
|
+ mock_engine.translate_batch.return_value = ["Result 1", "Result 2"]
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+
|
|
|
+ results = pipeline.translate_batch(["Text 1", "Text 2"])
|
|
|
+
|
|
|
+ assert len(results) == 2
|
|
|
+ mock_engine.translate_batch.assert_called_once()
|
|
|
+
|
|
|
+ def test_add_term(self, mock_engine):
|
|
|
+ """Test adding a term to the pipeline glossary."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+ entry = GlossaryEntry("test", "TEST", TermCategory.OTHER)
|
|
|
+
|
|
|
+ pipeline.add_term(entry)
|
|
|
+
|
|
|
+ assert pipeline.has_glossary is True
|
|
|
+ assert "test" in pipeline.glossary
|
|
|
+
|
|
|
+ def test_set_languages_valid(self, mock_engine):
|
|
|
+ """Test setting valid languages."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+ mock_engine.is_language_supported.return_value = True
|
|
|
+
|
|
|
+ pipeline.set_languages("en", "fr")
|
|
|
+
|
|
|
+ assert pipeline.src_lang == "en"
|
|
|
+ assert pipeline.tgt_lang == "fr"
|
|
|
+
|
|
|
+ def test_set_languages_invalid(self, mock_engine):
|
|
|
+ """Test setting invalid language raises error."""
|
|
|
+ pipeline = TranslationPipeline(mock_engine)
|
|
|
+ mock_engine.is_language_supported.side_effect = lambda x: x in ["zh", "en"]
|
|
|
+
|
|
|
+ with pytest.raises(ValueError, match="not supported"):
|
|
|
+ pipeline.set_languages("xx", "yy")
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# Test ProgressReporter (Story 5.4)
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+class TestProgressReporter:
|
|
|
+ """Tests for ProgressReporter class."""
|
|
|
+
|
|
|
+ def test_reporter_init(self):
|
|
|
+ """Test reporter initialization."""
|
|
|
+ callback = Mock()
|
|
|
+ reporter = ProgressReporter(callback)
|
|
|
+
|
|
|
+ assert reporter.callback is callback
|
|
|
+ assert reporter.total == 0
|
|
|
+ assert reporter.completed == 0
|
|
|
+
|
|
|
+ def test_on_start(self):
|
|
|
+ """Test start event."""
|
|
|
+ callback = Mock()
|
|
|
+ reporter = ProgressReporter(callback)
|
|
|
+
|
|
|
+ reporter.on_start(total=10)
|
|
|
+
|
|
|
+ assert reporter.total == 10
|
|
|
+ callback.assert_called_once()
|
|
|
+ status, data = callback.call_args[0]
|
|
|
+ assert status == ProgressStatus.START
|
|
|
+ assert data["total"] == 10
|
|
|
+
|
|
|
+ def test_on_chapter_complete(self):
|
|
|
+ """Test chapter complete event."""
|
|
|
+ callback = Mock()
|
|
|
+ reporter = ProgressReporter(callback)
|
|
|
+ reporter.on_start(total=5)
|
|
|
+
|
|
|
+ reporter.on_chapter_complete(chapter_index=0, chapter_title="Chapter 1")
|
|
|
+
|
|
|
+ assert reporter.completed == 1
|
|
|
+ assert reporter.progress_percent == 20.0
|
|
|
+
|
|
|
+ def test_on_chapter_failed(self):
|
|
|
+ """Test chapter failed event."""
|
|
|
+ callback = Mock()
|
|
|
+ reporter = ProgressReporter(callback)
|
|
|
+ reporter.on_start(total=5)
|
|
|
+
|
|
|
+ error = Exception("Test error")
|
|
|
+ reporter.on_chapter_failed(chapter_index=0, error=error)
|
|
|
+
|
|
|
+ assert reporter.failed == 1
|
|
|
+
|
|
|
+ def test_on_complete(self):
|
|
|
+ """Test complete event."""
|
|
|
+ callback = Mock()
|
|
|
+ reporter = ProgressReporter(callback)
|
|
|
+ reporter.on_start(total=3)
|
|
|
+ reporter.on_chapter_complete(chapter_index=0)
|
|
|
+ reporter.on_chapter_complete(chapter_index=1)
|
|
|
+
|
|
|
+ reporter.on_complete()
|
|
|
+
|
|
|
+ assert reporter.is_complete is True
|
|
|
+ assert reporter.duration_seconds is not None
|
|
|
+
|
|
|
+ def test_progress_percent(self):
|
|
|
+ """Test progress percentage calculation."""
|
|
|
+ reporter = ProgressReporter()
|
|
|
+ reporter.on_start(total=10)
|
|
|
+
|
|
|
+ assert reporter.progress_percent == 0.0
|
|
|
+
|
|
|
+ for i in range(5):
|
|
|
+ reporter.on_chapter_complete(chapter_index=i)
|
|
|
+
|
|
|
+ assert reporter.progress_percent == 50.0
|
|
|
+
|
|
|
+ def test_get_summary(self):
|
|
|
+ """Test getting progress summary."""
|
|
|
+ reporter = ProgressReporter()
|
|
|
+ reporter.on_start(total=10)
|
|
|
+ reporter.on_chapter_complete(chapter_index=0)
|
|
|
+ reporter.on_chapter_complete(chapter_index=1)
|
|
|
+ reporter.on_chapter_failed(chapter_index=2, error=Exception("test"))
|
|
|
+
|
|
|
+ summary = reporter.get_summary()
|
|
|
+
|
|
|
+ assert summary["total"] == 10
|
|
|
+ assert summary["completed"] == 2
|
|
|
+ assert summary["failed"] == 1
|
|
|
+ assert summary["remaining"] == 7
|
|
|
+ assert summary["progress_percent"] == 20.0
|
|
|
+
|
|
|
+
|
|
|
+class TestConsoleProgressReporter:
|
|
|
+ """Tests for ConsoleProgressReporter class."""
|
|
|
+
|
|
|
+ def test_console_reporter_init(self):
|
|
|
+ """Test console reporter initialization."""
|
|
|
+ reporter = ConsoleProgressReporter(show_details=True)
|
|
|
+
|
|
|
+ assert reporter.show_details is True
|
|
|
+ assert reporter.reporter is not None
|
|
|
+
|
|
|
+ def test_get_reporter(self):
|
|
|
+ """Test getting underlying reporter."""
|
|
|
+ console = ConsoleProgressReporter()
|
|
|
+ reporter = console.get_reporter()
|
|
|
+
|
|
|
+ assert isinstance(reporter, ProgressReporter)
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# Test ChapterTranslator (Story 5.3)
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+class TestChapterTranslator:
|
|
|
+ """Tests for ChapterTranslator class."""
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def mock_pipeline(self):
|
|
|
+ """Create a mock translation pipeline."""
|
|
|
+ pipeline = MagicMock(spec=TranslationPipeline)
|
|
|
+ pipeline.translate.return_value = "Translated text"
|
|
|
+ return pipeline
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def mock_repository(self):
|
|
|
+ """Create a mock repository."""
|
|
|
+ repo = MagicMock()
|
|
|
+ repo.save_chapter = MagicMock()
|
|
|
+ repo.get_pending_chapters.return_value = []
|
|
|
+ repo.get_chapters.return_value = []
|
|
|
+ repo.get_failed_chapters.return_value = []
|
|
|
+ repo.record_failure = MagicMock()
|
|
|
+ repo.update_work_status = MagicMock()
|
|
|
+ return repo
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def sample_chapter(self):
|
|
|
+ """Create a sample chapter."""
|
|
|
+ from src.repository.models import ChapterItem, ChapterStatus
|
|
|
+
|
|
|
+ return ChapterItem(
|
|
|
+ work_id="test_work",
|
|
|
+ chapter_index=0,
|
|
|
+ title="Test Chapter",
|
|
|
+ content="Test content for translation.",
|
|
|
+ status=ChapterStatus.PENDING
|
|
|
+ )
|
|
|
+
|
|
|
+ def test_translator_init(self, mock_pipeline, mock_repository):
|
|
|
+ """Test translator initialization."""
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+
|
|
|
+ assert translator.pipeline is mock_pipeline
|
|
|
+ assert translator.repository is mock_repository
|
|
|
+
|
|
|
+ def test_split_paragraphs_simple(self, mock_pipeline, mock_repository):
|
|
|
+ """Test splitting simple paragraphs."""
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+
|
|
|
+ content = "Para 1\n\nPara 2\n\nPara 3"
|
|
|
+ segments = translator._split_paragraphs(content)
|
|
|
+
|
|
|
+ assert len(segments) == 3
|
|
|
+ assert segments[0] == "Para 1"
|
|
|
+ assert segments[1] == "Para 2"
|
|
|
+ assert segments[2] == "Para 3"
|
|
|
+
|
|
|
+ def test_split_long_paragraph(self, mock_pipeline, mock_repository):
|
|
|
+ """Test splitting a long paragraph."""
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+
|
|
|
+ # Create a long paragraph
|
|
|
+ long_text = "。".join(["Sentence " + str(i) for i in range(100)])
|
|
|
+ segments = translator._split_long_paragraph(long_text)
|
|
|
+
|
|
|
+ assert len(segments) > 1
|
|
|
+ # Each segment should be under the max length
|
|
|
+ for seg in segments:
|
|
|
+ assert len(seg) <= translator.MAX_SEGMENT_LENGTH + 100 # Allow some buffer
|
|
|
+
|
|
|
+ def test_translate_chapter_success(
|
|
|
+ self, mock_pipeline, mock_repository, sample_chapter
|
|
|
+ ):
|
|
|
+ """Test successful chapter translation."""
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+
|
|
|
+ result = translator.translate_chapter("test_work", sample_chapter)
|
|
|
+
|
|
|
+ assert result.status == "completed"
|
|
|
+ assert result.translation is not None
|
|
|
+ mock_repository.save_chapter.assert_called()
|
|
|
+
|
|
|
+ def test_translate_chapter_already_completed(
|
|
|
+ self, mock_pipeline, mock_repository
|
|
|
+ ):
|
|
|
+ """Test skipping already translated chapter."""
|
|
|
+ from src.repository.models import ChapterItem, ChapterStatus
|
|
|
+
|
|
|
+ chapter = ChapterItem(
|
|
|
+ work_id="test_work",
|
|
|
+ chapter_index=0,
|
|
|
+ title="Test",
|
|
|
+ content="Content",
|
|
|
+ status=ChapterStatus.COMPLETED,
|
|
|
+ translation="Already translated"
|
|
|
+ )
|
|
|
+
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+ result = translator.translate_chapter("test_work", chapter)
|
|
|
+
|
|
|
+ assert result.translation == "Already translated"
|
|
|
+ # translate should not be called
|
|
|
+ mock_pipeline.translate.assert_not_called()
|
|
|
+
|
|
|
+ def test_translate_work_empty(self, mock_pipeline, mock_repository):
|
|
|
+ """Test translating work with no pending chapters."""
|
|
|
+ mock_repository.get_pending_chapters.return_value = []
|
|
|
+ mock_repository.get_chapters.return_value = []
|
|
|
+
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+ translator.translate_work("test_work")
|
|
|
+
|
|
|
+ # Should not crash, should just return
|
|
|
+ mock_pipeline.translate.assert_not_called()
|
|
|
+
|
|
|
+ def test_retry_failed_chapters(self, mock_pipeline, mock_repository):
|
|
|
+ """Test retrying failed chapters."""
|
|
|
+ from src.repository.models import ChapterItem, ChapterStatus
|
|
|
+
|
|
|
+ failed_chapter = ChapterItem(
|
|
|
+ work_id="test_work",
|
|
|
+ chapter_index=0,
|
|
|
+ title="Failed",
|
|
|
+ content="Content",
|
|
|
+ status=ChapterStatus.FAILED,
|
|
|
+ retry_count=0
|
|
|
+ )
|
|
|
+
|
|
|
+ mock_repository.get_failed_chapters.return_value = [failed_chapter]
|
|
|
+
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+ translator.retry_failed_chapters("test_work")
|
|
|
+
|
|
|
+ assert mock_pipeline.translate.called
|
|
|
+
|
|
|
+ def test_set_progress_callback(self, mock_pipeline, mock_repository):
|
|
|
+ """Test setting a new progress callback."""
|
|
|
+ translator = ChapterTranslator(mock_pipeline, mock_repository)
|
|
|
+
|
|
|
+ new_callback = Mock()
|
|
|
+ translator.set_progress_callback(new_callback)
|
|
|
+
|
|
|
+ assert translator.progress_reporter.callback is new_callback
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# Integration Tests (with mocked external dependencies)
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+class TestIntegration:
|
|
|
+ """Integration tests for the translator module."""
|
|
|
+
|
|
|
+ @pytest.fixture
|
|
|
+ def full_pipeline(self, tmp_path):
|
|
|
+ """Create a full pipeline with mocked model but real other components."""
|
|
|
+ with patch('src.translator.engine.M2M100ForConditionalGeneration') as mock_model, \
|
|
|
+ patch('src.translator.engine.M2M100Tokenizer') as mock_tokenizer:
|
|
|
+
|
|
|
+ # Setup mocks
|
|
|
+ mock_tok_instance = MagicMock()
|
|
|
+ mock_tok_instance.src_lang = "zh"
|
|
|
+ mock_tok_instance.lang_code_to_id = {"zh": 1, "en": 2}
|
|
|
+ mock_tokenizer.from_pretrained.return_value = mock_tok_instance
|
|
|
+
|
|
|
+ mock_model_instance = MagicMock()
|
|
|
+ mock_model.from_pretrained.return_value = mock_model_instance
|
|
|
+
|
|
|
+ # Create mock model directory
|
|
|
+ model_dir = tmp_path / "model"
|
|
|
+ model_dir.mkdir()
|
|
|
+ (model_dir / "config.json").write_text("{}")
|
|
|
+
|
|
|
+ # Return configured components
|
|
|
+ mock_tok_instance.batch_decode.return_value = ["Translated text"]
|
|
|
+
|
|
|
+ engine = TranslationEngine(model_path=str(model_dir))
|
|
|
+
|
|
|
+ glossary = Glossary()
|
|
|
+ glossary.add(GlossaryEntry("林风", "Lin Feng", TermCategory.CHARACTER))
|
|
|
+
|
|
|
+ pipeline = TranslationPipeline(engine, glossary)
|
|
|
+
|
|
|
+ return {
|
|
|
+ "engine": engine,
|
|
|
+ "pipeline": pipeline,
|
|
|
+ "glossary": glossary,
|
|
|
+ "mock_tok": mock_tok_instance
|
|
|
+ }
|
|
|
+
|
|
|
+ def test_full_pipeline_translate(self, full_pipeline):
|
|
|
+ """Test full pipeline from text to translation."""
|
|
|
+ pipeline = full_pipeline["pipeline"]
|
|
|
+ mock_tok = full_pipeline["mock_tok"]
|
|
|
+
|
|
|
+ # Setup mock to return text with placeholder
|
|
|
+ mock_tok.batch_decode.return_value = ["__en__Lin Feng is here"]
|
|
|
+
|
|
|
+ result = pipeline.translate("林风在这里")
|
|
|
+
|
|
|
+ assert "Lin Feng" in result
|
|
|
+
|
|
|
+ def test_full_pipeline_statistics(self, full_pipeline):
|
|
|
+ """Test getting statistics from pipeline."""
|
|
|
+ pipeline = full_pipeline["pipeline"]
|
|
|
+
|
|
|
+ stats = pipeline.get_statistics("林风是林风的剑")
|
|
|
+
|
|
|
+ assert "林风" in stats
|
|
|
+ assert stats["林风"] == 2
|