Przeglądaj źródła

feat(translator): Implement Story 5.5 - Breakpoint Resume Translation (3 SP)

Implements breakpoint resume mechanism for translation recovery.

### Core Features
- ResumeTracker: Manages translation resume state
  - Saves last completed/failed chapter index to disk
  - Generates human-readable resume messages
  - Loads resume state to continue from checkpoint
- ResumeState: Data class for resume information
  - Tracks last completed and failed chapters
  - Calculates resume index automatically
  - Provides resumable status check

### Integration
- Updated ChapterTranslator to use ResumeTracker
  - Updates state on chapter completion/failure
  - Displays resume message when continuing
  - Clears resume state on successful completion
- Enhanced translate_work() with resume logic
  - Checks for existing resume state
  - Starts from first pending chapter
  - Logs "Resume from chapter X" message

### Testing
- 23 unit tests covering all resume tracker functionality
- Tests for state persistence and loading

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
d8dfun 2 dni temu
rodzic
commit
8da3d0ba40

+ 3 - 0
src/translator/__init__.py

@@ -17,6 +17,7 @@ from .term_injector import (
     TermResult,
     TermUsageRecord,
 )
+from .resume_tracker import ResumeTracker, ResumeState
 
 __all__ = [
     "TranslationEngine",
@@ -30,4 +31,6 @@ __all__ = [
     "TermValidationResult",
     "TermResult",
     "TermUsageRecord",
+    "ResumeTracker",
+    "ResumeState",
 ]

+ 34 - 0
src/translator/chapter_translator.py

@@ -8,11 +8,13 @@ integrating with the repository for persistence.
 import logging
 import traceback
 from typing import List, Optional, Callable
+from pathlib import Path
 
 from ..repository.models import ChapterItem, ChapterStatus
 from ..repository.repository import Repository
 from .pipeline import TranslationPipeline
 from .progress import ProgressReporter, ProgressCallback, ProgressStatus
+from .resume_tracker import ResumeTracker
 
 logger = logging.getLogger(__name__)
 
@@ -53,6 +55,7 @@ class ChapterTranslator:
         self.pipeline = pipeline
         self.repository = repository
         self.progress_reporter = ProgressReporter(callback=progress_callback)
+        self.resume_tracker = ResumeTracker(repository.storage_dir)
 
     def _split_paragraphs(self, content: str) -> List[str]:
         """
@@ -182,6 +185,12 @@ class ChapterTranslator:
             # Save to repository
             self.repository.save_chapter(work_id, chapter)
 
+            # Update resume tracker
+            all_chapters = self.repository.get_chapters(work_id)
+            self.resume_tracker.update_on_chapter_complete(
+                work_id, chapter.chapter_index, len(all_chapters)
+            )
+
             # Notify completion
             self.progress_reporter.on_chapter_complete(
                 chapter.chapter_index,
@@ -197,6 +206,12 @@ class ChapterTranslator:
             chapter.retry_count += 1
             self.repository.save_chapter(work_id, chapter)
 
+            # Update resume tracker on failure
+            all_chapters = self.repository.get_chapters(work_id)
+            self.resume_tracker.update_on_chapter_failed(
+                work_id, chapter.chapter_index, len(all_chapters)
+            )
+
             # Record failure
             self.repository.record_failure(
                 work_id, chapter.chapter_index, e, traceback.format_exc()
@@ -228,6 +243,16 @@ class ChapterTranslator:
             start_index: Optional starting chapter index (inclusive)
             end_index: Optional ending chapter index (exclusive)
         """
+        # Check for existing resume state
+        resume_state = None
+        if resume:
+            resume_state = self.resume_tracker.load_resume_state(work_id)
+            if resume_state:
+                logger.info(f"Resuming translation: {resume_state.resume_message}")
+                # Use resume index if start_index not specified
+                if start_index is None:
+                    start_index = resume_state.get_resume_index()
+
         # Get chapters to translate
         if start_index is not None or end_index is not None:
             # Range-based translation
@@ -250,6 +275,13 @@ class ChapterTranslator:
         # Sort by chapter index
         chapters.sort(key=lambda c: c.chapter_index)
 
+        # Display resume info if available
+        if resume_state and chapters:
+            logger.info(
+                f"Starting translation from chapter {chapters[0].chapter_index} "
+                f"({len(chapters)} chapters remaining)"
+            )
+
         # Notify start
         self.progress_reporter.on_start(len(chapters))
 
@@ -275,6 +307,8 @@ class ChapterTranslator:
         summary = self.progress_reporter.get_summary()
         if summary["failed"] == 0:
             self.repository.update_work_status(work_id, "completed")
+            # Clear resume state on successful completion
+            self.resume_tracker.delete_resume_state(work_id)
         elif summary["completed"] > 0:
             self.repository.update_work_status(work_id, "paused")
 

+ 331 - 0
src/translator/resume_tracker.py

@@ -0,0 +1,331 @@
+"""
+Resume tracking module for translation breakpoint recovery.
+
+This module provides functionality for tracking translation progress
+and enabling resumption from the last translated chapter.
+"""
+
+import json
+import logging
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ResumeState:
+    """
+    State information for resuming translation.
+
+    Attributes:
+        work_id: The work item ID
+        last_completed_index: The last successfully completed chapter index
+        last_failed_index: The last failed chapter index (if any)
+        total_chapters: Total number of chapters
+        timestamp: When this state was recorded
+        resume_message: Human-readable resume message
+    """
+
+    work_id: str
+    last_completed_index: int
+    last_failed_index: Optional[int]
+    total_chapters: int
+    timestamp: datetime = field(default_factory=datetime.now)
+    resume_message: str = ""
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "work_id": self.work_id,
+            "last_completed_index": self.last_completed_index,
+            "last_failed_index": self.last_failed_index,
+            "total_chapters": self.total_chapters,
+            "timestamp": self.timestamp.isoformat(),
+            "resume_message": self.resume_message,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ResumeState":
+        """Create from dictionary deserialized from JSON."""
+        return cls(
+            work_id=data["work_id"],
+            last_completed_index=data["last_completed_index"],
+            last_failed_index=data.get("last_failed_index"),
+            total_chapters=data["total_chapters"],
+            timestamp=datetime.fromisoformat(data["timestamp"]),
+            resume_message=data.get("resume_message", ""),
+        )
+
+    def get_resume_index(self) -> int:
+        """
+        Get the chapter index to resume from.
+
+        Returns:
+            The chapter index to start translation from
+        """
+        if self.last_failed_index is not None:
+            # Resume from the failed chapter
+            return self.last_failed_index
+        # Resume from the chapter after the last completed
+        return self.last_completed_index + 1
+
+    def is_resumable(self) -> bool:
+        """
+        Check if translation can be resumed.
+
+        Returns:
+            True if there are remaining chapters to translate
+        """
+        resume_index = self.get_resume_index()
+        return resume_index < self.total_chapters
+
+
+class ResumeTracker:
+    """
+    Tracker for translation resume state.
+
+    This class manages saving and loading resume state,
+    allowing translation to resume from the last checkpoint.
+    """
+
+    def __init__(self, storage_dir: Path):
+        """
+        Initialize the resume tracker.
+
+        Args:
+            storage_dir: Directory for storing resume state files
+        """
+        self.storage_dir = Path(storage_dir)
+        self.resume_dir = self.storage_dir / "resume"
+        self.resume_dir.mkdir(parents=True, exist_ok=True)
+
+    def _get_resume_file_path(self, work_id: str) -> Path:
+        """
+        Get the file path for a work's resume state.
+
+        Args:
+            work_id: The work item ID
+
+        Returns:
+            Path to the resume state file
+        """
+        return self.resume_dir / f"{work_id}_resume.json"
+
+    def save_resume_state(
+        self,
+        work_id: str,
+        last_completed_index: int,
+        last_failed_index: Optional[int],
+        total_chapters: int,
+        resume_message: str = ""
+    ) -> ResumeState:
+        """
+        Save the current resume state.
+
+        Args:
+            work_id: The work item ID
+            last_completed_index: Last successfully completed chapter index
+            last_failed_index: Last failed chapter index (if any)
+            total_chapters: Total number of chapters
+            resume_message: Optional custom resume message
+
+        Returns:
+            The saved ResumeState
+        """
+        state = ResumeState(
+            work_id=work_id,
+            last_completed_index=last_completed_index,
+            last_failed_index=last_failed_index,
+            total_chapters=total_chapters,
+            resume_message=resume_message,
+        )
+
+        # If no custom message, generate default
+        if not resume_message:
+            state.resume_message = self._generate_resume_message(state)
+
+        # Save to file
+        resume_file = self._get_resume_file_path(work_id)
+        with open(resume_file, "w", encoding="utf-8") as f:
+            json.dump(state.to_dict(), f, indent=2, ensure_ascii=False)
+
+        logger.debug(f"Saved resume state for work {work_id}: {state.resume_message}")
+        return state
+
+    def load_resume_state(self, work_id: str) -> Optional[ResumeState]:
+        """
+        Load the resume state for a work item.
+
+        Args:
+            work_id: The work item ID
+
+        Returns:
+            ResumeState if found, None otherwise
+        """
+        resume_file = self._get_resume_file_path(work_id)
+
+        if not resume_file.exists():
+            return None
+
+        try:
+            with open(resume_file, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            return ResumeState.from_dict(data)
+        except (json.JSONDecodeError, KeyError, ValueError) as e:
+            logger.warning(f"Failed to load resume state for {work_id}: {e}")
+            return None
+
+    def delete_resume_state(self, work_id: str) -> None:
+        """
+        Delete the resume state for a work item.
+
+        Args:
+            work_id: The work item ID
+        """
+        resume_file = self._get_resume_file_path(work_id)
+        if resume_file.exists():
+            resume_file.unlink()
+            logger.debug(f"Deleted resume state for work {work_id}")
+
+    def get_resume_index(self, work_id: str, total_chapters: int) -> Optional[int]:
+        """
+        Get the chapter index to resume translation from.
+
+        Args:
+            work_id: The work item ID
+            total_chapters: Total number of chapters
+
+        Returns:
+            Chapter index to resume from, or None if no resume state exists
+        """
+        state = self.load_resume_state(work_id)
+        if state is None:
+            return None
+
+        return state.get_resume_index()
+
+    def update_on_chapter_complete(
+        self,
+        work_id: str,
+        chapter_index: int,
+        total_chapters: int
+    ) -> None:
+        """
+        Update resume state after a chapter completes.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter that just completed
+            total_chapters: Total number of chapters
+        """
+        # Get existing state or create new
+        existing = self.load_resume_state(work_id)
+
+        if existing:
+            # Update existing state
+            existing.last_completed_index = chapter_index
+            existing.last_failed_index = None  # Clear failed index on success
+            existing.timestamp = datetime.now()
+            existing.resume_message = self._generate_resume_message(existing)
+            state = existing
+        else:
+            # Create new state
+            state = ResumeState(
+                work_id=work_id,
+                last_completed_index=chapter_index,
+                last_failed_index=None,
+                total_chapters=total_chapters,
+            )
+            state.resume_message = self._generate_resume_message(state)
+
+        # Save updated state
+        resume_file = self._get_resume_file_path(work_id)
+        with open(resume_file, "w", encoding="utf-8") as f:
+            json.dump(state.to_dict(), f, indent=2, ensure_ascii=False)
+
+    def update_on_chapter_failed(
+        self,
+        work_id: str,
+        chapter_index: int,
+        total_chapters: int
+    ) -> None:
+        """
+        Update resume state after a chapter fails.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter that failed
+            total_chapters: Total number of chapters
+        """
+        # Get existing state or create new
+        existing = self.load_resume_state(work_id)
+
+        if existing:
+            # Update existing state
+            existing.last_failed_index = chapter_index
+            existing.timestamp = datetime.now()
+            existing.resume_message = self._generate_resume_message(existing)
+            state = existing
+        else:
+            # Create new state
+            state = ResumeState(
+                work_id=work_id,
+                last_completed_index=-1,
+                last_failed_index=chapter_index,
+                total_chapters=total_chapters,
+            )
+            state.resume_message = self._generate_resume_message(state)
+
+        # Save updated state
+        resume_file = self._get_resume_file_path(work_id)
+        with open(resume_file, "w", encoding="utf-8") as f:
+            json.dump(state.to_dict(), f, indent=2, ensure_ascii=False)
+
+    def _generate_resume_message(self, state: ResumeState) -> str:
+        """
+        Generate a human-readable resume message.
+
+        Args:
+            state: The resume state
+
+        Returns:
+            Human-readable resume message
+        """
+        resume_index = state.get_resume_index()
+        remaining = state.total_chapters - resume_index
+
+        if state.last_failed_index is not None:
+            return f"Resume from failed chapter {state.last_failed_index} ({remaining} chapters remaining)"
+
+        if state.last_completed_index >= 0:
+            return f"Resume from chapter {resume_index} after completing chapter {state.last_completed_index} ({remaining} chapters remaining)"
+
+        return f"Start translation from chapter {resume_index} ({state.total_chapters} chapters total)"
+
+    def get_all_resume_states(self) -> List[ResumeState]:
+        """
+        Get all resume states.
+
+        Returns:
+            List of all ResumeState objects
+        """
+        states = []
+
+        for resume_file in self.resume_dir.glob("*_resume.json"):
+            try:
+                with open(resume_file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                states.append(ResumeState.from_dict(data))
+            except (json.JSONDecodeError, KeyError, ValueError) as e:
+                logger.warning(f"Failed to load resume state from {resume_file}: {e}")
+
+        return states
+
+    def clear_all_resume_states(self) -> None:
+        """Clear all resume state files."""
+        for resume_file in self.resume_dir.glob("*_resume.json"):
+            resume_file.unlink()
+        logger.debug("Cleared all resume states")

+ 336 - 0
tests/translator/test_resume_tracker.py

@@ -0,0 +1,336 @@
+"""
+Unit tests for the resume tracker module.
+
+Tests cover resume state tracking and loading.
+"""
+
+import sys
+from unittest.mock import Mock
+from pathlib import Path
+import json
+import tempfile
+
+# Mock torch and transformers before importing
+sys_mock = Mock()
+sys.modules["torch"] = sys_mock
+sys.modules["transformers"] = sys_mock
+
+import pytest
+
+from src.translator.resume_tracker import ResumeTracker, ResumeState
+
+
+class TestResumeState:
+    """Test cases for ResumeState dataclass."""
+
+    def test_create_resume_state(self):
+        """Test creating a resume state."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        assert state.work_id == "test_work"
+        assert state.last_completed_index == 10
+        assert state.last_failed_index is None
+        assert state.total_chapters == 100
+
+    def test_to_dict(self):
+        """Test converting resume state to dictionary."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+            resume_message="Test message",
+        )
+
+        data = state.to_dict()
+
+        assert data["work_id"] == "test_work"
+        assert data["last_completed_index"] == 10
+        assert data["last_failed_index"] is None
+        assert data["total_chapters"] == 100
+        assert data["resume_message"] == "Test message"
+        assert "timestamp" in data
+
+    def test_from_dict(self):
+        """Test creating resume state from dictionary."""
+        data = {
+            "work_id": "test_work",
+            "last_completed_index": 10,
+            "last_failed_index": None,
+            "total_chapters": 100,
+            "timestamp": "2024-01-01T00:00:00",
+            "resume_message": "Test message",
+        }
+
+        state = ResumeState.from_dict(data)
+
+        assert state.work_id == "test_work"
+        assert state.last_completed_index == 10
+        assert state.total_chapters == 100
+
+    def test_get_resume_index_with_failure(self):
+        """Test getting resume index when there's a failed chapter."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=15,
+            total_chapters=100,
+        )
+
+        assert state.get_resume_index() == 15
+
+    def test_get_resume_index_without_failure(self):
+        """Test getting resume index when there's no failed chapter."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        assert state.get_resume_index() == 11
+
+    def test_is_resumable_with_remaining(self):
+        """Test is_resumable when there are remaining chapters."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        assert state.is_resumable() is True
+
+    def test_is_resumable_complete(self):
+        """Test is_resumable when all chapters are complete."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=99,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        assert state.is_resumable() is False
+
+
+class TestResumeTracker:
+    """Test cases for ResumeTracker class."""
+
+    @pytest.fixture
+    def temp_dir(self):
+        """Create a temporary directory for testing."""
+        with tempfile.TemporaryDirectory() as tmp:
+            yield Path(tmp)
+
+    @pytest.fixture
+    def tracker(self, temp_dir):
+        """Create a resume tracker for testing."""
+        return ResumeTracker(temp_dir)
+
+    def test_init(self, temp_dir):
+        """Test ResumeTracker initialization."""
+        tracker = ResumeTracker(temp_dir)
+
+        assert tracker.storage_dir == temp_dir
+        assert tracker.resume_dir == temp_dir / "resume"
+        assert tracker.resume_dir.exists()
+
+    def test_save_resume_state(self, tracker):
+        """Test saving resume state."""
+        state = tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        assert state.work_id == "test_work"
+        assert state.last_completed_index == 10
+        assert "Resume from chapter" in state.resume_message
+
+        # Check file was created
+        resume_file = tracker._get_resume_file_path("test_work")
+        assert resume_file.exists()
+
+    def test_load_resume_state(self, tracker):
+        """Test loading resume state."""
+        # Save state first
+        tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        # Load it back
+        loaded = tracker.load_resume_state("test_work")
+
+        assert loaded is not None
+        assert loaded.work_id == "test_work"
+        assert loaded.last_completed_index == 10
+        assert loaded.total_chapters == 100
+
+    def test_load_nonexistent_resume_state(self, tracker):
+        """Test loading a non-existent resume state."""
+        loaded = tracker.load_resume_state("nonexistent")
+        assert loaded is None
+
+    def test_delete_resume_state(self, tracker):
+        """Test deleting resume state."""
+        # Save state first
+        tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        # Delete it
+        tracker.delete_resume_state("test_work")
+
+        # Check it's gone
+        resume_file = tracker._get_resume_file_path("test_work")
+        assert not resume_file.exists()
+
+    def test_get_resume_index(self, tracker):
+        """Test getting resume index."""
+        # Save state first
+        tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        # Get resume index
+        index = tracker.get_resume_index("test_work", 100)
+
+        assert index == 11
+
+    def test_get_resume_index_nonexistent(self, tracker):
+        """Test getting resume index for non-existent state."""
+        index = tracker.get_resume_index("nonexistent", 100)
+        assert index is None
+
+    def test_update_on_chapter_complete(self, tracker):
+        """Test updating state on chapter completion."""
+        tracker.update_on_chapter_complete(
+            work_id="test_work",
+            chapter_index=5,
+            total_chapters=100,
+        )
+
+        loaded = tracker.load_resume_state("test_work")
+        assert loaded is not None
+        assert loaded.last_completed_index == 5
+        assert loaded.last_failed_index is None
+
+    def test_update_on_chapter_failed(self, tracker):
+        """Test updating state on chapter failure."""
+        tracker.update_on_chapter_failed(
+            work_id="test_work",
+            chapter_index=5,
+            total_chapters=100,
+        )
+
+        loaded = tracker.load_resume_state("test_work")
+        assert loaded is not None
+        assert loaded.last_failed_index == 5
+
+    def test_generate_resume_message_with_failure(self):
+        """Test generating resume message with failed chapter."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=15,
+            total_chapters=100,
+        )
+
+        tracker = ResumeTracker(Path("/tmp"))
+        message = tracker._generate_resume_message(state)
+
+        assert "failed chapter 15" in message
+        assert "85" in message  # remaining chapters
+
+    def test_generate_resume_message_without_failure(self):
+        """Test generating resume message without failed chapter."""
+        state = ResumeState(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+        )
+
+        tracker = ResumeTracker(Path("/tmp"))
+        message = tracker._generate_resume_message(state)
+
+        assert "Resume from chapter 11" in message
+        assert "89" in message  # remaining chapters
+
+    def test_get_all_resume_states(self, tracker):
+        """Test getting all resume states."""
+        # Save multiple states
+        tracker.save_resume_state("work1", 10, None, 100)
+        tracker.save_resume_state("work2", 20, None, 200)
+
+        states = tracker.get_all_resume_states()
+
+        assert len(states) == 2
+        work_ids = {s.work_id for s in states}
+        assert work_ids == {"work1", "work2"}
+
+    def test_clear_all_resume_states(self, tracker):
+        """Test clearing all resume states."""
+        # Save multiple states
+        tracker.save_resume_state("work1", 10, None, 100)
+        tracker.save_resume_state("work2", 20, None, 200)
+
+        tracker.clear_all_resume_states()
+
+        states = tracker.get_all_resume_states()
+        assert len(states) == 0
+
+    def test_update_existing_state_on_complete(self, tracker):
+        """Test that updating state preserves existing data."""
+        # Save initial state
+        tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=5,
+            last_failed_index=7,
+            total_chapters=100,
+        )
+
+        # Update on completion
+        tracker.update_on_chapter_complete(
+            work_id="test_work",
+            chapter_index=10,
+            total_chapters=100,
+        )
+
+        loaded = tracker.load_resume_state("test_work")
+        assert loaded.last_completed_index == 10
+        assert loaded.last_failed_index is None  # Should be cleared
+
+    def test_custom_resume_message(self, tracker):
+        """Test saving state with custom resume message."""
+        state = tracker.save_resume_state(
+            work_id="test_work",
+            last_completed_index=10,
+            last_failed_index=None,
+            total_chapters=100,
+            resume_message="Custom message here",
+        )
+
+        assert state.resume_message == "Custom message here"
+
+    def test_get_resume_file_path(self, tracker):
+        """Test getting resume file path."""
+        path = tracker._get_resume_file_path("test_work")
+
+        assert path.name == "test_work_resume.json"
+        assert path.parent == tracker.resume_dir