Преглед изворни кода

feat(repository): Implement Story 5.8 - Failure List Recording and Retry (3 SP)

Implements global failure list management for tracking and retrying failures.

### Core Features
- FailureListManager: Global failure list manager
  - Records all failures to translate_failed.jsonl
  - Tracks retry count and resolved status
  - Supports filtering by work_id and resolved status
- FailedTranslation: Data class for failure records
  - Stores work_id, chapter_index, error info
  - Optional source_text for debugging
  - Timestamp and retry tracking

### New Repository Methods
- get_failure_list(): Get all failures from global list
- get_failures_for_work(): Get failures for specific work
- export_failure_list(): Export to JSONL/JSON/CSV
- get_failure_summary(): Get failure statistics
- get_retry_list(): Get failures available for retry
- mark_failure_resolved(): Mark failure as resolved
- clear_resolved_failures(): Remove resolved failures

### New ChapterTranslator Methods
- retry_from_failure_list(): Batch retry from global failure list
  - Supports filtering by work_id
  - Marks failures as resolved on success
  - Increments retry count on failure

### Integration
- Updated Repository.record_failure() to also write to global list
- Updated ChapterTranslator to pass source_text when recording failures

### Testing
- 22 unit tests covering all failure list functionality
- Tests for recording, loading, export, and retry operations

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
d8dfun пре 2 дана
родитељ
комит
17bc90d656

+ 3 - 0
src/repository/__init__.py

@@ -8,6 +8,7 @@ atomic write guarantees to ensure no data loss on crashes.
 from .models import WorkItem, ChapterItem, FailureRecord, WorkStatus, ChapterStatus
 from .jsonl_store import JSONLStore
 from .repository import Repository
+from .failure_list import FailureListManager, FailedTranslation
 
 __all__ = [
     "WorkItem",
@@ -17,4 +18,6 @@ __all__ = [
     "ChapterStatus",
     "JSONLStore",
     "Repository",
+    "FailureListManager",
+    "FailedTranslation",
 ]

+ 347 - 0
src/repository/failure_list.py

@@ -0,0 +1,347 @@
+"""
+Failure list manager for tracking and retrying failed translations.
+
+This module provides functionality for:
+- Writing failures to translate_failed.jsonl
+- Exporting failure lists
+- Batch retry from failure lists
+"""
+
+import json
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional, Dict, Any, Iterator
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class FailedTranslation:
+    """
+    Represents a failed translation in the failure list.
+
+    Attributes:
+        work_id: The work item ID
+        chapter_index: The chapter that failed
+        error_type: Type of error (e.g., "ValueError", "RuntimeError")
+        error_message: Human-readable error message
+        timestamp: When the failure occurred
+        retry_count: Number of retry attempts
+        source_text: The original source text that failed (optional)
+        resolved: Whether the failure has been resolved
+    """
+
+    work_id: str
+    chapter_index: int
+    error_type: str
+    error_message: str
+    timestamp: datetime = field(default_factory=datetime.now)
+    retry_count: int = 0
+    source_text: Optional[str] = None
+    resolved: bool = False
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "work_id": self.work_id,
+            "chapter_index": self.chapter_index,
+            "error_type": self.error_type,
+            "error_message": self.error_message,
+            "timestamp": self.timestamp.isoformat(),
+            "retry_count": self.retry_count,
+            "source_text": self.source_text,
+            "resolved": self.resolved,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "FailedTranslation":
+        """Create from dictionary deserialized from JSON."""
+        return cls(
+            work_id=data["work_id"],
+            chapter_index=data["chapter_index"],
+            error_type=data["error_type"],
+            error_message=data["error_message"],
+            timestamp=datetime.fromisoformat(data["timestamp"]),
+            retry_count=data.get("retry_count", 0),
+            source_text=data.get("source_text"),
+            resolved=data.get("resolved", False),
+        )
+
+
+class FailureListManager:
+    """
+    Manager for the global translation failure list.
+
+    Maintains a translate_failed.jsonl file with all failed translations
+    across all works, enabling export and batch retry operations.
+    """
+
+    def __init__(self, storage_dir: Path):
+        """
+        Initialize the failure list manager.
+
+        Args:
+            storage_dir: Base directory for storage
+        """
+        self.storage_dir = Path(storage_dir)
+        self.storage_dir.mkdir(parents=True, exist_ok=True)
+        self.failure_list_path = self.storage_dir / "translate_failed.jsonl"
+
+    def record_failure(
+        self,
+        work_id: str,
+        chapter_index: int,
+        error: Exception,
+        source_text: Optional[str] = None
+    ) -> FailedTranslation:
+        """
+        Record a failed translation to the global failure list.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter that failed
+            error: The exception that occurred
+            source_text: Optional source text that failed
+
+        Returns:
+            The created FailedTranslation record
+        """
+        failure = FailedTranslation(
+            work_id=work_id,
+            chapter_index=chapter_index,
+            error_type=type(error).__name__,
+            error_message=str(error),
+            source_text=source_text,
+        )
+
+        self._append_failure(failure)
+        logger.debug(
+            f"Recorded failure for work {work_id} chapter {chapter_index}: {error}"
+        )
+        return failure
+
+    def _append_failure(self, failure: FailedTranslation) -> None:
+        """
+        Append a failure record to the failure list file.
+
+        Args:
+            failure: The failure record to append
+        """
+        data = json.dumps(failure.to_dict(), ensure_ascii=False)
+
+        with open(self.failure_list_path, "a", encoding="utf-8") as f:
+            f.write(data + "\n")
+            f.flush()
+            import os
+            os.fsync(f.fileno())
+
+    def load_failures(self, include_resolved: bool = False) -> Iterator[FailedTranslation]:
+        """
+        Load failure records from the failure list.
+
+        Args:
+            include_resolved: Whether to include resolved failures
+
+        Yields:
+            FailedTranslation instances
+        """
+        if not self.failure_list_path.exists():
+            return
+
+        with open(self.failure_list_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    data = json.loads(line)
+                    failure = FailedTranslation.from_dict(data)
+                    if include_resolved or not failure.resolved:
+                        yield failure
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Skipping corrupted line in failure list: {e}")
+                    continue
+
+    def get_failures(self, include_resolved: bool = False) -> List[FailedTranslation]:
+        """
+        Get all failure records as a list.
+
+        Args:
+            include_resolved: Whether to include resolved failures
+
+        Returns:
+            List of FailedTranslation instances
+        """
+        return list(self.load_failures(include_resolved))
+
+    def get_failures_for_work(self, work_id: str) -> List[FailedTranslation]:
+        """
+        Get failure records for a specific work.
+
+        Args:
+            work_id: The work item ID
+
+        Returns:
+            List of FailedTranslation instances for the work
+        """
+        return [f for f in self.load_failures() if f.work_id == work_id]
+
+    def mark_resolved(self, work_id: str, chapter_index: int) -> None:
+        """
+        Mark a failure as resolved.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter index
+        """
+        failures = self.get_failures(include_resolved=True)
+
+        # Rewrite file with updated resolved status
+        with open(self.failure_list_path, "w", encoding="utf-8") as f:
+            for failure in failures:
+                if failure.work_id == work_id and failure.chapter_index == chapter_index:
+                    failure.resolved = True
+                f.write(json.dumps(failure.to_dict(), ensure_ascii=False) + "\n")
+
+        logger.debug(f"Marked failure for work {work_id} chapter {chapter_index} as resolved")
+
+    def export_failure_list(
+        self,
+        output_path: Optional[Path] = None,
+        format: str = "jsonl"
+    ) -> Path:
+        """
+        Export the failure list to a file.
+
+        Args:
+            output_path: Optional output file path (defaults to translate_failed.jsonl)
+            format: Export format ("jsonl", "json", "csv")
+
+        Returns:
+            Path to the exported file
+        """
+        if output_path is None:
+            output_path = self.failure_list_path.with_suffix(".export.jsonl")
+
+        failures = self.get_failures(include_resolved=False)
+
+        if format == "jsonl":
+            with open(output_path, "w", encoding="utf-8") as f:
+                for failure in failures:
+                    f.write(json.dumps(failure.to_dict(), ensure_ascii=False) + "\n")
+
+        elif format == "json":
+            data = [f.to_dict() for f in failures]
+            with open(output_path, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2, ensure_ascii=False)
+
+        elif format == "csv":
+            import csv
+            with open(output_path, "w", encoding="utf-8", newline="") as f:
+                if not failures:
+                    return output_path
+
+                writer = csv.DictWriter(f, fieldnames=failures[0].to_dict().keys())
+                writer.writeheader()
+                for failure in failures:
+                    writer.writerow(failure.to_dict())
+
+        else:
+            raise ValueError(f"Unsupported format: {format}")
+
+        logger.info(f"Exported {len(failures)} failures to {output_path}")
+        return output_path
+
+    def get_failure_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of all failures.
+
+        Returns:
+            Dictionary with failure statistics
+        """
+        failures = self.get_failures(include_resolved=True)
+
+        # Count by error type
+        error_counts: Dict[str, int] = {}
+        for failure in failures:
+            error_counts[failure.error_type] = error_counts.get(failure.error_type, 0) + 1
+
+        # Count by work
+        work_counts: Dict[str, int] = {}
+        for failure in failures:
+            work_counts[failure.work_id] = work_counts.get(failure.work_id, 0) + 1
+
+        # Count resolved vs unresolved
+        resolved_count = sum(1 for f in failures if f.resolved)
+        unresolved_count = len(failures) - resolved_count
+
+        return {
+            "total_failures": len(failures),
+            "resolved_count": resolved_count,
+            "unresolved_count": unresolved_count,
+            "by_error_type": error_counts,
+            "by_work": work_counts,
+        }
+
+    def get_retry_list(self, work_id: Optional[str] = None) -> List[FailedTranslation]:
+        """
+        Get list of failures that can be retried.
+
+        Args:
+            work_id: Optional work ID to filter by
+
+        Returns:
+            List of failures available for retry
+        """
+        failures = self.get_failures(include_resolved=False)
+
+        if work_id:
+            failures = [f for f in failures if f.work_id == work_id]
+
+        # Filter by retry count (e.g., max 3 retries)
+        MAX_RETRIES = 3
+        return [f for f in failures if f.retry_count < MAX_RETRIES]
+
+    def increment_retry_count(self, work_id: str, chapter_index: int) -> None:
+        """
+        Increment the retry count for a failure.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter index
+        """
+        failures = self.get_failures(include_resolved=True)
+
+        # Rewrite file with updated retry counts
+        with open(self.failure_list_path, "w", encoding="utf-8") as f:
+            for failure in failures:
+                if failure.work_id == work_id and failure.chapter_index == chapter_index:
+                    failure.retry_count += 1
+                f.write(json.dumps(failure.to_dict(), ensure_ascii=False) + "\n")
+
+    def clear_resolved(self) -> int:
+        """
+        Remove all resolved failures from the failure list.
+
+        Returns:
+            Number of failures removed
+        """
+        failures = self.get_failures(include_resolved=True)
+        unresolved = [f for f in failures if not f.resolved]
+
+        # Rewrite file with only unresolved failures
+        with open(self.failure_list_path, "w", encoding="utf-8") as f:
+            for failure in unresolved:
+                f.write(json.dumps(failure.to_dict(), ensure_ascii=False) + "\n")
+
+        removed_count = len(failures) - len(unresolved)
+        logger.info(f"Cleared {removed_count} resolved failures from failure list")
+        return removed_count
+
+    def clear_all(self) -> None:
+        """Clear all failures from the failure list."""
+        if self.failure_list_path.exists():
+            self.failure_list_path.unlink()
+        logger.info("Cleared all failures from failure list")

+ 100 - 1
src/repository/repository.py

@@ -10,6 +10,7 @@ from typing import List, Optional, Iterator, Dict, Any
 
 from .models import WorkItem, ChapterItem, FailureRecord, WorkStatus, ChapterStatus
 from .jsonl_store import JSONLStore, JSONLError
+from .failure_list import FailureListManager, FailedTranslation
 
 
 class RepositoryError(Exception):
@@ -54,6 +55,7 @@ class Repository:
         """
         self.storage_dir = Path(storage_dir)
         self.store = JSONLStore(self.storage_dir)
+        self.failure_list_manager = FailureListManager(self.storage_dir)
 
     # ========== WorkItem Operations ==========
 
@@ -327,7 +329,8 @@ class Repository:
 
     def record_failure(
         self, work_id: str, chapter_index: int, error: Exception,
-        traceback_str: Optional[str] = None
+        traceback_str: Optional[str] = None,
+        source_text: Optional[str] = None
     ) -> FailureRecord:
         """
         Record a failure.
@@ -337,6 +340,7 @@ class Repository:
             chapter_index: Chapter index (-1 for work-level failures)
             error: The exception that occurred
             traceback_str: Optional traceback string
+            source_text: Optional source text that failed
 
         Returns:
             Created FailureRecord
@@ -345,6 +349,12 @@ class Repository:
             work_id, error, chapter_index, traceback_str
         )
         self.store.save_failure(failure)
+
+        # Also record to global failure list
+        self.failure_list_manager.record_failure(
+            work_id, chapter_index, error, source_text
+        )
+
         return failure
 
     def get_failures(
@@ -437,3 +447,92 @@ class Repository:
             List of work IDs
         """
         return [w.work_id for w in self.list_works()]
+
+    # ========== Failure List Management ==========
+
+    def get_failure_list(
+        self, include_resolved: bool = False
+    ) -> List[FailedTranslation]:
+        """
+        Get the global failure list.
+
+        Args:
+            include_resolved: Whether to include resolved failures
+
+        Returns:
+            List of FailedTranslation instances
+        """
+        return self.failure_list_manager.get_failures(include_resolved)
+
+    def get_failures_for_work(self, work_id: str) -> List[FailedTranslation]:
+        """
+        Get failure records for a specific work from the global list.
+
+        Args:
+            work_id: The work item ID
+
+        Returns:
+            List of FailedTranslation instances for the work
+        """
+        return self.failure_list_manager.get_failures_for_work(work_id)
+
+    def export_failure_list(
+        self,
+        output_path: Optional[Path] = None,
+        format: str = "jsonl"
+    ) -> Path:
+        """
+        Export the failure list to a file.
+
+        Args:
+            output_path: Optional output file path
+            format: Export format ("jsonl", "json", "csv")
+
+        Returns:
+            Path to the exported file
+        """
+        return self.failure_list_manager.export_failure_list(output_path, format)
+
+    def get_failure_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of all failures.
+
+        Returns:
+            Dictionary with failure statistics
+        """
+        return self.failure_list_manager.get_failure_summary()
+
+    def get_retry_list(self, work_id: Optional[str] = None) -> List[FailedTranslation]:
+        """
+        Get list of failures available for retry.
+
+        Args:
+            work_id: Optional work ID to filter by
+
+        Returns:
+            List of failures available for retry
+        """
+        return self.failure_list_manager.get_retry_list(work_id)
+
+    def mark_failure_resolved(self, work_id: str, chapter_index: int) -> None:
+        """
+        Mark a failure as resolved in the global list.
+
+        Args:
+            work_id: The work item ID
+            chapter_index: The chapter index
+        """
+        self.failure_list_manager.mark_resolved(work_id, chapter_index)
+
+    def clear_resolved_failures(self) -> int:
+        """
+        Remove all resolved failures from the failure list.
+
+        Returns:
+            Number of failures removed
+        """
+        return self.failure_list_manager.clear_resolved()
+
+    def clear_all_failures(self) -> None:
+        """Clear all failures from the failure list."""
+        self.failure_list_manager.clear_all()

+ 72 - 3
src/translator/chapter_translator.py

@@ -7,7 +7,7 @@ integrating with the repository for persistence.
 
 import logging
 import traceback
-from typing import List, Optional, Callable
+from typing import List, Optional, Callable, Dict
 from pathlib import Path
 
 from ..repository.models import ChapterItem, ChapterStatus
@@ -212,9 +212,10 @@ class ChapterTranslator:
                 work_id, chapter.chapter_index, len(all_chapters)
             )
 
-            # Record failure
+            # Record failure with source text
             self.repository.record_failure(
-                work_id, chapter.chapter_index, e, traceback.format_exc()
+                work_id, chapter.chapter_index, e, traceback.format_exc(),
+                source_text=chapter.content
             )
 
             # Notify failure
@@ -350,6 +351,74 @@ class ChapterTranslator:
 
         self.progress_reporter.on_complete()
 
+    def retry_from_failure_list(
+        self,
+        work_id: Optional[str] = None,
+        max_retries: int = 3
+    ) -> None:
+        """
+        Retry failed translations from the global failure list.
+
+        Args:
+            work_id: Optional work ID to filter by (None for all works)
+            max_retries: Maximum number of retry attempts per chapter
+        """
+        from ..repository.failure_list import FailedTranslation
+
+        # Get retry list from failure manager
+        retry_list = self.repository.get_retry_list(work_id)
+
+        if not retry_list:
+            logger.info("No failures to retry from failure list")
+            return
+
+        # Group by work_id
+        by_work: Dict[str, List[FailedTranslation]] = {}
+        for failure in retry_list:
+            if failure.retry_count < max_retries:
+                by_work.setdefault(failure.work_id, []).append(failure)
+
+        if not by_work:
+            logger.info("All failures have exceeded max retries")
+            return
+
+        logger.info(f"Retrying {len(retry_list)} failures from {len(by_work)} works")
+
+        # Process each work
+        for work_id, failures in by_work.items():
+            # Notify start for this work
+            self.progress_reporter.on_start(len(failures))
+
+            for failure in failures:
+                try:
+                    # Get the chapter
+                    chapter = self.repository.get_chapter(work_id, failure.chapter_index)
+                    if chapter is None:
+                        logger.warning(
+                            f"Chapter {failure.chapter_index} not found for work {work_id}"
+                        )
+                        continue
+
+                    # Translate the chapter
+                    self.translate_chapter(work_id, chapter)
+
+                    # Mark as resolved if successful
+                    self.repository.mark_failure_resolved(work_id, failure.chapter_index)
+
+                except Exception:
+                    # Increment retry count on failure
+                    self.repository.failure_list_manager.increment_retry_count(
+                        work_id, failure.chapter_index
+                    )
+                    logger.error(
+                        f"Retry failed for work {work_id} chapter {failure.chapter_index}"
+                    )
+                    continue
+
+            self.progress_reporter.on_complete()
+
+        logger.info(f"Completed retry from failure list")
+
     def set_progress_callback(self, callback: ProgressCallback) -> None:
         """
         Update the progress callback.

+ 349 - 0
tests/repository/test_failure_list.py

@@ -0,0 +1,349 @@
+"""
+Unit tests for the failure list manager module.
+
+Tests cover failure list recording, export, and retry functionality.
+"""
+
+import sys
+from unittest.mock import Mock
+from pathlib import Path
+import tempfile
+import json
+
+# Mock torch and transformers before importing
+sys_mock = Mock()
+sys.modules["torch"] = sys_mock
+sys.modules["transformers"] = sys_mock
+
+import pytest
+
+from src.repository.failure_list import (
+    FailureListManager,
+    FailedTranslation,
+)
+
+
+class TestFailedTranslation:
+    """Test cases for FailedTranslation dataclass."""
+
+    def test_create_failed_translation(self):
+        """Test creating a failed translation record."""
+        failure = FailedTranslation(
+            work_id="test_work",
+            chapter_index=5,
+            error_type="ValueError",
+            error_message="Test error",
+        )
+
+        assert failure.work_id == "test_work"
+        assert failure.chapter_index == 5
+        assert failure.error_type == "ValueError"
+        assert failure.error_message == "Test error"
+        assert failure.resolved is False
+        assert failure.retry_count == 0
+
+    def test_to_dict(self):
+        """Test converting failed translation to dictionary."""
+        failure = FailedTranslation(
+            work_id="test_work",
+            chapter_index=5,
+            error_type="ValueError",
+            error_message="Test error",
+            source_text="Test content",
+        )
+
+        data = failure.to_dict()
+
+        assert data["work_id"] == "test_work"
+        assert data["chapter_index"] == 5
+        assert data["error_type"] == "ValueError"
+        assert data["source_text"] == "Test content"
+        assert "timestamp" in data
+
+    def test_from_dict(self):
+        """Test creating failed translation from dictionary."""
+        data = {
+            "work_id": "test_work",
+            "chapter_index": 5,
+            "error_type": "ValueError",
+            "error_message": "Test error",
+            "timestamp": "2024-01-01T00:00:00",
+            "retry_count": 2,
+            "source_text": "Test content",
+            "resolved": False,
+        }
+
+        failure = FailedTranslation.from_dict(data)
+
+        assert failure.work_id == "test_work"
+        assert failure.chapter_index == 5
+        assert failure.retry_count == 2
+
+
+class TestFailureListManager:
+    """Test cases for FailureListManager class."""
+
+    @pytest.fixture
+    def temp_dir(self):
+        """Create a temporary directory for testing."""
+        with tempfile.TemporaryDirectory() as tmp:
+            yield Path(tmp)
+
+    @pytest.fixture
+    def manager(self, temp_dir):
+        """Create a failure list manager for testing."""
+        return FailureListManager(temp_dir)
+
+    def test_init(self, temp_dir):
+        """Test FailureListManager initialization."""
+        manager = FailureListManager(temp_dir)
+
+        assert manager.storage_dir == temp_dir
+        assert manager.failure_list_path == temp_dir / "translate_failed.jsonl"
+
+    def test_record_failure(self, manager):
+        """Test recording a failure."""
+        error = ValueError("Test error")
+        failure = manager.record_failure(
+            work_id="test_work",
+            chapter_index=5,
+            error=error,
+            source_text="Test content"
+        )
+
+        assert failure.work_id == "test_work"
+        assert failure.chapter_index == 5
+        assert failure.error_type == "ValueError"
+        assert failure.source_text == "Test content"
+
+        # Check file was created
+        assert manager.failure_list_path.exists()
+
+    def test_load_failures(self, manager):
+        """Test loading failures from file."""
+        # Record some failures
+        error1 = ValueError("Error 1")
+        error2 = RuntimeError("Error 2")
+
+        manager.record_failure("work1", 1, error1)
+        manager.record_failure("work2", 2, error2)
+
+        # Load them back
+        failures = list(manager.load_failures())
+
+        assert len(failures) == 2
+        assert any(f.work_id == "work1" for f in failures)
+        assert any(f.work_id == "work2" for f in failures)
+
+    def test_load_failures_exclude_resolved(self, manager):
+        """Test loading failures excluding resolved ones."""
+        # Record failures
+        error1 = ValueError("Error 1")
+        error2 = RuntimeError("Error 2")
+
+        manager.record_failure("work1", 1, error1)
+        manager.record_failure("work2", 2, error2)
+
+        # Mark one as resolved
+        manager.mark_resolved("work1", 1)
+
+        # Load excluding resolved
+        failures = list(manager.load_failures(include_resolved=False))
+
+        assert len(failures) == 1
+        assert failures[0].work_id == "work2"
+
+    def test_get_failures(self, manager):
+        """Test getting failures as a list."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        failures = manager.get_failures()
+
+        assert len(failures) == 1
+        assert failures[0].work_id == "test_work"
+
+    def test_get_failures_for_work(self, manager):
+        """Test getting failures for a specific work."""
+        error = ValueError("Test error")
+        manager.record_failure("work1", 1, error)
+        manager.record_failure("work2", 2, error)
+
+        work1_failures = manager.get_failures_for_work("work1")
+
+        assert len(work1_failures) == 1
+        assert work1_failures[0].work_id == "work1"
+
+    def test_mark_resolved(self, manager):
+        """Test marking a failure as resolved."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        manager.mark_resolved("test_work", 1)
+
+        # Load failures
+        failures = manager.get_failures(include_resolved=True)
+
+        assert len(failures) == 1
+        assert failures[0].resolved is True
+
+    def test_export_failure_list_jsonl(self, manager, temp_dir):
+        """Test exporting failure list in JSONL format."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        output_path = temp_dir / "export.jsonl"
+        result = manager.export_failure_list(output_path, format="jsonl")
+
+        assert result == output_path
+        assert result.exists()
+
+        # Verify content
+        with open(result, "r") as f:
+            lines = f.readlines()
+        assert len(lines) == 1
+
+    def test_export_failure_list_json(self, manager, temp_dir):
+        """Test exporting failure list in JSON format."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        output_path = temp_dir / "export.json"
+        result = manager.export_failure_list(output_path, format="json")
+
+        assert result == output_path
+        assert result.exists()
+
+        # Verify content
+        with open(result, "r") as f:
+            data = json.load(f)
+        assert isinstance(data, list)
+        assert len(data) == 1
+
+    def test_export_failure_list_csv(self, manager, temp_dir):
+        """Test exporting failure list in CSV format."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        output_path = temp_dir / "export.csv"
+        result = manager.export_failure_list(output_path, format="csv")
+
+        assert result == output_path
+        assert result.exists()
+
+    def test_export_failure_list_unsupported_format(self, manager, temp_dir):
+        """Test exporting with unsupported format."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        with pytest.raises(ValueError, match="Unsupported format"):
+            manager.export_failure_list(format="xml")
+
+    def test_get_failure_summary(self, manager):
+        """Test getting failure summary."""
+        error1 = ValueError("Error 1")
+        error2 = RuntimeError("Error 2")
+        error3 = ValueError("Error 3")
+
+        manager.record_failure("work1", 1, error1)
+        manager.record_failure("work1", 2, error2)
+        manager.record_failure("work2", 1, error3)
+
+        summary = manager.get_failure_summary()
+
+        assert summary["total_failures"] == 3
+        assert summary["resolved_count"] == 0
+        assert summary["unresolved_count"] == 3
+        assert summary["by_error_type"]["ValueError"] == 2
+        assert summary["by_error_type"]["RuntimeError"] == 1
+        assert summary["by_work"]["work1"] == 2
+        assert summary["by_work"]["work2"] == 1
+
+    def test_get_retry_list(self, manager):
+        """Test getting retry list."""
+        error = ValueError("Test error")
+        manager.record_failure("work1", 1, error)
+        manager.record_failure("work2", 2, error)
+
+        retry_list = manager.get_retry_list()
+
+        assert len(retry_list) == 2
+
+    def test_get_retry_list_for_work(self, manager):
+        """Test getting retry list for specific work."""
+        error = ValueError("Test error")
+        manager.record_failure("work1", 1, error)
+        manager.record_failure("work2", 2, error)
+
+        retry_list = manager.get_retry_list(work_id="work1")
+
+        assert len(retry_list) == 1
+        assert retry_list[0].work_id == "work1"
+
+    def test_increment_retry_count(self, manager):
+        """Test incrementing retry count."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        manager.increment_retry_count("test_work", 1)
+
+        failures = manager.get_failures()
+        assert failures[0].retry_count == 1
+
+    def test_clear_resolved(self, manager):
+        """Test clearing resolved failures."""
+        error1 = ValueError("Error 1")
+        error2 = RuntimeError("Error 2")
+
+        manager.record_failure("work1", 1, error1)
+        manager.record_failure("work2", 2, error2)
+
+        # Mark one as resolved
+        manager.mark_resolved("work1", 1)
+
+        # Clear resolved
+        removed = manager.clear_resolved()
+
+        assert removed == 1
+
+        # Only unresolved should remain
+        failures = manager.get_failures(include_resolved=False)
+        assert len(failures) == 1
+        assert failures[0].work_id == "work2"
+
+    def test_clear_all(self, manager):
+        """Test clearing all failures."""
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        assert manager.failure_list_path.exists()
+
+        manager.clear_all()
+
+        assert not manager.failure_list_path.exists()
+
+    def test_empty_failure_list(self, manager):
+        """Test operations on empty failure list."""
+        failures = manager.get_failures()
+        assert len(failures) == 0
+
+        summary = manager.get_failure_summary()
+        assert summary["total_failures"] == 0
+
+        retry_list = manager.get_retry_list()
+        assert len(retry_list) == 0
+
+    def test_corrupted_line_handling(self, manager, temp_dir):
+        """Test handling of corrupted lines in failure list."""
+        # Record a valid failure
+        error = ValueError("Test error")
+        manager.record_failure("test_work", 1, error)
+
+        # Add a corrupted line
+        with open(manager.failure_list_path, "a") as f:
+            f.write("this is not valid json\n")
+
+        # Should skip corrupted line
+        failures = manager.get_failures()
+        assert len(failures) == 1
+        assert failures[0].work_id == "test_work"