il y a 1 mois · 9dd9c3e9d0
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -6,13 +6,27 @@ A novel translation tool with glossary support and crash-safe state management.
 
															 __version__ = "0.1.0"
														
 
															-# Translator module exports
														
 
															-from src.translator import (
														
 
															-    TranslationEngine,
														
 
															-    TranslationPipeline,
														
 
															-    ChapterTranslator,
														
 
															-    ProgressReporter,
														
 
															-)
														
 
															+# Optional translator module exports (requires torch)
														
 
															+try:
														
 
															+    from src.translator import (
														
 
															+        TranslationEngine,
														
 
															+        TranslationPipeline,
														
 
															+        ChapterTranslator,
														
 
															+        ProgressReporter,
														
 
															+    )
														
 
															+    _translator_available = True
														
 
															+except ImportError:
														
 
															+    _translator_available = False
														
 
															+
														
 
															+    # Create stubs for type checking
														
 
															+    class TranslationEngine:
														
 
															+        pass
														
 
															+    class TranslationPipeline:
														
 
															+        pass
														
 
															+    class ChapterTranslator:
														
 
															+        pass
														
 
															+    class ProgressReporter:
														
 
															+        pass
														
 
															 __all__ = [
														
 
															     "TranslationEngine",
														
--- a/src/fingerprint/__init__.py
+++ b/src/fingerprint/__init__.py
@@ -0,0 +1,24 @@
 
															+"""
														
 
															+Fingerprint module for duplicate file detection.
														
 
															+
														
 
															+This module provides MD5-based file fingerprinting to prevent
														
 
															+duplicate translations and save user costs.
														
 
															+
														
 
															+Classes:
														
 
															+    FileFingerprint: Calculate file MD5 hashes
														
 
															+    FingerprintStore: Store and query fingerprints
														
 
															+    FingerprintService: Integration service for import flow
														
 
															+    BatchFingerprintChecker: Batch file checking
														
 
															+"""
														
 
															+
														
 
															+from .calculator import FileFingerprint
														
 
															+from .store import FingerprintStore
														
 
															+from .service import FingerprintService
														
 
															+from .batch import BatchFingerprintChecker
														
 
															+
														
 
															+__all__ = [
														
 
															+    "FileFingerprint",
														
 
															+    "FingerprintStore",
														
 
															+    "FingerprintService",
														
 
															+    "BatchFingerprintChecker",
														
 
															+]
														
--- a/src/fingerprint/batch.py
+++ b/src/fingerprint/batch.py
@@ -0,0 +1,177 @@
 
															+"""
														
 
															+Batch fingerprint checking utilities.
														
 
															+
														
 
															+This module provides efficient batch processing for checking
														
 
															+multiple files for duplicates.
														
 
															+"""
														
 
															+
														
 
															+from typing import List, Dict, Tuple, Optional
														
 
															+
														
 
															+from .service import FingerprintService
														
 
															+
														
 
															+
														
 
															+class BatchFingerprintChecker:
														
 
															+    """
														
 
															+    Batch fingerprint checker for processing multiple files.
														
 
															+
														
 
															+    This class provides utilities for checking many files at once,
														
 
															+    useful for batch import workflows.
														
 
															+
														
 
															+    Attributes:
														
 
															+        service: The underlying fingerprint service
														
 
															+
														
 
															+    Example:
														
 
															+        >>> checker = BatchFingerprintChecker(service)
														
 
															+        >>> results = checker.check_files(["file1.txt", "file2.txt"])
														
 
															+        >>> new_files = checker.filter_new_files(["file1.txt", "file2.txt"])
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, service: FingerprintService):
														
 
															+        """
														
 
															+        Initialize the batch checker.
														
 
															+
														
 
															+        Args:
														
 
															+            service: FingerprintService instance
														
 
															+        """
														
 
															+        self.service = service
														
 
															+
														
 
															+    def check_files(
														
 
															+        self, file_paths: List[str]
														
 
															+    ) -> Dict[str, Tuple[bool, Optional[str]]]:
														
 
															+        """
														
 
															+        Check multiple files for duplicates.
														
 
															+
														
 
															+        Args:
														
 
															+            file_paths: List of file paths to check
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary mapping each file path to a tuple:
														
 
															+                (is_duplicate, work_id)
														
 
															+
														
 
															+        Note:
														
 
															+            Files that don't exist or can't be read will have
														
 
															+            (False, None) as their result.
														
 
															+        """
														
 
															+        results = {}
														
 
															+
														
 
															+        for file_path in file_paths:
														
 
															+            try:
														
 
															+                is_dup, work_id = self.service.check_before_import(file_path)
														
 
															+                results[file_path] = (is_dup, work_id)
														
 
															+            except (FileNotFoundError, IOError):
														
 
															+                # Skip files that can't be read
														
 
															+                results[file_path] = (False, None)
														
 
															+
														
 
															+        return results
														
 
															+
														
 
															+    def filter_new_files(self, file_paths: List[str]) -> List[str]:
														
 
															+        """
														
 
															+        Filter a list of files, returning only new (non-duplicate) files.
														
 
															+
														
 
															+        Args:
														
 
															+            file_paths: List of file paths to filter
														
 
															+
														
 
															+        Returns:
														
 
															+            List of file paths that have not been translated
														
 
															+
														
 
															+        Note:
														
 
															+            Files that can't be read are included in the result
														
 
															+            to allow the import pipeline to handle errors appropriately.
														
 
															+        """
														
 
															+        new_files = []
														
 
															+
														
 
															+        for file_path in file_paths:
														
 
															+            try:
														
 
															+                is_dup, _ = self.service.check_before_import(file_path)
														
 
															+                if not is_dup:
														
 
															+                    new_files.append(file_path)
														
 
															+            except (FileNotFoundError, IOError):
														
 
															+                # Include unreadable files for error handling
														
 
															+                new_files.append(file_path)
														
 
															+
														
 
															+        return new_files
														
 
															+
														
 
															+    def filter_duplicate_files(
														
 
															+        self, file_paths: List[str]
														
 
															+    ) -> Dict[str, Optional[str]]:
														
 
															+        """
														
 
															+        Filter a list of files, returning only duplicates with their work IDs.
														
 
															+
														
 
															+        Args:
														
 
															+            file_paths: List of file paths to filter
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary mapping duplicate file paths to their work IDs
														
 
															+
														
 
															+        Note:
														
 
															+            Files that can't be read are not included in the result.
														
 
															+        """
														
 
															+        duplicates = {}
														
 
															+
														
 
															+        for file_path in file_paths:
														
 
															+            try:
														
 
															+                is_dup, work_id = self.service.check_before_import(file_path)
														
 
															+                if is_dup and work_id:
														
 
															+                    duplicates[file_path] = work_id
														
 
															+            except (FileNotFoundError, IOError):
														
 
															+                # Skip unreadable files
														
 
															+                continue
														
 
															+
														
 
															+        return duplicates
														
 
															+
														
 
															+    def categorize_files(
														
 
															+        self, file_paths: List[str]
														
 
															+    ) -> Dict[str, List[str]]:
														
 
															+        """
														
 
															+        Categorize files into new and duplicate.
														
 
															+
														
 
															+        Args:
														
 
															+            file_paths: List of file paths to categorize
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with two keys:
														
 
															+                - "new": List of new file paths
														
 
															+                - "duplicate": List of duplicate file paths
														
 
															+                - "error": List of files that couldn't be read
														
 
															+        """
														
 
															+        result = {
														
 
															+            "new": [],
														
 
															+            "duplicate": [],
														
 
															+            "error": [],
														
 
															+        }
														
 
															+
														
 
															+        for file_path in file_paths:
														
 
															+            try:
														
 
															+                is_dup, work_id = self.service.check_before_import(file_path)
														
 
															+                if is_dup:
														
 
															+                    result["duplicate"].append(file_path)
														
 
															+                else:
														
 
															+                    result["new"].append(file_path)
														
 
															+            except (FileNotFoundError, IOError):
														
 
															+                result["error"].append(file_path)
														
 
															+
														
 
															+        return result
														
 
															+
														
 
															+    def get_summary(
														
 
															+        self, file_paths: List[str]
														
 
															+    ) -> Dict[str, int]:
														
 
															+        """
														
 
															+        Get a summary of duplicate detection results.
														
 
															+
														
 
															+        Args:
														
 
															+            file_paths: List of file paths to analyze
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with counts:
														
 
															+                - total: Total number of files
														
 
															+                - new: Number of new files
														
 
															+                - duplicate: Number of duplicate files
														
 
															+                - error: Number of files that couldn't be read
														
 
															+        """
														
 
															+        categorized = self.categorize_files(file_paths)
														
 
															+        return {
														
 
															+            "total": len(file_paths),
														
 
															+            "new": len(categorized["new"]),
														
 
															+            "duplicate": len(categorized["duplicate"]),
														
 
															+            "error": len(categorized["error"]),
														
 
															+        }
														
--- a/src/fingerprint/calculator.py
+++ b/src/fingerprint/calculator.py
@@ -0,0 +1,134 @@
 
															+"""
														
 
															+File fingerprint calculator.
														
 
															+
														
 
															+This module provides MD5-based hash calculation for files,
														
 
															+supporting both full file hashing and quick sampling.
														
 
															+"""
														
 
															+
														
 
															+import hashlib
														
 
															+from pathlib import Path
														
 
															+from typing import Dict, Any
														
 
															+
														
 
															+
														
 
															+class FileFingerprint:
														
 
															+    """
														
 
															+    File fingerprint calculator using MD5 hashing.
														
 
															+
														
 
															+    Attributes:
														
 
															+        chunk_size: Size of chunks to read when calculating hashes
														
 
															+
														
 
															+    Example:
														
 
															+        >>> calc = FileFingerprint()
														
 
															+        >>> md5 = calc.calculate_md5("novel.txt")
														
 
															+        >>> meta = calc.get_file_meta("novel.txt")
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, chunk_size: int = 8192):
														
 
															+        """
														
 
															+        Initialize the fingerprint calculator.
														
 
															+
														
 
															+        Args:
														
 
															+            chunk_size: Size of chunks to read (default: 8192 bytes)
														
 
															+        """
														
 
															+        self.chunk_size = chunk_size
														
 
															+
														
 
															+    def calculate_md5(self, file_path: str) -> str:
														
 
															+        """
														
 
															+        Calculate the MD5 hash of a file.
														
 
															+
														
 
															+        Reads the entire file in chunks and computes the MD5 hash.
														
 
															+        Suitable for files of any size.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            32-character hexadecimal MD5 hash string
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+            IOError: If the file cannot be read
														
 
															+        """
														
 
															+        path = Path(file_path)
														
 
															+        if not path.exists():
														
 
															+            raise FileNotFoundError(f"File not found: {file_path}")
														
 
															+
														
 
															+        md5 = hashlib.md5()
														
 
															+        with open(path, 'rb') as f:
														
 
															+            while chunk := f.read(self.chunk_size):
														
 
															+                md5.update(chunk)
														
 
															+        return md5.hexdigest()
														
 
															+
														
 
															+    def calculate_quick_hash(
														
 
															+        self, file_path: str, sample_size: int = 1024
														
 
															+    ) -> str:
														
 
															+        """
														
 
															+        Calculate a quick hash using only the beginning of the file.
														
 
															+
														
 
															+        This is useful for large files as a preliminary check before
														
 
															+        computing the full hash. Not suitable for final duplicate detection.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+            sample_size: Number of bytes to read from the start
														
 
															+
														
 
															+        Returns:
														
 
															+            32-character hexadecimal MD5 hash of the sample
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        path = Path(file_path)
														
 
															+        if not path.exists():
														
 
															+            raise FileNotFoundError(f"File not found: {file_path}")
														
 
															+
														
 
															+        md5 = hashlib.md5()
														
 
															+        with open(path, 'rb') as f:
														
 
															+            chunk = f.read(sample_size)
														
 
															+            md5.update(chunk)
														
 
															+        return md5.hexdigest()
														
 
															+
														
 
															+    def get_file_size(self, file_path: str) -> int:
														
 
															+        """
														
 
															+        Get the size of a file in bytes.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            File size in bytes
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        path = Path(file_path)
														
 
															+        if not path.exists():
														
 
															+            raise FileNotFoundError(f"File not found: {file_path}")
														
 
															+        return path.stat().st_size
														
 
															+
														
 
															+    def get_file_meta(self, file_path: str) -> Dict[str, Any]:
														
 
															+        """
														
 
															+        Get file metadata.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with file metadata:
														
 
															+                - name: File name
														
 
															+                - size: File size in bytes
														
 
															+                - modified_time: Last modification timestamp
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        path = Path(file_path)
														
 
															+        if not path.exists():
														
 
															+            raise FileNotFoundError(f"File not found: {file_path}")
														
 
															+
														
 
															+        stat = path.stat()
														
 
															+        return {
														
 
															+            "name": path.name,
														
 
															+            "size": stat.st_size,
														
 
															+            "modified_time": stat.st_mtime,
														
 
															+        }
														
--- a/src/fingerprint/service.py
+++ b/src/fingerprint/service.py
@@ -0,0 +1,149 @@
 
															+"""
														
 
															+Fingerprint service for import integration.
														
 
															+
														
 
															+This module provides high-level services for checking file
														
 
															+fingerprints during the import workflow.
														
 
															+"""
														
 
															+
														
 
															+from typing import Optional, Tuple
														
 
															+
														
 
															+from .calculator import FileFingerprint
														
 
															+from .store import FingerprintStore
														
 
															+from ..repository import Repository, WorkItem
														
 
															+
														
 
															+
														
 
															+class FingerprintService:
														
 
															+    """
														
 
															+    High-level fingerprint service for import workflow integration.
														
 
															+
														
 
															+    This service combines fingerprint calculation and storage checking
														
 
															+    to provide a simple interface for the import pipeline.
														
 
															+
														
 
															+    Attributes:
														
 
															+        repository: The repository instance
														
 
															+        store: The fingerprint store
														
 
															+        calculator: The fingerprint calculator
														
 
															+
														
 
															+    Example:
														
 
															+        >>> service = FingerprintService(repository)
														
 
															+        >>> is_dup, work_id = service.check_before_import("novel.txt")
														
 
															+        >>> if is_dup:
														
 
															+        ...     print(f"Already translated: {work_id}")
														
 
															+        >>> else:
														
 
															+        ...     work = repo.create_work("novel.txt")
														
 
															+        ...     service.register_import(work.work_id, "novel.txt")
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, repository: Repository):
														
 
															+        """
														
 
															+        Initialize the fingerprint service.
														
 
															+
														
 
															+        Args:
														
 
															+            repository: Repository instance
														
 
															+        """
														
 
															+        self.repository = repository
														
 
															+        self.store = FingerprintStore(repository)
														
 
															+        self.calculator = FileFingerprint()
														
 
															+
														
 
															+    def check_before_import(self, file_path: str) -> Tuple[bool, Optional[str]]:
														
 
															+        """
														
 
															+        Check if a file has already been translated before importing.
														
 
															+
														
 
															+        This method checks both the fingerprint index and verifies
														
 
															+        that the associated work item exists and is completed.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file to check
														
 
															+
														
 
															+        Returns:
														
 
															+            Tuple of (is_duplicate, work_id):
														
 
															+                - is_duplicate: True if file was already translated
														
 
															+                - work_id: The work ID if duplicate, None otherwise
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        # Check fingerprint store
														
 
															+        work_id = self.store.check_duplicate(file_path)
														
 
															+
														
 
															+        if work_id:
														
 
															+            # Verify work exists and is completed
														
 
															+            existing_work = self.repository.get_work(work_id)
														
 
															+            if existing_work and existing_work.status == "completed":
														
 
															+                return (True, work_id)
														
 
															+
														
 
															+        return (False, None)
														
 
															+
														
 
															+    def register_import(self, work_id: str, file_path: str) -> None:
														
 
															+        """
														
 
															+        Register a newly imported file in the fingerprint store.
														
 
															+
														
 
															+        Should be called after a work item is created to prevent
														
 
															+        future duplicate imports of the same file.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: Work item ID
														
 
															+            file_path: Path to the imported file
														
 
															+        """
														
 
															+        metadata = self.calculator.get_file_meta(file_path)
														
 
															+        self.store.add_fingerprint(work_id, file_path, metadata)
														
 
															+
														
 
															+    def register_batch_import(
														
 
															+        self, work_id: str, file_paths: list[str]
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Register multiple files for a single work item.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: Work item ID
														
 
															+            file_paths: List of file paths
														
 
															+        """
														
 
															+        for file_path in file_paths:
														
 
															+            try:
														
 
															+                self.register_import(work_id, file_path)
														
 
															+            except (FileNotFoundError, IOError):
														
 
															+                # Skip files that can't be read
														
 
															+                continue
														
 
															+
														
 
															+    def get_fingerprint(self, file_path: str) -> str:
														
 
															+        """
														
 
															+        Get the MD5 fingerprint of a file.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            32-character hexadecimal MD5 hash
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        return self.calculator.calculate_md5(file_path)
														
 
															+
														
 
															+    def get_file_info(self, file_path: str) -> dict:
														
 
															+        """
														
 
															+        Get comprehensive file information including fingerprint.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with:
														
 
															+                - fingerprint: MD5 hash
														
 
															+                - metadata: File metadata
														
 
															+                - is_duplicate: Whether file was already translated
														
 
															+                - existing_work_id: Work ID if duplicate
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        fingerprint = self.get_fingerprint(file_path)
														
 
															+        metadata = self.calculator.get_file_meta(file_path)
														
 
															+        is_dup, work_id = self.check_before_import(file_path)
														
 
															+
														
 
															+        return {
														
 
															+            "fingerprint": fingerprint,
														
 
															+            "metadata": metadata,
														
 
															+            "is_duplicate": is_dup,
														
 
															+            "existing_work_id": work_id,
														
 
															+        }
														
--- a/src/fingerprint/store.py
+++ b/src/fingerprint/store.py
@@ -0,0 +1,196 @@
 
															+"""
														
 
															+Fingerprint storage and query interface.
														
 
															+
														
 
															+This module provides persistent storage for file fingerprints,
														
 
															+enabling duplicate detection for previously translated files.
														
 
															+"""
														
 
															+
														
 
															+import json
														
 
															+from datetime import datetime
														
 
															+from pathlib import Path
														
 
															+from typing import Optional, List, Dict, Any
														
 
															+
														
 
															+from ..repository import Repository
														
 
															+
														
 
															+
														
 
															+class FingerprintStore:
														
 
															+    """
														
 
															+    Fingerprint storage for detecting duplicate translations.
														
 
															+
														
 
															+    Maintains a JSON index mapping MD5 hashes to work items,
														
 
															+    allowing efficient duplicate detection before translation.
														
 
															+
														
 
															+    Attributes:
														
 
															+        repository: The repository instance
														
 
															+        index_file: Path to the fingerprint index JSON file
														
 
															+
														
 
															+    Example:
														
 
															+        >>> store = FingerprintStore(repository)
														
 
															+        >>> store.add_fingerprint("work123", "novel.txt", {...})
														
 
															+        >>> work_id = store.check_duplicate("novel_copy.txt")
														
 
															+        >>> if work_id:
														
 
															+        ...     print(f"Already translated in {work_id}")
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, repository: Repository):
														
 
															+        """
														
 
															+        Initialize the fingerprint store.
														
 
															+
														
 
															+        Args:
														
 
															+            repository: Repository instance for storage directory
														
 
															+        """
														
 
															+        self.repository = repository
														
 
															+        self.index_file = repository.storage_dir / "fingerprints.json"
														
 
															+        self._load_index()
														
 
															+
														
 
															+    def _load_index(self) -> None:
														
 
															+        """Load the fingerprint index from disk."""
														
 
															+        if self.index_file.exists():
														
 
															+            try:
														
 
															+                with open(self.index_file, 'r', encoding='utf-8') as f:
														
 
															+                    self.index: Dict[str, Dict[str, Any]] = json.load(f)
														
 
															+            except (json.JSONDecodeError, IOError):
														
 
															+                # Corrupted index, start fresh
														
 
															+                self.index = {}
														
 
															+        else:
														
 
															+            self.index = {}
														
 
															+
														
 
															+    def _save_index(self) -> None:
														
 
															+        """
														
 
															+        Save the fingerprint index to disk.
														
 
															+
														
 
															+        Creates parent directories if needed.
														
 
															+        Uses atomic write to prevent corruption.
														
 
															+        """
														
 
															+        self.index_file.parent.mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+        # Atomic write: write to temp file, then rename
														
 
															+        temp_file = self.index_file.with_suffix('.tmp')
														
 
															+        try:
														
 
															+            with open(temp_file, 'w', encoding='utf-8') as f:
														
 
															+                json.dump(self.index, f, indent=2, ensure_ascii=False)
														
 
															+            temp_file.replace(self.index_file)
														
 
															+        except (IOError, OSError) as e:
														
 
															+            # Clean up temp file on error
														
 
															+            if temp_file.exists():
														
 
															+                temp_file.unlink()
														
 
															+            raise
														
 
															+
														
 
															+    def add_fingerprint(
														
 
															+        self, work_id: str, file_path: str, metadata: Dict[str, Any]
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Add a file fingerprint to the index.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: Work item ID
														
 
															+            file_path: Path to the source file
														
 
															+            metadata: File metadata (name, size, etc.)
														
 
															+        """
														
 
															+        from .calculator import FileFingerprint
														
 
															+
														
 
															+        calculator = FileFingerprint()
														
 
															+        try:
														
 
															+            fingerprint = calculator.calculate_md5(file_path)
														
 
															+        except (FileNotFoundError, IOError):
														
 
															+            # File may have been moved/deleted, skip
														
 
															+            return
														
 
															+
														
 
															+        self.index[fingerprint] = {
														
 
															+            "work_id": work_id,
														
 
															+            "file_path": file_path,
														
 
															+            "metadata": metadata,
														
 
															+            "created_at": datetime.now().isoformat(),
														
 
															+        }
														
 
															+
														
 
															+        self._save_index()
														
 
															+
														
 
															+    def check_duplicate(self, file_path: str) -> Optional[str]:
														
 
															+        """
														
 
															+        Check if a file has already been translated.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file to check
														
 
															+
														
 
															+        Returns:
														
 
															+            Work ID if the file was already translated, None otherwise
														
 
															+
														
 
															+        Raises:
														
 
															+            FileNotFoundError: If the file does not exist
														
 
															+        """
														
 
															+        from .calculator import FileFingerprint
														
 
															+
														
 
															+        calculator = FileFingerprint()
														
 
															+        fingerprint = calculator.calculate_md5(file_path)
														
 
															+
														
 
															+        if fingerprint in self.index:
														
 
															+            return self.index[fingerprint]["work_id"]
														
 
															+        return None
														
 
															+
														
 
															+    def get_work_history(self, work_id: str) -> List[Dict[str, Any]]:
														
 
															+        """
														
 
															+        Get all fingerprint records for a work item.
														
 
															+
														
 
															+        Args:
														
 
															+            work_id: Work item ID
														
 
															+
														
 
															+        Returns:
														
 
															+            List of fingerprint records with keys:
														
 
															+                - fingerprint: MD5 hash
														
 
															+                - file_path: Original file path
														
 
															+                - metadata: File metadata
														
 
															+        """
														
 
															+        history = []
														
 
															+        for fp, record in self.index.items():
														
 
															+            if record["work_id"] == work_id:
														
 
															+                history.append({
														
 
															+                    "fingerprint": fp,
														
 
															+                    "file_path": record["file_path"],
														
 
															+                    "metadata": record["metadata"],
														
 
															+                    "created_at": record.get("created_at"),
														
 
															+                })
														
 
															+        return history
														
 
															+
														
 
															+    def remove_fingerprint(self, file_path: str) -> bool:
														
 
															+        """
														
 
															+        Remove a fingerprint from the index.
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: Path to the file
														
 
															+
														
 
															+        Returns:
														
 
															+            True if removed, False if not found
														
 
															+        """
														
 
															+        from .calculator import FileFingerprint
														
 
															+
														
 
															+        calculator = FileFingerprint()
														
 
															+        try:
														
 
															+            fingerprint = calculator.calculate_md5(file_path)
														
 
															+        except (FileNotFoundError, IOError):
														
 
															+            return False
														
 
															+
														
 
															+        if fingerprint in self.index:
														
 
															+            del self.index[fingerprint]
														
 
															+            self._save_index()
														
 
															+            return True
														
 
															+        return False
														
 
															+
														
 
															+    def clear(self) -> None:
														
 
															+        """Clear all fingerprints from the index."""
														
 
															+        self.index = {}
														
 
															+        self._save_index()
														
 
															+
														
 
															+    def get_stats(self) -> Dict[str, Any]:
														
 
															+        """
														
 
															+        Get statistics about the fingerprint store.
														
 
															+
														
 
															+        Returns:
														
 
															+            Dictionary with statistics:
														
 
															+                - total_fingerprints: Total number of fingerprints
														
 
															+                - unique_works: Number of unique work items
														
 
															+        """
														
 
															+        work_ids = set(r["work_id"] for r in self.index.values())
														
 
															+        return {
														
 
															+            "total_fingerprints": len(self.index),
														
 
															+            "unique_works": len(work_ids),
														
 
															+        }
														
--- a/tests/test_fingerprint.py
+++ b/tests/test_fingerprint.py
@@ -0,0 +1,724 @@
 
															+"""
														
 
															+Unit tests for the fingerprint module.
														
 
															+
														
 
															+Tests cover FileFingerprint, FingerprintStore, FingerprintService,
														
 
															+and BatchFingerprintChecker functionality.
														
 
															+"""
														
 
															+
														
 
															+import tempfile
														
 
															+from pathlib import Path
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															+from src.fingerprint.calculator import FileFingerprint
														
 
															+from src.fingerprint.store import FingerprintStore
														
 
															+from src.fingerprint.service import FingerprintService
														
 
															+from src.fingerprint.batch import BatchFingerprintChecker
														
 
															+from src.repository import Repository
														
 
															+from src.repository.models import WorkItem, WorkStatus
														
 
															+
														
 
															+
														
 
															+class TestFileFingerprint:
														
 
															+    """Test FileFingerprint calculator."""
														
 
															+
														
 
															+    def test_calculate_md5(self):
														
 
															+        """Test MD5 calculation."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False) as f:
														
 
															+            f.write(b"Hello, World!")
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            calc = FileFingerprint()
														
 
															+            md5 = calc.calculate_md5(temp_path)
														
 
															+
														
 
															+            # Known MD5 for "Hello, World!"
														
 
															+            assert md5 == "65a8e27d8879283831b664bd8b7f0ad4"
														
 
															+            assert len(md5) == 32
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_calculate_md5_different_content(self):
														
 
															+        """Test that different content produces different hashes."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+
														
 
															+            file1.write_text("content one")
														
 
															+            file2.write_text("content two")
														
 
															+
														
 
															+            calc = FileFingerprint()
														
 
															+            md5_1 = calc.calculate_md5(str(file1))
														
 
															+            md5_2 = calc.calculate_md5(str(file2))
														
 
															+
														
 
															+            assert md5_1 != md5_2
														
 
															+
														
 
															+    def test_calculate_md5_same_content(self):
														
 
															+        """Test that same content produces same hash."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+
														
 
															+            content = "identical content"
														
 
															+            file1.write_text(content)
														
 
															+            file2.write_text(content)
														
 
															+
														
 
															+            calc = FileFingerprint()
														
 
															+            md5_1 = calc.calculate_md5(str(file1))
														
 
															+            md5_2 = calc.calculate_md5(str(file2))
														
 
															+
														
 
															+            assert md5_1 == md5_2
														
 
															+
														
 
															+    def test_calculate_md5_large_file(self):
														
 
															+        """Test MD5 calculation for larger files."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False) as f:
														
 
															+            # Write 100KB of data
														
 
															+            f.write(b"x" * 100_000)
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            calc = FileFingerprint()
														
 
															+            md5 = calc.calculate_md5(temp_path)
														
 
															+            assert len(md5) == 32
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_calculate_quick_hash(self):
														
 
															+        """Test quick hash calculation."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False) as f:
														
 
															+            f.write(b"Hello, World!")
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            calc = FileFingerprint()
														
 
															+            quick = calc.calculate_quick_hash(temp_path, sample_size=5)
														
 
															+
														
 
															+            # Hash of first 5 bytes "Hello" is different from full hash
														
 
															+            # MD5 of "Hello" is 8b1a9953c4611296a827abf8c47804d7
														
 
															+            assert quick == "8b1a9953c4611296a827abf8c47804d7"
														
 
															+            assert len(quick) == 32
														
 
															+            # Quick hash should differ from full hash
														
 
															+            full_hash = calc.calculate_md5(temp_path)
														
 
															+            assert quick != full_hash
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_get_file_size(self):
														
 
															+        """Test getting file size."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False) as f:
														
 
															+            content = b"Test content for size"
														
 
															+            f.write(content)
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            calc = FileFingerprint()
														
 
															+            size = calc.get_file_size(temp_path)
														
 
															+            assert size == len(content)
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_get_file_meta(self):
														
 
															+        """Test getting file metadata."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
														
 
															+            f.write(b"content")
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            calc = FileFingerprint()
														
 
															+            meta = calc.get_file_meta(temp_path)
														
 
															+
														
 
															+            assert "name" in meta
														
 
															+            assert "size" in meta
														
 
															+            assert "modified_time" in meta
														
 
															+            assert meta["size"] == 7
														
 
															+            assert meta["name"].endswith(".txt")
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_file_not_found(self):
														
 
															+        """Test FileNotFoundError for non-existent file."""
														
 
															+        calc = FileFingerprint()
														
 
															+
														
 
															+        with pytest.raises(FileNotFoundError):
														
 
															+            calc.calculate_md5("/nonexistent/file.txt")
														
 
															+
														
 
															+        with pytest.raises(FileNotFoundError):
														
 
															+            calc.calculate_quick_hash("/nonexistent/file.txt")
														
 
															+
														
 
															+        with pytest.raises(FileNotFoundError):
														
 
															+            calc.get_file_size("/nonexistent/file.txt")
														
 
															+
														
 
															+        with pytest.raises(FileNotFoundError):
														
 
															+            calc.get_file_meta("/nonexistent/file.txt")
														
 
															+
														
 
															+
														
 
															+class TestFingerprintStore:
														
 
															+    """Test FingerprintStore."""
														
 
															+
														
 
															+    def test_init_creates_index(self):
														
 
															+        """Test that initialization creates an empty index."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            assert isinstance(store.index, dict)
														
 
															+            assert len(store.index) == 0
														
 
															+
														
 
															+    def test_load_existing_index(self):
														
 
															+        """Test loading an existing index."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            storage_dir = Path(tmpdir)
														
 
															+
														
 
															+            # Create a pre-existing index
														
 
															+            index_file = storage_dir / "fingerprints.json"
														
 
															+            index_file.write_text('{"abc123": {"work_id": "work1"}}')
														
 
															+
														
 
															+            store = FingerprintStore(repo)
														
 
															+            assert "abc123" in store.index
														
 
															+
														
 
															+    def test_add_fingerprint(self):
														
 
															+        """Test adding a fingerprint."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Create test file
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("test content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            store.add_fingerprint("work123", str(test_file), {"name": "test.txt"})
														
 
															+
														
 
															+            assert len(store.index) == 1
														
 
															+
														
 
															+            # Get the fingerprint
														
 
															+            from src.fingerprint.calculator import FileFingerprint
														
 
															+            calc = FileFingerprint()
														
 
															+            fp = calc.calculate_md5(str(test_file))
														
 
															+            assert fp in store.index
														
 
															+
														
 
															+    def test_check_duplicate(self):
														
 
															+        """Test checking for duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Create test file
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("test content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            # Not duplicate initially
														
 
															+            work_id = store.check_duplicate(str(test_file))
														
 
															+            assert work_id is None
														
 
															+
														
 
															+            # Add fingerprint
														
 
															+            store.add_fingerprint("work123", str(test_file), {})
														
 
															+
														
 
															+            # Now it's a duplicate
														
 
															+            work_id = store.check_duplicate(str(test_file))
														
 
															+            assert work_id == "work123"
														
 
															+
														
 
															+    def test_check_duplicate_copy(self):
														
 
															+        """Test that file copies are detected as duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Create original and copy
														
 
															+            file1 = Path(tmpdir) / "original.txt"
														
 
															+            file2 = Path(tmpdir) / "copy.txt"
														
 
															+            content = "same content"
														
 
															+            file1.write_text(content)
														
 
															+            file2.write_text(content)
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            # Register first file
														
 
															+            store.add_fingerprint("work123", str(file1), {})
														
 
															+
														
 
															+            # Check second file
														
 
															+            work_id = store.check_duplicate(str(file2))
														
 
															+            assert work_id == "work123"
														
 
															+
														
 
															+    def test_get_work_history(self):
														
 
															+        """Test getting fingerprint history for a work."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            # Add multiple files for same work
														
 
															+            for i in range(3):
														
 
															+                test_file = Path(tmpdir) / f"file{i}.txt"
														
 
															+                test_file.write_text(f"content {i}")
														
 
															+                store.add_fingerprint("work123", str(test_file), {"index": i})
														
 
															+
														
 
															+            history = store.get_work_history("work123")
														
 
															+            assert len(history) == 3
														
 
															+
														
 
															+    def test_remove_fingerprint(self):
														
 
															+        """Test removing a fingerprint."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            store.add_fingerprint("work123", str(test_file), {})
														
 
															+            assert len(store.index) == 1
														
 
															+
														
 
															+            removed = store.remove_fingerprint(str(test_file))
														
 
															+            assert removed is True
														
 
															+            assert len(store.index) == 0
														
 
															+
														
 
															+    def test_remove_nonexistent_fingerprint(self):
														
 
															+        """Test removing a non-existent fingerprint."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            removed = store.remove_fingerprint(str(test_file))
														
 
															+            assert removed is False
														
 
															+
														
 
															+    def test_clear(self):
														
 
															+        """Test clearing all fingerprints."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            # Add some fingerprints
														
 
															+            for i in range(3):
														
 
															+                test_file = Path(tmpdir) / f"file{i}.txt"
														
 
															+                test_file.write_text(f"content {i}")
														
 
															+                store.add_fingerprint(f"work{i}", str(test_file), {})
														
 
															+
														
 
															+            assert len(store.index) == 3
														
 
															+
														
 
															+            store.clear()
														
 
															+            assert len(store.index) == 0
														
 
															+
														
 
															+    def test_get_stats(self):
														
 
															+        """Test getting store statistics."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            store = FingerprintStore(repo)
														
 
															+
														
 
															+            # Add fingerprints
														
 
															+            for i in range(5):
														
 
															+                test_file = Path(tmpdir) / f"file{i}.txt"
														
 
															+                test_file.write_text(f"content {i}")
														
 
															+                work_id = "work1" if i < 3 else "work2"
														
 
															+                store.add_fingerprint(work_id, str(test_file), {})
														
 
															+
														
 
															+            stats = store.get_stats()
														
 
															+            assert stats["total_fingerprints"] == 5
														
 
															+            assert stats["unique_works"] == 2
														
 
															+
														
 
															+    def test_persistence(self):
														
 
															+        """Test that index persists across store instances."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+
														
 
															+            # Create store and add fingerprint
														
 
															+            store1 = FingerprintStore(repo)
														
 
															+            store1.add_fingerprint("work123", str(test_file), {})
														
 
															+
														
 
															+            # Create new store instance
														
 
															+            store2 = FingerprintStore(repo)
														
 
															+
														
 
															+            # Should have the fingerprint
														
 
															+            work_id = store2.check_duplicate(str(test_file))
														
 
															+            assert work_id == "work123"
														
 
															+
														
 
															+
														
 
															+class TestFingerprintService:
														
 
															+    """Test FingerprintService."""
														
 
															+
														
 
															+    def test_check_before_import_new_file(self):
														
 
															+        """Test checking a new file before import."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "new.txt"
														
 
															+            test_file.write_text("new content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            is_dup, work_id = service.check_before_import(str(test_file))
														
 
															+            assert is_dup is False
														
 
															+            assert work_id is None
														
 
															+
														
 
															+    def test_check_before_import_duplicate(self):
														
 
															+        """Test checking a duplicate file before import."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+
														
 
															+            # Create a completed work
														
 
															+            test_file_path = Path(tmpdir) / "source.txt"
														
 
															+            test_file_path.write_text("content")
														
 
															+            work = repo.create_work(str(test_file_path), title="Test")
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+
														
 
															+            service = FingerprintService(repo)
														
 
															+            service.register_import(work.work_id, str(test_file_path))
														
 
															+
														
 
															+            # Check duplicate
														
 
															+            is_dup, work_id = service.check_before_import(str(test_file_path))
														
 
															+            assert is_dup is True
														
 
															+            assert work_id == work.work_id
														
 
															+
														
 
															+    def test_check_duplicate_incomplete_work(self):
														
 
															+        """Test that incomplete works don't count as duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+
														
 
															+            # Create an incomplete work
														
 
															+            work = repo.create_work(str(test_file))
														
 
															+            # Status is PENDING, not COMPLETED
														
 
															+
														
 
															+            service = FingerprintService(repo)
														
 
															+            service.register_import(work.work_id, str(test_file))
														
 
															+
														
 
															+            # Should not be a duplicate
														
 
															+            is_dup, work_id = service.check_before_import(str(test_file))
														
 
															+            assert is_dup is False
														
 
															+
														
 
															+    def test_register_import(self):
														
 
															+        """Test registering an import."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("content")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            service.register_import("work123", str(test_file))
														
 
															+
														
 
															+            # Verify it's now tracked
														
 
															+            is_dup, work_id = service.check_before_import(str(test_file))
														
 
															+            # Note: won't be duplicate until work is completed
														
 
															+            assert is_dup is False
														
 
															+
														
 
															+            # But fingerprint is in store
														
 
															+            fp = service.store.check_duplicate(str(test_file))
														
 
															+            assert fp == "work123"
														
 
															+
														
 
															+    def test_register_batch_import(self):
														
 
															+        """Test registering multiple files."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            files = []
														
 
															+            for i in range(3):
														
 
															+                f = Path(tmpdir) / f"file{i}.txt"
														
 
															+                f.write_text(f"content {i}")
														
 
															+                files.append(str(f))
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            service.register_batch_import("work123", files)
														
 
															+
														
 
															+            stats = service.store.get_stats()
														
 
															+            assert stats["total_fingerprints"] == 3
														
 
															+
														
 
															+    def test_get_fingerprint(self):
														
 
															+        """Test getting file fingerprint."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False) as f:
														
 
															+            f.write(b"known content")
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            repo = Repository(Path(temp_path).parent)
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            fp = service.get_fingerprint(temp_path)
														
 
															+            assert len(fp) == 32
														
 
															+            assert isinstance(fp, str)
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+    def test_get_file_info(self):
														
 
															+        """Test getting comprehensive file info."""
														
 
															+        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
														
 
															+            f.write(b"test content")
														
 
															+            temp_path = f.name
														
 
															+
														
 
															+        try:
														
 
															+            repo = Repository(Path(temp_path).parent)
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            info = service.get_file_info(temp_path)
														
 
															+
														
 
															+            assert "fingerprint" in info
														
 
															+            assert "metadata" in info
														
 
															+            assert "is_duplicate" in info
														
 
															+            assert "existing_work_id" in info
														
 
															+            assert len(info["fingerprint"]) == 32
														
 
															+            assert info["metadata"]["size"] == 12
														
 
															+        finally:
														
 
															+            Path(temp_path).unlink()
														
 
															+
														
 
															+
														
 
															+class TestBatchFingerprintChecker:
														
 
															+    """Test BatchFingerprintChecker."""
														
 
															+
														
 
															+    def test_check_files(self):
														
 
															+        """Test checking multiple files."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Create files
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+            file1.write_text("content 1")
														
 
															+            file2.write_text("content 2")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+
														
 
															+            results = checker.check_files([str(file1), str(file2)])
														
 
															+
														
 
															+            assert len(results) == 2
														
 
															+            assert str(file1) in results
														
 
															+            assert str(file2) in results
														
 
															+            # Both should be non-duplicate
														
 
															+            assert results[str(file1)] == (False, None)
														
 
															+            assert results[str(file2)] == (False, None)
														
 
															+
														
 
															+    def test_check_files_with_duplicate(self):
														
 
															+        """Test checking files with one duplicate."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+            file1.write_text("same")
														
 
															+            file2.write_text("same")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            # Register first file
														
 
															+            work = repo.create_work(str(file1))
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+            service.register_import(work.work_id, str(file1))
														
 
															+
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+            results = checker.check_files([str(file1), str(file2)])
														
 
															+
														
 
															+            # file1 should be duplicate, file2 should be too (same content)
														
 
															+            assert results[str(file1)][0] is True
														
 
															+            assert results[str(file2)][0] is True
														
 
															+
														
 
															+    def test_filter_new_files(self):
														
 
															+        """Test filtering new files."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            files = []
														
 
															+            for i in range(3):
														
 
															+                f = Path(tmpdir) / f"file{i}.txt"
														
 
															+                f.write_text(f"content {i}")
														
 
															+                files.append(str(f))
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+
														
 
															+            new_files = checker.filter_new_files(files)
														
 
															+            assert len(new_files) == 3
														
 
															+
														
 
															+    def test_filter_new_files_with_duplicate(self):
														
 
															+        """Test filtering removes duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+            file1.write_text("same")
														
 
															+            file2.write_text("different")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            # Register file1
														
 
															+            work = repo.create_work(str(file1))
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+            service.register_import(work.work_id, str(file1))
														
 
															+
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+            new_files = checker.filter_new_files([str(file1), str(file2)])
														
 
															+
														
 
															+            # Only file2 should be new
														
 
															+            assert len(new_files) == 1
														
 
															+            assert str(file2) in new_files
														
 
															+
														
 
															+    def test_filter_duplicate_files(self):
														
 
															+        """Test filtering to get only duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+            file1.write_text("same content")
														
 
															+            file2.write_text("different")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            # Register file1
														
 
															+            work = repo.create_work(str(file1))
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+            service.register_import(work.work_id, str(file1))
														
 
															+
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+            duplicates = checker.filter_duplicate_files([str(file1), str(file2)])
														
 
															+
														
 
															+            assert len(duplicates) == 1
														
 
															+            assert str(file1) in duplicates
														
 
															+
														
 
															+    def test_categorize_files(self):
														
 
															+        """Test categorizing files."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            file1 = Path(tmpdir) / "file1.txt"
														
 
															+            file2 = Path(tmpdir) / "file2.txt"
														
 
															+            file3 = Path(tmpdir) / "nonexistent.txt"
														
 
															+            file1.write_text("same")
														
 
															+            file2.write_text("different")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            # Register file1
														
 
															+            work = repo.create_work(str(file1))
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+            service.register_import(work.work_id, str(file1))
														
 
															+
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+            result = checker.categorize_files([str(file1), str(file2), str(file3)])
														
 
															+
														
 
															+            assert len(result["duplicate"]) == 1
														
 
															+            assert len(result["new"]) == 1
														
 
															+            assert len(result["error"]) == 1
														
 
															+            assert str(file1) in result["duplicate"]
														
 
															+            assert str(file2) in result["new"]
														
 
															+            assert str(file3) in result["error"]
														
 
															+
														
 
															+    def test_get_summary(self):
														
 
															+        """Test getting summary statistics."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            files = []
														
 
															+            for i in range(5):
														
 
															+                f = Path(tmpdir) / f"file{i}.txt"
														
 
															+                f.write_text(f"content {i}")
														
 
															+                files.append(str(f))
														
 
															+
														
 
															+            # Add one non-existent file
														
 
															+            files.append("/nonexistent/file.txt")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+
														
 
															+            summary = checker.get_summary(files)
														
 
															+
														
 
															+            assert summary["total"] == 6
														
 
															+            assert summary["new"] == 5  # All existing files are new
														
 
															+            assert summary["duplicate"] == 0
														
 
															+            assert summary["error"] == 1  # Non-existent file
														
 
															+
														
 
															+
														
 
															+class TestIntegration:
														
 
															+    """Integration tests for fingerprint module."""
														
 
															+
														
 
															+    def test_full_duplicate_detection_workflow(self):
														
 
															+        """Test complete duplicate detection workflow."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Original file
														
 
															+            original = Path(tmpdir) / "novel.txt"
														
 
															+            original.write_text("This is a novel content.")
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+
														
 
															+            # Import original file
														
 
															+            work = repo.create_work(str(original), title="My Novel")
														
 
															+            service.register_import(work.work_id, str(original))
														
 
															+
														
 
															+            # Mark as completed
														
 
															+            work.status = WorkStatus.COMPLETED
														
 
															+            repo.update_work(work)
														
 
															+
														
 
															+            # Try to import duplicate (copy with same content)
														
 
															+            copy = Path(tmpdir) / "novel_copy.txt"
														
 
															+            copy.write_text("This is a novel content.")
														
 
															+
														
 
															+            is_dup, existing_work_id = service.check_before_import(str(copy))
														
 
															+
														
 
															+            assert is_dup is True
														
 
															+            assert existing_work_id == work.work_id
														
 
															+
														
 
															+    def test_batch_import_with_duplicates(self):
														
 
															+        """Test batch import workflow with duplicates."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            # Create files with some duplicates
														
 
															+            content_sets = [
														
 
															+                ("unique1.txt", "content 1"),
														
 
															+                ("unique2.txt", "content 2"),
														
 
															+                ("unique3.txt", "content 3"),  # Will be duplicated
														
 
															+                ("copy3.txt", "content 3"),     # Duplicate of unique3
														
 
															+                ("unique4.txt", "content 4"),
														
 
															+            ]
														
 
															+
														
 
															+            files = []
														
 
															+            for name, content in content_sets:
														
 
															+                f = Path(tmpdir) / name
														
 
															+                f.write_text(content)
														
 
															+                files.append(str(f))
														
 
															+
														
 
															+            repo = Repository(Path(tmpdir))
														
 
															+            service = FingerprintService(repo)
														
 
															+            checker = BatchFingerprintChecker(service)
														
 
															+
														
 
															+            # First batch - import unique1-3
														
 
															+            first_batch = files[:3]
														
 
															+            for file_path in first_batch:
														
 
															+                work = repo.create_work(file_path)
														
 
															+                work.status = WorkStatus.COMPLETED
														
 
															+                repo.update_work(work)
														
 
															+                service.register_import(work.work_id, file_path)
														
 
															+
														
 
															+            # Check second batch
														
 
															+            summary = checker.get_summary(files)
														
 
															+
														
 
															+            assert summary["total"] == 5
														
 
															+            # 1 duplicate (copy3), 4 new (unique1, unique2, unique4, copy3 detected as dup)
														
 
															+            assert summary["duplicate"] >= 1
														
 
															+
														
 
															+    def test_fingerprint_survives_repository_restart(self):
														
 
															+        """Test that fingerprints persist across repository restarts."""
														
 
															+        with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+            test_file = Path(tmpdir) / "test.txt"
														
 
															+            test_file.write_text("persistent content")
														
 
															+
														
 
															+            storage_dir = Path(tmpdir) / "storage"
														
 
															+
														
 
															+            # First session
														
 
															+            repo1 = Repository(storage_dir)
														
 
															+            service1 = FingerprintService(repo1)
														
 
															+
														
 
															+            work1 = repo1.create_work(str(test_file))
														
 
															+            work1.status = WorkStatus.COMPLETED
														
 
															+            repo1.update_work(work1)
														
 
															+            service1.register_import(work1.work_id, str(test_file))
														
 
															+
														
 
															+            # Second session (new instances)
														
 
															+            repo2 = Repository(storage_dir)
														
 
															+            service2 = FingerprintService(repo2)
														
 
															+
														
 
															+            is_dup, work_id = service2.check_before_import(str(test_file))
														
 
															+            assert is_dup is True
														
 
															+            assert work_id == work1.work_id