""" Unit tests for the repository module. Tests cover models, JSONLStore, and Repository functionality. """ import tempfile import shutil from pathlib import Path from datetime import datetime import time import pytest from src.repository.models import ( WorkItem, ChapterItem, FailureRecord, WorkStatus, ChapterStatus, ) from src.repository.jsonl_store import JSONLStore, JSONLError from src.repository.repository import ( Repository, RepositoryError, WorkNotFoundError, ChapterNotFoundError, ) class TestWorkItem: """Test WorkItem model.""" def test_create_work_item(self): """Test creating a work item.""" work = WorkItem( work_id="test123", file_path="/test/file.txt", file_size=1000, chapter_count=10, ) assert work.work_id == "test123" assert work.status == WorkStatus.PENDING def test_work_item_auto_generate_id(self): """Test that work_id is auto-generated from file path.""" work = WorkItem( work_id="", # Empty to trigger auto-generation file_path="/test/file.txt", file_size=1000, chapter_count=10, ) assert work.work_id != "" assert len(work.work_id) == 32 # MD5 hash length def test_work_item_from_file(self): """Test creating WorkItem from file path.""" with tempfile.NamedTemporaryFile(delete=False) as f: f.write(b"test content") temp_path = f.name try: work = WorkItem.from_file(temp_path, title="Test Novel") assert work.file_size == len(b"test content") assert work.metadata["title"] == "Test Novel" finally: Path(temp_path).unlink() def test_work_item_to_dict(self): """Test converting WorkItem to dictionary.""" work = WorkItem( work_id="test123", file_path="/test/file.txt", file_size=1000, chapter_count=10, status=WorkStatus.TRANSLATING, metadata={"title": "Test"}, ) data = work.to_dict() assert data["work_id"] == "test123" assert data["status"] == "translating" assert data["metadata"]["title"] == "Test" def test_work_item_from_dict(self): """Test creating WorkItem from dictionary.""" data = { "work_id": "test123", "file_path": "/test/file.txt", "file_size": 1000, "chapter_count": 10, "status": "completed", "created_at": datetime.utcnow().isoformat(), "updated_at": datetime.utcnow().isoformat(), "metadata": {}, "progress": 50, } work = WorkItem.from_dict(data) assert work.work_id == "test123" assert work.status == WorkStatus.COMPLETED assert work.progress == 50 def test_touch_updates_timestamp(self): """Test that touch() updates updated_at.""" work = WorkItem( work_id="test", file_path="/test.txt", file_size=100, chapter_count=1, ) old_time = work.updated_at time.sleep(0.01) # Small delay work.touch() assert work.updated_at > old_time class TestChapterItem: """Test ChapterItem model.""" def test_create_chapter(self): """Test creating a chapter.""" chapter = ChapterItem( work_id="work123", chapter_index=0, title="Chapter 1", content="This is the content", ) assert chapter.work_id == "work123" assert chapter.word_count == 4 # Auto-calculated def test_chapter_word_count(self): """Test word count calculation.""" chapter = ChapterItem( work_id="work123", chapter_index=0, title="Chapter 1", content="One two three four five", ) assert chapter.word_count == 5 def test_chapter_to_dict(self): """Test converting chapter to dictionary.""" chapter = ChapterItem( work_id="work123", chapter_index=0, title="Chapter 1", content="Content", translation="Translated", ) data = chapter.to_dict() assert data["work_id"] == "work123" assert data["translation"] == "Translated" def test_chapter_from_dict(self): """Test creating chapter from dictionary.""" data = { "work_id": "work123", "chapter_index": 0, "title": "Chapter 1", "content": "Content", "word_count": 1, "status": "completed", "created_at": datetime.utcnow().isoformat(), "updated_at": datetime.utcnow().isoformat(), "retry_count": 0, } chapter = ChapterItem.from_dict(data) assert chapter.work_id == "work123" assert chapter.status == ChapterStatus.COMPLETED class TestFailureRecord: """Test FailureRecord model.""" def test_create_failure(self): """Test creating a failure record.""" failure = FailureRecord( work_id="work123", chapter_index=0, error_type="ValueError", error_message="Invalid value", ) assert failure.work_id == "work123" assert failure.resolved is False def test_failure_from_exception(self): """Test creating failure from exception.""" try: raise ValueError("Test error") except Exception as e: failure = FailureRecord.from_exception("work123", e, chapter_index=5) assert failure.error_type == "ValueError" assert failure.error_message == "Test error" assert failure.chapter_index == 5 def test_failure_to_dict(self): """Test converting failure to dictionary.""" failure = FailureRecord( work_id="work123", chapter_index=0, error_type="ValueError", error_message="Test", ) data = failure.to_dict() assert data["work_id"] == "work123" assert data["error_type"] == "ValueError" class TestJSONLStore: """Test JSONLStore functionality.""" def test_init_creates_directory(self): """Test that initialization creates base directory.""" with tempfile.TemporaryDirectory() as tmpdir: store_dir = Path(tmpdir) / "store" store = JSONLStore(store_dir) assert store_dir.exists() def test_save_and_load_work_item(self): """Test saving and loading work items.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) work = WorkItem( work_id="test123", file_path="/test.txt", file_size=100, chapter_count=1, ) store.save_work_item(work) loaded = list(store.load_work_items()) assert len(loaded) == 1 assert loaded[0].work_id == "test123" def test_save_work_item_overwrite_mode(self): """Test overwrite mode removes duplicates.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) work = WorkItem( work_id="test123", file_path="/test.txt", file_size=100, chapter_count=1, ) store.save_work_item(work, mode="append") store.save_work_item(work, mode="append") # Duplicate loaded = list(store.load_work_items()) assert len(loaded) == 2 # JSONL has duplicates # Now use overwrite work.chapter_count = 5 # Modified store.save_work_item(work, mode="overwrite") loaded = list(store.load_work_items()) assert len(loaded) == 1 # Only one after overwrite assert loaded[0].chapter_count == 5 def test_get_work_item(self): """Test getting specific work item.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) work1 = WorkItem( work_id="work1", file_path="/test1.txt", file_size=100, chapter_count=1, ) work2 = WorkItem( work_id="work2", file_path="/test2.txt", file_size=200, chapter_count=2, ) store.save_work_item(work1) store.save_work_item(work2) found = store.get_work_item("work1") assert found is not None assert found.file_size == 100 not_found = store.get_work_item("work999") assert not_found is None def test_save_and_load_chapters(self): """Test saving and loading chapters.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) chapter1 = ChapterItem( work_id="work123", chapter_index=0, title="Chapter 1", content="Content 1", ) chapter2 = ChapterItem( work_id="work123", chapter_index=1, title="Chapter 2", content="Content 2", ) store.save_chapter(chapter1) store.save_chapter(chapter2) chapters = list(store.load_chapters("work123")) assert len(chapters) == 2 def test_get_chapter(self): """Test getting specific chapter.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) chapter = ChapterItem( work_id="work123", chapter_index=5, title="Chapter 5", content="Content", ) store.save_chapter(chapter) found = store.get_chapter("work123", 5) assert found is not None assert found.title == "Chapter 5" not_found = store.get_chapter("work123", 99) assert not_found is None def test_save_and_load_failures(self): """Test saving and loading failures.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) failure = FailureRecord( work_id="work123", chapter_index=0, error_type="ValueError", error_message="Test error", ) store.save_failure(failure) failures = list(store.load_failures("work123")) assert len(failures) == 1 assert failures[0].error_message == "Test error" def test_delete_work(self): """Test deleting a work item.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) work = WorkItem( work_id="test123", file_path="/test.txt", file_size=100, chapter_count=1, ) store.save_work_item(work) # Create chapter directory chapter = ChapterItem( work_id="test123", chapter_index=0, title="Chapter 1", content="Content", ) store.save_chapter(chapter) # Delete store.delete_work("test123") # Verify deletion assert not store.work_exists("test123") assert not (Path(tmpdir) / "test123").exists() def test_atomic_write_handles_empty_data(self): """Test atomic write with empty data.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) path = Path(tmpdir) / "test.jsonl" store._atomic_write(path, "") assert path.exists() assert path.read_text() == "" def test_corrupted_line_skipped(self): """Test that corrupted lines are skipped during read.""" with tempfile.TemporaryDirectory() as tmpdir: store_dir = Path(tmpdir) store = JSONLStore(store_dir) work = WorkItem( work_id="test123", file_path="/test.txt", file_size=100, chapter_count=1, ) store.save_work_item(work) # Corrupt the file by appending invalid JSON path = store_dir / "work_items.jsonl" with open(path, "a") as f: f.write("\n{invalid json}\n") # Should still load valid entries loaded = list(store.load_work_items()) assert len(loaded) == 1 assert loaded[0].work_id == "test123" class TestRepository: """Test Repository interface.""" def test_init(self): """Test repository initialization.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) assert repo.storage_dir == Path(tmpdir) def test_create_and_get_work(self): """Test creating and getting a work item.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) # Create a test file test_file = Path(tmpdir) / "test.txt" test_file.write_text("test content") work = repo.create_work(str(test_file), title="Test Novel") retrieved = repo.get_work(work.work_id) assert retrieved is not None assert retrieved.metadata["title"] == "Test Novel" def test_get_work_or_raise(self): """Test get_work_or_raise raises on not found.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) with pytest.raises(WorkNotFoundError): repo.get_work_or_raise("nonexistent") def test_list_works(self): """Test listing all work items.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) # Create test files for i in range(3): test_file = Path(tmpdir) / f"test{i}.txt" test_file.write_text(f"content {i}") repo.create_work(str(test_file)) works = repo.list_works() assert len(works) == 3 def test_list_works_by_status(self): """Test filtering works by status.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work1 = repo.create_work(str(test_file)) work2 = repo.create_work(str(test_file)) repo.update_work_status(work1.work_id, WorkStatus.TRANSLATING) translating = repo.list_works(status=WorkStatus.TRANSLATING) assert len(translating) == 1 assert translating[0].work_id == work1.work_id def test_update_work_status(self): """Test updating work status.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) repo.update_work_status(work.work_id, WorkStatus.TRANSLATING) retrieved = repo.get_work(work.work_id) assert retrieved.status == WorkStatus.TRANSLATING def test_save_and_get_chapter(self): """Test saving and getting chapters.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) chapter = ChapterItem( work_id=work.work_id, chapter_index=0, title="Chapter 1", content="Content", ) repo.save_chapter(work.work_id, chapter) retrieved = repo.get_chapter(work.work_id, 0) assert retrieved is not None assert retrieved.title == "Chapter 1" def test_get_pending_chapters(self): """Test getting pending chapters.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) # Add chapters with different statuses for i in range(3): chapter = ChapterItem( work_id=work.work_id, chapter_index=i, title=f"Chapter {i}", content="Content", status=ChapterStatus.COMPLETED if i == 0 else ChapterStatus.PENDING, ) repo.save_chapter(work.work_id, chapter) pending = repo.get_pending_chapters(work.work_id) assert len(pending) == 2 def test_record_failure(self): """Test recording a failure.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) try: raise ValueError("Test error") except Exception as e: failure = repo.record_failure(work.work_id, 0, e) assert failure.error_type == "ValueError" failures = repo.get_failures(work.work_id) assert len(failures) == 1 def test_get_work_stats(self): """Test getting work statistics.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) # Add chapters for i in range(5): chapter = ChapterItem( work_id=work.work_id, chapter_index=i, title=f"Chapter {i}", content="Content", status=ChapterStatus.COMPLETED if i < 3 else ChapterStatus.PENDING, ) repo.save_chapter(work.work_id, chapter) stats = repo.get_work_stats(work.work_id) assert stats["total_chapters"] == 5 assert stats["completed_chapters"] == 3 assert stats["pending_chapters"] == 2 def test_save_chapter_translation(self): """Test saving chapter translation.""" with tempfile.TemporaryDirectory() as tmpdir: repo = Repository(Path(tmpdir)) test_file = Path(tmpdir) / "test.txt" test_file.write_text("content") work = repo.create_work(str(test_file)) chapter = ChapterItem( work_id=work.work_id, chapter_index=0, title="Chapter 1", content="Original", ) repo.save_chapter(work.work_id, chapter) repo.save_chapter_translation(work.work_id, 0, "Translated") retrieved = repo.get_chapter(work.work_id, 0) assert retrieved.translation == "Translated" assert retrieved.status == ChapterStatus.COMPLETED class TestCrashSafety: """Test crash-safety features.""" def test_atomic_write_preserves_original_on_error(self): """Test that atomic write preserves original file on error.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) work = WorkItem( work_id="test", file_path="/test.txt", file_size=100, chapter_count=1, ) store.save_work_item(work) # Get original content path = Path(tmpdir) / "work_items.jsonl" original_content = path.read_text() # Simulate crash by creating a temp file and stopping temp_path = path.with_suffix(".tmp") temp_path.write_text("corrupted data") # Original should be unchanged assert path.read_text() == original_content # Clean up temp file temp_path.unlink() # Verify we can still load loaded = list(store.load_work_items()) assert len(loaded) == 1 def test_append_mode_is_crash_safe(self): """Test that append mode is crash-safe at line level.""" with tempfile.TemporaryDirectory() as tmpdir: store = JSONLStore(Path(tmpdir)) # Add initial work work = WorkItem( work_id="work1", file_path="/test1.txt", file_size=100, chapter_count=1, ) store.save_work_item(work) # Add another work work2 = WorkItem( work_id="work2", file_path="/test2.txt", file_size=200, chapter_count=2, ) store.save_work_item(work2) # Both should be readable works = list(store.load_work_items()) assert len(works) == 2