| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675 |
- """
- Unit tests for the repository module.
- Tests cover models, JSONLStore, and Repository functionality.
- """
- import tempfile
- import shutil
- from pathlib import Path
- from datetime import datetime
- import time
- import pytest
- from src.repository.models import (
- WorkItem,
- ChapterItem,
- FailureRecord,
- WorkStatus,
- ChapterStatus,
- )
- from src.repository.jsonl_store import JSONLStore, JSONLError
- from src.repository.repository import (
- Repository,
- RepositoryError,
- WorkNotFoundError,
- ChapterNotFoundError,
- )
- class TestWorkItem:
- """Test WorkItem model."""
- def test_create_work_item(self):
- """Test creating a work item."""
- work = WorkItem(
- work_id="test123",
- file_path="/test/file.txt",
- file_size=1000,
- chapter_count=10,
- )
- assert work.work_id == "test123"
- assert work.status == WorkStatus.PENDING
- def test_work_item_auto_generate_id(self):
- """Test that work_id is auto-generated from file path."""
- work = WorkItem(
- work_id="", # Empty to trigger auto-generation
- file_path="/test/file.txt",
- file_size=1000,
- chapter_count=10,
- )
- assert work.work_id != ""
- assert len(work.work_id) == 32 # MD5 hash length
- def test_work_item_from_file(self):
- """Test creating WorkItem from file path."""
- with tempfile.NamedTemporaryFile(delete=False) as f:
- f.write(b"test content")
- temp_path = f.name
- try:
- work = WorkItem.from_file(temp_path, title="Test Novel")
- assert work.file_size == len(b"test content")
- assert work.metadata["title"] == "Test Novel"
- finally:
- Path(temp_path).unlink()
- def test_work_item_to_dict(self):
- """Test converting WorkItem to dictionary."""
- work = WorkItem(
- work_id="test123",
- file_path="/test/file.txt",
- file_size=1000,
- chapter_count=10,
- status=WorkStatus.TRANSLATING,
- metadata={"title": "Test"},
- )
- data = work.to_dict()
- assert data["work_id"] == "test123"
- assert data["status"] == "translating"
- assert data["metadata"]["title"] == "Test"
- def test_work_item_from_dict(self):
- """Test creating WorkItem from dictionary."""
- data = {
- "work_id": "test123",
- "file_path": "/test/file.txt",
- "file_size": 1000,
- "chapter_count": 10,
- "status": "completed",
- "created_at": datetime.utcnow().isoformat(),
- "updated_at": datetime.utcnow().isoformat(),
- "metadata": {},
- "progress": 50,
- }
- work = WorkItem.from_dict(data)
- assert work.work_id == "test123"
- assert work.status == WorkStatus.COMPLETED
- assert work.progress == 50
- def test_touch_updates_timestamp(self):
- """Test that touch() updates updated_at."""
- work = WorkItem(
- work_id="test",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- old_time = work.updated_at
- time.sleep(0.01) # Small delay
- work.touch()
- assert work.updated_at > old_time
- class TestChapterItem:
- """Test ChapterItem model."""
- def test_create_chapter(self):
- """Test creating a chapter."""
- chapter = ChapterItem(
- work_id="work123",
- chapter_index=0,
- title="Chapter 1",
- content="This is the content",
- )
- assert chapter.work_id == "work123"
- assert chapter.word_count == 4 # Auto-calculated
- def test_chapter_word_count(self):
- """Test word count calculation."""
- chapter = ChapterItem(
- work_id="work123",
- chapter_index=0,
- title="Chapter 1",
- content="One two three four five",
- )
- assert chapter.word_count == 5
- def test_chapter_to_dict(self):
- """Test converting chapter to dictionary."""
- chapter = ChapterItem(
- work_id="work123",
- chapter_index=0,
- title="Chapter 1",
- content="Content",
- translation="Translated",
- )
- data = chapter.to_dict()
- assert data["work_id"] == "work123"
- assert data["translation"] == "Translated"
- def test_chapter_from_dict(self):
- """Test creating chapter from dictionary."""
- data = {
- "work_id": "work123",
- "chapter_index": 0,
- "title": "Chapter 1",
- "content": "Content",
- "word_count": 1,
- "status": "completed",
- "created_at": datetime.utcnow().isoformat(),
- "updated_at": datetime.utcnow().isoformat(),
- "retry_count": 0,
- }
- chapter = ChapterItem.from_dict(data)
- assert chapter.work_id == "work123"
- assert chapter.status == ChapterStatus.COMPLETED
- class TestFailureRecord:
- """Test FailureRecord model."""
- def test_create_failure(self):
- """Test creating a failure record."""
- failure = FailureRecord(
- work_id="work123",
- chapter_index=0,
- error_type="ValueError",
- error_message="Invalid value",
- )
- assert failure.work_id == "work123"
- assert failure.resolved is False
- def test_failure_from_exception(self):
- """Test creating failure from exception."""
- try:
- raise ValueError("Test error")
- except Exception as e:
- failure = FailureRecord.from_exception("work123", e, chapter_index=5)
- assert failure.error_type == "ValueError"
- assert failure.error_message == "Test error"
- assert failure.chapter_index == 5
- def test_failure_to_dict(self):
- """Test converting failure to dictionary."""
- failure = FailureRecord(
- work_id="work123",
- chapter_index=0,
- error_type="ValueError",
- error_message="Test",
- )
- data = failure.to_dict()
- assert data["work_id"] == "work123"
- assert data["error_type"] == "ValueError"
- class TestJSONLStore:
- """Test JSONLStore functionality."""
- def test_init_creates_directory(self):
- """Test that initialization creates base directory."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store_dir = Path(tmpdir) / "store"
- store = JSONLStore(store_dir)
- assert store_dir.exists()
- def test_save_and_load_work_item(self):
- """Test saving and loading work items."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- work = WorkItem(
- work_id="test123",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work)
- loaded = list(store.load_work_items())
- assert len(loaded) == 1
- assert loaded[0].work_id == "test123"
- def test_save_work_item_overwrite_mode(self):
- """Test overwrite mode removes duplicates."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- work = WorkItem(
- work_id="test123",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work, mode="append")
- store.save_work_item(work, mode="append") # Duplicate
- loaded = list(store.load_work_items())
- assert len(loaded) == 2 # JSONL has duplicates
- # Now use overwrite
- work.chapter_count = 5 # Modified
- store.save_work_item(work, mode="overwrite")
- loaded = list(store.load_work_items())
- assert len(loaded) == 1 # Only one after overwrite
- assert loaded[0].chapter_count == 5
- def test_get_work_item(self):
- """Test getting specific work item."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- work1 = WorkItem(
- work_id="work1",
- file_path="/test1.txt",
- file_size=100,
- chapter_count=1,
- )
- work2 = WorkItem(
- work_id="work2",
- file_path="/test2.txt",
- file_size=200,
- chapter_count=2,
- )
- store.save_work_item(work1)
- store.save_work_item(work2)
- found = store.get_work_item("work1")
- assert found is not None
- assert found.file_size == 100
- not_found = store.get_work_item("work999")
- assert not_found is None
- def test_save_and_load_chapters(self):
- """Test saving and loading chapters."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- chapter1 = ChapterItem(
- work_id="work123",
- chapter_index=0,
- title="Chapter 1",
- content="Content 1",
- )
- chapter2 = ChapterItem(
- work_id="work123",
- chapter_index=1,
- title="Chapter 2",
- content="Content 2",
- )
- store.save_chapter(chapter1)
- store.save_chapter(chapter2)
- chapters = list(store.load_chapters("work123"))
- assert len(chapters) == 2
- def test_get_chapter(self):
- """Test getting specific chapter."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- chapter = ChapterItem(
- work_id="work123",
- chapter_index=5,
- title="Chapter 5",
- content="Content",
- )
- store.save_chapter(chapter)
- found = store.get_chapter("work123", 5)
- assert found is not None
- assert found.title == "Chapter 5"
- not_found = store.get_chapter("work123", 99)
- assert not_found is None
- def test_save_and_load_failures(self):
- """Test saving and loading failures."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- failure = FailureRecord(
- work_id="work123",
- chapter_index=0,
- error_type="ValueError",
- error_message="Test error",
- )
- store.save_failure(failure)
- failures = list(store.load_failures("work123"))
- assert len(failures) == 1
- assert failures[0].error_message == "Test error"
- def test_delete_work(self):
- """Test deleting a work item."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- work = WorkItem(
- work_id="test123",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work)
- # Create chapter directory
- chapter = ChapterItem(
- work_id="test123",
- chapter_index=0,
- title="Chapter 1",
- content="Content",
- )
- store.save_chapter(chapter)
- # Delete
- store.delete_work("test123")
- # Verify deletion
- assert not store.work_exists("test123")
- assert not (Path(tmpdir) / "test123").exists()
- def test_atomic_write_handles_empty_data(self):
- """Test atomic write with empty data."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- path = Path(tmpdir) / "test.jsonl"
- store._atomic_write(path, "")
- assert path.exists()
- assert path.read_text() == ""
- def test_corrupted_line_skipped(self):
- """Test that corrupted lines are skipped during read."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store_dir = Path(tmpdir)
- store = JSONLStore(store_dir)
- work = WorkItem(
- work_id="test123",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work)
- # Corrupt the file by appending invalid JSON
- path = store_dir / "work_items.jsonl"
- with open(path, "a") as f:
- f.write("\n{invalid json}\n")
- # Should still load valid entries
- loaded = list(store.load_work_items())
- assert len(loaded) == 1
- assert loaded[0].work_id == "test123"
- class TestRepository:
- """Test Repository interface."""
- def test_init(self):
- """Test repository initialization."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- assert repo.storage_dir == Path(tmpdir)
- def test_create_and_get_work(self):
- """Test creating and getting a work item."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- # Create a test file
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("test content")
- work = repo.create_work(str(test_file), title="Test Novel")
- retrieved = repo.get_work(work.work_id)
- assert retrieved is not None
- assert retrieved.metadata["title"] == "Test Novel"
- def test_get_work_or_raise(self):
- """Test get_work_or_raise raises on not found."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- with pytest.raises(WorkNotFoundError):
- repo.get_work_or_raise("nonexistent")
- def test_list_works(self):
- """Test listing all work items."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- # Create test files
- for i in range(3):
- test_file = Path(tmpdir) / f"test{i}.txt"
- test_file.write_text(f"content {i}")
- repo.create_work(str(test_file))
- works = repo.list_works()
- assert len(works) == 3
- def test_list_works_by_status(self):
- """Test filtering works by status."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work1 = repo.create_work(str(test_file))
- work2 = repo.create_work(str(test_file))
- repo.update_work_status(work1.work_id, WorkStatus.TRANSLATING)
- translating = repo.list_works(status=WorkStatus.TRANSLATING)
- assert len(translating) == 1
- assert translating[0].work_id == work1.work_id
- def test_update_work_status(self):
- """Test updating work status."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- repo.update_work_status(work.work_id, WorkStatus.TRANSLATING)
- retrieved = repo.get_work(work.work_id)
- assert retrieved.status == WorkStatus.TRANSLATING
- def test_save_and_get_chapter(self):
- """Test saving and getting chapters."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- chapter = ChapterItem(
- work_id=work.work_id,
- chapter_index=0,
- title="Chapter 1",
- content="Content",
- )
- repo.save_chapter(work.work_id, chapter)
- retrieved = repo.get_chapter(work.work_id, 0)
- assert retrieved is not None
- assert retrieved.title == "Chapter 1"
- def test_get_pending_chapters(self):
- """Test getting pending chapters."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- # Add chapters with different statuses
- for i in range(3):
- chapter = ChapterItem(
- work_id=work.work_id,
- chapter_index=i,
- title=f"Chapter {i}",
- content="Content",
- status=ChapterStatus.COMPLETED if i == 0 else ChapterStatus.PENDING,
- )
- repo.save_chapter(work.work_id, chapter)
- pending = repo.get_pending_chapters(work.work_id)
- assert len(pending) == 2
- def test_record_failure(self):
- """Test recording a failure."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- try:
- raise ValueError("Test error")
- except Exception as e:
- failure = repo.record_failure(work.work_id, 0, e)
- assert failure.error_type == "ValueError"
- failures = repo.get_failures(work.work_id)
- assert len(failures) == 1
- def test_get_work_stats(self):
- """Test getting work statistics."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- # Add chapters
- for i in range(5):
- chapter = ChapterItem(
- work_id=work.work_id,
- chapter_index=i,
- title=f"Chapter {i}",
- content="Content",
- status=ChapterStatus.COMPLETED if i < 3 else ChapterStatus.PENDING,
- )
- repo.save_chapter(work.work_id, chapter)
- stats = repo.get_work_stats(work.work_id)
- assert stats["total_chapters"] == 5
- assert stats["completed_chapters"] == 3
- assert stats["pending_chapters"] == 2
- def test_save_chapter_translation(self):
- """Test saving chapter translation."""
- with tempfile.TemporaryDirectory() as tmpdir:
- repo = Repository(Path(tmpdir))
- test_file = Path(tmpdir) / "test.txt"
- test_file.write_text("content")
- work = repo.create_work(str(test_file))
- chapter = ChapterItem(
- work_id=work.work_id,
- chapter_index=0,
- title="Chapter 1",
- content="Original",
- )
- repo.save_chapter(work.work_id, chapter)
- repo.save_chapter_translation(work.work_id, 0, "Translated")
- retrieved = repo.get_chapter(work.work_id, 0)
- assert retrieved.translation == "Translated"
- assert retrieved.status == ChapterStatus.COMPLETED
- class TestCrashSafety:
- """Test crash-safety features."""
- def test_atomic_write_preserves_original_on_error(self):
- """Test that atomic write preserves original file on error."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- work = WorkItem(
- work_id="test",
- file_path="/test.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work)
- # Get original content
- path = Path(tmpdir) / "work_items.jsonl"
- original_content = path.read_text()
- # Simulate crash by creating a temp file and stopping
- temp_path = path.with_suffix(".tmp")
- temp_path.write_text("corrupted data")
- # Original should be unchanged
- assert path.read_text() == original_content
- # Clean up temp file
- temp_path.unlink()
- # Verify we can still load
- loaded = list(store.load_work_items())
- assert len(loaded) == 1
- def test_append_mode_is_crash_safe(self):
- """Test that append mode is crash-safe at line level."""
- with tempfile.TemporaryDirectory() as tmpdir:
- store = JSONLStore(Path(tmpdir))
- # Add initial work
- work = WorkItem(
- work_id="work1",
- file_path="/test1.txt",
- file_size=100,
- chapter_count=1,
- )
- store.save_work_item(work)
- # Add another work
- work2 = WorkItem(
- work_id="work2",
- file_path="/test2.txt",
- file_size=200,
- chapter_count=2,
- )
- store.save_work_item(work2)
- # Both should be readable
- works = list(store.load_work_items())
- assert len(works) == 2
|