2
0

test_repository.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. """
  2. Unit tests for the repository module.
  3. Tests cover models, JSONLStore, and Repository functionality.
  4. """
  5. import tempfile
  6. import shutil
  7. from pathlib import Path
  8. from datetime import datetime
  9. import time
  10. import pytest
  11. from src.repository.models import (
  12. WorkItem,
  13. ChapterItem,
  14. FailureRecord,
  15. WorkStatus,
  16. ChapterStatus,
  17. )
  18. from src.repository.jsonl_store import JSONLStore, JSONLError
  19. from src.repository.repository import (
  20. Repository,
  21. RepositoryError,
  22. WorkNotFoundError,
  23. ChapterNotFoundError,
  24. )
  25. class TestWorkItem:
  26. """Test WorkItem model."""
  27. def test_create_work_item(self):
  28. """Test creating a work item."""
  29. work = WorkItem(
  30. work_id="test123",
  31. file_path="/test/file.txt",
  32. file_size=1000,
  33. chapter_count=10,
  34. )
  35. assert work.work_id == "test123"
  36. assert work.status == WorkStatus.PENDING
  37. def test_work_item_auto_generate_id(self):
  38. """Test that work_id is auto-generated from file path."""
  39. work = WorkItem(
  40. work_id="", # Empty to trigger auto-generation
  41. file_path="/test/file.txt",
  42. file_size=1000,
  43. chapter_count=10,
  44. )
  45. assert work.work_id != ""
  46. assert len(work.work_id) == 32 # MD5 hash length
  47. def test_work_item_from_file(self):
  48. """Test creating WorkItem from file path."""
  49. with tempfile.NamedTemporaryFile(delete=False) as f:
  50. f.write(b"test content")
  51. temp_path = f.name
  52. try:
  53. work = WorkItem.from_file(temp_path, title="Test Novel")
  54. assert work.file_size == len(b"test content")
  55. assert work.metadata["title"] == "Test Novel"
  56. finally:
  57. Path(temp_path).unlink()
  58. def test_work_item_to_dict(self):
  59. """Test converting WorkItem to dictionary."""
  60. work = WorkItem(
  61. work_id="test123",
  62. file_path="/test/file.txt",
  63. file_size=1000,
  64. chapter_count=10,
  65. status=WorkStatus.TRANSLATING,
  66. metadata={"title": "Test"},
  67. )
  68. data = work.to_dict()
  69. assert data["work_id"] == "test123"
  70. assert data["status"] == "translating"
  71. assert data["metadata"]["title"] == "Test"
  72. def test_work_item_from_dict(self):
  73. """Test creating WorkItem from dictionary."""
  74. data = {
  75. "work_id": "test123",
  76. "file_path": "/test/file.txt",
  77. "file_size": 1000,
  78. "chapter_count": 10,
  79. "status": "completed",
  80. "created_at": datetime.utcnow().isoformat(),
  81. "updated_at": datetime.utcnow().isoformat(),
  82. "metadata": {},
  83. "progress": 50,
  84. }
  85. work = WorkItem.from_dict(data)
  86. assert work.work_id == "test123"
  87. assert work.status == WorkStatus.COMPLETED
  88. assert work.progress == 50
  89. def test_touch_updates_timestamp(self):
  90. """Test that touch() updates updated_at."""
  91. work = WorkItem(
  92. work_id="test",
  93. file_path="/test.txt",
  94. file_size=100,
  95. chapter_count=1,
  96. )
  97. old_time = work.updated_at
  98. time.sleep(0.01) # Small delay
  99. work.touch()
  100. assert work.updated_at > old_time
  101. class TestChapterItem:
  102. """Test ChapterItem model."""
  103. def test_create_chapter(self):
  104. """Test creating a chapter."""
  105. chapter = ChapterItem(
  106. work_id="work123",
  107. chapter_index=0,
  108. title="Chapter 1",
  109. content="This is the content",
  110. )
  111. assert chapter.work_id == "work123"
  112. assert chapter.word_count == 4 # Auto-calculated
  113. def test_chapter_word_count(self):
  114. """Test word count calculation."""
  115. chapter = ChapterItem(
  116. work_id="work123",
  117. chapter_index=0,
  118. title="Chapter 1",
  119. content="One two three four five",
  120. )
  121. assert chapter.word_count == 5
  122. def test_chapter_to_dict(self):
  123. """Test converting chapter to dictionary."""
  124. chapter = ChapterItem(
  125. work_id="work123",
  126. chapter_index=0,
  127. title="Chapter 1",
  128. content="Content",
  129. translation="Translated",
  130. )
  131. data = chapter.to_dict()
  132. assert data["work_id"] == "work123"
  133. assert data["translation"] == "Translated"
  134. def test_chapter_from_dict(self):
  135. """Test creating chapter from dictionary."""
  136. data = {
  137. "work_id": "work123",
  138. "chapter_index": 0,
  139. "title": "Chapter 1",
  140. "content": "Content",
  141. "word_count": 1,
  142. "status": "completed",
  143. "created_at": datetime.utcnow().isoformat(),
  144. "updated_at": datetime.utcnow().isoformat(),
  145. "retry_count": 0,
  146. }
  147. chapter = ChapterItem.from_dict(data)
  148. assert chapter.work_id == "work123"
  149. assert chapter.status == ChapterStatus.COMPLETED
  150. class TestFailureRecord:
  151. """Test FailureRecord model."""
  152. def test_create_failure(self):
  153. """Test creating a failure record."""
  154. failure = FailureRecord(
  155. work_id="work123",
  156. chapter_index=0,
  157. error_type="ValueError",
  158. error_message="Invalid value",
  159. )
  160. assert failure.work_id == "work123"
  161. assert failure.resolved is False
  162. def test_failure_from_exception(self):
  163. """Test creating failure from exception."""
  164. try:
  165. raise ValueError("Test error")
  166. except Exception as e:
  167. failure = FailureRecord.from_exception("work123", e, chapter_index=5)
  168. assert failure.error_type == "ValueError"
  169. assert failure.error_message == "Test error"
  170. assert failure.chapter_index == 5
  171. def test_failure_to_dict(self):
  172. """Test converting failure to dictionary."""
  173. failure = FailureRecord(
  174. work_id="work123",
  175. chapter_index=0,
  176. error_type="ValueError",
  177. error_message="Test",
  178. )
  179. data = failure.to_dict()
  180. assert data["work_id"] == "work123"
  181. assert data["error_type"] == "ValueError"
  182. class TestJSONLStore:
  183. """Test JSONLStore functionality."""
  184. def test_init_creates_directory(self):
  185. """Test that initialization creates base directory."""
  186. with tempfile.TemporaryDirectory() as tmpdir:
  187. store_dir = Path(tmpdir) / "store"
  188. store = JSONLStore(store_dir)
  189. assert store_dir.exists()
  190. def test_save_and_load_work_item(self):
  191. """Test saving and loading work items."""
  192. with tempfile.TemporaryDirectory() as tmpdir:
  193. store = JSONLStore(Path(tmpdir))
  194. work = WorkItem(
  195. work_id="test123",
  196. file_path="/test.txt",
  197. file_size=100,
  198. chapter_count=1,
  199. )
  200. store.save_work_item(work)
  201. loaded = list(store.load_work_items())
  202. assert len(loaded) == 1
  203. assert loaded[0].work_id == "test123"
  204. def test_save_work_item_overwrite_mode(self):
  205. """Test overwrite mode removes duplicates."""
  206. with tempfile.TemporaryDirectory() as tmpdir:
  207. store = JSONLStore(Path(tmpdir))
  208. work = WorkItem(
  209. work_id="test123",
  210. file_path="/test.txt",
  211. file_size=100,
  212. chapter_count=1,
  213. )
  214. store.save_work_item(work, mode="append")
  215. store.save_work_item(work, mode="append") # Duplicate
  216. loaded = list(store.load_work_items())
  217. assert len(loaded) == 2 # JSONL has duplicates
  218. # Now use overwrite
  219. work.chapter_count = 5 # Modified
  220. store.save_work_item(work, mode="overwrite")
  221. loaded = list(store.load_work_items())
  222. assert len(loaded) == 1 # Only one after overwrite
  223. assert loaded[0].chapter_count == 5
  224. def test_get_work_item(self):
  225. """Test getting specific work item."""
  226. with tempfile.TemporaryDirectory() as tmpdir:
  227. store = JSONLStore(Path(tmpdir))
  228. work1 = WorkItem(
  229. work_id="work1",
  230. file_path="/test1.txt",
  231. file_size=100,
  232. chapter_count=1,
  233. )
  234. work2 = WorkItem(
  235. work_id="work2",
  236. file_path="/test2.txt",
  237. file_size=200,
  238. chapter_count=2,
  239. )
  240. store.save_work_item(work1)
  241. store.save_work_item(work2)
  242. found = store.get_work_item("work1")
  243. assert found is not None
  244. assert found.file_size == 100
  245. not_found = store.get_work_item("work999")
  246. assert not_found is None
  247. def test_save_and_load_chapters(self):
  248. """Test saving and loading chapters."""
  249. with tempfile.TemporaryDirectory() as tmpdir:
  250. store = JSONLStore(Path(tmpdir))
  251. chapter1 = ChapterItem(
  252. work_id="work123",
  253. chapter_index=0,
  254. title="Chapter 1",
  255. content="Content 1",
  256. )
  257. chapter2 = ChapterItem(
  258. work_id="work123",
  259. chapter_index=1,
  260. title="Chapter 2",
  261. content="Content 2",
  262. )
  263. store.save_chapter(chapter1)
  264. store.save_chapter(chapter2)
  265. chapters = list(store.load_chapters("work123"))
  266. assert len(chapters) == 2
  267. def test_get_chapter(self):
  268. """Test getting specific chapter."""
  269. with tempfile.TemporaryDirectory() as tmpdir:
  270. store = JSONLStore(Path(tmpdir))
  271. chapter = ChapterItem(
  272. work_id="work123",
  273. chapter_index=5,
  274. title="Chapter 5",
  275. content="Content",
  276. )
  277. store.save_chapter(chapter)
  278. found = store.get_chapter("work123", 5)
  279. assert found is not None
  280. assert found.title == "Chapter 5"
  281. not_found = store.get_chapter("work123", 99)
  282. assert not_found is None
  283. def test_save_and_load_failures(self):
  284. """Test saving and loading failures."""
  285. with tempfile.TemporaryDirectory() as tmpdir:
  286. store = JSONLStore(Path(tmpdir))
  287. failure = FailureRecord(
  288. work_id="work123",
  289. chapter_index=0,
  290. error_type="ValueError",
  291. error_message="Test error",
  292. )
  293. store.save_failure(failure)
  294. failures = list(store.load_failures("work123"))
  295. assert len(failures) == 1
  296. assert failures[0].error_message == "Test error"
  297. def test_delete_work(self):
  298. """Test deleting a work item."""
  299. with tempfile.TemporaryDirectory() as tmpdir:
  300. store = JSONLStore(Path(tmpdir))
  301. work = WorkItem(
  302. work_id="test123",
  303. file_path="/test.txt",
  304. file_size=100,
  305. chapter_count=1,
  306. )
  307. store.save_work_item(work)
  308. # Create chapter directory
  309. chapter = ChapterItem(
  310. work_id="test123",
  311. chapter_index=0,
  312. title="Chapter 1",
  313. content="Content",
  314. )
  315. store.save_chapter(chapter)
  316. # Delete
  317. store.delete_work("test123")
  318. # Verify deletion
  319. assert not store.work_exists("test123")
  320. assert not (Path(tmpdir) / "test123").exists()
  321. def test_atomic_write_handles_empty_data(self):
  322. """Test atomic write with empty data."""
  323. with tempfile.TemporaryDirectory() as tmpdir:
  324. store = JSONLStore(Path(tmpdir))
  325. path = Path(tmpdir) / "test.jsonl"
  326. store._atomic_write(path, "")
  327. assert path.exists()
  328. assert path.read_text() == ""
  329. def test_corrupted_line_skipped(self):
  330. """Test that corrupted lines are skipped during read."""
  331. with tempfile.TemporaryDirectory() as tmpdir:
  332. store_dir = Path(tmpdir)
  333. store = JSONLStore(store_dir)
  334. work = WorkItem(
  335. work_id="test123",
  336. file_path="/test.txt",
  337. file_size=100,
  338. chapter_count=1,
  339. )
  340. store.save_work_item(work)
  341. # Corrupt the file by appending invalid JSON
  342. path = store_dir / "work_items.jsonl"
  343. with open(path, "a") as f:
  344. f.write("\n{invalid json}\n")
  345. # Should still load valid entries
  346. loaded = list(store.load_work_items())
  347. assert len(loaded) == 1
  348. assert loaded[0].work_id == "test123"
  349. class TestRepository:
  350. """Test Repository interface."""
  351. def test_init(self):
  352. """Test repository initialization."""
  353. with tempfile.TemporaryDirectory() as tmpdir:
  354. repo = Repository(Path(tmpdir))
  355. assert repo.storage_dir == Path(tmpdir)
  356. def test_create_and_get_work(self):
  357. """Test creating and getting a work item."""
  358. with tempfile.TemporaryDirectory() as tmpdir:
  359. repo = Repository(Path(tmpdir))
  360. # Create a test file
  361. test_file = Path(tmpdir) / "test.txt"
  362. test_file.write_text("test content")
  363. work = repo.create_work(str(test_file), title="Test Novel")
  364. retrieved = repo.get_work(work.work_id)
  365. assert retrieved is not None
  366. assert retrieved.metadata["title"] == "Test Novel"
  367. def test_get_work_or_raise(self):
  368. """Test get_work_or_raise raises on not found."""
  369. with tempfile.TemporaryDirectory() as tmpdir:
  370. repo = Repository(Path(tmpdir))
  371. with pytest.raises(WorkNotFoundError):
  372. repo.get_work_or_raise("nonexistent")
  373. def test_list_works(self):
  374. """Test listing all work items."""
  375. with tempfile.TemporaryDirectory() as tmpdir:
  376. repo = Repository(Path(tmpdir))
  377. # Create test files
  378. for i in range(3):
  379. test_file = Path(tmpdir) / f"test{i}.txt"
  380. test_file.write_text(f"content {i}")
  381. repo.create_work(str(test_file))
  382. works = repo.list_works()
  383. assert len(works) == 3
  384. def test_list_works_by_status(self):
  385. """Test filtering works by status."""
  386. with tempfile.TemporaryDirectory() as tmpdir:
  387. repo = Repository(Path(tmpdir))
  388. test_file = Path(tmpdir) / "test.txt"
  389. test_file.write_text("content")
  390. work1 = repo.create_work(str(test_file))
  391. work2 = repo.create_work(str(test_file))
  392. repo.update_work_status(work1.work_id, WorkStatus.TRANSLATING)
  393. translating = repo.list_works(status=WorkStatus.TRANSLATING)
  394. assert len(translating) == 1
  395. assert translating[0].work_id == work1.work_id
  396. def test_update_work_status(self):
  397. """Test updating work status."""
  398. with tempfile.TemporaryDirectory() as tmpdir:
  399. repo = Repository(Path(tmpdir))
  400. test_file = Path(tmpdir) / "test.txt"
  401. test_file.write_text("content")
  402. work = repo.create_work(str(test_file))
  403. repo.update_work_status(work.work_id, WorkStatus.TRANSLATING)
  404. retrieved = repo.get_work(work.work_id)
  405. assert retrieved.status == WorkStatus.TRANSLATING
  406. def test_save_and_get_chapter(self):
  407. """Test saving and getting chapters."""
  408. with tempfile.TemporaryDirectory() as tmpdir:
  409. repo = Repository(Path(tmpdir))
  410. test_file = Path(tmpdir) / "test.txt"
  411. test_file.write_text("content")
  412. work = repo.create_work(str(test_file))
  413. chapter = ChapterItem(
  414. work_id=work.work_id,
  415. chapter_index=0,
  416. title="Chapter 1",
  417. content="Content",
  418. )
  419. repo.save_chapter(work.work_id, chapter)
  420. retrieved = repo.get_chapter(work.work_id, 0)
  421. assert retrieved is not None
  422. assert retrieved.title == "Chapter 1"
  423. def test_get_pending_chapters(self):
  424. """Test getting pending chapters."""
  425. with tempfile.TemporaryDirectory() as tmpdir:
  426. repo = Repository(Path(tmpdir))
  427. test_file = Path(tmpdir) / "test.txt"
  428. test_file.write_text("content")
  429. work = repo.create_work(str(test_file))
  430. # Add chapters with different statuses
  431. for i in range(3):
  432. chapter = ChapterItem(
  433. work_id=work.work_id,
  434. chapter_index=i,
  435. title=f"Chapter {i}",
  436. content="Content",
  437. status=ChapterStatus.COMPLETED if i == 0 else ChapterStatus.PENDING,
  438. )
  439. repo.save_chapter(work.work_id, chapter)
  440. pending = repo.get_pending_chapters(work.work_id)
  441. assert len(pending) == 2
  442. def test_record_failure(self):
  443. """Test recording a failure."""
  444. with tempfile.TemporaryDirectory() as tmpdir:
  445. repo = Repository(Path(tmpdir))
  446. test_file = Path(tmpdir) / "test.txt"
  447. test_file.write_text("content")
  448. work = repo.create_work(str(test_file))
  449. try:
  450. raise ValueError("Test error")
  451. except Exception as e:
  452. failure = repo.record_failure(work.work_id, 0, e)
  453. assert failure.error_type == "ValueError"
  454. failures = repo.get_failures(work.work_id)
  455. assert len(failures) == 1
  456. def test_get_work_stats(self):
  457. """Test getting work statistics."""
  458. with tempfile.TemporaryDirectory() as tmpdir:
  459. repo = Repository(Path(tmpdir))
  460. test_file = Path(tmpdir) / "test.txt"
  461. test_file.write_text("content")
  462. work = repo.create_work(str(test_file))
  463. # Add chapters
  464. for i in range(5):
  465. chapter = ChapterItem(
  466. work_id=work.work_id,
  467. chapter_index=i,
  468. title=f"Chapter {i}",
  469. content="Content",
  470. status=ChapterStatus.COMPLETED if i < 3 else ChapterStatus.PENDING,
  471. )
  472. repo.save_chapter(work.work_id, chapter)
  473. stats = repo.get_work_stats(work.work_id)
  474. assert stats["total_chapters"] == 5
  475. assert stats["completed_chapters"] == 3
  476. assert stats["pending_chapters"] == 2
  477. def test_save_chapter_translation(self):
  478. """Test saving chapter translation."""
  479. with tempfile.TemporaryDirectory() as tmpdir:
  480. repo = Repository(Path(tmpdir))
  481. test_file = Path(tmpdir) / "test.txt"
  482. test_file.write_text("content")
  483. work = repo.create_work(str(test_file))
  484. chapter = ChapterItem(
  485. work_id=work.work_id,
  486. chapter_index=0,
  487. title="Chapter 1",
  488. content="Original",
  489. )
  490. repo.save_chapter(work.work_id, chapter)
  491. repo.save_chapter_translation(work.work_id, 0, "Translated")
  492. retrieved = repo.get_chapter(work.work_id, 0)
  493. assert retrieved.translation == "Translated"
  494. assert retrieved.status == ChapterStatus.COMPLETED
  495. class TestCrashSafety:
  496. """Test crash-safety features."""
  497. def test_atomic_write_preserves_original_on_error(self):
  498. """Test that atomic write preserves original file on error."""
  499. with tempfile.TemporaryDirectory() as tmpdir:
  500. store = JSONLStore(Path(tmpdir))
  501. work = WorkItem(
  502. work_id="test",
  503. file_path="/test.txt",
  504. file_size=100,
  505. chapter_count=1,
  506. )
  507. store.save_work_item(work)
  508. # Get original content
  509. path = Path(tmpdir) / "work_items.jsonl"
  510. original_content = path.read_text()
  511. # Simulate crash by creating a temp file and stopping
  512. temp_path = path.with_suffix(".tmp")
  513. temp_path.write_text("corrupted data")
  514. # Original should be unchanged
  515. assert path.read_text() == original_content
  516. # Clean up temp file
  517. temp_path.unlink()
  518. # Verify we can still load
  519. loaded = list(store.load_work_items())
  520. assert len(loaded) == 1
  521. def test_append_mode_is_crash_safe(self):
  522. """Test that append mode is crash-safe at line level."""
  523. with tempfile.TemporaryDirectory() as tmpdir:
  524. store = JSONLStore(Path(tmpdir))
  525. # Add initial work
  526. work = WorkItem(
  527. work_id="work1",
  528. file_path="/test1.txt",
  529. file_size=100,
  530. chapter_count=1,
  531. )
  532. store.save_work_item(work)
  533. # Add another work
  534. work2 = WorkItem(
  535. work_id="work2",
  536. file_path="/test2.txt",
  537. file_size=200,
  538. chapter_count=2,
  539. )
  540. store.save_work_item(work2)
  541. # Both should be readable
  542. works = list(store.load_work_items())
  543. assert len(works) == 2