//! End-to-end test: import a Claude fixture twice, expect no duplication; //! then append a new message and re-import, expect exactly 1 new message. use camino::Utf8PathBuf; use dirigent_archivist::{ backends::JsonlBackend, import::{claude::import_claude_sessions, ImportProgressSink}, Archivist, SessionListQuery, }; use std::sync::Arc; use uuid::Uuid; fn fixture_root() -> Utf8PathBuf { Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()) .join("tests/fixtures/claude_minimal") } /// Build a self-contained coordinator for a given archive root. /// /// Uses `from_single_backend` so that parallel-test runs do not race on a /// shared `.archives.json` in the tempdir's parent (which is what /// `new_with_single_archive` would create). async fn mk_archivist(root: std::path::PathBuf) -> dirigent_archivist::Result { let backend = Arc::new(JsonlBackend::new(root).await?); Archivist::from_single_backend("main".into(), backend).await } #[tokio::test] async fn claude_import_twice_is_idempotent() -> dirigent_archivist::Result<()> { let tmp = std::env::temp_dir().join(format!("claude_idem_{}", Uuid::now_v7())); let archivist = mk_archivist(tmp.clone()).await?; let fixture = fixture_root(); // First run — should import everything. let stats1 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; assert!( stats1.sessions_imported >= 1, "expected at least one imported session, got stats {:?}", stats1 ); assert!( stats1.messages_written >= 2, "expected >=2 messages written, got {:?}", stats1 ); // Second run — should write nothing (fingerprint gate skips unchanged sessions). let stats2 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; assert_eq!( stats2.messages_written, 0, "expected no re-write on second import, got {:?}", stats2 ); assert_eq!(stats2.sessions_imported, 0); assert!( stats2.sessions_skipped >= 1, "expected at least one skipped session, got {:?}", stats2 ); // Verify on disk: no duplicate message_ids within any session. let page = archivist .list_sessions_paged(SessionListQuery::default().with_limit(200)) .await?; for session in &page.items { let messages = archivist.get_messages(session.scroll_id, None).await?; let mut seen = std::collections::HashSet::new(); for m in &messages { assert!( seen.insert(m.message_id), "duplicate message_id {} in session {}", m.message_id, session.scroll_id ); } } let _ = tokio::fs::remove_dir_all(tmp).await; Ok(()) } #[tokio::test] async fn claude_import_picks_up_additive_growth() -> dirigent_archivist::Result<()> { // Copy the fixture to a mutable temp dir so we can append a message. let tmp_src = std::env::temp_dir().join(format!("claude_grow_src_{}", Uuid::now_v7())); let fixture = fixture_root(); copy_dir_recursive(&fixture.as_std_path().to_path_buf(), &tmp_src).await; let tmp_arch = std::env::temp_dir().join(format!("claude_grow_arch_{}", Uuid::now_v7())); let archivist = mk_archivist(tmp_arch.clone()).await?; let src = Utf8PathBuf::from_path_buf(tmp_src.clone()).unwrap(); let _ = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; // Append a new message to the existing JSONL. let jsonl = find_jsonl(&tmp_src).expect("fixture jsonl not found"); let extra = r#"{"type":"user","uuid":"33333333-3333-7333-8333-333333333333","parentUuid":"22222222-2222-7222-8222-222222222222","timestamp":"2024-01-01T00:00:02Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"follow up"}}"#; use tokio::io::AsyncWriteExt; let mut f = tokio::fs::OpenOptions::new() .append(true) .open(&jsonl) .await .unwrap(); f.write_all(extra.as_bytes()).await.unwrap(); f.write_all(b"\n").await.unwrap(); drop(f); let stats = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; assert_eq!( stats.messages_written, 1, "expected 1 new message to be imported, got {:?}", stats ); assert_eq!( stats.sessions_updated, 1, "expected 1 session updated, got {:?}", stats ); let _ = tokio::fs::remove_dir_all(tmp_src).await; let _ = tokio::fs::remove_dir_all(tmp_arch).await; Ok(()) } async fn copy_dir_recursive(src: &std::path::Path, dst: &std::path::Path) { tokio::fs::create_dir_all(dst).await.unwrap(); let mut stack = vec![(src.to_path_buf(), dst.to_path_buf())]; while let Some((s, d)) = stack.pop() { let mut entries = tokio::fs::read_dir(&s).await.unwrap(); while let Some(entry) = entries.next_entry().await.unwrap() { let from = entry.path(); let to = d.join(entry.file_name()); if entry.file_type().await.unwrap().is_dir() { tokio::fs::create_dir_all(&to).await.unwrap(); stack.push((from, to)); } else { tokio::fs::copy(&from, &to).await.unwrap(); } } } } fn find_jsonl(dir: &std::path::Path) -> Option { for entry in walkdir::WalkDir::new(dir).into_iter().flatten() { if entry.file_type().is_file() && entry.path().extension().and_then(|s| s.to_str()) == Some("jsonl") { return Some(entry.path().to_path_buf()); } } None }