Files
dirigent/crates/dirigent_archivist/tests/import_claude_idempotency_test.rs
T
2026-05-08 01:59:04 +02:00

154 lines
5.9 KiB
Rust

//! End-to-end test: import a Claude fixture twice, expect no duplication;
//! then append a new message and re-import, expect exactly 1 new message.
use camino::Utf8PathBuf;
use dirigent_archivist::{
backends::JsonlBackend,
import::{claude::import_claude_sessions, ImportProgressSink},
Archivist, SessionListQuery,
};
use std::sync::Arc;
use uuid::Uuid;
fn fixture_root() -> Utf8PathBuf {
Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
.join("tests/fixtures/claude_minimal")
}
/// Build a self-contained coordinator for a given archive root.
///
/// Uses `from_single_backend` so that parallel-test runs do not race on a
/// shared `.archives.json` in the tempdir's parent (which is what
/// `new_with_single_archive` would create).
async fn mk_archivist(root: std::path::PathBuf) -> dirigent_archivist::Result<Archivist> {
let backend = Arc::new(JsonlBackend::new(root).await?);
Archivist::from_single_backend("main".into(), backend).await
}
#[tokio::test]
async fn claude_import_twice_is_idempotent() -> dirigent_archivist::Result<()> {
let tmp = std::env::temp_dir().join(format!("claude_idem_{}", Uuid::now_v7()));
let archivist = mk_archivist(tmp.clone()).await?;
let fixture = fixture_root();
// First run — should import everything.
let stats1 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
assert!(
stats1.sessions_imported >= 1,
"expected at least one imported session, got stats {:?}",
stats1
);
assert!(
stats1.messages_written >= 2,
"expected >=2 messages written, got {:?}",
stats1
);
// Second run — should write nothing (fingerprint gate skips unchanged sessions).
let stats2 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
assert_eq!(
stats2.messages_written, 0,
"expected no re-write on second import, got {:?}",
stats2
);
assert_eq!(stats2.sessions_imported, 0);
assert!(
stats2.sessions_skipped >= 1,
"expected at least one skipped session, got {:?}",
stats2
);
// Verify on disk: no duplicate message_ids within any session.
let page = archivist
.list_sessions_paged(SessionListQuery::default().with_limit(200))
.await?;
for session in &page.items {
let messages = archivist.get_messages(session.scroll_id, None).await?;
let mut seen = std::collections::HashSet::new();
for m in &messages {
assert!(
seen.insert(m.message_id),
"duplicate message_id {} in session {}",
m.message_id,
session.scroll_id
);
}
}
let _ = tokio::fs::remove_dir_all(tmp).await;
Ok(())
}
#[tokio::test]
async fn claude_import_picks_up_additive_growth() -> dirigent_archivist::Result<()> {
// Copy the fixture to a mutable temp dir so we can append a message.
let tmp_src = std::env::temp_dir().join(format!("claude_grow_src_{}", Uuid::now_v7()));
let fixture = fixture_root();
copy_dir_recursive(&fixture.as_std_path().to_path_buf(), &tmp_src).await;
let tmp_arch = std::env::temp_dir().join(format!("claude_grow_arch_{}", Uuid::now_v7()));
let archivist = mk_archivist(tmp_arch.clone()).await?;
let src = Utf8PathBuf::from_path_buf(tmp_src.clone()).unwrap();
let _ = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
// Append a new message to the existing JSONL.
let jsonl = find_jsonl(&tmp_src).expect("fixture jsonl not found");
let extra = r#"{"type":"user","uuid":"33333333-3333-7333-8333-333333333333","parentUuid":"22222222-2222-7222-8222-222222222222","timestamp":"2024-01-01T00:00:02Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"follow up"}}"#;
use tokio::io::AsyncWriteExt;
let mut f = tokio::fs::OpenOptions::new()
.append(true)
.open(&jsonl)
.await
.unwrap();
f.write_all(extra.as_bytes()).await.unwrap();
f.write_all(b"\n").await.unwrap();
drop(f);
let stats = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
assert_eq!(
stats.messages_written, 1,
"expected 1 new message to be imported, got {:?}",
stats
);
assert_eq!(
stats.sessions_updated, 1,
"expected 1 session updated, got {:?}",
stats
);
let _ = tokio::fs::remove_dir_all(tmp_src).await;
let _ = tokio::fs::remove_dir_all(tmp_arch).await;
Ok(())
}
async fn copy_dir_recursive(src: &std::path::Path, dst: &std::path::Path) {
tokio::fs::create_dir_all(dst).await.unwrap();
let mut stack = vec![(src.to_path_buf(), dst.to_path_buf())];
while let Some((s, d)) = stack.pop() {
let mut entries = tokio::fs::read_dir(&s).await.unwrap();
while let Some(entry) = entries.next_entry().await.unwrap() {
let from = entry.path();
let to = d.join(entry.file_name());
if entry.file_type().await.unwrap().is_dir() {
tokio::fs::create_dir_all(&to).await.unwrap();
stack.push((from, to));
} else {
tokio::fs::copy(&from, &to).await.unwrap();
}
}
}
}
fn find_jsonl(dir: &std::path::Path) -> Option<std::path::PathBuf> {
for entry in walkdir::WalkDir::new(dir).into_iter().flatten() {
if entry.file_type().is_file()
&& entry.path().extension().and_then(|s| s.to_str()) == Some("jsonl")
{
return Some(entry.path().to_path_buf());
}
}
None
}