154 lines
5.9 KiB
Rust
154 lines
5.9 KiB
Rust
//! End-to-end test: import a Claude fixture twice, expect no duplication;
|
|
//! then append a new message and re-import, expect exactly 1 new message.
|
|
|
|
use camino::Utf8PathBuf;
|
|
use dirigent_archivist::{
|
|
backends::JsonlBackend,
|
|
import::{claude::import_claude_sessions, ImportProgressSink},
|
|
Archivist, SessionListQuery,
|
|
};
|
|
use std::sync::Arc;
|
|
use uuid::Uuid;
|
|
|
|
fn fixture_root() -> Utf8PathBuf {
|
|
Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
|
|
.join("tests/fixtures/claude_minimal")
|
|
}
|
|
|
|
/// Build a self-contained coordinator for a given archive root.
|
|
///
|
|
/// Uses `from_single_backend` so that parallel-test runs do not race on a
|
|
/// shared `.archives.json` in the tempdir's parent (which is what
|
|
/// `new_with_single_archive` would create).
|
|
async fn mk_archivist(root: std::path::PathBuf) -> dirigent_archivist::Result<Archivist> {
|
|
let backend = Arc::new(JsonlBackend::new(root).await?);
|
|
Archivist::from_single_backend("main".into(), backend).await
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn claude_import_twice_is_idempotent() -> dirigent_archivist::Result<()> {
|
|
let tmp = std::env::temp_dir().join(format!("claude_idem_{}", Uuid::now_v7()));
|
|
let archivist = mk_archivist(tmp.clone()).await?;
|
|
|
|
let fixture = fixture_root();
|
|
|
|
// First run — should import everything.
|
|
let stats1 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
|
assert!(
|
|
stats1.sessions_imported >= 1,
|
|
"expected at least one imported session, got stats {:?}",
|
|
stats1
|
|
);
|
|
assert!(
|
|
stats1.messages_written >= 2,
|
|
"expected >=2 messages written, got {:?}",
|
|
stats1
|
|
);
|
|
|
|
// Second run — should write nothing (fingerprint gate skips unchanged sessions).
|
|
let stats2 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
|
assert_eq!(
|
|
stats2.messages_written, 0,
|
|
"expected no re-write on second import, got {:?}",
|
|
stats2
|
|
);
|
|
assert_eq!(stats2.sessions_imported, 0);
|
|
assert!(
|
|
stats2.sessions_skipped >= 1,
|
|
"expected at least one skipped session, got {:?}",
|
|
stats2
|
|
);
|
|
|
|
// Verify on disk: no duplicate message_ids within any session.
|
|
let page = archivist
|
|
.list_sessions_paged(SessionListQuery::default().with_limit(200))
|
|
.await?;
|
|
for session in &page.items {
|
|
let messages = archivist.get_messages(session.scroll_id, None).await?;
|
|
let mut seen = std::collections::HashSet::new();
|
|
for m in &messages {
|
|
assert!(
|
|
seen.insert(m.message_id),
|
|
"duplicate message_id {} in session {}",
|
|
m.message_id,
|
|
session.scroll_id
|
|
);
|
|
}
|
|
}
|
|
|
|
let _ = tokio::fs::remove_dir_all(tmp).await;
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn claude_import_picks_up_additive_growth() -> dirigent_archivist::Result<()> {
|
|
// Copy the fixture to a mutable temp dir so we can append a message.
|
|
let tmp_src = std::env::temp_dir().join(format!("claude_grow_src_{}", Uuid::now_v7()));
|
|
let fixture = fixture_root();
|
|
copy_dir_recursive(&fixture.as_std_path().to_path_buf(), &tmp_src).await;
|
|
|
|
let tmp_arch = std::env::temp_dir().join(format!("claude_grow_arch_{}", Uuid::now_v7()));
|
|
let archivist = mk_archivist(tmp_arch.clone()).await?;
|
|
|
|
let src = Utf8PathBuf::from_path_buf(tmp_src.clone()).unwrap();
|
|
let _ = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
|
|
|
// Append a new message to the existing JSONL.
|
|
let jsonl = find_jsonl(&tmp_src).expect("fixture jsonl not found");
|
|
let extra = r#"{"type":"user","uuid":"33333333-3333-7333-8333-333333333333","parentUuid":"22222222-2222-7222-8222-222222222222","timestamp":"2024-01-01T00:00:02Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"follow up"}}"#;
|
|
use tokio::io::AsyncWriteExt;
|
|
let mut f = tokio::fs::OpenOptions::new()
|
|
.append(true)
|
|
.open(&jsonl)
|
|
.await
|
|
.unwrap();
|
|
f.write_all(extra.as_bytes()).await.unwrap();
|
|
f.write_all(b"\n").await.unwrap();
|
|
drop(f);
|
|
|
|
let stats = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
|
assert_eq!(
|
|
stats.messages_written, 1,
|
|
"expected 1 new message to be imported, got {:?}",
|
|
stats
|
|
);
|
|
assert_eq!(
|
|
stats.sessions_updated, 1,
|
|
"expected 1 session updated, got {:?}",
|
|
stats
|
|
);
|
|
|
|
let _ = tokio::fs::remove_dir_all(tmp_src).await;
|
|
let _ = tokio::fs::remove_dir_all(tmp_arch).await;
|
|
Ok(())
|
|
}
|
|
|
|
async fn copy_dir_recursive(src: &std::path::Path, dst: &std::path::Path) {
|
|
tokio::fs::create_dir_all(dst).await.unwrap();
|
|
let mut stack = vec![(src.to_path_buf(), dst.to_path_buf())];
|
|
while let Some((s, d)) = stack.pop() {
|
|
let mut entries = tokio::fs::read_dir(&s).await.unwrap();
|
|
while let Some(entry) = entries.next_entry().await.unwrap() {
|
|
let from = entry.path();
|
|
let to = d.join(entry.file_name());
|
|
if entry.file_type().await.unwrap().is_dir() {
|
|
tokio::fs::create_dir_all(&to).await.unwrap();
|
|
stack.push((from, to));
|
|
} else {
|
|
tokio::fs::copy(&from, &to).await.unwrap();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn find_jsonl(dir: &std::path::Path) -> Option<std::path::PathBuf> {
|
|
for entry in walkdir::WalkDir::new(dir).into_iter().flatten() {
|
|
if entry.file_type().is_file()
|
|
&& entry.path().extension().and_then(|s| s.to_str()) == Some("jsonl")
|
|
{
|
|
return Some(entry.path().to_path_buf());
|
|
}
|
|
}
|
|
None
|
|
}
|