sync from monorepo @ 2452e92e
This commit is contained in:
@@ -0,0 +1,153 @@
|
||||
//! End-to-end test: import a Claude fixture twice, expect no duplication;
|
||||
//! then append a new message and re-import, expect exactly 1 new message.
|
||||
|
||||
use camino::Utf8PathBuf;
|
||||
use dirigent_archivist::{
|
||||
backends::JsonlBackend,
|
||||
import::{claude::import_claude_sessions, ImportProgressSink},
|
||||
Archivist, SessionListQuery,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn fixture_root() -> Utf8PathBuf {
|
||||
Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
|
||||
.join("tests/fixtures/claude_minimal")
|
||||
}
|
||||
|
||||
/// Build a self-contained coordinator for a given archive root.
|
||||
///
|
||||
/// Uses `from_single_backend` so that parallel-test runs do not race on a
|
||||
/// shared `.archives.json` in the tempdir's parent (which is what
|
||||
/// `new_with_single_archive` would create).
|
||||
async fn mk_archivist(root: std::path::PathBuf) -> dirigent_archivist::Result<Archivist> {
|
||||
let backend = Arc::new(JsonlBackend::new(root).await?);
|
||||
Archivist::from_single_backend("main".into(), backend).await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn claude_import_twice_is_idempotent() -> dirigent_archivist::Result<()> {
|
||||
let tmp = std::env::temp_dir().join(format!("claude_idem_{}", Uuid::now_v7()));
|
||||
let archivist = mk_archivist(tmp.clone()).await?;
|
||||
|
||||
let fixture = fixture_root();
|
||||
|
||||
// First run — should import everything.
|
||||
let stats1 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
||||
assert!(
|
||||
stats1.sessions_imported >= 1,
|
||||
"expected at least one imported session, got stats {:?}",
|
||||
stats1
|
||||
);
|
||||
assert!(
|
||||
stats1.messages_written >= 2,
|
||||
"expected >=2 messages written, got {:?}",
|
||||
stats1
|
||||
);
|
||||
|
||||
// Second run — should write nothing (fingerprint gate skips unchanged sessions).
|
||||
let stats2 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
||||
assert_eq!(
|
||||
stats2.messages_written, 0,
|
||||
"expected no re-write on second import, got {:?}",
|
||||
stats2
|
||||
);
|
||||
assert_eq!(stats2.sessions_imported, 0);
|
||||
assert!(
|
||||
stats2.sessions_skipped >= 1,
|
||||
"expected at least one skipped session, got {:?}",
|
||||
stats2
|
||||
);
|
||||
|
||||
// Verify on disk: no duplicate message_ids within any session.
|
||||
let page = archivist
|
||||
.list_sessions_paged(SessionListQuery::default().with_limit(200))
|
||||
.await?;
|
||||
for session in &page.items {
|
||||
let messages = archivist.get_messages(session.scroll_id, None).await?;
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for m in &messages {
|
||||
assert!(
|
||||
seen.insert(m.message_id),
|
||||
"duplicate message_id {} in session {}",
|
||||
m.message_id,
|
||||
session.scroll_id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(tmp).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn claude_import_picks_up_additive_growth() -> dirigent_archivist::Result<()> {
|
||||
// Copy the fixture to a mutable temp dir so we can append a message.
|
||||
let tmp_src = std::env::temp_dir().join(format!("claude_grow_src_{}", Uuid::now_v7()));
|
||||
let fixture = fixture_root();
|
||||
copy_dir_recursive(&fixture.as_std_path().to_path_buf(), &tmp_src).await;
|
||||
|
||||
let tmp_arch = std::env::temp_dir().join(format!("claude_grow_arch_{}", Uuid::now_v7()));
|
||||
let archivist = mk_archivist(tmp_arch.clone()).await?;
|
||||
|
||||
let src = Utf8PathBuf::from_path_buf(tmp_src.clone()).unwrap();
|
||||
let _ = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
||||
|
||||
// Append a new message to the existing JSONL.
|
||||
let jsonl = find_jsonl(&tmp_src).expect("fixture jsonl not found");
|
||||
let extra = r#"{"type":"user","uuid":"33333333-3333-7333-8333-333333333333","parentUuid":"22222222-2222-7222-8222-222222222222","timestamp":"2024-01-01T00:00:02Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"follow up"}}"#;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let mut f = tokio::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(&jsonl)
|
||||
.await
|
||||
.unwrap();
|
||||
f.write_all(extra.as_bytes()).await.unwrap();
|
||||
f.write_all(b"\n").await.unwrap();
|
||||
drop(f);
|
||||
|
||||
let stats = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?;
|
||||
assert_eq!(
|
||||
stats.messages_written, 1,
|
||||
"expected 1 new message to be imported, got {:?}",
|
||||
stats
|
||||
);
|
||||
assert_eq!(
|
||||
stats.sessions_updated, 1,
|
||||
"expected 1 session updated, got {:?}",
|
||||
stats
|
||||
);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(tmp_src).await;
|
||||
let _ = tokio::fs::remove_dir_all(tmp_arch).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn copy_dir_recursive(src: &std::path::Path, dst: &std::path::Path) {
|
||||
tokio::fs::create_dir_all(dst).await.unwrap();
|
||||
let mut stack = vec![(src.to_path_buf(), dst.to_path_buf())];
|
||||
while let Some((s, d)) = stack.pop() {
|
||||
let mut entries = tokio::fs::read_dir(&s).await.unwrap();
|
||||
while let Some(entry) = entries.next_entry().await.unwrap() {
|
||||
let from = entry.path();
|
||||
let to = d.join(entry.file_name());
|
||||
if entry.file_type().await.unwrap().is_dir() {
|
||||
tokio::fs::create_dir_all(&to).await.unwrap();
|
||||
stack.push((from, to));
|
||||
} else {
|
||||
tokio::fs::copy(&from, &to).await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_jsonl(dir: &std::path::Path) -> Option<std::path::PathBuf> {
|
||||
for entry in walkdir::WalkDir::new(dir).into_iter().flatten() {
|
||||
if entry.file_type().is_file()
|
||||
&& entry.path().extension().and_then(|s| s.to_str()) == Some("jsonl")
|
||||
{
|
||||
return Some(entry.path().to_path_buf());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
Reference in New Issue
Block a user