Files
dirigent/crates/dirigent_archivist/tests/multi_backend_writer_test.rs
T
2026-05-08 01:59:04 +02:00

253 lines
8.1 KiB
Rust

#![cfg(feature = "test-utils")]
//! Integration tests for Task 17's per-backend queued writer task.
//!
//! These exercise the full enqueue → batch → coalesce → dispatch pipeline
//! end-to-end by constructing real writer tasks against `MockBackend`
//! instances and driving them through the `Archivist` coordinator.
//!
//! The tests are timing-sensitive: the batch window is 25ms and the
//! backpressure test artificially slows the backend. Assertions use
//! tolerant margins so they survive CI jitter.
use std::sync::Arc;
use std::time::Duration;
use dirigent_archivist::backend::mock::MockBackend;
use dirigent_archivist::backend::{ArchiveBackend, HealthStatus};
use dirigent_archivist::coordinator::Archivist;
use dirigent_archivist::registry::writer::spawn_writer;
use dirigent_archivist::registry::{
ArchiveRegistration, FailureMode, OverflowPolicy, WritePolicy,
};
use uuid::Uuid;
fn sample_message(scroll: Uuid) -> dirigent_archivist::types::MessageRecord {
dirigent_archivist::types::MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: scroll,
parent_id: None,
ts: chrono::Utc::now(),
role: "user".into(),
author: None,
content_md: "hi".into(),
content_parts: None,
attachments: vec![],
metadata: serde_json::Value::Null,
}
}
fn queued_reg(
name: &str,
backend: Arc<MockBackend>,
priority: u32,
overflow: OverflowPolicy,
) -> Arc<ArchiveRegistration> {
let initial_health = HealthStatus::Healthy;
let policy = WritePolicy::Queued {
batch_window_ms: 25,
capacity: 8,
overflow,
};
let health = Arc::new(tokio::sync::RwLock::new(initial_health));
let last_error = Arc::new(tokio::sync::RwLock::new(None));
let consecutive = Arc::new(tokio::sync::RwLock::new(0u32));
let writer = Some(spawn_writer(
backend.clone() as Arc<dyn ArchiveBackend>,
name.into(),
8,
Duration::from_millis(25),
overflow,
health.clone(),
last_error.clone(),
consecutive.clone(),
));
Arc::new(ArchiveRegistration::new_with_shared_state(
name.into(),
"mock",
backend as Arc<dyn ArchiveBackend>,
true,
FailureMode::Required,
priority,
true,
policy,
writer,
health,
last_error,
consecutive,
))
}
#[tokio::test]
async fn queued_write_returns_immediately_then_eventually_lands() {
let mock = Arc::new(MockBackend::new());
let archivist = Archivist::from_registrations(vec![queued_reg(
"queued",
mock.clone(),
0,
OverflowPolicy::Block,
)]);
let scroll = Uuid::new_v4();
archivist
.append_messages(scroll, vec![sample_message(scroll)], None)
.await
.unwrap();
// Wait up to 500ms for the writer to drain.
let mut landed = false;
for _ in 0..50 {
if mock.appended_count(scroll) > 0 {
landed = true;
break;
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
assert!(landed, "writer task did not drain within 500ms");
assert_eq!(mock.appended_count(scroll), 1);
archivist.shutdown().await.unwrap();
}
#[tokio::test]
async fn coalescing_merges_consecutive_appends_for_same_scroll() {
let mock = Arc::new(MockBackend::new());
let archivist = Archivist::from_registrations(vec![queued_reg(
"queued",
mock.clone(),
0,
OverflowPolicy::Block,
)]);
let scroll = Uuid::new_v4();
for _ in 0..5 {
archivist
.append_messages(scroll, vec![sample_message(scroll)], None)
.await
.unwrap();
}
// Give the writer time to drain + coalesce, then shut down to guarantee
// any still-queued ops are flushed before we assert.
tokio::time::sleep(Duration::from_millis(200)).await;
archivist.shutdown().await.unwrap();
// Five enqueued ops may have been coalesced into fewer backend calls.
// The only strict invariant we can reliably assert is: the total number
// of backend `append_messages` INVOCATIONS is <= 5.
assert!(
mock.append_call_count(scroll) <= 5,
"expected <= 5 backend calls, got {}",
mock.append_call_count(scroll)
);
assert_eq!(
mock.appended_count(scroll),
5,
"all 5 messages should land"
);
}
#[tokio::test]
async fn overflow_block_applies_backpressure() {
// For backpressure to visibly stall the sender, we need four things:
// 1. A tight queue (capacity=2) so the channel actually fills up.
// 2. A slow backend (per-op 50ms) so the writer stalls in dispatch
// long enough for the channel to fill.
// 3. batch_window=0 so the writer spends (almost) all its time in
// the 50ms per-op sleep instead of draining fast inside the
// batch-collection phase.
// 4. Distinct scroll IDs so the writer's same-scroll coalescing
// doesn't merge everything into one dispatch call (which would
// collapse the entire batch into a single 50ms sleep).
// With those, the writer dispatches N serial 50ms calls; while it's
// sleeping the sender can't fit its next op into the full channel
// and must wait for a drain.
let mock = Arc::new(MockBackend::new());
mock.set_per_op_delay(Duration::from_millis(50));
let capacity = 2usize;
let overflow = OverflowPolicy::Block;
// batch_window=0 means the writer dispatches each op immediately and
// spends (almost) all its time in the 50ms per-op sleep — so the
// channel stays full and the sender has to wait on every drain.
let policy = WritePolicy::Queued {
batch_window_ms: 0,
capacity,
overflow,
};
let health = Arc::new(tokio::sync::RwLock::new(HealthStatus::Healthy));
let last_error = Arc::new(tokio::sync::RwLock::new(None));
let consecutive = Arc::new(tokio::sync::RwLock::new(0u32));
let writer = Some(spawn_writer(
mock.clone() as Arc<dyn ArchiveBackend>,
"queued".into(),
capacity,
Duration::from_millis(0),
overflow,
health.clone(),
last_error.clone(),
consecutive.clone(),
));
let reg = Arc::new(ArchiveRegistration::new_with_shared_state(
"queued".into(),
"mock",
mock.clone() as Arc<dyn ArchiveBackend>,
true,
FailureMode::Required,
0,
true,
policy,
writer,
health,
last_error,
consecutive,
));
let archivist = Archivist::from_registrations(vec![reg]);
// Prime the writer with one op and wait just long enough for it to
// enter its first 50ms dispatch sleep. After that the writer is NOT
// recv'ing, so the tight capacity=2 channel fills and further sends
// must wait for a drain.
let scroll0 = Uuid::new_v4();
archivist
.append_messages(scroll0, vec![sample_message(scroll0)], None)
.await
.unwrap();
tokio::time::sleep(Duration::from_millis(10)).await;
// Now measure the cost of many more sends with distinct scroll IDs
// so the writer can't coalesce them. Each dispatch call is 50ms, the
// queue holds only 2, so the sender must wait repeatedly for the
// writer to drain cycles.
let start = std::time::Instant::now();
for _ in 0..24 {
let scroll = Uuid::new_v4();
archivist
.append_messages(scroll, vec![sample_message(scroll)], None)
.await
.unwrap();
}
let elapsed = start.elapsed();
// With 24 distinct-scroll sends, a capacity=2 queue, batch_window=0,
// and a 50ms per-op delay, the sender cannot finish instantly — the
// writer needs many drain cycles and the sender waits on each. A
// 100ms floor keeps the test meaningful (a non-blocking run measures
// in microseconds) while being lenient on CI jitter.
assert!(
elapsed >= Duration::from_millis(100),
"block policy did not apply backpressure (elapsed: {:?})",
elapsed
);
archivist.shutdown().await.unwrap();
}