sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
+477
View File
@@ -0,0 +1,477 @@
//! SharingBus: single-producer, many-subscriber event multiplexer with
//! subscriber-side filtering performed by a worker task. See
//! docs/plans/2026-04-21-archivist-phase4-design.md §1.
//!
//! Architecture:
//! - One internal `tokio::sync::broadcast::Sender<BusEvent>` feeds a single
//! worker task. The worker iterates `Vec<SubscriberSlot>` (behind `RwLock`),
//! filter-matches each slot, and `try_send`s the event onto each slot's
//! `mpsc::Sender<BusEvent>`.
//! - Slow subscribers drop their own events at their mpsc (counted in the
//! slot's `lagged` atomic). The bus-internal broadcast channel never drops
//! due to a slow subscriber — only due to the broadcast lag contract, which
//! we log and continue.
//! - `SessionRegistered` events late-bind `(connector_id, native_session_id) ->
//! scroll_id` via a small cache consulted on every publish.
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use tokio::sync::{broadcast, mpsc, RwLock};
use tokio::task::JoinHandle;
use tracing::{debug, warn};
use uuid::Uuid;
use dirigent_protocol::streaming::{BusEvent, EventFilter};
pub use dirigent_protocol::streaming::BusReceiver;
use dirigent_protocol::Event;
const BUS_INTERNAL_CAPACITY: usize = 1024;
const SUBSCRIBER_QUEUE_DEFAULT: usize = 256;
/// Single-producer, many-subscriber event multiplexer.
///
/// Subscribers see a `mpsc::Receiver<BusEvent>` that only yields events
/// matching their `EventFilter`. Filtering happens inside a single worker
/// task, so the cost per event is O(n_subscribers) regardless of publisher
/// count. Slow subscribers lose events at their own mpsc, not at the bus.
pub struct SharingBus {
publish_tx: broadcast::Sender<BusEvent>,
subscribers: Arc<RwLock<Vec<SubscriberSlot>>>,
scroll_id_cache: Arc<RwLock<HashMap<(String, String), Uuid>>>,
next_id: Arc<AtomicU64>,
_worker: JoinHandle<()>,
}
struct SubscriberSlot {
id: u64,
filter: EventFilter,
sender: mpsc::Sender<BusEvent>,
lagged: Arc<AtomicU64>,
}
impl SharingBus {
/// Construct a new bus and spawn its dispatch worker.
pub fn new() -> Arc<Self> {
let (publish_tx, publish_rx) = broadcast::channel(BUS_INTERNAL_CAPACITY);
let subscribers: Arc<RwLock<Vec<SubscriberSlot>>> = Arc::new(RwLock::new(Vec::new()));
let scroll_id_cache: Arc<RwLock<HashMap<(String, String), Uuid>>> =
Arc::new(RwLock::new(HashMap::new()));
let next_id = Arc::new(AtomicU64::new(0));
let worker = tokio::spawn(run_worker(publish_rx, Arc::clone(&subscribers)));
Arc::new(Self {
publish_tx,
subscribers,
scroll_id_cache,
next_id,
_worker: worker,
})
}
/// Publish a `BusEvent` to all matching subscribers.
///
/// This method also performs two side-effects on the scroll-id cache:
///
/// 1. If the wrapped event is `Event::SessionRegistered`, the binding
/// `(connector_id, session_id) -> scroll_id` is inserted into the
/// cache, and the current event's `routing.scroll_id` is set so the
/// binding event itself carries its own scroll_id downstream.
/// 2. If the event's `routing.scroll_id` is absent but it carries both a
/// `connector_id` and `native_session_id`, the cache is consulted to
/// late-bind `scroll_id` before broadcasting.
pub async fn publish(&self, mut bus_event: BusEvent) {
// (2) Late-bind scroll_id from cache if we can, BEFORE the possibly
// more specific (1) handling overrides it. This is a no-op for
// SessionRegistered (its scroll_id is always populated in (1)).
if bus_event.routing.scroll_id.is_none() {
if let (Some(cid), Some(nsid)) = (
bus_event.routing.connector_id.as_ref(),
bus_event.routing.native_session_id.as_ref(),
) {
let cache = self.scroll_id_cache.read().await;
if let Some(uuid) = cache.get(&(cid.clone(), nsid.clone())) {
bus_event.routing.scroll_id = Some(*uuid);
}
}
}
// (1) If the wrapped event is SessionRegistered, populate the cache
// and set scroll_id on the event itself.
if let Event::SessionRegistered {
connector_id,
session_id,
scroll_id,
} = bus_event.event.as_ref()
{
match Uuid::parse_str(scroll_id) {
Ok(uuid) => {
self.scroll_id_cache
.write()
.await
.insert((connector_id.clone(), session_id.clone()), uuid);
bus_event.routing.scroll_id = Some(uuid);
}
Err(e) => {
warn!(
connector_id = %connector_id,
session_id = %session_id,
scroll_id = %scroll_id,
error = %e,
"SessionRegistered carried an unparseable scroll_id; skipping late-bind cache insert",
);
}
}
}
// No subscribers is not an error — ignore the Result.
let _ = self.publish_tx.send(bus_event);
}
/// Subscribe to every event on the bus.
pub async fn subscribe_all(&self) -> BusReceiver {
self.subscribe_filtered(EventFilter::All, SUBSCRIBER_QUEUE_DEFAULT)
.await
}
/// Subscribe to events that match `filter`. `queue_capacity` caps the
/// buffered events between the worker and the caller's `recv()`.
pub async fn subscribe_filtered(
&self,
filter: EventFilter,
queue_capacity: usize,
) -> BusReceiver {
let (tx, rx) = mpsc::channel(queue_capacity);
let lagged = Arc::new(AtomicU64::new(0));
// Relaxed ordering is sufficient: subscriber IDs are only compared for
// equality with other IDs issued by this same bus; there is no
// cross-thread ordering dependency on this counter.
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
self.subscribers.write().await.push(SubscriberSlot {
id,
filter,
sender: tx,
lagged: Arc::clone(&lagged),
});
BusReceiver { id, rx, lagged }
}
/// Remove a subscriber by id. Idempotent.
pub async fn unsubscribe(&self, id: u64) {
self.subscribers.write().await.retain(|s| s.id != id);
}
}
async fn run_worker(
mut rx: broadcast::Receiver<BusEvent>,
subscribers: Arc<RwLock<Vec<SubscriberSlot>>>,
) {
loop {
match rx.recv().await {
Ok(evt) => {
let mut closed_ids: Vec<u64> = Vec::new();
{
let subs = subscribers.read().await;
for slot in subs.iter() {
if !slot.filter.matches(&evt) {
continue;
}
match slot.sender.try_send(evt.clone()) {
Ok(()) => {}
Err(mpsc::error::TrySendError::Full(_)) => {
slot.lagged.fetch_add(1, Ordering::Relaxed);
warn!(
subscriber_id = slot.id,
"bus subscriber queue full; dropping event"
);
}
Err(mpsc::error::TrySendError::Closed(_)) => {
closed_ids.push(slot.id);
}
}
}
}
if !closed_ids.is_empty() {
subscribers
.write()
.await
.retain(|s| !closed_ids.contains(&s.id));
debug!(removed = closed_ids.len(), "GC'd closed subscriber slots");
}
}
Err(broadcast::error::RecvError::Lagged(n)) => {
warn!(skipped = n, "SharingBus internal broadcast lagged");
}
Err(broadcast::error::RecvError::Closed) => {
debug!("SharingBus worker exiting (sender closed)");
return;
}
}
}
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::time::timeout;
use uuid::Uuid;
use super::*;
use dirigent_protocol::streaming::{BusEvent, EventKind, EventOrigin, EventRouting};
use dirigent_protocol::Event;
/// Build a minimal `BusEvent` for tests. Uses `Event::Connected` as payload
/// unless a specific event is needed for late-bind checks.
fn make_event(
scroll_id: Option<Uuid>,
connector_uid: Option<Uuid>,
connector_id: Option<String>,
native_session_id: Option<String>,
kind: EventKind,
event: Event,
) -> BusEvent {
BusEvent {
routing: EventRouting {
scroll_id,
connector_uid,
connector_id,
native_session_id,
kind,
},
origin: EventOrigin::Runtime,
event: Arc::new(event),
}
}
// 1. subscribe_all + publish: one event round-trips to receiver.
#[tokio::test]
async fn subscribe_all_receives_published_event() {
let bus = SharingBus::new();
let mut recv = bus.subscribe_all().await;
let ev = make_event(
None,
None,
None,
None,
EventKind::System,
Event::Connected,
);
bus.publish(ev).await;
let got = timeout(Duration::from_millis(200), recv.rx.recv())
.await
.expect("timed out waiting for event")
.expect("channel closed unexpectedly");
match got.event.as_ref() {
Event::Connected => {}
other => panic!("expected Event::Connected, got {:?}", other),
}
}
// 2. ConnectorUid filter: matching UID passes, other UID skipped.
#[tokio::test]
async fn connector_uid_filter_only_forwards_matching_events() {
let bus = SharingBus::new();
let target = Uuid::new_v4();
let other = Uuid::new_v4();
let mut recv = bus
.subscribe_filtered(EventFilter::ConnectorUid(target), 16)
.await;
// Publish one matching and one non-matching event.
let ev_match = make_event(
None,
Some(target),
None,
None,
EventKind::System,
Event::Connected,
);
let ev_other = make_event(
None,
Some(other),
None,
None,
EventKind::System,
Event::Connected,
);
bus.publish(ev_match).await;
bus.publish(ev_other).await;
// First recv returns the matching event.
let got = timeout(Duration::from_millis(200), recv.rx.recv())
.await
.expect("timed out waiting for first event")
.expect("channel closed unexpectedly");
assert_eq!(got.routing.connector_uid, Some(target));
// Second recv must time out — no other matching event was published.
let result = timeout(Duration::from_millis(100), recv.rx.recv()).await;
assert!(
result.is_err(),
"expected no further events, got: {:?}",
result.ok().flatten().map(|e| e.routing.connector_uid)
);
}
// 3. Queue full = lagged counter increments, first event still delivered.
#[tokio::test]
async fn full_queue_increments_lagged_counter() {
let bus = SharingBus::new();
// Capacity 1 — only one event can be buffered before try_send fails.
let mut recv = bus.subscribe_filtered(EventFilter::All, 1).await;
// Publish 5 events without draining.
for _ in 0..5 {
let ev = make_event(
None,
None,
None,
None,
EventKind::System,
Event::Connected,
);
bus.publish(ev).await;
}
// Give the worker a chance to process all 5.
for _ in 0..10 {
tokio::task::yield_now().await;
}
tokio::time::sleep(Duration::from_millis(20)).await;
// First event is still in the queue.
let first = timeout(Duration::from_millis(200), recv.rx.recv())
.await
.expect("timed out waiting for first event")
.expect("channel closed unexpectedly");
match first.event.as_ref() {
Event::Connected => {}
other => panic!("expected Event::Connected, got {:?}", other),
}
// At minimum 4 events were dropped (5 published, 1 fit).
let lagged = recv.lagged.load(Ordering::Relaxed);
assert!(
lagged >= 4,
"expected lagged >= 4 after publishing 5 events to a capacity-1 queue, got {}",
lagged
);
}
// 4. scroll_id late-bind: SessionRegistered populates cache; subsequent
// events with matching (connector_id, native_session_id) get their
// scroll_id filled in before dispatch.
#[tokio::test]
async fn session_registered_populates_cache_and_late_binds_subsequent_events() {
let bus = SharingBus::new();
let scroll = Uuid::new_v4();
// Subscriber filters on ScrollId(scroll). It should see:
// - the SessionRegistered event (bus sets its own scroll_id at publish)
// - a follow-up event with (connector_id="c", native_session_id="s")
// that had no scroll_id on entry (late-bound from the cache).
let mut recv = bus
.subscribe_filtered(EventFilter::ScrollId(scroll), 16)
.await;
// --- publish SessionRegistered (binding event) ---
let reg_event = Event::SessionRegistered {
connector_id: "c".to_string(),
session_id: "s".to_string(),
scroll_id: scroll.to_string(),
};
// We pass through the routing fields the producer would populate.
// `scroll_id` starts as None; publish() sets it from the event payload.
let reg_bus = make_event(
None,
None,
Some("c".to_string()),
Some("s".to_string()),
EventKind::SessionLifecycle,
reg_event,
);
bus.publish(reg_bus).await;
let got1 = timeout(Duration::from_millis(200), recv.rx.recv())
.await
.expect("timed out waiting for SessionRegistered")
.expect("channel closed unexpectedly");
assert!(matches!(
got1.event.as_ref(),
Event::SessionRegistered { .. }
));
assert_eq!(got1.routing.scroll_id, Some(scroll));
// --- publish a follow-up event with no scroll_id but matching
// connector_id + native_session_id ---
let follow_up = make_event(
None,
None,
Some("c".to_string()),
Some("s".to_string()),
EventKind::System,
Event::Connected,
);
bus.publish(follow_up).await;
let got2 = timeout(Duration::from_millis(200), recv.rx.recv())
.await
.expect("timed out waiting for late-bound follow-up")
.expect("channel closed unexpectedly");
assert_eq!(
got2.routing.scroll_id,
Some(scroll),
"follow-up event should have had scroll_id late-bound from the cache"
);
assert!(matches!(got2.event.as_ref(), Event::Connected));
}
// 5. Dropped receiver is GC'd after the next publish.
#[tokio::test]
async fn closed_receiver_slot_is_reaped_on_next_publish() {
let bus = SharingBus::new();
// Subscribe, then immediately drop the receiver — simulates a caller
// that forgets (or skips) `unsubscribe()`.
let recv = bus.subscribe_all().await;
drop(recv);
// Sanity check: slot is present before GC.
assert_eq!(bus.subscribers.read().await.len(), 1);
// Publish one event; the worker encounters TrySendError::Closed and
// schedules the slot for removal.
let ev = make_event(
None,
None,
None,
None,
EventKind::System,
Event::Connected,
);
bus.publish(ev).await;
// Give the worker a moment to process and GC.
for _ in 0..10 {
tokio::task::yield_now().await;
}
tokio::time::sleep(Duration::from_millis(10)).await;
assert_eq!(
bus.subscribers.read().await.len(),
0,
"closed subscriber slot should have been GC'd after publish"
);
}
}
+101
View File
@@ -0,0 +1,101 @@
//! `[[streams]]` TOML config block parsed from `dirigent.toml`.
use serde::{Deserialize, Serialize};
use dirigent_protocol::streaming::StreamScope;
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct StreamsConfig {
#[serde(default, rename = "streams")]
pub entries: Vec<StreamConfig>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct StreamConfig {
pub name: String,
#[serde(rename = "type")]
pub kind: String, // "matrix" | "langfuse" | ...
pub scope: StreamScope,
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default = "default_params")]
pub params: toml::Value, // type-specific
}
fn default_enabled() -> bool { true }
fn default_params() -> toml::Value { toml::Value::Table(toml::map::Map::new()) }
#[cfg(test)]
mod tests {
use super::*;
use uuid::Uuid;
const FULL_TOML: &str = r#"
[[streams]]
name = "matrix-main"
type = "matrix"
enabled = true
[streams.scope]
kind = "session"
scroll_id = "01985d00-0000-7000-8000-000000000000"
[streams.params]
homeserver_url = "https://matrix.org"
room_id = "!abc:matrix.org"
"#;
const MINIMAL_TOML: &str = r#"
[[streams]]
name = "minimal"
type = "langfuse"
[streams.scope]
kind = "archive_wide"
acknowledged = false
"#;
#[test]
fn round_trip_full() {
let cfg: StreamsConfig = toml::from_str(FULL_TOML).expect("parse failed");
assert_eq!(cfg.entries.len(), 1);
let entry = &cfg.entries[0];
assert_eq!(entry.name, "matrix-main");
assert_eq!(entry.kind, "matrix");
assert!(entry.enabled);
let expected_id = Uuid::parse_str("01985d00-0000-7000-8000-000000000000").unwrap();
match &entry.scope {
StreamScope::Session { scroll_id } => {
assert_eq!(*scroll_id, expected_id);
}
other => panic!("expected Session scope, got {:?}", other),
}
let params = entry.params.as_table().expect("params should be a table");
assert_eq!(
params.get("homeserver_url").and_then(|v| v.as_str()),
Some("https://matrix.org")
);
assert_eq!(
params.get("room_id").and_then(|v| v.as_str()),
Some("!abc:matrix.org")
);
}
#[test]
fn default_enabled_when_omitted() {
let cfg: StreamsConfig = toml::from_str(MINIMAL_TOML).expect("parse failed");
assert_eq!(cfg.entries.len(), 1);
let entry = &cfg.entries[0];
assert_eq!(entry.name, "minimal");
assert!(entry.enabled, "enabled should default to true");
}
#[test]
fn empty_config_is_valid() {
let cfg: StreamsConfig = toml::from_str("").expect("empty parse failed");
assert!(cfg.entries.is_empty());
}
}
+143
View File
@@ -0,0 +1,143 @@
//! Maps a `StreamConfig`'s `type` string to a concrete [`SessionStream`].
//!
//! Stream implementations register themselves with a
//! [`StreamFactoryRegistry`] at boot so the runtime can construct streams
//! from `[[streams]]` config blocks without knowing every concrete type up
//! front. This is the stream-side analogue of the archivist's
//! `BackendRegistry` / `BackendFactory` pattern (Phase 3).
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use thiserror::Error;
use dirigent_protocol::streaming::SessionStream;
use super::config::StreamConfig;
/// Builds a concrete [`SessionStream`] from a `StreamConfig`.
///
/// Each implementation advertises a single `kind` (matching the `type`
/// string in the TOML config). The registry routes config blocks to the
/// matching factory at boot.
#[async_trait]
pub trait StreamFactory: Send + Sync {
/// The `type` discriminator in `[[streams]]` — e.g. `"matrix"`,
/// `"langfuse"`. Must be unique across all registered factories.
fn kind(&self) -> &'static str;
/// Build a running stream from its config. Implementations are
/// expected to read `cfg.params` (type-specific TOML table), establish
/// any transport, and return an [`Arc<dyn SessionStream>`] ready to
/// receive events.
async fn build(&self, cfg: &StreamConfig) -> Result<Arc<dyn SessionStream>, StreamBuildError>;
}
/// Lookup table of `StreamFactory` implementations keyed by `kind()`.
///
/// Populate this at startup (typically once, on the main thread) before
/// handing it off to the runtime. The registry is cheap to clone via
/// `Arc` at the call site.
#[derive(Default)]
pub struct StreamFactoryRegistry {
factories: HashMap<&'static str, Arc<dyn StreamFactory>>,
}
impl std::fmt::Debug for StreamFactoryRegistry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StreamFactoryRegistry")
.field("kinds", &self.factories.keys().collect::<Vec<_>>())
.finish()
}
}
impl StreamFactoryRegistry {
/// Construct an empty registry.
pub fn new() -> Self {
Self::default()
}
/// Register a factory. Consumes and returns `self` so callers can
/// chain `.register(...)` calls at startup.
pub fn register<F: StreamFactory + 'static>(mut self, f: F) -> Self {
self.factories.insert(f.kind(), Arc::new(f));
self
}
/// Look up a factory by its `kind` discriminator, or `None` if no
/// factory for that kind has been registered.
pub fn get(&self, kind: &str) -> Option<&Arc<dyn StreamFactory>> {
self.factories.get(kind)
}
}
/// Errors raised while building a `SessionStream` from its config.
#[derive(Debug, Error)]
pub enum StreamBuildError {
/// No factory is registered for `cfg.kind`.
#[error("unknown kind: {0}")]
UnknownKind(String),
/// Config was structurally valid TOML but semantically invalid
/// (missing required field, enum out of range, etc).
#[error("config: {0}")]
Config(String),
/// The factory accepted the config but the transport refused to come
/// up (network error, auth failure, …).
#[error("transport: {0}")]
Transport(String),
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use async_trait::async_trait;
use dirigent_protocol::streaming::{
BusEvent, SessionStream, StreamKind, StreamOutcome, StreamScope, StreamSummary,
};
struct DummyStream;
#[async_trait]
impl SessionStream for DummyStream {
fn summary(&self) -> StreamSummary {
StreamSummary {
name: "dummy".to_string(),
kind: StreamKind::Custom,
target: "dummy".to_string(),
active_since: chrono::Utc::now(),
}
}
fn scope(&self) -> StreamScope {
StreamScope::ArchiveWide { acknowledged: false }
}
async fn on_event(&self, _event: &BusEvent) -> StreamOutcome {
StreamOutcome::Ok
}
async fn shutdown(&self) {}
}
struct DummyFactory;
#[async_trait]
impl StreamFactory for DummyFactory {
fn kind(&self) -> &'static str {
"dummy"
}
async fn build(
&self,
_cfg: &StreamConfig,
) -> Result<Arc<dyn SessionStream>, StreamBuildError> {
Ok(Arc::new(DummyStream))
}
}
#[test]
fn register_and_lookup() {
let reg = StreamFactoryRegistry::new().register(DummyFactory);
assert!(reg.get("dummy").is_some());
assert!(reg.get("missing").is_none());
}
}
+118
View File
@@ -0,0 +1,118 @@
//! Consecutive-failure health drift for streams (K=5 threshold).
//!
//! Mirrors the archivist's drift logic but tracks a single stream's
//! outcomes. Re-exports the shared `HealthStatus` enum from the
//! archivist's backend module to avoid duplication.
pub use dirigent_archivist::backend::HealthStatus;
/// Number of consecutive failures before a stream drifts from `Degraded`
/// to `Unavailable`. Matches the archivist's backend drift threshold.
pub const FAILURE_THRESHOLD: u32 = 5;
/// Update health state after a successful event delivery.
/// Resets consecutive-failure counter; lifts Degraded → Healthy.
pub fn record_success(health: &mut HealthStatus, consecutive_failures: &mut u32) {
*consecutive_failures = 0;
if matches!(health, HealthStatus::Degraded { .. }) {
*health = HealthStatus::Healthy;
}
}
/// Update health state after a failed event delivery.
/// Increments counter; drifts Healthy → Degraded → Unavailable at K=5.
pub fn record_failure(
health: &mut HealthStatus,
consecutive_failures: &mut u32,
reason: String,
) {
*consecutive_failures += 1;
if *consecutive_failures >= FAILURE_THRESHOLD {
*health = HealthStatus::Unavailable {
reason: format!("{} failures: {}", *consecutive_failures, reason),
};
} else {
*health = HealthStatus::Degraded { reason };
}
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn record_success_promotes_degraded_to_healthy() {
let mut health = HealthStatus::Degraded {
reason: "earlier hiccup".to_string(),
};
let mut counter: u32 = 3;
record_success(&mut health, &mut counter);
assert_eq!(counter, 0);
assert_eq!(health, HealthStatus::Healthy);
}
#[test]
fn record_success_keeps_healthy_and_resets_counter() {
let mut health = HealthStatus::Healthy;
let mut counter: u32 = 2;
record_success(&mut health, &mut counter);
assert_eq!(counter, 0);
assert_eq!(health, HealthStatus::Healthy);
}
#[test]
fn record_success_does_not_rescue_unavailable() {
// The archivist rule is: once Unavailable, only operational events
// rescue. Success on a stream does clear the counter but we leave
// the final clearing decision to the caller; document the current
// behaviour: we only downgrade Degraded, not Unavailable.
let mut health = HealthStatus::Unavailable {
reason: "still broken".to_string(),
};
let mut counter: u32 = 7;
record_success(&mut health, &mut counter);
assert_eq!(counter, 0);
assert!(matches!(health, HealthStatus::Unavailable { .. }));
}
#[test]
fn record_failure_once_moves_healthy_to_degraded() {
let mut health = HealthStatus::Healthy;
let mut counter: u32 = 0;
record_failure(&mut health, &mut counter, "boom".to_string());
assert_eq!(counter, 1);
match health {
HealthStatus::Degraded { reason } => assert_eq!(reason, "boom"),
other => panic!("expected Degraded, got {:?}", other),
}
}
#[test]
fn record_failure_five_times_drifts_to_unavailable() {
let mut health = HealthStatus::Healthy;
let mut counter: u32 = 0;
for i in 0..5 {
record_failure(&mut health, &mut counter, format!("err-{i}"));
}
assert_eq!(counter, 5);
match health {
HealthStatus::Unavailable { reason } => {
assert!(reason.contains("5 failures"), "reason: {reason}");
}
other => panic!("expected Unavailable after 5 failures, got {:?}", other),
}
}
#[test]
fn record_failure_from_degraded_drifts_to_unavailable_at_threshold() {
let mut health = HealthStatus::Degraded {
reason: "early".to_string(),
};
let mut counter: u32 = 4;
record_failure(&mut health, &mut counter, "final".to_string());
assert_eq!(counter, 5);
assert!(matches!(health, HealthStatus::Unavailable { .. }));
}
}
+217
View File
@@ -0,0 +1,217 @@
//! `MatrixFactory`: build a Matrix [`SessionStream`] from a `[[streams]]`
//! config block.
//!
//! This is the first stream-side factory wired for the Phase 4 migration
//! (Task 18). The factory lives in `dirigent_core` rather than
//! `dirigent_matrix` because `StreamFactory` is defined here and
//! `dirigent_core` already depends on `dirigent_matrix` — putting it in
//! `dirigent_matrix` would create a cycle.
//!
//! ## Scope
//!
//! The factory's responsibility is narrow: parse `cfg.params`, resolve
//! the target Matrix room via a running `MatrixService`, and construct a
//! `MatrixSessionShare` configured for the stream path (no legacy
//! forwarder task). Command-proxy wiring (Matrix → Dirigent
//! `ConnectorCommand::SendMessage`) remains in
//! `CoreRuntime::create_matrix_share` for now; a follow-up will extend
//! the factory to cover that path.
//!
//! ## Config shape
//!
//! ```toml
//! [[streams]]
//! name = "matrix-main"
//! type = "matrix"
//!
//! [streams.scope]
//! kind = "session"
//! scroll_id = "01985d00-..."
//!
//! [streams.params]
//! connector_id = "opencode-1" # dirigent connector key
//! session_id = "native-abc123" # native connector session id
//! room_id = "!abc:matrix.org" # pre-existing room to attach to
//! homeserver_url = "https://matrix.org" # informational (service already knows)
//! ```
use std::sync::Arc;
use async_trait::async_trait;
use serde::Deserialize;
use dirigent_protocol::streaming::{SessionStream, StreamScope};
use super::config::StreamConfig;
use super::factory::{StreamBuildError, StreamFactory};
/// Stream-side factory for Matrix. See module docs for the expected
/// TOML shape.
pub struct MatrixFactory {
service: Arc<dirigent_matrix::MatrixService>,
}
impl MatrixFactory {
/// Build a factory bound to a running `MatrixService`. The service
/// is expected to be logged in and sync-started by the time
/// `build()` is called; if it isn't, `build()` returns
/// `StreamBuildError::Transport`.
pub fn new(service: Arc<dirigent_matrix::MatrixService>) -> Self {
Self { service }
}
}
#[derive(Debug, Deserialize)]
struct MatrixStreamParams {
/// Dirigent connector id that owns the session being bridged.
connector_id: String,
/// Native connector session id.
session_id: String,
/// Matrix room id — must be a pre-existing room the bot can access.
/// Room creation is still handled by
/// `CoreRuntime::create_matrix_share` until the factory path is
/// expanded to cover it.
room_id: String,
/// Informational; the logged-in `MatrixService` is the authority on
/// which homeserver to talk to. Accepted so configs can be
/// self-documenting and round-trip through TOML.
#[serde(default)]
#[allow(dead_code)]
homeserver_url: Option<String>,
}
#[async_trait]
impl StreamFactory for MatrixFactory {
fn kind(&self) -> &'static str {
"matrix"
}
async fn build(
&self,
cfg: &StreamConfig,
) -> Result<Arc<dyn SessionStream>, StreamBuildError> {
// Scope must be Session; Matrix shares are intrinsically
// per-session bi-directional bridges.
let scroll_id = match &cfg.scope {
StreamScope::Session { scroll_id } => *scroll_id,
other => {
return Err(StreamBuildError::Config(format!(
"matrix stream requires scope.kind = \"session\", got {:?}",
other
)));
}
};
// Parse type-specific params.
let params: MatrixStreamParams = cfg
.params
.clone()
.try_into()
.map_err(|e: toml::de::Error| {
StreamBuildError::Config(format!(
"matrix stream '{}': invalid params: {}",
cfg.name, e
))
})?;
// Look up the room via the service. We intentionally don't
// create or join rooms here — the room must already exist.
// Creation remains the responsibility of
// `CoreRuntime::create_matrix_share`.
let room = match self.service.room_by_id(&params.room_id).await {
Ok(Some(room)) => room,
Ok(None) => {
return Err(StreamBuildError::Transport(format!(
"matrix stream '{}': room '{}' not found on client \
— ensure the bot has joined it",
cfg.name, params.room_id
)));
}
Err(dirigent_matrix::MatrixError::NotLoggedIn) => {
return Err(StreamBuildError::Transport(
"matrix service is not logged in; cannot build stream"
.to_string(),
));
}
Err(dirigent_matrix::MatrixError::Config(msg)) => {
return Err(StreamBuildError::Config(format!(
"matrix stream '{}': {}",
cfg.name, msg
)));
}
Err(other) => {
return Err(StreamBuildError::Transport(format!(
"matrix stream '{}': {}",
cfg.name, other
)));
}
};
// Construct the share for the stream path (no legacy forwarder
// task). We drop the command receiver on the floor here — the
// Matrix → Dirigent direction is not covered by this factory
// yet; see the follow-up TODO in the module docs.
let (share, _command_rx) = dirigent_matrix::MatrixSessionShare::new_for_stream(
params.connector_id,
params.session_id,
scroll_id,
params.room_id,
room,
);
Ok(Arc::new(share) as Arc<dyn SessionStream>)
}
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn factory_kind_is_matrix() {
// The factory's `kind()` is static and doesn't require a running
// MatrixService to read — covered by a minimal construction
// check in the integration test suite.
fn assert_is_factory<F: StreamFactory>(_: &F) {}
// We can't easily build a MatrixService in a unit test (it needs
// an Account + data dir + SQLite store). The full smoke test
// lives in `packages/dirigent_matrix/tests/factory_test.rs` and
// the cross-crate registry test in
// `packages/dirigent_core/tests/matrix_migration_test.rs`.
//
// This module-local test exists only to assert that the impl
// block type-checks against the `StreamFactory` trait bound.
fn _compile_check(f: &MatrixFactory) {
assert_is_factory(f);
}
}
#[test]
fn matrix_stream_params_deserialise_ok() {
let toml_str = r#"
connector_id = "opencode-1"
session_id = "native-abc"
room_id = "!foo:example.com"
homeserver_url = "https://matrix.org"
"#;
let p: MatrixStreamParams = toml::from_str(toml_str).expect("parse");
assert_eq!(p.connector_id, "opencode-1");
assert_eq!(p.session_id, "native-abc");
assert_eq!(p.room_id, "!foo:example.com");
assert_eq!(p.homeserver_url.as_deref(), Some("https://matrix.org"));
}
#[test]
fn matrix_stream_params_reject_missing_required() {
// Missing room_id should fail.
let toml_str = r#"
connector_id = "opencode-1"
session_id = "native-abc"
"#;
let err: Result<MatrixStreamParams, _> = toml::from_str(toml_str);
assert!(err.is_err());
}
}
+80
View File
@@ -0,0 +1,80 @@
//! Test-only `SessionStream` implementation for integration tests.
//!
//! Records every received `BusEvent` into an in-memory buffer; can be
//! configured to fail the next N events to exercise health drift paths.
#![cfg(any(test, feature = "test-utils"))]
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::{AtomicU32, Ordering};
use async_trait::async_trait;
use chrono::Utc;
use dirigent_protocol::streaming::{
BusEvent, SessionStream, StreamError, StreamKind, StreamOutcome, StreamScope, StreamSummary,
};
/// In-memory `SessionStream` used in integration tests.
pub struct MockStream {
scope: StreamScope,
name: String,
pub received: Arc<Mutex<Vec<BusEvent>>>,
pub fail_remaining: Arc<AtomicU32>,
pub shutdown_called: Arc<Mutex<bool>>,
}
impl MockStream {
pub fn new(name: impl Into<String>, scope: StreamScope) -> Arc<Self> {
Arc::new(Self {
scope,
name: name.into(),
received: Arc::new(Mutex::new(Vec::new())),
fail_remaining: Arc::new(AtomicU32::new(0)),
shutdown_called: Arc::new(Mutex::new(false)),
})
}
/// Configure the next N `on_event` calls to return `StreamOutcome::Failed`.
pub fn fail_next(&self, n: u32) {
self.fail_remaining.store(n, Ordering::Relaxed);
}
pub fn received_count(&self) -> usize {
self.received.lock().unwrap().len()
}
pub fn was_shutdown(&self) -> bool {
*self.shutdown_called.lock().unwrap()
}
}
#[async_trait]
impl SessionStream for MockStream {
fn summary(&self) -> StreamSummary {
StreamSummary {
name: self.name.clone(),
kind: StreamKind::Custom,
target: "mock".into(),
active_since: Utc::now(),
}
}
fn scope(&self) -> StreamScope {
self.scope.clone()
}
async fn on_event(&self, event: &BusEvent) -> StreamOutcome {
if self.fail_remaining.load(Ordering::Relaxed) > 0 {
self.fail_remaining.fetch_sub(1, Ordering::Relaxed);
return StreamOutcome::Failed(StreamError::Rejected("mock fail".into()));
}
self.received.lock().unwrap().push(event.clone());
StreamOutcome::Ok
}
async fn shutdown(&self) {
*self.shutdown_called.lock().unwrap() = true;
}
}
+23
View File
@@ -0,0 +1,23 @@
//! SharingBus, StreamRegistry, and replay. See docs/plans/2026-04-21-archivist-phase4-design.md.
pub mod bus;
pub mod config;
pub mod factory;
pub mod health;
#[cfg(feature = "server")]
pub mod matrix;
#[cfg(any(test, feature = "test-utils"))]
pub mod mock;
pub mod registry;
pub mod replay;
pub use bus::{BusReceiver, SharingBus};
pub use config::{StreamConfig, StreamsConfig};
pub use factory::{StreamBuildError, StreamFactory, StreamFactoryRegistry};
pub use health::HealthStatus;
#[cfg(feature = "server")]
pub use matrix::MatrixFactory;
pub use registry::{StreamId, StreamInfo, StreamRegistration, StreamRegistry};
pub use replay::{ReplayError, ReplayOptions, ReplayReport, ReplaySpeed};
#[cfg(any(test, feature = "test-utils"))]
pub use mock::MockStream;
@@ -0,0 +1,236 @@
//! Owns all active streams.
//!
//! Populated at boot from `[[streams]]` config and at runtime via
//! [`StreamRegistry::attach`]. Each attached stream gets:
//!
//! - a bus subscription with an [`EventFilter`] derived from its scope,
//! - a dedicated worker task that drives `SessionStream::on_event`,
//! - a per-stream [`HealthStatus`] that drifts on consecutive failures
//! (see [`super::health`]).
//!
//! The worker is cancellable via a one-shot `mpsc::Sender<()>` on the
//! registration so [`detach`](StreamRegistry::detach) can stop delivery
//! deterministically before invoking `SessionStream::shutdown`.
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use tokio::sync::{RwLock, mpsc};
use tokio::task::JoinHandle;
use tracing::warn;
use uuid::Uuid;
use dirigent_protocol::streaming::{
BusReceiver, EventFilter, SessionStream, StreamOutcome, StreamScope, StreamSummary,
};
use super::bus::SharingBus;
use super::health::{HealthStatus, record_failure, record_success};
/// Per-subscriber queue capacity for a stream's bus subscription. Matches
/// the default used by `SharingBus::subscribe_all`.
const STREAM_QUEUE_CAPACITY: usize = 256;
/// Identifier of a registered stream. Opaque wrapper around a `Uuid` so
/// that callers can't confuse stream ids with scroll/connector ids.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StreamId(pub Uuid);
/// Full registration record for a live stream.
///
/// Held inside the registry behind an `Arc`; `detach` returns the Arc so
/// callers can inspect the final health state or await the worker handle
/// if they wish. Most fields are `Arc`s so the worker task and the
/// registry share state without serialising on a single lock.
pub struct StreamRegistration {
pub id: StreamId,
pub name: String,
pub stream: Arc<dyn SessionStream>,
pub scope: StreamScope,
pub enabled: bool,
pub health: Arc<RwLock<HealthStatus>>,
/// Number of consecutive delivery failures; drives the K=5 drift to
/// `Unavailable`. Stored atomic so the worker can update without
/// taking the health lock on every success.
pub consecutive_failures: Arc<AtomicU32>,
pub worker: JoinHandle<()>,
pub stop_tx: mpsc::Sender<()>,
}
/// Snapshot view of a registered stream. Returned by
/// [`StreamRegistry::list`] for telemetry / UI.
#[derive(Debug, Clone)]
pub struct StreamInfo {
pub id: StreamId,
pub name: String,
pub summary: StreamSummary,
pub scope: StreamScope,
pub enabled: bool,
pub health: HealthStatus,
/// Current consecutive-failure count (mirrors
/// `StreamRegistration::consecutive_failures` at read time).
pub lagged_count: u64,
}
/// The live registry of all streams wired to a [`SharingBus`].
pub struct StreamRegistry {
bus: Arc<SharingBus>,
regs: RwLock<Vec<Arc<StreamRegistration>>>,
}
impl StreamRegistry {
/// Build an empty registry bound to `bus`.
pub fn new(bus: Arc<SharingBus>) -> Self {
Self {
bus,
regs: RwLock::new(Vec::new()),
}
}
/// Attach a running stream.
///
/// Subscribes to the bus with a filter derived from `stream.scope()`,
/// spawns a worker task that ferries events into `on_event`, and
/// stores a registration with fresh health state.
pub async fn attach(&self, name: String, stream: Arc<dyn SessionStream>) -> StreamId {
let id = StreamId(Uuid::now_v7());
let scope = stream.scope();
let filter = scope_to_filter(&scope);
let bus_rx = self
.bus
.subscribe_filtered(filter, STREAM_QUEUE_CAPACITY)
.await;
let (stop_tx, stop_rx) = mpsc::channel(1);
let health = Arc::new(RwLock::new(HealthStatus::Healthy));
let failures = Arc::new(AtomicU32::new(0));
let stream_for_worker = Arc::clone(&stream);
let health_for_worker = Arc::clone(&health);
let failures_for_worker = Arc::clone(&failures);
let name_for_worker = name.clone();
let worker = tokio::spawn(run_stream_worker(
name_for_worker,
bus_rx,
stream_for_worker,
health_for_worker,
failures_for_worker,
stop_rx,
));
let reg = Arc::new(StreamRegistration {
id,
name,
stream,
scope,
enabled: true,
health,
consecutive_failures: failures,
worker,
stop_tx,
});
self.regs.write().await.push(reg);
id
}
/// Detach a stream. Signals the worker to exit, then invokes
/// `SessionStream::shutdown`. Returns the registration if the stream
/// was found, or `None` if the id was already detached.
pub async fn detach(&self, id: StreamId) -> Option<Arc<StreamRegistration>> {
let mut regs = self.regs.write().await;
let idx = regs.iter().position(|r| r.id == id)?;
let reg = regs.remove(idx);
drop(regs);
// Best-effort stop: if the channel is already closed (worker panicked)
// we still want to run shutdown.
let _ = reg.stop_tx.send(()).await;
reg.stream.shutdown().await;
Some(reg)
}
/// Look up a live stream by id.
pub async fn get_stream(&self, id: StreamId) -> Option<Arc<dyn SessionStream>> {
self.regs
.read()
.await
.iter()
.find(|r| r.id == id)
.map(|r| Arc::clone(&r.stream))
}
/// Snapshot every registered stream. Clones the underlying health
/// value so the returned `Vec` is safe to hand across async tasks
/// without holding any locks.
pub async fn list(&self) -> Vec<StreamInfo> {
let regs = self.regs.read().await;
let mut out = Vec::with_capacity(regs.len());
for r in regs.iter() {
let health = r.health.read().await.clone();
out.push(StreamInfo {
id: r.id,
name: r.name.clone(),
summary: r.stream.summary(),
scope: r.scope.clone(),
enabled: r.enabled,
health,
lagged_count: r.consecutive_failures.load(Ordering::Relaxed) as u64,
});
}
out
}
}
/// Translate a declarative [`StreamScope`] into the subscriber-side
/// [`EventFilter`] applied on the bus.
fn scope_to_filter(scope: &StreamScope) -> EventFilter {
match scope {
StreamScope::Session { scroll_id } => EventFilter::ScrollId(*scroll_id),
StreamScope::Connector { connector_uid } => EventFilter::ConnectorUid(*connector_uid),
StreamScope::ArchiveWide { .. } => EventFilter::All,
}
}
/// Worker loop: pulls events from the bus subscription, forwards them to
/// the stream, and updates health state on every outcome.
async fn run_stream_worker(
name: String,
mut rx: BusReceiver,
stream: Arc<dyn SessionStream>,
health: Arc<RwLock<HealthStatus>>,
failures: Arc<AtomicU32>,
mut stop_rx: mpsc::Receiver<()>,
) {
loop {
tokio::select! {
biased;
_ = stop_rx.recv() => {
return;
}
maybe_evt = rx.rx.recv() => {
let Some(evt) = maybe_evt else {
// Bus hung up — registry should detach but we can exit
// here regardless.
return;
};
match stream.on_event(&evt).await {
StreamOutcome::Ok | StreamOutcome::Skipped => {
let mut h = health.write().await;
let mut counter = failures.load(Ordering::Relaxed);
record_success(&mut h, &mut counter);
failures.store(counter, Ordering::Relaxed);
}
StreamOutcome::Failed(err) => {
let reason = err.to_string();
warn!(stream = %name, error = %reason, "stream rejected event");
let mut h = health.write().await;
let mut counter = failures.load(Ordering::Relaxed);
record_failure(&mut h, &mut counter, reason);
failures.store(counter, Ordering::Relaxed);
}
}
}
}
}
}
+226
View File
@@ -0,0 +1,226 @@
//! Replay: reads a session from the archive and dispatches synthetic
//! `BusEvent`s with `EventOrigin::Replay` directly to a target stream,
//! bypassing the `SharingBus`.
//!
//! Consumed by `CoreRuntime::replay_session_to_stream` (task 16). This
//! module intentionally exposes a free function that takes
//! `&Archivist`, `scroll_id`, `Arc<dyn SessionStream>`, and `ReplayOptions`
//! so it can be unit-tested without a full runtime.
use std::sync::Arc;
use std::time::Duration;
use uuid::Uuid;
use dirigent_archivist::coordinator::Archivist;
use dirigent_archivist::error::ArchivistError;
use dirigent_archivist::types::MessageRecord;
use dirigent_protocol::{
Event, Message, MessagePart, MessageRole, MessageStatus,
streaming::{BusEvent, EventOrigin, EventRouting, SessionStream, StreamOutcome},
};
/// Options controlling a replay pass.
#[derive(Debug, Clone)]
pub struct ReplayOptions {
/// When true and the session is an AcpConnection, meta-events are read from
/// the archive (currently only counted — rendering meta events as
/// `BusEvent`s is out of scope for Phase 4).
pub include_meta_events: bool,
/// Pace events in real time (sleep between consecutive timestamps) or emit
/// as fast as the target stream can consume.
pub speed: ReplaySpeed,
}
impl Default for ReplayOptions {
fn default() -> Self {
Self {
include_meta_events: false,
speed: ReplaySpeed::AsFastAsPossible,
}
}
}
/// Controls inter-event pacing during replay.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReplaySpeed {
/// Sleep the wall-clock delta between consecutive message timestamps.
Realtime,
/// Emit events as fast as the stream can consume.
AsFastAsPossible,
}
/// Outcome of a replay pass.
#[derive(Debug, Default, Clone)]
pub struct ReplayReport {
/// Total events dispatched to the stream (includes failed attempts).
pub events_sent: usize,
/// Events the stream rejected (`StreamOutcome::Failed`).
pub failures: usize,
/// Wall-clock duration of the replay in milliseconds.
pub duration_ms: u64,
}
/// Errors raised by `replay_session_to_stream` itself. Stream-side failures are
/// counted in `ReplayReport::failures` rather than propagated, so one bad event
/// doesn't abort the replay.
#[derive(Debug, thiserror::Error)]
pub enum ReplayError {
/// The archive has no session with the given scroll id.
#[error("session not found: {0}")]
SessionNotFound(Uuid),
/// Archivist returned a non-SessionUnknown error (I/O, decoding, etc).
#[error("archivist: {0}")]
Archivist(String),
}
/// Replay a session's archived messages to a single `SessionStream`.
///
/// Reads metadata + messages from `archivist`, synthesises a `BusEvent` per
/// message with `EventOrigin::Replay { replay_id }`, and dispatches directly
/// to the target stream. The `SharingBus` is not involved; live events remain
/// unaffected.
///
/// The function continues on stream failures and records the count in the
/// returned `ReplayReport`; only unrecoverable archive errors propagate.
pub async fn replay_session_to_stream(
archivist: &Archivist,
scroll_id: Uuid,
stream: Arc<dyn SessionStream>,
opts: ReplayOptions,
) -> Result<ReplayReport, ReplayError> {
let start = std::time::Instant::now();
let replay_id = Uuid::new_v4();
// Load metadata. Translate the archivist's typed `SessionUnknown` into
// the replay-level `SessionNotFound` variant; everything else becomes
// `Archivist(_)` so callers can distinguish "missing" from "broken".
let metadata = archivist
.get_session_metadata(scroll_id, None)
.await
.map_err(|e| match e {
ArchivistError::SessionUnknown(id) => ReplayError::SessionNotFound(id),
other => ReplayError::Archivist(other.to_string()),
})?;
let messages = archivist
.get_messages(scroll_id, None)
.await
.map_err(|e| ReplayError::Archivist(e.to_string()))?;
let connector_uid = Some(metadata.connector_uid);
let native_session_id = metadata.native_session_id.clone();
// We do not persist the orchestrator-side `connector_id` string in session
// metadata; the native session id is the best reversible handle we have.
let connector_id = native_session_id.clone().unwrap_or_default();
let mut events_sent = 0usize;
let mut failures = 0usize;
let mut prev_ts: Option<chrono::DateTime<chrono::Utc>> = None;
for record in messages {
if matches!(opts.speed, ReplaySpeed::Realtime) {
if let Some(prev) = prev_ts {
let delta = record.ts.signed_duration_since(prev);
if let Ok(d) = delta.to_std() {
// Cap per-step sleep at 1h to avoid pathological archives
// where a session sat idle for days.
if d > Duration::from_millis(0) && d < Duration::from_secs(3600) {
tokio::time::sleep(d).await;
}
}
}
prev_ts = Some(record.ts);
}
let message = message_from_record(&record, native_session_id.as_deref());
let event = Event::MessageCompleted {
connector_id: connector_id.clone(),
message,
};
let mut routing = EventRouting::derive(&event, connector_uid, &connector_id);
// `derive()` leaves scroll_id=None (the bus cache normally fills it in).
// During replay we have the authoritative scroll_id up front.
routing.scroll_id = Some(scroll_id);
let bus_event = BusEvent {
routing,
origin: EventOrigin::Replay { replay_id },
event: Arc::new(event),
};
match stream.on_event(&bus_event).await {
StreamOutcome::Ok | StreamOutcome::Skipped => {
events_sent += 1;
}
StreamOutcome::Failed(_err) => {
failures += 1;
events_sent += 1; // count attempted regardless
}
}
}
if opts.include_meta_events {
// Meta-events exist only on AcpConnection sessions; the read is
// cheap and idempotent, so we don't gate on `metadata.kind`. Render-
// as-BusEvent is out of scope for Phase 4 — we just probe the
// archive so missing meta-event storage surfaces as a log line
// here rather than later in the call chain.
let _ = archivist.get_meta_events(scroll_id, None).await;
}
Ok(ReplayReport {
events_sent,
failures,
duration_ms: start.elapsed().as_millis() as u64,
})
}
/// Synthesize a protocol `Message` from an archived `MessageRecord`.
///
/// The session_id we emit is the connector's native session id when known,
/// falling back to the stringified scroll_id so downstream routing at least
/// has a stable handle.
fn message_from_record(record: &MessageRecord, native_session_id: Option<&str>) -> Message {
Message {
id: record.message_id.to_string(),
session_id: native_session_id
.map(str::to_string)
.unwrap_or_else(|| record.session.to_string()),
role: parse_role(&record.role),
created_at: record.ts,
content: content_parts_from_record(record),
status: MessageStatus::Completed,
metadata: None,
}
}
/// Parse the archivist's stringly-typed role into the protocol enum.
///
/// `MessageRole` only has `User` and `Assistant` today; archived "system" /
/// "tool" rows (which the protocol layer does not support) fall back to
/// `User` rather than drop the message entirely. Lossy but preserves content.
fn parse_role(role: &str) -> MessageRole {
match role {
"assistant" => MessageRole::Assistant,
"user" => MessageRole::User,
// Protocol has no System/Tool variant; surface these as user messages
// so their content still reaches the stream.
_ => MessageRole::User,
}
}
/// Prefer the archived structured `content_parts` (round-trips tool calls,
/// code blocks, etc). Fall back to a single `Text` part built from the
/// markdown rendering when parts are missing or fail to parse.
fn content_parts_from_record(record: &MessageRecord) -> Vec<MessagePart> {
if let Some(parts) = &record.content_parts {
if let Ok(parsed) = serde_json::from_value::<Vec<MessagePart>>(parts.clone()) {
return parsed;
}
}
vec![MessagePart::Text {
text: record.content_md.clone(),
}]
}