//! SharingBus: single-producer, many-subscriber event multiplexer with //! subscriber-side filtering performed by a worker task. See //! docs/plans/2026-04-21-archivist-phase4-design.md §1. //! //! Architecture: //! - One internal `tokio::sync::broadcast::Sender` feeds a single //! worker task. The worker iterates `Vec` (behind `RwLock`), //! filter-matches each slot, and `try_send`s the event onto each slot's //! `mpsc::Sender`. //! - Slow subscribers drop their own events at their mpsc (counted in the //! slot's `lagged` atomic). The bus-internal broadcast channel never drops //! due to a slow subscriber — only due to the broadcast lag contract, which //! we log and continue. //! - `SessionRegistered` events late-bind `(connector_id, native_session_id) -> //! scroll_id` via a small cache consulted on every publish. use std::collections::HashMap; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use tokio::sync::{broadcast, mpsc, RwLock}; use tokio::task::JoinHandle; use tracing::{debug, warn}; use uuid::Uuid; use dirigent_protocol::streaming::{BusEvent, EventFilter}; pub use dirigent_protocol::streaming::BusReceiver; use dirigent_protocol::Event; const BUS_INTERNAL_CAPACITY: usize = 1024; const SUBSCRIBER_QUEUE_DEFAULT: usize = 256; /// Single-producer, many-subscriber event multiplexer. /// /// Subscribers see a `mpsc::Receiver` that only yields events /// matching their `EventFilter`. Filtering happens inside a single worker /// task, so the cost per event is O(n_subscribers) regardless of publisher /// count. Slow subscribers lose events at their own mpsc, not at the bus. pub struct SharingBus { publish_tx: broadcast::Sender, subscribers: Arc>>, scroll_id_cache: Arc>>, next_id: Arc, _worker: JoinHandle<()>, } struct SubscriberSlot { id: u64, filter: EventFilter, sender: mpsc::Sender, lagged: Arc, } impl SharingBus { /// Construct a new bus and spawn its dispatch worker. pub fn new() -> Arc { let (publish_tx, publish_rx) = broadcast::channel(BUS_INTERNAL_CAPACITY); let subscribers: Arc>> = Arc::new(RwLock::new(Vec::new())); let scroll_id_cache: Arc>> = Arc::new(RwLock::new(HashMap::new())); let next_id = Arc::new(AtomicU64::new(0)); let worker = tokio::spawn(run_worker(publish_rx, Arc::clone(&subscribers))); Arc::new(Self { publish_tx, subscribers, scroll_id_cache, next_id, _worker: worker, }) } /// Publish a `BusEvent` to all matching subscribers. /// /// This method also performs two side-effects on the scroll-id cache: /// /// 1. If the wrapped event is `Event::SessionRegistered`, the binding /// `(connector_id, session_id) -> scroll_id` is inserted into the /// cache, and the current event's `routing.scroll_id` is set so the /// binding event itself carries its own scroll_id downstream. /// 2. If the event's `routing.scroll_id` is absent but it carries both a /// `connector_id` and `native_session_id`, the cache is consulted to /// late-bind `scroll_id` before broadcasting. pub async fn publish(&self, mut bus_event: BusEvent) { // (2) Late-bind scroll_id from cache if we can, BEFORE the possibly // more specific (1) handling overrides it. This is a no-op for // SessionRegistered (its scroll_id is always populated in (1)). if bus_event.routing.scroll_id.is_none() { if let (Some(cid), Some(nsid)) = ( bus_event.routing.connector_id.as_ref(), bus_event.routing.native_session_id.as_ref(), ) { let cache = self.scroll_id_cache.read().await; if let Some(uuid) = cache.get(&(cid.clone(), nsid.clone())) { bus_event.routing.scroll_id = Some(*uuid); } } } // (1) If the wrapped event is SessionRegistered, populate the cache // and set scroll_id on the event itself. if let Event::SessionRegistered { connector_id, session_id, scroll_id, } = bus_event.event.as_ref() { match Uuid::parse_str(scroll_id) { Ok(uuid) => { self.scroll_id_cache .write() .await .insert((connector_id.clone(), session_id.clone()), uuid); bus_event.routing.scroll_id = Some(uuid); } Err(e) => { warn!( connector_id = %connector_id, session_id = %session_id, scroll_id = %scroll_id, error = %e, "SessionRegistered carried an unparseable scroll_id; skipping late-bind cache insert", ); } } } // No subscribers is not an error — ignore the Result. let _ = self.publish_tx.send(bus_event); } /// Subscribe to every event on the bus. pub async fn subscribe_all(&self) -> BusReceiver { self.subscribe_filtered(EventFilter::All, SUBSCRIBER_QUEUE_DEFAULT) .await } /// Subscribe to events that match `filter`. `queue_capacity` caps the /// buffered events between the worker and the caller's `recv()`. pub async fn subscribe_filtered( &self, filter: EventFilter, queue_capacity: usize, ) -> BusReceiver { let (tx, rx) = mpsc::channel(queue_capacity); let lagged = Arc::new(AtomicU64::new(0)); // Relaxed ordering is sufficient: subscriber IDs are only compared for // equality with other IDs issued by this same bus; there is no // cross-thread ordering dependency on this counter. let id = self.next_id.fetch_add(1, Ordering::Relaxed); self.subscribers.write().await.push(SubscriberSlot { id, filter, sender: tx, lagged: Arc::clone(&lagged), }); BusReceiver { id, rx, lagged } } /// Remove a subscriber by id. Idempotent. pub async fn unsubscribe(&self, id: u64) { self.subscribers.write().await.retain(|s| s.id != id); } } async fn run_worker( mut rx: broadcast::Receiver, subscribers: Arc>>, ) { loop { match rx.recv().await { Ok(evt) => { let mut closed_ids: Vec = Vec::new(); { let subs = subscribers.read().await; for slot in subs.iter() { if !slot.filter.matches(&evt) { continue; } match slot.sender.try_send(evt.clone()) { Ok(()) => {} Err(mpsc::error::TrySendError::Full(_)) => { slot.lagged.fetch_add(1, Ordering::Relaxed); warn!( subscriber_id = slot.id, "bus subscriber queue full; dropping event" ); } Err(mpsc::error::TrySendError::Closed(_)) => { closed_ids.push(slot.id); } } } } if !closed_ids.is_empty() { subscribers .write() .await .retain(|s| !closed_ids.contains(&s.id)); debug!(removed = closed_ids.len(), "GC'd closed subscriber slots"); } } Err(broadcast::error::RecvError::Lagged(n)) => { warn!(skipped = n, "SharingBus internal broadcast lagged"); } Err(broadcast::error::RecvError::Closed) => { debug!("SharingBus worker exiting (sender closed)"); return; } } } } // ─── Tests ─────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use std::sync::Arc; use std::sync::atomic::Ordering; use std::time::Duration; use tokio::time::timeout; use uuid::Uuid; use super::*; use dirigent_protocol::streaming::{BusEvent, EventKind, EventOrigin, EventRouting}; use dirigent_protocol::Event; /// Build a minimal `BusEvent` for tests. Uses `Event::Connected` as payload /// unless a specific event is needed for late-bind checks. fn make_event( scroll_id: Option, connector_uid: Option, connector_id: Option, native_session_id: Option, kind: EventKind, event: Event, ) -> BusEvent { BusEvent { routing: EventRouting { scroll_id, connector_uid, connector_id, native_session_id, kind, }, origin: EventOrigin::Runtime, event: Arc::new(event), } } // 1. subscribe_all + publish: one event round-trips to receiver. #[tokio::test] async fn subscribe_all_receives_published_event() { let bus = SharingBus::new(); let mut recv = bus.subscribe_all().await; let ev = make_event( None, None, None, None, EventKind::System, Event::Connected, ); bus.publish(ev).await; let got = timeout(Duration::from_millis(200), recv.rx.recv()) .await .expect("timed out waiting for event") .expect("channel closed unexpectedly"); match got.event.as_ref() { Event::Connected => {} other => panic!("expected Event::Connected, got {:?}", other), } } // 2. ConnectorUid filter: matching UID passes, other UID skipped. #[tokio::test] async fn connector_uid_filter_only_forwards_matching_events() { let bus = SharingBus::new(); let target = Uuid::new_v4(); let other = Uuid::new_v4(); let mut recv = bus .subscribe_filtered(EventFilter::ConnectorUid(target), 16) .await; // Publish one matching and one non-matching event. let ev_match = make_event( None, Some(target), None, None, EventKind::System, Event::Connected, ); let ev_other = make_event( None, Some(other), None, None, EventKind::System, Event::Connected, ); bus.publish(ev_match).await; bus.publish(ev_other).await; // First recv returns the matching event. let got = timeout(Duration::from_millis(200), recv.rx.recv()) .await .expect("timed out waiting for first event") .expect("channel closed unexpectedly"); assert_eq!(got.routing.connector_uid, Some(target)); // Second recv must time out — no other matching event was published. let result = timeout(Duration::from_millis(100), recv.rx.recv()).await; assert!( result.is_err(), "expected no further events, got: {:?}", result.ok().flatten().map(|e| e.routing.connector_uid) ); } // 3. Queue full = lagged counter increments, first event still delivered. #[tokio::test] async fn full_queue_increments_lagged_counter() { let bus = SharingBus::new(); // Capacity 1 — only one event can be buffered before try_send fails. let mut recv = bus.subscribe_filtered(EventFilter::All, 1).await; // Publish 5 events without draining. for _ in 0..5 { let ev = make_event( None, None, None, None, EventKind::System, Event::Connected, ); bus.publish(ev).await; } // Give the worker a chance to process all 5. for _ in 0..10 { tokio::task::yield_now().await; } tokio::time::sleep(Duration::from_millis(20)).await; // First event is still in the queue. let first = timeout(Duration::from_millis(200), recv.rx.recv()) .await .expect("timed out waiting for first event") .expect("channel closed unexpectedly"); match first.event.as_ref() { Event::Connected => {} other => panic!("expected Event::Connected, got {:?}", other), } // At minimum 4 events were dropped (5 published, 1 fit). let lagged = recv.lagged.load(Ordering::Relaxed); assert!( lagged >= 4, "expected lagged >= 4 after publishing 5 events to a capacity-1 queue, got {}", lagged ); } // 4. scroll_id late-bind: SessionRegistered populates cache; subsequent // events with matching (connector_id, native_session_id) get their // scroll_id filled in before dispatch. #[tokio::test] async fn session_registered_populates_cache_and_late_binds_subsequent_events() { let bus = SharingBus::new(); let scroll = Uuid::new_v4(); // Subscriber filters on ScrollId(scroll). It should see: // - the SessionRegistered event (bus sets its own scroll_id at publish) // - a follow-up event with (connector_id="c", native_session_id="s") // that had no scroll_id on entry (late-bound from the cache). let mut recv = bus .subscribe_filtered(EventFilter::ScrollId(scroll), 16) .await; // --- publish SessionRegistered (binding event) --- let reg_event = Event::SessionRegistered { connector_id: "c".to_string(), session_id: "s".to_string(), scroll_id: scroll.to_string(), }; // We pass through the routing fields the producer would populate. // `scroll_id` starts as None; publish() sets it from the event payload. let reg_bus = make_event( None, None, Some("c".to_string()), Some("s".to_string()), EventKind::SessionLifecycle, reg_event, ); bus.publish(reg_bus).await; let got1 = timeout(Duration::from_millis(200), recv.rx.recv()) .await .expect("timed out waiting for SessionRegistered") .expect("channel closed unexpectedly"); assert!(matches!( got1.event.as_ref(), Event::SessionRegistered { .. } )); assert_eq!(got1.routing.scroll_id, Some(scroll)); // --- publish a follow-up event with no scroll_id but matching // connector_id + native_session_id --- let follow_up = make_event( None, None, Some("c".to_string()), Some("s".to_string()), EventKind::System, Event::Connected, ); bus.publish(follow_up).await; let got2 = timeout(Duration::from_millis(200), recv.rx.recv()) .await .expect("timed out waiting for late-bound follow-up") .expect("channel closed unexpectedly"); assert_eq!( got2.routing.scroll_id, Some(scroll), "follow-up event should have had scroll_id late-bound from the cache" ); assert!(matches!(got2.event.as_ref(), Event::Connected)); } // 5. Dropped receiver is GC'd after the next publish. #[tokio::test] async fn closed_receiver_slot_is_reaped_on_next_publish() { let bus = SharingBus::new(); // Subscribe, then immediately drop the receiver — simulates a caller // that forgets (or skips) `unsubscribe()`. let recv = bus.subscribe_all().await; drop(recv); // Sanity check: slot is present before GC. assert_eq!(bus.subscribers.read().await.len(), 1); // Publish one event; the worker encounters TrySendError::Closed and // schedules the slot for removal. let ev = make_event( None, None, None, None, EventKind::System, Event::Connected, ); bus.publish(ev).await; // Give the worker a moment to process and GC. for _ in 0..10 { tokio::task::yield_now().await; } tokio::time::sleep(Duration::from_millis(10)).await; assert_eq!( bus.subscribers.read().await.len(), 0, "closed subscriber slot should have been GC'd after publish" ); } }