sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
+53
View File
@@ -0,0 +1,53 @@
[workspace]
resolver = "2"
members = [
"crates/dirigent_protocol",
"crates/dirigent_core",
"crates/dirigent_tools",
"crates/dirigent_fermata",
"crates/dirigent_auth",
"crates/dirigent_config",
"crates/dirigent_acp_api",
"crates/dirigent_archivist",
"crates/dirigent_process",
"crates/dirigent_taskrunner",
"crates/dirigent_anth",
"crates/dirigent_inspector",
"crates/dirigent_projects",
"crates/dirigent_matrix",
"crates/dirigent_zed",
"crates/dirigent_langfuse",
"crates/dirigent_chatgpt",
"crates/dirigent_codex",
"crates/dirigent_testing",
"crates/opencode_client",
]
[workspace.lints.rust]
dead_code = "allow"
unused_imports = "allow"
unused_variables = "allow"
unused_mut = "allow"
unused_assignments = "allow"
[workspace.dependencies]
dirigent_protocol = { path = "crates/dirigent_protocol" }
dirigent_core = { path = "crates/dirigent_core" }
dirigent_tools = { path = "crates/dirigent_tools" }
dirigent_fermata = { path = "crates/dirigent_fermata" }
dirigent_auth = { path = "crates/dirigent_auth" }
dirigent_config = { path = "crates/dirigent_config" }
dirigent_acp_api = { path = "crates/dirigent_acp_api" }
dirigent_archivist = { path = "crates/dirigent_archivist" }
dirigent_process = { path = "crates/dirigent_process" }
dirigent_taskrunner = { path = "crates/dirigent_taskrunner" }
dirigent_anth = { path = "crates/dirigent_anth" }
dirigent_inspector = { path = "crates/dirigent_inspector" }
dirigent_projects = { path = "crates/dirigent_projects" }
dirigent_matrix = { path = "crates/dirigent_matrix", default-features = true }
dirigent_zed = { path = "crates/dirigent_zed" }
dirigent_langfuse = { path = "crates/dirigent_langfuse" }
dirigent_chatgpt = { path = "crates/dirigent_chatgpt" }
dirigent_codex = { path = "crates/dirigent_codex" }
dirigent_testing = { path = "crates/dirigent_testing" }
opencode_client = { path = "crates/opencode_client" }
+81
View File
@@ -0,0 +1,81 @@
# Dirigent
<p align="center">
<img src="dirigent.svg" alt="Dirigent" width="64" height="64">
</p>
<p align="center">Core libraries for the Dirigent agent orchestration platform.</p>
---
Dirigent is a multi-agent orchestration platform built around the Agent-Client Protocol (ACP). This repository contains the foundational library crates — the building blocks used by downstream tools such as [dirigate](https://git.g4b.org/dirigence/dirigate) and [fermata](https://git.g4b.org/dirigence/fermata).
> **Downstream mirror.** Active development happens in an upstream monorepo. This repository is an export of the core library crates and is updated on each release. Issues and contributions should be directed to the upstream project.
---
## Crates
| Crate | Description |
|-------|-------------|
| `dirigent_protocol` | ACP protocol types — messages, events, and RPC definitions |
| `dirigent_core` | Multi-connector orchestration runtime |
| `dirigent_tools` | Tool sandbox and execution abstractions |
| `dirigent_fermata` | Policy gate for AI coding agents (`.botignore` / `botignore.toml`) |
| `dirigent_auth` | User authorization model |
| `dirigent_config` | Configuration management |
| `dirigent_acp_api` | ACP server for incoming agent connections |
| `dirigent_archivist` | Event-driven session archival |
| `dirigent_process` | Child process management |
| `dirigent_taskrunner` | Background task runner |
| `dirigent_anth` | Claude Code JSONL session parser |
| `dirigent_inspector` | Session inspection tools |
| `dirigent_projects` | Project management primitives |
| `dirigent_matrix` | Matrix integration for session sharing |
| `dirigent_zed` | Zed editor integration |
| `dirigent_langfuse` | Langfuse observability integration |
| `dirigent_chatgpt` | ChatGPT `conversations.json` parser |
| `dirigent_codex` | OpenAI Codex session parser |
| `dirigent_testing` | Test utilities |
| `opencode_client` | OpenCode.ai HTTP client |
---
## Usage
### Library crates (via git dependency)
Add a crate to your `Cargo.toml`:
```toml
[dependencies]
dirigent_protocol = { git = "https://git.g4b.org/dirigence/dirigent", path = "crates/dirigent_protocol" }
dirigent_core = { git = "https://git.g4b.org/dirigence/dirigent", path = "crates/dirigent_core" }
```
Replace `dirigent_protocol` / `dirigent_core` with the crate you need. All crates follow the same pattern.
### Binary crates (cargo install)
**fermata** — policy gate CLI and Claude hook adapter:
```bash
cargo install --git https://git.g4b.org/dirigence/dirigent --features cli
```
**anth** — Claude Code session inspector:
```bash
cargo install --git https://git.g4b.org/dirigence/dirigent --bin anth_bear --features dirigent-paths
```
---
## License
Licensed under either of
- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <https://www.apache.org/licenses/LICENSE-2.0>)
- MIT License ([LICENSE-MIT](LICENSE-MIT) or <https://opensource.org/licenses/MIT>)
at your option.
+124
View File
@@ -0,0 +1,124 @@
# Package: dirigent_acp_api
ACP Server implementation for accepting incoming ACP connections from external agents.
## Quick Facts
- **Type**: Library
- **Main Entry**: src/lib.rs
- **Dependencies**: axum, tokio, serde, tracing, uuid, async-trait, dirigent_protocol
- **Status**: Core structure complete, integration with CoreRuntime pending
## Overview
The `dirigent_acp_api` package implements an ACP (Agent-Client Protocol) server that allows Dirigent to accept incoming connections from external ACP clients like Claude Code or custom agents. This enables session sharing, remote orchestration, and multi-client collaboration.
## Architecture
### Core Components
- **config.rs** - Server configuration types (`AcpServerConfig`)
- **error.rs** - Error types (`AcpServerError`, `JsonRpcErrorObject`)
- **jsonrpc.rs** - JSON-RPC 2.0 types and parsing
- **rpc.rs** - RPC handler and method dispatch
- **session_manager.rs** - Session/client tracking (TODO)
- **sse.rs** - SSE notification system (TODO)
- **event_bridge.rs** - Event translation (TODO)
### Key Types
```rust
pub struct AcpServerConfig {
pub enabled: bool, // Enable/disable server
pub port: u16, // Listen port (default: 3001)
pub allowed_origins: Option<Vec<String>>, // CORS origins
pub max_connections: usize, // Connection limit (default: 100)
}
```
### ConnectorOperations Trait
The RPC handler uses a trait abstraction to avoid circular dependencies with dirigent_core:
```rust
#[async_trait]
pub trait ConnectorOperations: Send + Sync {
async fn create_session(&self, connector_id: &str) -> Result<String>;
async fn load_session(&self, connector_id: &str, session_id: &str) -> Result<Session>;
async fn send_prompt(&self, connector_id: &str, session_id: &str, prompt: &str) -> Result<String>;
// ... more methods
}
```
## API Endpoints
### POST `/rpc`
JSON-RPC 2.0 endpoint supporting:
- `initialize` - Client handshake
- `session/new` - Create session
- `session/load` - Load existing session
- `session/prompt` - Send prompt
- `session/cancel` - Cancel generation
- `session/close` - Close session
### GET `/events`
Server-Sent Events for streaming notifications:
- `acp/messageChunk` - Streaming content
- `acp/messageComplete` - Generation complete
- `acp/sessionIdle` - Ready for input
### GET `/health`
Health check endpoint.
## Configuration UI
The ACP Server is configured via the web UI at **Configuration > ACP Server**:
- Enable/disable toggle
- Port configuration
- Max connections limit
- Allowed origins (CORS)
- Default connector selection
- Connected clients management
Server functions in `crates/api/src/acp_server.rs` bridge the UI and this package.
## Implementation Status
**Completed:**
- Configuration types (`AcpServerConfig`)
- Error types (`AcpServerError`)
- JSON-RPC types and parsing
- RPC handler structure with ConnectorOperations trait
**Pending:**
- Session Manager implementation
- SSE Notifier implementation
- Event Bridge implementation
- Axum router integration
- Web server integration
## Key Files
| File | Description |
|------|-------------|
| `src/lib.rs` | Module exports and router creation |
| `src/config.rs` | AcpServerConfig with validation |
| `src/error.rs` | Error types and codes |
| `src/jsonrpc.rs` | JSON-RPC 2.0 implementation |
| `src/rpc.rs` | RPC handler and method dispatch |
## Related Packages
- **dirigent_core** - Provides CoreHandle implementation of ConnectorOperations
- **dirigent_protocol** - Shared event and message types
- **api** - Server functions for UI configuration
- **web** - Configuration UI components
## Documentation
- **Architecture**: `docs/architecture/acp_server.md`
- **Configuration**: `docs/configuration/acp-connectors.md`
- **Tasks**: `docs/building/07_acp_serve/02_acp_server_tasks.md`
+30
View File
@@ -0,0 +1,30 @@
[package]
name = "dirigent_acp_api"
version = "0.1.0"
edition = "2021"
[lib]
path = "src/lib.rs"
[dependencies]
anyhow = "1.0"
async-trait = "0.1"
# ACP (Agent-Client Protocol) dependencies
axum = "0.8"
# ACP Server dependencies (Phase 2)
chrono = { version = "0.4", features = ["serde"] }
# Workspace dependencies
# Note: dirigent_protocol is used for Event types
dirigent_protocol = { path = "../dirigent_protocol" }
# Note: dirigent_core is NOT a direct dependency to avoid circular dependency.
# CoreHandle is passed into the ACP server at runtime via generics or trait objects.
# The mode/model mapping logic is duplicated here for legacy mode support.
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tower = "0.5"
tower-http = { version = "0.6", features = ["cors"] }
# ACP Server dependencies (Phase 1)
tracing = "0.1"
uuid = { version = "1.0", features = ["serde", "v4", "v7"] }
+5
View File
@@ -0,0 +1,5 @@
This crate should expose an ACP API for dirigent to be used by another ACP Client.
Funny Integration Test would be to use Dirigent using Dirigent (but that would need probably a dummy acp agent to be used by that)
This will however also require dirigent to "pass through" functionality, it believes to be responsible for.
@@ -0,0 +1,438 @@
//! Agent Request Tracker
//!
//! This module provides infrastructure for tracking agent-initiated requests
//! that require responses from HTTP clients. It's used to implement bidirectional
//! JSON-RPC communication in the ACP Server.
//!
//! ## Use Case
//!
//! When an agent (like Claude) sends a request to the client (like a permission
//! prompt), the ACP Server needs to:
//! 1. Forward the request to the client via SSE
//! 2. Wait for the client's response via HTTP POST
//! 3. Deliver the response back to the agent
//!
//! The `AgentRequestTracker` manages the pending requests and provides a way
//! to correlate responses with their corresponding requests.
//!
//! ## Example Flow
//!
//! ```rust,ignore
//! use dirigent_acp_api::agent_requests::AgentRequestTracker;
//! use serde_json::json;
//!
//! // Create tracker
//! let tracker = AgentRequestTracker::new();
//!
//! // Agent sends request - register it and get receiver
//! let request_id = json!(0);
//! let client_id = "client-123";
//! let receiver = tracker.register(client_id, request_id.clone());
//!
//! // Forward request to client via SSE...
//!
//! // Client responds via HTTP POST - complete the request
//! let response = json!({"selectedOptionId": "allow"});
//! tracker.complete(client_id, request_id, response)?;
//!
//! // The receiver now gets the response
//! let response_value = receiver.await?;
//! ```
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use anyhow::{anyhow, Result};
use serde_json::Value;
use tokio::sync::oneshot;
use tracing::{debug, warn};
/// Tracks pending agent requests awaiting client responses
///
/// This struct provides thread-safe storage for correlating agent requests
/// with their eventual client responses. It uses oneshot channels to deliver
/// responses to waiting tasks.
///
/// ## Thread Safety
///
/// The tracker is designed to be cloned and shared across async tasks.
/// Internal state is protected by `Arc<Mutex<...>>` for thread-safe access.
#[derive(Debug, Clone)]
pub struct AgentRequestTracker {
/// Maps (client_id, request_id_string) to oneshot sender for delivering response
///
/// The key is a tuple of client ID and request ID (as string) to uniquely
/// identify each pending request. The value is a oneshot sender that will
/// be used to deliver the response when it arrives.
pending: Arc<Mutex<HashMap<(String, String), oneshot::Sender<Value>>>>,
}
impl Default for AgentRequestTracker {
fn default() -> Self {
Self::new()
}
}
impl AgentRequestTracker {
/// Create a new agent request tracker
///
/// Returns an empty tracker ready to register pending requests.
pub fn new() -> Self {
Self {
pending: Arc::new(Mutex::new(HashMap::new())),
}
}
/// Register a pending agent request and return a receiver for the response
///
/// This method creates a oneshot channel, stores the sender in the pending
/// requests map, and returns the receiver. The caller can await on the
/// receiver to get the client's response.
///
/// # Parameters
///
/// - `client_id`: The ID of the client that should respond to this request
/// - `request_id`: The request ID from the agent (JSON-RPC id field)
///
/// # Returns
///
/// A oneshot receiver that will receive the client's response when
/// `complete()` is called with matching client_id and request_id.
///
/// # Example
///
/// ```rust,ignore
/// let receiver = tracker.register("client-123", json!(0));
///
/// // Later, when client responds...
/// tracker.complete("client-123", json!(0), response)?;
///
/// // The receiver gets the response
/// let response = receiver.await?;
/// ```
pub fn register(&self, client_id: &str, request_id: Value) -> oneshot::Receiver<Value> {
let (tx, rx) = oneshot::channel();
let key = (client_id.to_string(), request_id.to_string());
let mut pending = self.pending.lock().expect("Lock poisoned");
pending.insert(key.clone(), tx);
debug!(
client_id = %client_id,
request_id = %request_id,
"Registered pending agent request"
);
rx
}
/// Complete a pending agent request with the client's response
///
/// This method looks up the pending request, sends the response through
/// the oneshot channel, and removes it from the pending map.
///
/// # Parameters
///
/// - `client_id`: The ID of the client sending the response
/// - `request_id`: The request ID from the original agent request
/// - `response`: The client's response (JSON-RPC response object)
///
/// # Returns
///
/// - `Ok(())` if the request was found and the response was delivered
/// - `Err` if the request_id was not found in pending requests
///
/// # Errors
///
/// Returns an error if:
/// - The request_id is not found in pending requests (may have timed out)
/// - The receiver has been dropped (unlikely but possible)
///
/// # Example
///
/// ```rust,ignore
/// // Client POSTs response to /acp/agent_response
/// let response = json!({
/// "jsonrpc": "2.0",
/// "id": 0,
/// "result": {"selectedOptionId": "allow"}
/// });
///
/// tracker.complete("client-123", json!(0), response)?;
/// ```
pub fn complete(&self, client_id: &str, request_id: Value, response: Value) -> Result<()> {
let key = (client_id.to_string(), request_id.to_string());
let mut pending = self.pending.lock().expect("Lock poisoned");
if let Some(sender) = pending.remove(&key) {
debug!(
client_id = %client_id,
request_id = %request_id,
"Completing pending agent request"
);
// Send the response through the oneshot channel
sender.send(response).map_err(|_| {
anyhow!(
"Failed to send response for request {}: receiver dropped",
request_id
)
})?;
Ok(())
} else {
warn!(
client_id = %client_id,
request_id = %request_id,
"Attempted to complete non-existent agent request (may have timed out)"
);
Err(anyhow!(
"Request ID {} not found for client {}",
request_id,
client_id
))
}
}
/// Timeout a pending agent request
///
/// This method removes a pending request from the map and logs a timeout
/// warning. It should be called when a request has been pending for too
/// long (e.g., 30 seconds) without a client response.
///
/// # Parameters
///
/// - `client_id`: The ID of the client that was supposed to respond
/// - `request_id`: The request ID that timed out
///
/// # Note
///
/// This method does not send any error response through the channel.
/// The receiver will get a `RecvError` when it tries to receive, which
/// the caller should interpret as a timeout.
///
/// # Example
///
/// ```rust,ignore
/// use tokio::time::{timeout, Duration};
///
/// let receiver = tracker.register("client-123", json!(0));
///
/// // Wait up to 30 seconds for response
/// match timeout(Duration::from_secs(30), receiver).await {
/// Ok(Ok(response)) => {
/// // Got response
/// }
/// Ok(Err(_)) => {
/// // Channel closed (timeout was called)
/// tracker.timeout("client-123", json!(0));
/// }
/// Err(_) => {
/// // Timeout elapsed
/// tracker.timeout("client-123", json!(0));
/// }
/// }
/// ```
pub fn timeout(&self, client_id: &str, request_id: Value) {
let key = (client_id.to_string(), request_id.to_string());
let mut pending = self.pending.lock().expect("Lock poisoned");
if pending.remove(&key).is_some() {
warn!(
client_id = %client_id,
request_id = %request_id,
"Agent request timed out (30s elapsed without client response)"
);
}
}
/// Get the number of pending requests
///
/// This method returns the total number of requests currently awaiting
/// client responses across all clients.
pub fn pending_count(&self) -> usize {
let pending = self.pending.lock().expect("Lock poisoned");
pending.len()
}
/// Get the number of pending requests for a specific client
///
/// # Parameters
///
/// - `client_id`: The ID of the client to query
pub fn client_pending_count(&self, client_id: &str) -> usize {
let pending = self.pending.lock().expect("Lock poisoned");
pending
.keys()
.filter(|(cid, _)| cid == client_id)
.count()
}
/// Clear all pending requests (used when shutting down or on client disconnect)
///
/// This method removes all pending requests from the tracker. The oneshot
/// senders are dropped, which will cause their receivers to get `RecvError`.
///
/// # Parameters
///
/// - `client_id`: Optional client ID to clear only that client's pending requests.
/// If None, clears all pending requests.
pub fn clear(&self, client_id: Option<&str>) {
let mut pending = self.pending.lock().expect("Lock poisoned");
match client_id {
Some(id) => {
let keys_to_remove: Vec<_> = pending
.keys()
.filter(|(cid, _)| cid == id)
.cloned()
.collect();
for key in keys_to_remove {
pending.remove(&key);
}
debug!(
client_id = %id,
"Cleared all pending agent requests for client"
);
}
None => {
let count = pending.len();
pending.clear();
debug!(
count = count,
"Cleared all pending agent requests"
);
}
}
}
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[tokio::test]
async fn test_register_and_complete() {
let tracker = AgentRequestTracker::new();
let client_id = "client-123";
let request_id = json!(0);
let response = json!({"result": "success"});
// Register request
let receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// Complete request
let result = tracker.complete(client_id, request_id, response.clone());
assert!(result.is_ok());
assert_eq!(tracker.pending_count(), 0);
// Receiver should get the response
let received = receiver.await.unwrap();
assert_eq!(received, response);
}
#[tokio::test]
async fn test_complete_non_existent_request() {
let tracker = AgentRequestTracker::new();
let client_id = "client-123";
let request_id = json!(999);
let response = json!({"result": "success"});
// Try to complete non-existent request
let result = tracker.complete(client_id, request_id, response);
assert!(result.is_err());
}
#[tokio::test]
async fn test_timeout() {
let tracker = AgentRequestTracker::new();
let client_id = "client-123";
let request_id = json!(0);
// Register request
let receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// Timeout the request
tracker.timeout(client_id, request_id);
assert_eq!(tracker.pending_count(), 0);
// Receiver should get error (channel closed)
let result = receiver.await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_multiple_pending_requests() {
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
// Register multiple requests
let _rx1 = tracker.register(client1, json!(0));
let _rx2 = tracker.register(client1, json!(1));
let _rx3 = tracker.register(client2, json!(0));
assert_eq!(tracker.pending_count(), 3);
assert_eq!(tracker.client_pending_count(client1), 2);
assert_eq!(tracker.client_pending_count(client2), 1);
}
#[tokio::test]
async fn test_clear_all() {
let tracker = AgentRequestTracker::new();
let _rx1 = tracker.register("client-1", json!(0));
let _rx2 = tracker.register("client-2", json!(0));
assert_eq!(tracker.pending_count(), 2);
tracker.clear(None);
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_clear_client() {
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
let _rx1 = tracker.register(client1, json!(0));
let _rx2 = tracker.register(client1, json!(1));
let _rx3 = tracker.register(client2, json!(0));
assert_eq!(tracker.pending_count(), 3);
// Clear only client1's requests
tracker.clear(Some(client1));
assert_eq!(tracker.pending_count(), 1);
assert_eq!(tracker.client_pending_count(client1), 0);
assert_eq!(tracker.client_pending_count(client2), 1);
}
#[test]
fn test_tracker_clone() {
let tracker = AgentRequestTracker::new();
let tracker_clone = tracker.clone();
// Both should point to same underlying state
let _rx = tracker.register("client-1", json!(0));
assert_eq!(tracker_clone.pending_count(), 1);
}
}
+275
View File
@@ -0,0 +1,275 @@
//! Configuration types for the ACP Server
//!
//! This module defines configuration types for the ACP Server, including
//! server settings, connection limits, and CORS configuration.
use serde::{Deserialize, Serialize};
/// Default port for the ACP Server
pub const DEFAULT_PORT: u16 = 3001;
/// Default maximum number of concurrent connections
pub const DEFAULT_MAX_CONNECTIONS: usize = 100;
/// Configuration for the ACP Server
///
/// This struct contains all configurable options for the ACP Server,
/// including network settings, security options, and resource limits.
///
/// **Note**: This config is used when starting an actual TCP server
/// (separate port mode only). For integrated mode (mounting at /acp),
/// the port field is still required but represents which port was chosen
/// for the separate server path. Higher-level configs (dirigent_core, api)
/// use `Option<u16>` to represent the integrated vs separate distinction.
///
/// TODO: Consider moving the AcpPortConfig enum from web package to core
/// and using it here to make the integrated/separate distinction explicit.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct AcpServerConfig {
/// Whether the ACP Server is enabled
///
/// When disabled, the server will not accept incoming connections.
/// Default: false
#[serde(default)]
pub enabled: bool,
/// The port to listen on for incoming connections
///
/// This is always a concrete port number because this config is used
/// to start an actual TCP server. Use Option<u16> in higher-level configs
/// to represent integrated mode (None) vs separate mode (Some(port)).
///
/// Default: 3001
#[serde(default = "default_port")]
pub port: u16,
/// List of allowed origins for CORS
///
/// When Some, only requests from these origins are allowed.
/// When None, all origins are allowed (use with caution).
///
/// Example: ["http://localhost:3000", "https://app.example.com"]
#[serde(default)]
pub allowed_origins: Option<Vec<String>>,
/// Maximum number of concurrent client connections
///
/// New connections will be rejected when this limit is reached.
/// Default: 100
#[serde(default = "default_max_connections")]
pub max_connections: usize,
}
/// Returns the default port value
fn default_port() -> u16 {
DEFAULT_PORT
}
/// Returns the default max connections value
fn default_max_connections() -> usize {
DEFAULT_MAX_CONNECTIONS
}
impl Default for AcpServerConfig {
fn default() -> Self {
Self {
enabled: false,
port: DEFAULT_PORT,
allowed_origins: None,
max_connections: DEFAULT_MAX_CONNECTIONS,
}
}
}
impl AcpServerConfig {
/// Create a new configuration with default values
pub fn new() -> Self {
Self::default()
}
/// Create a configuration with the server enabled
pub fn enabled() -> Self {
Self {
enabled: true,
..Default::default()
}
}
/// Create a configuration with a specific port
pub fn with_port(port: u16) -> Self {
Self {
port,
..Default::default()
}
}
/// Set whether the server is enabled
pub fn set_enabled(mut self, enabled: bool) -> Self {
self.enabled = enabled;
self
}
/// Set the port
pub fn set_port(mut self, port: u16) -> Self {
self.port = port;
self
}
/// Set the allowed origins
pub fn set_allowed_origins(mut self, origins: Option<Vec<String>>) -> Self {
self.allowed_origins = origins;
self
}
/// Set the maximum number of connections
pub fn set_max_connections(mut self, max: usize) -> Self {
self.max_connections = max;
self
}
/// Check if the configuration is valid
pub fn validate(&self) -> Result<(), String> {
if self.port == 0 {
return Err("Port cannot be 0".to_string());
}
if self.max_connections == 0 {
return Err("max_connections must be at least 1".to_string());
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = AcpServerConfig::default();
assert!(!config.enabled);
assert_eq!(config.port, DEFAULT_PORT);
assert!(config.allowed_origins.is_none());
assert_eq!(config.max_connections, DEFAULT_MAX_CONNECTIONS);
}
#[test]
fn test_enabled_config() {
let config = AcpServerConfig::enabled();
assert!(config.enabled);
assert_eq!(config.port, DEFAULT_PORT);
}
#[test]
fn test_with_port() {
let config = AcpServerConfig::with_port(8080);
assert!(!config.enabled);
assert_eq!(config.port, 8080);
}
#[test]
fn test_builder_pattern() {
let config = AcpServerConfig::new()
.set_enabled(true)
.set_port(4000)
.set_allowed_origins(Some(vec!["http://localhost:3000".to_string()]))
.set_max_connections(50);
assert!(config.enabled);
assert_eq!(config.port, 4000);
assert_eq!(
config.allowed_origins,
Some(vec!["http://localhost:3000".to_string()])
);
assert_eq!(config.max_connections, 50);
}
#[test]
fn test_validation_valid() {
let config = AcpServerConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_validation_invalid_port() {
let config = AcpServerConfig {
port: 0,
..Default::default()
};
assert!(config.validate().is_err());
}
#[test]
fn test_validation_invalid_max_connections() {
let config = AcpServerConfig {
max_connections: 0,
..Default::default()
};
assert!(config.validate().is_err());
}
#[test]
fn test_serialization() {
let config = AcpServerConfig {
enabled: true,
port: 3001,
allowed_origins: Some(vec!["http://localhost:3000".to_string()]),
max_connections: 100,
};
let json = serde_json::to_string(&config).unwrap();
assert!(json.contains("\"enabled\":true"));
assert!(json.contains("\"port\":3001"));
assert!(json.contains("\"allowed_origins\""));
assert!(json.contains("\"max_connections\":100"));
// Deserialize back
let parsed: AcpServerConfig = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, config);
}
#[test]
fn test_deserialization_with_defaults() {
// Minimal JSON with defaults
let json = r#"{"enabled":true}"#;
let config: AcpServerConfig = serde_json::from_str(json).unwrap();
assert!(config.enabled);
assert_eq!(config.port, DEFAULT_PORT);
assert!(config.allowed_origins.is_none());
assert_eq!(config.max_connections, DEFAULT_MAX_CONNECTIONS);
}
#[test]
fn test_deserialization_empty() {
// Empty JSON should use all defaults
let json = "{}";
let config: AcpServerConfig = serde_json::from_str(json).unwrap();
assert!(!config.enabled);
assert_eq!(config.port, DEFAULT_PORT);
assert!(config.allowed_origins.is_none());
assert_eq!(config.max_connections, DEFAULT_MAX_CONNECTIONS);
}
#[test]
fn test_equality() {
let config1 = AcpServerConfig::default();
let config2 = AcpServerConfig::default();
assert_eq!(config1, config2);
let config3 = AcpServerConfig::enabled();
assert_ne!(config1, config3);
}
#[test]
fn test_clone() {
let config = AcpServerConfig::enabled()
.set_port(8080)
.set_allowed_origins(Some(vec!["origin".to_string()]));
let cloned = config.clone();
assert_eq!(config, cloned);
}
}
+362
View File
@@ -0,0 +1,362 @@
//! Error types for the ACP Server
//!
//! This module defines error types used throughout the ACP Server implementation,
//! including conversions to JSON-RPC error format for client responses.
use std::fmt;
use serde::{Deserialize, Serialize};
/// Standard JSON-RPC error codes as defined in the specification
pub mod error_codes {
/// Parse error - Invalid JSON was received by the server
pub const PARSE_ERROR: i32 = -32700;
/// Invalid Request - The JSON sent is not a valid Request object
pub const INVALID_REQUEST: i32 = -32600;
/// Method not found - The method does not exist / is not available
pub const METHOD_NOT_FOUND: i32 = -32601;
/// Invalid params - Invalid method parameter(s)
pub const INVALID_PARAMS: i32 = -32602;
/// Internal error - Internal JSON-RPC error
pub const INTERNAL_ERROR: i32 = -32603;
// Server errors reserved for implementation-defined errors (-32000 to -32099)
/// Session not found error
pub const SESSION_NOT_FOUND: i32 = -32001;
/// Connector not found error
pub const CONNECTOR_NOT_FOUND: i32 = -32002;
/// Invalid session error
pub const INVALID_SESSION: i32 = -32003;
/// Transport error
pub const TRANSPORT_ERROR: i32 = -32004;
/// Client not found error
pub const CLIENT_NOT_FOUND: i32 = -32005;
}
/// ACP Server error enum representing all possible error conditions
#[derive(Debug, Clone, PartialEq)]
pub enum AcpServerError {
/// The session ID provided is invalid or malformed
InvalidSession,
/// An RPC-related error occurred
///
/// Contains a description of the RPC error, such as method not found,
/// invalid params, or parse errors.
RpcError(String),
/// A transport-level error occurred
///
/// Contains a description of the transport error, such as connection
/// failures, timeout, or network issues.
TransportError(String),
/// The requested session was not found
///
/// The session ID does not correspond to any known session in the
/// session manager.
SessionNotFound,
/// The requested connector was not found
///
/// The connector ID does not correspond to any registered connector
/// in the runtime. Contains the connector ID that was not found.
ConnectorNotFound(String),
/// The connector is not in a ready state
///
/// The connector exists but is not available to handle requests
/// (e.g., still connecting, error state, or stopped).
/// Contains the connector ID that is not ready.
ConnectorNotReady(String),
/// Operation timed out
///
/// Contains a description of what timed out.
Timeout(String),
/// An internal server error occurred
///
/// Contains a description of the internal error. Used for unexpected
/// conditions that don't fit other categories.
Internal(String),
/// The requested client was not found
///
/// The client ID does not correspond to any connected client in the
/// session manager. Contains the client ID that was not found.
ClientNotFound(String),
}
impl fmt::Display for AcpServerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AcpServerError::InvalidSession => {
write!(f, "Invalid session ID provided")
}
AcpServerError::RpcError(msg) => {
write!(f, "RPC error: {}", msg)
}
AcpServerError::TransportError(msg) => {
write!(f, "Transport error: {}", msg)
}
AcpServerError::SessionNotFound => {
write!(f, "Session not found")
}
AcpServerError::ConnectorNotFound(id) => {
write!(f, "Connector not found: {}", id)
}
AcpServerError::ConnectorNotReady(id) => {
write!(f, "Connector not ready: {}", id)
}
AcpServerError::Timeout(msg) => {
write!(f, "Timeout: {}", msg)
}
AcpServerError::Internal(msg) => {
write!(f, "Internal error: {}", msg)
}
AcpServerError::ClientNotFound(id) => {
write!(f, "Client not found: {}", id)
}
}
}
}
impl std::error::Error for AcpServerError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
// None of our error variants wrap other errors currently
None
}
}
/// JSON-RPC error representation for wire format
///
/// This struct is used in JSON-RPC responses when an error occurs.
/// It follows the JSON-RPC 2.0 specification for error objects.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct JsonRpcErrorObject {
/// A Number that indicates the error type that occurred
pub code: i32,
/// A String providing a short description of the error
pub message: String,
/// Optional additional information about the error
#[serde(skip_serializing_if = "Option::is_none")]
pub data: Option<serde_json::Value>,
}
impl JsonRpcErrorObject {
/// Create a new JSON-RPC error object
pub fn new(code: i32, message: impl Into<String>) -> Self {
Self {
code,
message: message.into(),
data: None,
}
}
/// Create a new JSON-RPC error object with additional data
pub fn with_data(code: i32, message: impl Into<String>, data: serde_json::Value) -> Self {
Self {
code,
message: message.into(),
data: Some(data),
}
}
/// Create a parse error
pub fn parse_error(message: impl Into<String>) -> Self {
Self::new(error_codes::PARSE_ERROR, message)
}
/// Create an invalid request error
pub fn invalid_request(message: impl Into<String>) -> Self {
Self::new(error_codes::INVALID_REQUEST, message)
}
/// Create a method not found error
pub fn method_not_found(method: impl Into<String>) -> Self {
Self::new(
error_codes::METHOD_NOT_FOUND,
format!("Method not found: {}", method.into()),
)
}
/// Create an invalid params error
pub fn invalid_params(message: impl Into<String>) -> Self {
Self::new(error_codes::INVALID_PARAMS, message)
}
/// Create an internal error
pub fn internal_error(message: impl Into<String>) -> Self {
Self::new(error_codes::INTERNAL_ERROR, message)
}
}
impl From<AcpServerError> for JsonRpcErrorObject {
fn from(error: AcpServerError) -> Self {
match error {
AcpServerError::InvalidSession => {
JsonRpcErrorObject::new(error_codes::INVALID_SESSION, "Invalid session ID provided")
}
AcpServerError::RpcError(msg) => {
JsonRpcErrorObject::new(error_codes::INTERNAL_ERROR, msg)
}
AcpServerError::TransportError(msg) => {
JsonRpcErrorObject::new(error_codes::TRANSPORT_ERROR, msg)
}
AcpServerError::SessionNotFound => {
JsonRpcErrorObject::new(error_codes::SESSION_NOT_FOUND, "Session not found")
}
AcpServerError::ConnectorNotFound(id) => {
JsonRpcErrorObject::new(error_codes::CONNECTOR_NOT_FOUND, format!("Connector not found: {}", id))
}
AcpServerError::ConnectorNotReady(id) => {
JsonRpcErrorObject::new(error_codes::CONNECTOR_NOT_FOUND, format!("Connector not ready: {}", id))
}
AcpServerError::Timeout(msg) => {
JsonRpcErrorObject::new(error_codes::TRANSPORT_ERROR, format!("Timeout: {}", msg))
}
AcpServerError::Internal(msg) => JsonRpcErrorObject::internal_error(msg),
AcpServerError::ClientNotFound(id) => {
JsonRpcErrorObject::new(error_codes::CLIENT_NOT_FOUND, format!("Client not found: {}", id))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_display() {
assert_eq!(
AcpServerError::InvalidSession.to_string(),
"Invalid session ID provided"
);
assert_eq!(
AcpServerError::RpcError("test".to_string()).to_string(),
"RPC error: test"
);
assert_eq!(
AcpServerError::TransportError("timeout".to_string()).to_string(),
"Transport error: timeout"
);
assert_eq!(
AcpServerError::SessionNotFound.to_string(),
"Session not found"
);
assert_eq!(
AcpServerError::ConnectorNotFound("test-conn".to_string()).to_string(),
"Connector not found: test-conn"
);
assert_eq!(
AcpServerError::ConnectorNotReady("test-conn".to_string()).to_string(),
"Connector not ready: test-conn"
);
assert_eq!(
AcpServerError::Timeout("request timed out".to_string()).to_string(),
"Timeout: request timed out"
);
assert_eq!(
AcpServerError::Internal("something went wrong".to_string()).to_string(),
"Internal error: something went wrong"
);
assert_eq!(
AcpServerError::ClientNotFound("client-123".to_string()).to_string(),
"Client not found: client-123"
);
}
#[test]
fn test_error_to_jsonrpc() {
let error: JsonRpcErrorObject = AcpServerError::InvalidSession.into();
assert_eq!(error.code, error_codes::INVALID_SESSION);
assert_eq!(error.message, "Invalid session ID provided");
let error: JsonRpcErrorObject = AcpServerError::SessionNotFound.into();
assert_eq!(error.code, error_codes::SESSION_NOT_FOUND);
let error: JsonRpcErrorObject = AcpServerError::ConnectorNotFound("conn1".to_string()).into();
assert_eq!(error.code, error_codes::CONNECTOR_NOT_FOUND);
assert!(error.message.contains("conn1"));
let error: JsonRpcErrorObject = AcpServerError::ConnectorNotReady("conn2".to_string()).into();
assert_eq!(error.code, error_codes::CONNECTOR_NOT_FOUND);
assert!(error.message.contains("conn2"));
let error: JsonRpcErrorObject = AcpServerError::Timeout("session creation".to_string()).into();
assert_eq!(error.code, error_codes::TRANSPORT_ERROR);
let error: JsonRpcErrorObject = AcpServerError::TransportError("net error".to_string()).into();
assert_eq!(error.code, error_codes::TRANSPORT_ERROR);
assert_eq!(error.message, "net error");
let error: JsonRpcErrorObject = AcpServerError::Internal("internal".to_string()).into();
assert_eq!(error.code, error_codes::INTERNAL_ERROR);
let error: JsonRpcErrorObject = AcpServerError::ClientNotFound("client-456".to_string()).into();
assert_eq!(error.code, error_codes::CLIENT_NOT_FOUND);
assert!(error.message.contains("client-456"));
}
#[test]
fn test_jsonrpc_error_serialization() {
let error = JsonRpcErrorObject::new(error_codes::PARSE_ERROR, "Invalid JSON");
let json = serde_json::to_string(&error).unwrap();
assert!(json.contains("-32700"));
assert!(json.contains("Invalid JSON"));
// With data
let error = JsonRpcErrorObject::with_data(
error_codes::INVALID_PARAMS,
"Missing field",
serde_json::json!({"field": "session_id"}),
);
let json = serde_json::to_string(&error).unwrap();
assert!(json.contains("session_id"));
}
#[test]
fn test_jsonrpc_error_factories() {
let error = JsonRpcErrorObject::parse_error("bad json");
assert_eq!(error.code, error_codes::PARSE_ERROR);
let error = JsonRpcErrorObject::invalid_request("missing jsonrpc field");
assert_eq!(error.code, error_codes::INVALID_REQUEST);
let error = JsonRpcErrorObject::method_not_found("session.unknown");
assert_eq!(error.code, error_codes::METHOD_NOT_FOUND);
assert!(error.message.contains("session.unknown"));
let error = JsonRpcErrorObject::invalid_params("session_id required");
assert_eq!(error.code, error_codes::INVALID_PARAMS);
let error = JsonRpcErrorObject::internal_error("panic");
assert_eq!(error.code, error_codes::INTERNAL_ERROR);
}
#[test]
fn test_error_is_error_trait() {
fn assert_is_error<T: std::error::Error>() {}
assert_is_error::<AcpServerError>();
}
#[test]
fn test_error_clone() {
let error = AcpServerError::Internal("test".to_string());
let cloned = error.clone();
assert_eq!(error, cloned);
}
}
File diff suppressed because it is too large Load Diff
+460
View File
@@ -0,0 +1,460 @@
//! JSON-RPC 2.0 types for the ACP Server
//!
//! This module implements JSON-RPC 2.0 request/response types according to
//! the specification at https://www.jsonrpc.org/specification.
//!
//! Key features:
//! - Support for both numeric and string IDs
//! - Batch request/response handling
//! - Proper serialization of null vs missing fields
use serde::{Deserialize, Serialize};
use crate::error::JsonRpcErrorObject;
/// JSON-RPC protocol version constant
pub const JSONRPC_VERSION: &str = "2.0";
/// JSON-RPC request/response identifier
///
/// According to the JSON-RPC 2.0 spec, an id can be a String, Number,
/// or Null. This type uses an untagged enum to handle both string and
/// number identifiers.
///
/// Note: The spec recommends not using Null as an id for requests.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JsonRpcId {
/// Numeric identifier (integer)
Number(i64),
/// String identifier
String(String),
/// Null identifier (typically used in error responses for invalid requests)
Null,
}
impl From<i64> for JsonRpcId {
fn from(n: i64) -> Self {
JsonRpcId::Number(n)
}
}
impl From<String> for JsonRpcId {
fn from(s: String) -> Self {
JsonRpcId::String(s)
}
}
impl From<&str> for JsonRpcId {
fn from(s: &str) -> Self {
JsonRpcId::String(s.to_string())
}
}
/// A JSON-RPC 2.0 request object
///
/// Represents a remote procedure call with optional parameters.
/// The `id` field determines whether this is a request (with id) or
/// notification (without id).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcRequest {
/// JSON-RPC protocol version (must be "2.0")
pub jsonrpc: String,
/// A String containing the name of the method to be invoked
pub method: String,
/// Optional structured value that holds the parameter values
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<serde_json::Value>,
/// Optional identifier established by the client
///
/// If absent, the request is a notification (no response expected)
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<JsonRpcId>,
}
impl JsonRpcRequest {
/// Create a new JSON-RPC request
pub fn new(method: impl Into<String>, params: Option<serde_json::Value>, id: JsonRpcId) -> Self {
Self {
jsonrpc: JSONRPC_VERSION.to_string(),
method: method.into(),
params,
id: Some(id),
}
}
/// Create a new JSON-RPC notification (request without id)
pub fn notification(method: impl Into<String>, params: Option<serde_json::Value>) -> Self {
Self {
jsonrpc: JSONRPC_VERSION.to_string(),
method: method.into(),
params,
id: None,
}
}
/// Check if this request is a notification (no id)
pub fn is_notification(&self) -> bool {
self.id.is_none()
}
/// Validate the request format
pub fn validate(&self) -> Result<(), String> {
if self.jsonrpc != JSONRPC_VERSION {
return Err(format!(
"Invalid JSON-RPC version: expected '{}', got '{}'",
JSONRPC_VERSION, self.jsonrpc
));
}
if self.method.is_empty() {
return Err("Method name cannot be empty".to_string());
}
// Methods starting with "rpc." are reserved for internal use
if self.method.starts_with("rpc.") {
return Err(format!(
"Method name '{}' is reserved (starts with 'rpc.')",
self.method
));
}
Ok(())
}
}
/// A JSON-RPC 2.0 response object
///
/// Contains either a result (success) or an error (failure), never both.
/// The id must match the corresponding request id.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcResponse {
/// JSON-RPC protocol version (must be "2.0")
pub jsonrpc: String,
/// The result of the call (on success)
///
/// This member is REQUIRED on success and MUST NOT exist on error.
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<serde_json::Value>,
/// The error object (on failure)
///
/// This member is REQUIRED on error and MUST NOT exist on success.
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<JsonRpcErrorObject>,
/// The identifier matching the request
///
/// If there was an error detecting the id in the Request object
/// (e.g. Parse error/Invalid Request), it MUST be Null.
pub id: JsonRpcId,
}
impl JsonRpcResponse {
/// Create a successful response
pub fn success(result: serde_json::Value, id: JsonRpcId) -> Self {
Self {
jsonrpc: JSONRPC_VERSION.to_string(),
result: Some(result),
error: None,
id,
}
}
/// Create an error response
pub fn error(error: JsonRpcErrorObject, id: JsonRpcId) -> Self {
Self {
jsonrpc: JSONRPC_VERSION.to_string(),
result: None,
error: Some(error),
id,
}
}
/// Create an error response with a null id (for parse errors)
pub fn error_with_null_id(error: JsonRpcErrorObject) -> Self {
Self::error(error, JsonRpcId::Null)
}
/// Check if this response represents success
pub fn is_success(&self) -> bool {
self.result.is_some() && self.error.is_none()
}
/// Check if this response represents an error
pub fn is_error(&self) -> bool {
self.error.is_some()
}
}
/// Represents either a single request or a batch of requests
///
/// The JSON-RPC 2.0 spec allows sending multiple requests in a single
/// JSON array for batch processing.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JsonRpcRequestBatch {
/// A single request
Single(JsonRpcRequest),
/// A batch of requests
Batch(Vec<JsonRpcRequest>),
}
impl JsonRpcRequestBatch {
/// Check if this is an empty batch
pub fn is_empty(&self) -> bool {
match self {
JsonRpcRequestBatch::Single(_) => false,
JsonRpcRequestBatch::Batch(batch) => batch.is_empty(),
}
}
/// Get the number of requests
pub fn len(&self) -> usize {
match self {
JsonRpcRequestBatch::Single(_) => 1,
JsonRpcRequestBatch::Batch(batch) => batch.len(),
}
}
/// Convert to a vector of requests
pub fn into_vec(self) -> Vec<JsonRpcRequest> {
match self {
JsonRpcRequestBatch::Single(req) => vec![req],
JsonRpcRequestBatch::Batch(batch) => batch,
}
}
}
/// Represents either a single response or a batch of responses
///
/// The response format must match the request format: single request
/// gets single response, batch request gets batch response.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JsonRpcResponseBatch {
/// A single response
Single(JsonRpcResponse),
/// A batch of responses
Batch(Vec<JsonRpcResponse>),
}
impl JsonRpcResponseBatch {
/// Create a batch response from a vector
///
/// Returns Single if there's exactly one response, otherwise Batch.
pub fn from_vec(responses: Vec<JsonRpcResponse>) -> Self {
if responses.len() == 1 {
JsonRpcResponseBatch::Single(responses.into_iter().next().unwrap())
} else {
JsonRpcResponseBatch::Batch(responses)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_jsonrpc_id_number() {
let id = JsonRpcId::Number(42);
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "42");
let parsed: JsonRpcId = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, id);
}
#[test]
fn test_jsonrpc_id_string() {
let id = JsonRpcId::String("abc-123".to_string());
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "\"abc-123\"");
let parsed: JsonRpcId = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, id);
}
#[test]
fn test_jsonrpc_id_null() {
let id = JsonRpcId::Null;
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "null");
let parsed: JsonRpcId = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, id);
}
#[test]
fn test_jsonrpc_id_from_conversions() {
let id: JsonRpcId = 42i64.into();
assert_eq!(id, JsonRpcId::Number(42));
let id: JsonRpcId = "test".into();
assert_eq!(id, JsonRpcId::String("test".to_string()));
let id: JsonRpcId = String::from("owned").into();
assert_eq!(id, JsonRpcId::String("owned".to_string()));
}
#[test]
fn test_request_creation() {
let req = JsonRpcRequest::new("session.new", Some(json!({"title": "Test"})), 1.into());
assert_eq!(req.jsonrpc, "2.0");
assert_eq!(req.method, "session.new");
assert!(req.params.is_some());
assert_eq!(req.id, Some(JsonRpcId::Number(1)));
assert!(!req.is_notification());
}
#[test]
fn test_notification_creation() {
let notif = JsonRpcRequest::notification("event.ping", None);
assert!(notif.is_notification());
assert_eq!(notif.id, None);
}
#[test]
fn test_request_validation() {
let valid = JsonRpcRequest::new("test.method", None, 1.into());
assert!(valid.validate().is_ok());
// Invalid version
let mut invalid_version = valid.clone();
invalid_version.jsonrpc = "1.0".to_string();
assert!(invalid_version.validate().is_err());
// Empty method
let mut empty_method = valid.clone();
empty_method.method = String::new();
assert!(empty_method.validate().is_err());
// Reserved method
let mut reserved = valid.clone();
reserved.method = "rpc.internal".to_string();
assert!(reserved.validate().is_err());
}
#[test]
fn test_request_serialization() {
let req = JsonRpcRequest::new(
"session.prompt",
Some(json!({"session_id": "abc", "content": "Hello"})),
"req-123".into(),
);
let json = serde_json::to_string(&req).unwrap();
assert!(json.contains("\"jsonrpc\":\"2.0\""));
assert!(json.contains("\"method\":\"session.prompt\""));
assert!(json.contains("\"id\":\"req-123\""));
// Deserialize back
let parsed: JsonRpcRequest = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.method, "session.prompt");
}
#[test]
fn test_response_success() {
let resp = JsonRpcResponse::success(json!({"session_id": "new-123"}), 1.into());
assert!(resp.is_success());
assert!(!resp.is_error());
assert_eq!(resp.result, Some(json!({"session_id": "new-123"})));
assert!(resp.error.is_none());
}
#[test]
fn test_response_error() {
let error = JsonRpcErrorObject::method_not_found("unknown.method");
let resp = JsonRpcResponse::error(error, 1.into());
assert!(!resp.is_success());
assert!(resp.is_error());
assert!(resp.result.is_none());
assert!(resp.error.is_some());
}
#[test]
fn test_response_serialization() {
// Success response
let success = JsonRpcResponse::success(json!({"ok": true}), 42.into());
let json = serde_json::to_string(&success).unwrap();
assert!(json.contains("\"result\""));
assert!(!json.contains("\"error\""));
// Error response
let error = JsonRpcResponse::error(
JsonRpcErrorObject::internal_error("Something broke"),
42.into(),
);
let json = serde_json::to_string(&error).unwrap();
assert!(!json.contains("\"result\""));
assert!(json.contains("\"error\""));
}
#[test]
fn test_batch_request_single() {
let req = JsonRpcRequest::new("test", None, 1.into());
let batch = JsonRpcRequestBatch::Single(req);
assert_eq!(batch.len(), 1);
assert!(!batch.is_empty());
let vec = batch.into_vec();
assert_eq!(vec.len(), 1);
}
#[test]
fn test_batch_request_multiple() {
let req1 = JsonRpcRequest::new("test1", None, 1.into());
let req2 = JsonRpcRequest::new("test2", None, 2.into());
let batch = JsonRpcRequestBatch::Batch(vec![req1, req2]);
assert_eq!(batch.len(), 2);
let vec = batch.into_vec();
assert_eq!(vec.len(), 2);
}
#[test]
fn test_batch_request_empty() {
let batch = JsonRpcRequestBatch::Batch(vec![]);
assert!(batch.is_empty());
assert_eq!(batch.len(), 0);
}
#[test]
fn test_batch_request_deserialization() {
// Single request
let single_json = r#"{"jsonrpc":"2.0","method":"test","id":1}"#;
let single: JsonRpcRequestBatch = serde_json::from_str(single_json).unwrap();
assert_eq!(single.len(), 1);
// Batch request
let batch_json = r#"[{"jsonrpc":"2.0","method":"test1","id":1},{"jsonrpc":"2.0","method":"test2","id":2}]"#;
let batch: JsonRpcRequestBatch = serde_json::from_str(batch_json).unwrap();
assert_eq!(batch.len(), 2);
}
#[test]
fn test_batch_response_from_vec() {
// Single response becomes Single variant
let responses = vec![JsonRpcResponse::success(json!(null), 1.into())];
let batch = JsonRpcResponseBatch::from_vec(responses);
matches!(batch, JsonRpcResponseBatch::Single(_));
// Multiple responses become Batch variant
let responses = vec![
JsonRpcResponse::success(json!(null), 1.into()),
JsonRpcResponse::success(json!(null), 2.into()),
];
let batch = JsonRpcResponseBatch::from_vec(responses);
matches!(batch, JsonRpcResponseBatch::Batch(_));
}
}
+116
View File
@@ -0,0 +1,116 @@
//! Dirigent ACP API
//!
//! This crate exposes an ACP (Agent-Client Protocol) API for Dirigent,
//! allowing other ACP clients to interact with Dirigent.
//!
//! ## Modules
//!
//! - [`config`] - Server configuration types
//! - [`error`] - Error types and JSON-RPC error conversion
//! - [`event_bridge`] - Event forwarding from source streams to SSE clients
//! - [`jsonrpc`] - JSON-RPC 2.0 request/response types
//! - [`router`] - Axum router and HTTP handlers
//! - [`rpc`] - JSON-RPC request handler and method dispatch
//! - [`session_manager`] - Session mapping and client connection tracking
//! - [`sse`] - SSE notifications for streaming events to clients
//!
//! ## Example
//!
//! ```rust,ignore
//! use dirigent_acp_api::{
//! AcpServerConfig, NoOpConnectorOperations,
//! router::{AcpServerState, create_acp_server_router},
//! };
//!
//! // Create server configuration
//! let config = AcpServerConfig::enabled()
//! .set_port(3001)
//! .set_max_connections(100);
//!
//! // Create server state
//! let state = AcpServerState::new(config);
//!
//! // Create the router with connector operations
//! let router = create_acp_server_router(state, NoOpConnectorOperations);
//!
//! // Run with axum
//! let listener = tokio::net::TcpListener::bind("0.0.0.0:3001").await?;
//! axum::serve(listener, router).await?;
//! ```
// Modules
pub mod agent_requests;
pub mod config;
pub mod error;
pub mod event_bridge;
pub mod jsonrpc;
pub mod router;
pub mod rpc;
pub mod session_manager;
pub mod sse;
// Re-exports for convenience
pub use agent_requests::AgentRequestTracker;
pub use config::AcpServerConfig;
pub use error::{AcpServerError, JsonRpcErrorObject};
pub use event_bridge::{EventBridge, EventBridgeConfig};
pub use jsonrpc::{
JsonRpcId, JsonRpcRequest, JsonRpcRequestBatch, JsonRpcResponse, JsonRpcResponseBatch,
JSONRPC_VERSION,
};
pub use router::{create_acp_server_router, AcpServerState, RouterState};
pub use rpc::{
ConnectorInfo, ConnectorOperations, NoOpConnectorOperations, RpcHandler, SessionInfo,
ACP_PROTOCOL_VERSION, SERVER_NAME,
};
pub use session_manager::{ClientConnection, ClientInfo, SessionManager, SessionMapping};
pub use sse::{AcpNotification, SseNotifier, translate_event};
use axum::{response::Json, routing::get, Router};
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct ApiInfo {
pub name: String,
pub version: String,
pub description: String,
}
/// Create the ACP API router
pub fn create_api_router() -> Router {
Router::new()
.route("/", get(api_info))
.route("/health", get(health_check))
}
/// Get API information
async fn api_info() -> Json<ApiInfo> {
Json(ApiInfo {
name: "dirigent_acp_api".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "ACP API for Dirigent - orchestrates agentic clients".to_string(),
})
}
/// Health check endpoint
async fn health_check() -> Json<serde_json::Value> {
Json(serde_json::json!({
"status": "healthy",
"message": "Dirigent ACP API is running"
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_api_info() {
let info = ApiInfo {
name: "test".to_string(),
version: "0.1.0".to_string(),
description: "test description".to_string(),
};
assert_eq!(info.name, "test");
}
}
+950
View File
@@ -0,0 +1,950 @@
//! Axum Router Integration for ACP Server
//!
//! This module provides the Axum router and HTTP handlers for the ACP Server.
//! It exposes a JSON-RPC endpoint at `/rpc` and an SSE events endpoint at `/events`.
//!
//! ## Architecture
//!
//! The router is generic over `ConnectorOperations`, allowing different implementations
//! to be used (e.g., one backed by `CoreHandle` for production, or `NoOpConnectorOperations`
//! for testing).
//!
//! ## Endpoints
//!
//! - `POST /rpc` - JSON-RPC 2.0 endpoint for method calls
//! - `GET /events?client_id=...` - SSE stream for real-time notifications
//! - `GET /health` - Health check endpoint
//!
//! ## Example
//!
//! ```rust,ignore
//! use dirigent_acp_api::router::{AcpServerState, create_acp_server_router};
//! use dirigent_acp_api::{AcpServerConfig, NoOpConnectorOperations};
//!
//! // Create state
//! let state = AcpServerState::new(AcpServerConfig::enabled());
//!
//! // Create router with no-op operations for testing
//! let router = create_acp_server_router(state, NoOpConnectorOperations);
//! ```
use std::convert::Infallible;
use std::sync::Arc;
use std::time::Instant;
use axum::{
extract::{Query, State},
http::{HeaderMap, StatusCode},
response::{
sse::{Event, KeepAlive, Sse},
IntoResponse, Json,
},
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use tokio_stream::StreamExt;
use tower_http::cors::{Any, CorsLayer};
use tracing::{debug, info, trace, warn};
use crate::config::AcpServerConfig;
use crate::rpc::{ConnectorOperations, RpcHandler};
use crate::session_manager::SessionManager;
use crate::sse::SseNotifier;
// ============================================================================
// AcpServerState (T031)
// ============================================================================
/// Internal state shared across handlers
struct AcpServerStateInner {
/// Session manager for tracking client sessions
session_manager: SessionManager,
/// SSE notifier for broadcasting events to clients
sse_notifier: SseNotifier,
/// Agent request tracker for bidirectional request/response
agent_request_tracker: Arc<crate::agent_requests::AgentRequestTracker>,
/// Server configuration
config: AcpServerConfig,
}
/// Shared state for the ACP Server Axum handlers
///
/// This struct contains all the state needed by the HTTP handlers:
/// - Session manager for tracking client sessions and mappings
/// - SSE notifier for broadcasting events to connected clients
/// - Server configuration
///
/// The state is wrapped in `Arc` internally, making it cheap to clone and
/// share across async tasks and handlers.
///
/// Note: `RpcHandler` is created per-request with `ConnectorOperations` passed in,
/// as the connector operations implementation cannot be stored in the shared state
/// (it may contain non-Clone types like `CoreHandle`).
#[derive(Clone)]
pub struct AcpServerState {
inner: Arc<AcpServerStateInner>,
}
impl AcpServerState {
/// Create a new ACP server state with the given configuration
///
/// # Parameters
///
/// - `config`: The server configuration
///
/// # Example
///
/// ```rust
/// use dirigent_acp_api::router::AcpServerState;
/// use dirigent_acp_api::AcpServerConfig;
///
/// let state = AcpServerState::new(AcpServerConfig::enabled());
/// ```
pub fn new(config: AcpServerConfig) -> Self {
Self {
inner: Arc::new(AcpServerStateInner {
session_manager: SessionManager::new(),
sse_notifier: SseNotifier::new(),
agent_request_tracker: Arc::new(crate::agent_requests::AgentRequestTracker::new()),
config,
}),
}
}
/// Create a new ACP server state with custom components
///
/// This is useful for testing or when you need to share a session manager
/// or SSE notifier with other parts of the application.
///
/// # Parameters
///
/// - `session_manager`: The session manager instance
/// - `sse_notifier`: The SSE notifier instance
/// - `agent_request_tracker`: The agent request tracker instance
/// - `config`: The server configuration
pub fn with_components(
session_manager: SessionManager,
sse_notifier: SseNotifier,
agent_request_tracker: Arc<crate::agent_requests::AgentRequestTracker>,
config: AcpServerConfig,
) -> Self {
Self {
inner: Arc::new(AcpServerStateInner {
session_manager,
sse_notifier,
agent_request_tracker,
config,
}),
}
}
/// Get a reference to the session manager
pub fn session_manager(&self) -> &SessionManager {
&self.inner.session_manager
}
/// Get a reference to the SSE notifier
pub fn sse_notifier(&self) -> &SseNotifier {
&self.inner.sse_notifier
}
/// Get a reference to the agent request tracker
pub fn agent_request_tracker(&self) -> &Arc<crate::agent_requests::AgentRequestTracker> {
&self.inner.agent_request_tracker
}
/// Get a reference to the configuration
pub fn config(&self) -> &AcpServerConfig {
&self.inner.config
}
}
impl std::fmt::Debug for AcpServerState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("AcpServerState")
.field("session_count", &self.inner.session_manager.mapping_count())
.field("client_count", &self.inner.sse_notifier.client_count())
.field("config", &self.inner.config)
.finish()
}
}
// ============================================================================
// Router State with ConnectorOperations (T028)
// ============================================================================
/// Combined state for Axum handlers that includes both server state and connector operations
///
/// This struct combines the shared `AcpServerState` with a `ConnectorOperations`
/// implementation. It's used as the Axum state to provide handlers with access
/// to both session management and connector functionality.
#[derive(Clone)]
pub struct RouterState<C: ConnectorOperations + Clone + Send + Sync + 'static> {
/// The ACP server state (session manager, SSE notifier, config)
pub state: AcpServerState,
/// The connector operations implementation
pub connector_ops: C,
}
impl<C: ConnectorOperations + Clone + Send + Sync + 'static> RouterState<C> {
/// Create a new router state
pub fn new(state: AcpServerState, connector_ops: C) -> Self {
Self {
state,
connector_ops,
}
}
}
// ============================================================================
// Router Factory (T028)
// ============================================================================
/// Create the ACP server router with all endpoints configured
///
/// This function creates an Axum router with the following endpoints:
/// - `POST /rpc` - JSON-RPC 2.0 endpoint
/// - `GET /events` - SSE events stream
/// - `GET /health` - Health check
///
/// CORS middleware is applied based on the configuration.
///
/// # Type Parameters
///
/// - `C`: The connector operations implementation
///
/// # Parameters
///
/// - `state`: The ACP server state
/// - `connector_ops`: The connector operations implementation
///
/// # Example
///
/// ```rust,ignore
/// use dirigent_acp_api::router::{AcpServerState, create_acp_server_router};
/// use dirigent_acp_api::{AcpServerConfig, NoOpConnectorOperations};
///
/// let state = AcpServerState::new(AcpServerConfig::enabled());
/// let router = create_acp_server_router(state, NoOpConnectorOperations);
///
/// // Use with axum server
/// let listener = tokio::net::TcpListener::bind("0.0.0.0:3001").await?;
/// axum::serve(listener, router).await?;
/// ```
pub fn create_acp_server_router<C>(state: AcpServerState, connector_ops: C) -> Router
where
C: ConnectorOperations + Clone + Send + Sync + 'static,
{
// Build CORS layer based on configuration
let cors = build_cors_layer(&state.config());
// Create combined router state
let router_state = RouterState::new(state, connector_ops);
// Build the router (T023: added /agent_response route)
Router::new()
.route("/rpc", post(handle_rpc::<C>))
.route("/events", get(handle_sse::<C>))
.route("/health", get(handle_health::<C>))
.route("/agent_response", post(handle_agent_response::<C>))
.layer(cors)
.with_state(router_state)
}
/// Build CORS layer from configuration
fn build_cors_layer(config: &AcpServerConfig) -> CorsLayer {
let cors = CorsLayer::new()
.allow_methods([
axum::http::Method::GET,
axum::http::Method::POST,
axum::http::Method::OPTIONS,
])
.allow_headers(Any);
match &config.allowed_origins {
Some(origins) if !origins.is_empty() => {
// Parse origins and set specific allowed origins
let parsed_origins: Vec<_> = origins
.iter()
.filter_map(|o| o.parse().ok())
.collect();
if parsed_origins.is_empty() {
warn!("No valid origins in allowed_origins, allowing any origin");
cors.allow_origin(Any)
} else {
debug!("CORS configured with {} allowed origins", parsed_origins.len());
cors.allow_origin(parsed_origins)
}
}
_ => {
debug!("CORS configured to allow any origin");
cors.allow_origin(Any)
}
}
}
// ============================================================================
// Request/Response Types
// ============================================================================
/// Query parameters for the SSE events endpoint
#[derive(Debug, Deserialize)]
pub struct SseQuery {
/// The client ID (required for SSE subscription)
pub client_id: Option<String>,
}
/// Health check response
#[derive(Debug, Serialize, Deserialize)]
pub struct HealthResponse {
/// Health status
pub status: String,
/// Server message
pub message: String,
/// Number of connected clients
pub clients: usize,
/// Number of active sessions
pub sessions: usize,
}
/// Error response for SSE endpoint
#[derive(Debug, Serialize, Deserialize)]
pub struct SseErrorResponse {
/// Error message
pub error: String,
/// Error code
pub code: String,
}
// ============================================================================
// Endpoint Handlers (T029, T030)
// ============================================================================
/// Handle POST /rpc requests (T029)
///
/// Extracts the JSON body, processes it through the RPC handler, and returns
/// the JSON-RPC response.
async fn handle_rpc<C>(
State(router_state): State<RouterState<C>>,
headers: HeaderMap,
body: String,
) -> impl IntoResponse
where
C: ConnectorOperations + Clone + Send + Sync + 'static,
{
debug!("Received RPC request: {} bytes", body.len());
// Extract client_id from X-Client-ID header
let client_id = headers
.get("X-Client-ID")
.and_then(|v| v.to_str().ok());
// Extract select_connector from X-Select-Connector header (sent during initialize)
let select_connector = headers
.get("X-Select-Connector")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
if let Some(id) = client_id {
info!(client_id = %id, "Received X-Client-ID header from RPC request");
debug!("RPC headers: {:?}", headers);
} else {
warn!("No X-Client-ID header provided in RPC request");
debug!("Available headers: {:?}", headers.keys().collect::<Vec<_>>());
}
// Log the incoming request
debug!("Received RPC request body: {}", serde_json::to_string(&body).unwrap_or_else(|_| "failed to serialize".to_string()));
// Create an RPC handler for this request
let handler = RpcHandler::new(
router_state.state.session_manager().clone(),
router_state.connector_ops.clone(),
router_state.state.sse_notifier().clone(),
router_state.state.agent_request_tracker().clone(),
);
// Process the request with the client_id from header
let response = handler.handle_request(&body, client_id).await;
// If this was an initialize request with X-Select-Connector header,
// store the preference after the client is registered
if let Some(ref connector) = select_connector {
// Check if this is an initialize response by looking for clientId in result
if let crate::jsonrpc::JsonRpcResponseBatch::Single(ref resp) = response {
if let Some(ref result) = resp.result {
if let Some(new_client_id) = result.get("clientId").and_then(|v| v.as_str()) {
router_state.state.session_manager().update_client_preferred_connector(
new_client_id,
Some(connector.clone()),
);
info!(
client_id = %new_client_id,
select_connector = %connector,
"Stored preferred connector from X-Select-Connector header"
);
}
}
}
}
// Log the response being sent
debug!("Sending RPC response: {}", serde_json::to_string(&response).unwrap_or_else(|_| "failed to serialize".to_string()));
// Return JSON response
Json(response)
}
/// Handle GET /events SSE requests (T030)
///
/// Creates an SSE stream subscription for the client. The client_id must be
/// provided as a query parameter.
async fn handle_sse<C>(
State(router_state): State<RouterState<C>>,
Query(query): Query<SseQuery>,
) -> Result<Sse<impl tokio_stream::Stream<Item = Result<Event, Infallible>>>, (StatusCode, Json<SseErrorResponse>)>
where
C: ConnectorOperations + Clone + Send + Sync + 'static,
{
// Validate client_id
let client_id = match query.client_id {
Some(id) if !id.is_empty() => id,
_ => {
warn!("SSE request missing client_id");
return Err((
StatusCode::BAD_REQUEST,
Json(SseErrorResponse {
error: "Missing required parameter: client_id".to_string(),
code: "MISSING_CLIENT_ID".to_string(),
}),
));
}
};
info!("SSE subscription requested for client: {}", client_id);
debug!(
"SSE notifier state before subscribe: client_count={}, subscribed_clients={:?}",
router_state.state.sse_notifier().client_count(),
router_state.state.sse_notifier().subscribed_clients()
);
// Subscribe to notifications
let notifier = router_state.state.sse_notifier();
let notification_stream = notifier.subscribe(&client_id);
info!(
"SSE client subscribed: client_id={}, total_clients={}, is_subscribed={}",
client_id,
notifier.client_count(),
notifier.is_subscribed(&client_id)
);
// Check if this client has any sessions that need tool updates
// This handles the race condition where session/new happens before SSE subscription
let session_manager = router_state.state.session_manager();
let client_sessions = session_manager.list_client_sessions(&client_id);
if !client_sessions.is_empty() {
debug!(
"Client {} has {} existing sessions, sending tool updates",
client_id,
client_sessions.len()
);
// For each session, get the connector and send tool updates
for session_id in client_sessions {
if let Some(mapping) = session_manager.get_mapping(&session_id) {
debug!(
"Fetching tool updates for session {} on connector {}",
session_id,
mapping.connector_id
);
// Get available commands from the connector
match router_state.connector_ops.get_connector_commands(&mapping.connector_id).await {
Ok(commands) => {
let update_params = crate::sse::SessionUpdateParams {
session_id: session_id.clone(),
update: crate::sse::SessionUpdateVariant::AvailableCommandsUpdate {
available_commands: commands.clone(),
},
event_type_override: None,
};
// Broadcast the tool update now that client is subscribed
match notifier.broadcast(&client_id, update_params) {
Ok(n) => {
info!(
"Sent deferred available_commands_update for session {}: {} commands to {} receivers",
session_id,
commands.len(),
n
);
}
Err(_) => {
warn!(
"Failed to send deferred available_commands_update for session {}",
session_id
);
}
}
}
Err(e) => {
warn!(
"Failed to get connector commands for session {}: {}",
session_id,
e
);
}
}
}
}
}
// Map the notification stream to SSE events
let client_id_for_log = client_id.clone();
let sse_stream = notification_stream.map(move |result| {
match result {
Ok(notification) => {
// T013: Start timing for SSE event creation
let event_start = Instant::now();
// Convert notification to SSE event
// Use to_sse_json() which handles raw events correctly
let event_type = notification.event_type();
let data = notification.to_sse_json();
// T011: Log before SSE event is written (T023: includes session_id for correlation)
trace!(
client_id = %client_id_for_log,
event_type = %event_type,
session_id = %notification.session_id,
data_len = data.len(),
"Writing SSE event to client stream"
);
debug!(
"SSE: Sending event to client {}: type={}, session_id={}, data_len={}",
client_id_for_log,
event_type,
notification.session_id,
data.len()
);
trace!("SSE event data: {}", data);
let event = Event::default()
.event(event_type)
.data(data);
// T012: Log after Event::default() construction (T023: includes session_id for correlation)
trace!(
client_id = %client_id_for_log,
event_type = %event_type,
session_id = %notification.session_id,
"SSE event constructed, sending to client"
);
// T013: Log SSE event write completion with timing (T023: includes session_id for correlation)
let elapsed_ms = event_start.elapsed().as_millis();
trace!(
client_id = %client_id_for_log,
event_type = %event_type,
session_id = %notification.session_id,
elapsed_ms = elapsed_ms,
"SSE event write completed"
);
// T014: Warn for slow SSE event writes (T023: includes session_id for correlation)
if elapsed_ms > 100 {
warn!(
elapsed_ms = elapsed_ms,
client_id = %client_id_for_log,
event_type = %event_type,
session_id = %notification.session_id,
"Slow SSE event write detected"
);
}
Ok(event)
}
Err(e) => {
// Broadcast stream error (e.g., lagged receiver)
// We send an error event but keep the stream open
warn!("SSE stream error for client {}: {:?}", client_id_for_log, e);
Ok(Event::default()
.event("error")
.data(format!(r#"{{"error":"Stream error: {:?}"}}"#, e)))
}
}
});
// Return SSE response with keep-alive
Ok(Sse::new(sse_stream).keep_alive(KeepAlive::default()))
}
/// Handle GET /health requests
///
/// Returns basic health information about the server.
async fn handle_health<C>(
State(router_state): State<RouterState<C>>,
) -> Json<HealthResponse>
where
C: ConnectorOperations + Clone + Send + Sync + 'static,
{
Json(HealthResponse {
status: "healthy".to_string(),
message: "Dirigent ACP Server is running".to_string(),
clients: router_state.state.sse_notifier().client_count(),
sessions: router_state.state.session_manager().mapping_count(),
})
}
/// Response for agent response endpoint
#[derive(Debug, Serialize, Deserialize)]
pub struct AgentResponseResult {
/// Status of the response
pub status: String,
}
/// Handle POST /agent_response requests (T019)
///
/// Accepts JSON-RPC responses from clients for pending agent requests.
/// When a client approves/denies a permission request, they POST the
/// response to this endpoint, which completes the pending request.
/// (Note: When nested at /acp, the full path becomes /acp/agent_response)
///
/// Expected body format:
/// ```json
/// {
/// "jsonrpc": "2.0",
/// "id": 0,
/// "result": { "selectedOptionId": "allow" }
/// }
/// ```
///
/// The response delivery to the connector happens through the oneshot channel
/// registered in the event bridge (Phase 2.4). The tracker's `complete()` method
/// sends the response value through the oneshot channel, and the event bridge task
/// (which registered the request) will receive it and send the `ConnectorCommand::AgentResponse`.
async fn handle_agent_response<C>(
State(router_state): State<RouterState<C>>,
headers: HeaderMap,
Json(response): Json<serde_json::Value>,
) -> Result<Json<AgentResponseResult>, (StatusCode, String)>
where
C: ConnectorOperations + Clone + Send + Sync + 'static,
{
// Extract client_id from X-Client-ID header (T020)
let client_id = match headers.get("X-Client-ID").and_then(|v| v.to_str().ok()) {
Some(id) if !id.is_empty() => id,
_ => {
warn!("Agent response request missing X-Client-ID header");
return Err((
StatusCode::BAD_REQUEST,
"Missing required header: X-Client-ID".to_string(),
));
}
};
debug!(
"Received agent response from client: {}",
client_id
);
trace!("Agent response body: {}", response);
// Extract request_id from JSON body (T020)
let request_id = match response.get("id") {
Some(id) => id.clone(),
None => {
warn!(
client_id = %client_id,
"Agent response missing 'id' field in body"
);
return Err((
StatusCode::BAD_REQUEST,
"Missing required field: id".to_string(),
));
}
};
info!(
client_id = %client_id,
request_id = %request_id,
"Processing agent response"
);
// Call AgentRequestTracker::complete() (T021)
let tracker = router_state.state.agent_request_tracker();
match tracker.complete(client_id, request_id.clone(), response.clone()) {
Ok(()) => {
info!(
client_id = %client_id,
request_id = %request_id,
"Agent response delivered successfully"
);
// Note (T022): The response delivery to the connector happens through
// the oneshot channel in the event bridge. When the event bridge handles
// an `Event::AgentRequest`, it registers the request with the tracker and
// gets a receiver. After we complete the request here, the receiver in the
// event bridge gets the response value and sends the `ConnectorCommand::AgentResponse`
// to the connector. This flow is implemented in Phase 2.4 (T024-T030).
Ok(Json(AgentResponseResult {
status: "ok".to_string(),
}))
}
Err(e) => {
warn!(
client_id = %client_id,
request_id = %request_id,
error = %e,
"Agent response failed: request not found (may have timed out)"
);
Err((
StatusCode::NOT_FOUND,
format!("Request ID {} not found or already completed", request_id),
))
}
}
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use crate::jsonrpc::JsonRpcResponseBatch;
use crate::rpc::NoOpConnectorOperations;
use axum::{
body::Body,
http::{Request, StatusCode},
};
use tower::ServiceExt;
fn create_test_router() -> Router {
let state = AcpServerState::new(AcpServerConfig::enabled());
create_acp_server_router(state, NoOpConnectorOperations)
}
#[tokio::test]
async fn test_health_endpoint() {
let router = create_test_router();
let request = Request::builder()
.uri("/health")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
.await
.unwrap();
let health: HealthResponse = serde_json::from_slice(&body).unwrap();
assert_eq!(health.status, "healthy");
assert_eq!(health.clients, 0);
assert_eq!(health.sessions, 0);
}
#[tokio::test]
async fn test_rpc_initialize() {
let router = create_test_router();
let request_body = r#"{"jsonrpc":"2.0","method":"initialize","id":1}"#;
let request = Request::builder()
.method("POST")
.uri("/rpc")
.header("content-type", "application/json")
.body(Body::from(request_body))
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
.await
.unwrap();
let resp: JsonRpcResponseBatch = serde_json::from_slice(&body).unwrap();
match resp {
JsonRpcResponseBatch::Single(r) => {
assert!(r.is_success());
let result = r.result.unwrap();
assert_eq!(result["agentInfo"]["name"], "dirigent-acp-server");
}
_ => panic!("Expected single response"),
}
}
#[tokio::test]
async fn test_rpc_invalid_json() {
let router = create_test_router();
let request = Request::builder()
.method("POST")
.uri("/rpc")
.header("content-type", "application/json")
.body(Body::from("not valid json"))
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
.await
.unwrap();
let resp: JsonRpcResponseBatch = serde_json::from_slice(&body).unwrap();
match resp {
JsonRpcResponseBatch::Single(r) => {
assert!(r.is_error());
let error = r.error.unwrap();
assert_eq!(error.code, crate::error::error_codes::PARSE_ERROR);
}
_ => panic!("Expected single response"),
}
}
#[tokio::test]
async fn test_sse_missing_client_id() {
let router = create_test_router();
let request = Request::builder()
.uri("/events")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
let body = axum::body::to_bytes(response.into_body(), usize::MAX)
.await
.unwrap();
let error: SseErrorResponse = serde_json::from_slice(&body).unwrap();
assert_eq!(error.code, "MISSING_CLIENT_ID");
}
#[tokio::test]
async fn test_sse_with_client_id() {
let router = create_test_router();
let request = Request::builder()
.uri("/events?client_id=test-client")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
// Should return 200 OK with SSE content type
assert_eq!(response.status(), StatusCode::OK);
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok());
assert!(content_type.is_some());
assert!(content_type.unwrap().contains("text/event-stream"));
}
#[test]
fn test_acp_server_state_new() {
let state = AcpServerState::new(AcpServerConfig::enabled());
assert!(state.config().enabled);
assert_eq!(state.session_manager().mapping_count(), 0);
assert_eq!(state.sse_notifier().client_count(), 0);
}
#[test]
fn test_acp_server_state_with_components() {
let session_manager = SessionManager::new();
let sse_notifier = SseNotifier::new();
let config = AcpServerConfig::enabled().set_port(4000);
// Pre-populate some state
session_manager.register_client(None);
let agent_request_tracker = Arc::new(crate::agent_requests::AgentRequestTracker::new());
let state = AcpServerState::with_components(
session_manager,
sse_notifier,
agent_request_tracker,
config,
);
assert!(state.config().enabled);
assert_eq!(state.config().port, 4000);
assert_eq!(state.session_manager().client_count(), 1);
}
#[test]
fn test_acp_server_state_clone() {
let state1 = AcpServerState::new(AcpServerConfig::enabled());
let state2 = state1.clone();
// Both should share the same underlying state
let client_id = state1.session_manager().register_client(None);
assert_eq!(state2.session_manager().client_count(), 1);
assert!(state2.session_manager().get_client(&client_id).is_some());
}
#[test]
fn test_acp_server_state_debug() {
let state = AcpServerState::new(AcpServerConfig::enabled());
let debug_str = format!("{:?}", state);
assert!(debug_str.contains("AcpServerState"));
assert!(debug_str.contains("session_count"));
assert!(debug_str.contains("client_count"));
}
#[test]
fn test_build_cors_layer_any_origin() {
let config = AcpServerConfig::default();
let _cors = build_cors_layer(&config);
// No assertion needed - just verify it doesn't panic
}
#[test]
fn test_build_cors_layer_specific_origins() {
let config = AcpServerConfig::default()
.set_allowed_origins(Some(vec![
"http://localhost:3000".to_string(),
"https://app.example.com".to_string(),
]));
let _cors = build_cors_layer(&config);
// No assertion needed - just verify it doesn't panic
}
#[test]
fn test_build_cors_layer_empty_origins() {
let config = AcpServerConfig::default()
.set_allowed_origins(Some(vec![]));
let _cors = build_cors_layer(&config);
// Should fall back to any origin
}
}
File diff suppressed because it is too large Load Diff
+185
View File
@@ -0,0 +1,185 @@
//! NoOp Connector Operations for Testing
//!
//! This module provides a stub implementation of `ConnectorOperations` that
//! can be used for testing the RPC handler without actual connector access.
use async_trait::async_trait;
use tracing::debug;
use crate::error::AcpServerError;
use super::types::{ConnectorInfo, ConnectorOperations, SessionInfo};
/// A no-op implementation of ConnectorOperations for testing
///
/// This stub provides minimal implementations that return success without
/// actually doing anything. Useful for unit testing the RPC dispatch logic.
#[derive(Debug, Clone, Copy, Default)]
pub struct NoOpConnectorOperations;
#[async_trait]
impl ConnectorOperations for NoOpConnectorOperations {
async fn create_session(
&self,
connector_id: &str,
_cwd: Option<String>,
_ownership: dirigent_protocol::SessionOwnership,
) -> Result<SessionInfo, AcpServerError> {
Ok(SessionInfo {
session_id: uuid::Uuid::new_v4().to_string(),
title: Some("New Session".to_string()),
connector_id: connector_id.to_string(),
cwd: None,
created_at: chrono::Utc::now().to_rfc3339(),
models: None,
modes: None,
})
}
async fn load_session(
&self,
connector_id: &str,
session_id: &str,
_cwd: Option<String>,
_mcp_servers: Option<serde_json::Value>,
) -> Result<SessionInfo, AcpServerError> {
Ok(SessionInfo {
session_id: session_id.to_string(),
title: Some("Loaded Session".to_string()),
connector_id: connector_id.to_string(),
cwd: None,
created_at: chrono::Utc::now().to_rfc3339(),
models: None,
modes: None,
})
}
async fn send_message(
&self,
_connector_id: &str,
_session_id: &str,
text: String,
) -> Result<String, AcpServerError> {
debug!("NoOp: send_message - {} chars", text.len());
Ok("end_turn".to_string())
}
async fn cancel_generation(
&self,
_connector_id: &str,
_session_id: &str,
) -> Result<(), AcpServerError> {
debug!("NoOp: cancel_generation");
Ok(())
}
async fn list_connectors(&self) -> Result<Vec<ConnectorInfo>, AcpServerError> {
Ok(vec![ConnectorInfo {
id: "stub-connector".to_string(),
name: "Stub Connector".to_string(),
connector_type: "stub".to_string(),
available: true,
}])
}
async fn default_connector_id(&self) -> Option<String> {
Some("stub-connector".to_string())
}
async fn get_connector_commands(
&self,
_connector_id: &str,
) -> Result<Vec<crate::sse::SlashCommand>, AcpServerError> {
// Return a stub list of commands
Ok(vec![crate::sse::SlashCommand {
name: "echo".to_string(),
description: "Echo command (stub)".to_string(),
input: None,
}])
}
async fn send_agent_response(
&self,
connector_id: &str,
request_id: serde_json::Value,
_response: serde_json::Value,
) -> Result<(), AcpServerError> {
debug!(
"NoOp: send_agent_response - connector: {}, request: {}",
connector_id, request_id
);
Ok(())
}
async fn get_session_metadata(
&self,
_connector_id: &str,
_session_id: &str,
) -> Result<
(
Option<dirigent_protocol::SessionModelState>,
Option<dirigent_protocol::SessionModeState>,
),
AcpServerError,
> {
debug!("NoOp: get_session_metadata");
// Return None for both - no metadata available in stub
Ok((None, None))
}
async fn set_session_mode(
&self,
_connector_id: &str,
_session_id: &str,
mode_id: &str,
) -> Result<(), AcpServerError> {
debug!("NoOp: set_session_mode - mode_id: {}", mode_id);
Ok(())
}
async fn set_session_model(
&self,
_connector_id: &str,
_session_id: &str,
model_id: &str,
) -> Result<(), AcpServerError> {
debug!("NoOp: set_session_model - model_id: {}", model_id);
Ok(())
}
async fn get_connector_agent_type(
&self,
_connector_id: &str,
) -> Result<Option<String>, AcpServerError> {
debug!("NoOp: get_connector_agent_type");
Ok(None)
}
async fn list_sessions(
&self,
connector_id: &str,
) -> Result<Vec<SessionInfo>, AcpServerError> {
debug!("NoOp: list_sessions for connector: {}", connector_id);
Ok(vec![
SessionInfo {
session_id: "stub-session-1".to_string(),
title: Some("Stub Session".to_string()),
connector_id: connector_id.to_string(),
cwd: None,
created_at: chrono::Utc::now().to_rfc3339(),
models: None,
modes: None,
},
])
}
async fn resolve_session_connector(&self, _session_id: &str) -> Option<String> {
debug!("NoOp: resolve_session_connector");
None
}
async fn list_all_sessions(&self) -> Result<Vec<SessionInfo>, AcpServerError> {
debug!("NoOp: list_all_sessions");
Ok(vec![])
}
}
+705
View File
@@ -0,0 +1,705 @@
//! JSON-RPC Request/Response Types for ACP Server
//!
//! This module contains all the parameter and result types used in
//! the ACP JSON-RPC protocol.
use serde::{Deserialize, Serialize};
use crate::error::AcpServerError;
use crate::sse::ConfigOption;
// ============================================================================
// Session and Connector Info Types
// ============================================================================
/// Information about a session
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionInfo {
/// The session ID
pub session_id: String,
/// Optional title for the session
pub title: Option<String>,
/// The connector handling this session
pub connector_id: String,
/// Working directory for this session
pub cwd: Option<String>,
/// When the session was created (ISO 8601 format)
pub created_at: String,
/// Available models and current model (optional, connector-dependent)
pub models: Option<dirigent_protocol::SessionModelState>,
/// Available modes and current mode (optional, connector-dependent)
pub modes: Option<dirigent_protocol::SessionModeState>,
}
/// Information about a connector
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ConnectorInfo {
/// Unique identifier for this connector
pub id: String,
/// Human-readable name
pub name: String,
/// Type of connector (e.g., "opencode", "gateway")
pub connector_type: String,
/// Whether the connector is currently available
pub available: bool,
}
// ============================================================================
// Initialize Types
// ============================================================================
/// Parameters for the initialize request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct InitializeParams {
/// Client capabilities
#[serde(default)]
pub capabilities: Option<serde_json::Value>,
/// Client name
#[serde(default)]
pub client_name: Option<String>,
/// Client version
#[serde(default)]
pub client_version: Option<String>,
}
/// Result of the initialize request (ACP spec compliant)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct InitializeResult {
/// Protocol version (integer)
pub protocol_version: u32,
/// Agent capabilities
pub agent_capabilities: AgentCapabilities,
/// Agent info (name, title, version)
pub agent_info: AgentInfo,
/// Authentication methods (empty for now)
pub auth_methods: Vec<AuthMethod>,
/// Client ID for HTTP-based transports (required for SSE routing)
/// Note: This is not in the official ACP spec but is required for stateless
/// HTTP connections where the client needs an ID to subscribe to SSE events.
pub client_id: String,
}
/// Agent information
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AgentInfo {
/// Agent name (programmatic)
pub name: String,
/// Agent title (human-readable)
pub title: String,
/// Agent version
pub version: String,
}
/// Agent capabilities advertised to clients
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AgentCapabilities {
/// Whether session/load is supported
#[serde(skip_serializing_if = "Option::is_none")]
pub load_session: Option<bool>,
/// Whether session/list is supported
#[serde(skip_serializing_if = "Option::is_none")]
pub list_sessions: Option<bool>,
/// Session capabilities (resume, fork, etc.)
#[serde(skip_serializing_if = "Option::is_none")]
pub session_capabilities: Option<SessionCapabilities>,
/// Prompt capabilities (content types supported)
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_capabilities: Option<PromptCapabilities>,
/// MCP server support
#[serde(skip_serializing_if = "Option::is_none")]
pub mcp: Option<McpCapabilities>,
}
/// Extended session capabilities
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionCapabilities {
/// Whether session/list is supported (empty object = supported)
#[serde(skip_serializing_if = "Option::is_none")]
pub list: Option<serde_json::Value>,
/// Whether session/resume is supported (empty object = supported)
#[serde(skip_serializing_if = "Option::is_none")]
pub resume: Option<serde_json::Value>,
}
/// Prompt capabilities (content types)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PromptCapabilities {
/// Image content support
#[serde(skip_serializing_if = "Option::is_none")]
pub image: Option<bool>,
/// Audio content support
#[serde(skip_serializing_if = "Option::is_none")]
pub audio: Option<bool>,
/// Embedded context support
#[serde(skip_serializing_if = "Option::is_none")]
pub embedded_context: Option<bool>,
}
/// MCP capabilities
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct McpCapabilities {
/// HTTP transport support
#[serde(skip_serializing_if = "Option::is_none")]
pub http: Option<bool>,
/// SSE transport support (deprecated)
#[serde(skip_serializing_if = "Option::is_none")]
pub sse: Option<bool>,
}
/// Authentication method
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AuthMethod {
/// Method ID
pub id: String,
/// Method name
pub name: String,
/// Method description
pub description: String,
}
impl Default for AgentCapabilities {
fn default() -> Self {
Self {
load_session: Some(true),
list_sessions: Some(true),
session_capabilities: Some(SessionCapabilities {
list: Some(serde_json::json!({})),
resume: Some(serde_json::json!({})),
}),
prompt_capabilities: Some(PromptCapabilities {
image: Some(true),
audio: None,
embedded_context: Some(true),
}),
mcp: Some(McpCapabilities {
http: Some(true),
sse: Some(true),
}),
}
}
}
// ============================================================================
// Session/New Types
// ============================================================================
/// Parameters for session/new request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionNewParams {
/// Optional connector ID to create the session on
#[serde(default)]
pub connector_id: Option<String>,
/// Optional working directory for the session
#[serde(default)]
pub cwd: Option<String>,
/// Optional client-provided session ID
#[serde(default)]
pub session_id: Option<String>,
}
/// Result of session/new request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionNewResult {
/// The client-facing session ID
pub session_id: String,
/// Optional title for the session
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
/// The connector handling this session
pub connector_id: String,
/// When the session was created (ISO 8601 format)
pub created_at: String,
/// Available models and current model (UNSTABLE in ACP spec)
#[serde(skip_serializing_if = "Option::is_none")]
pub models: Option<dirigent_protocol::SessionModelState>,
/// Available modes and current mode
#[serde(skip_serializing_if = "Option::is_none")]
pub modes: Option<dirigent_protocol::SessionModeState>,
/// Configuration options (modes, models as unified config)
/// This is the preferred way to expose configuration in ACP
#[serde(skip_serializing_if = "Option::is_none")]
pub config_options: Option<Vec<ConfigOption>>,
}
// ============================================================================
// Session/Load Types
// ============================================================================
/// Parameters for session/load request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionLoadParams {
/// The session ID to load
pub session_id: String,
/// The connector ID where the session exists (optional; resolved automatically if omitted)
#[serde(default)]
pub connector_id: Option<String>,
/// Optional working directory (standard ACP field sent by clients like Zed)
#[serde(default)]
pub cwd: Option<String>,
/// Optional MCP server configurations (standard ACP field)
#[serde(default)]
pub mcp_servers: Option<Vec<serde_json::Value>>,
}
/// Result of session/load request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionLoadResult {
/// The client-facing session ID
pub session_id: String,
/// Optional title for the session
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
/// The connector handling this session
pub connector_id: String,
/// When the session was created (ISO 8601 format)
pub created_at: String,
/// Available models and current model (UNSTABLE in ACP spec)
#[serde(skip_serializing_if = "Option::is_none")]
pub models: Option<dirigent_protocol::SessionModelState>,
/// Available modes and current mode
#[serde(skip_serializing_if = "Option::is_none")]
pub modes: Option<dirigent_protocol::SessionModeState>,
/// Configuration options (modes, models as unified config)
/// This is the preferred way to expose configuration in ACP
#[serde(skip_serializing_if = "Option::is_none")]
pub config_options: Option<Vec<ConfigOption>>,
}
// ============================================================================
// Session/List Types
// ============================================================================
/// Parameters for session/list request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionListParams {
/// Optional connector ID to list sessions from
#[serde(default)]
pub connector_id: Option<String>,
/// Optional working directory filter
#[serde(default)]
pub cwd: Option<String>,
/// Optional pagination cursor from previous response
#[serde(default)]
pub cursor: Option<String>,
}
/// A session entry in a session/list response (ACP spec)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionListEntry {
/// Unique session identifier
pub session_id: String,
/// Working directory for this session
pub cwd: String,
/// Human-readable title
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
/// Last activity timestamp (ISO 8601)
#[serde(skip_serializing_if = "Option::is_none")]
pub updated_at: Option<String>,
/// Agent-specific metadata
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(rename = "_meta")]
pub meta: Option<serde_json::Value>,
}
/// Result of session/list request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionListResult {
/// List of available sessions
pub sessions: Vec<SessionListEntry>,
/// Pagination cursor for next page (absent when no more results)
#[serde(skip_serializing_if = "Option::is_none")]
pub next_cursor: Option<String>,
}
// ============================================================================
// Session/Resume Types
// ============================================================================
/// Parameters for session/resume request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionResumeParams {
/// The session ID to resume
pub session_id: String,
/// The connector ID where the session exists (optional; resolved automatically if omitted)
#[serde(default)]
pub connector_id: Option<String>,
/// Optional working directory (standard ACP field sent by clients like Zed)
#[serde(default)]
pub cwd: Option<String>,
/// Optional MCP server configurations (standard ACP field)
#[serde(default)]
pub mcp_servers: Option<Vec<serde_json::Value>>,
}
/// Result of session/resume request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionResumeResult {
/// The client-facing session ID
pub session_id: String,
/// Optional title for the session
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
/// The connector handling this session
pub connector_id: String,
/// When the session was created (ISO 8601 format)
pub created_at: String,
/// Available models and current model
#[serde(skip_serializing_if = "Option::is_none")]
pub models: Option<dirigent_protocol::SessionModelState>,
/// Available modes and current mode
#[serde(skip_serializing_if = "Option::is_none")]
pub modes: Option<dirigent_protocol::SessionModeState>,
/// Configuration options
#[serde(skip_serializing_if = "Option::is_none")]
pub config_options: Option<Vec<crate::sse::ConfigOption>>,
}
// ============================================================================
// Session/Prompt Types
// ============================================================================
/// Parameters for session/prompt request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionPromptParams {
/// The session ID to send the prompt to
pub session_id: String,
/// The prompt content (array of content blocks)
pub prompt: PromptContent,
}
/// Content for a prompt - either simple text or structured blocks
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum PromptContent {
/// Simple text content
Text(String),
/// Structured content blocks
Blocks(Vec<ContentBlock>),
}
impl PromptContent {
/// Convert to plain text representation
pub fn to_text(&self) -> String {
match self {
PromptContent::Text(text) => text.clone(),
PromptContent::Blocks(blocks) => blocks
.iter()
.filter_map(|b| {
if let ContentBlock::Text { text } = b {
Some(text.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n"),
}
}
}
/// A content block in a prompt
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ContentBlock {
/// Text content
#[serde(rename = "text")]
Text { text: String },
/// Image content (base64 encoded)
#[serde(rename = "image")]
Image { data: String, media_type: String },
}
/// Result of session/prompt request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionPromptResult {
/// The reason the turn stopped
pub stop_reason: String,
}
// ============================================================================
// Session/Cancel Types
// ============================================================================
/// Parameters for session/cancel request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionCancelParams {
/// The session ID to cancel
pub session_id: String,
}
/// Result of session/cancel request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionCancelResult {
/// Whether the cancellation was successful
pub cancelled: bool,
}
// ============================================================================
// Session/Close Types
// ============================================================================
/// Parameters for session/close request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionCloseParams {
/// The session ID to close
pub session_id: String,
}
/// Result of session/close request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionCloseResult {
/// Whether the session was successfully closed
pub closed: bool,
}
// ============================================================================
// Session/SetMode and Session/SetModel Types
// ============================================================================
/// Parameters for session/set_mode request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionSetModeParams {
/// The session ID to update
pub session_id: String,
/// The mode ID to switch to
pub mode_id: String,
}
/// Parameters for session/set_model request
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionSetModelParams {
/// The session ID to update
pub session_id: String,
/// The model ID to switch to
pub model_id: String,
}
/// Parameters for session/set_config_option request
///
/// This is the unified way to set configuration options (modes, models, etc.)
/// for clients using the new config_options system.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionSetConfigOptionParams {
/// The session ID to update
pub session_id: String,
/// The config option ID (e.g., "mode", "model")
pub config_id: String,
/// The value to set
pub value: String,
}
// ============================================================================
// ConnectorOperations Trait
// ============================================================================
/// Trait for connector operations that will be implemented by the web server
///
/// This trait abstracts the operations that require access to `CoreHandle`,
/// allowing the RPC handler to remain decoupled from `dirigent_core`.
/// The web server implements this trait to bridge between the ACP server
/// and the actual connector implementations.
#[async_trait::async_trait]
pub trait ConnectorOperations: Send + Sync {
/// Create a new session on the specified connector
async fn create_session(
&self,
connector_id: &str,
cwd: Option<String>,
ownership: dirigent_protocol::SessionOwnership,
) -> Result<SessionInfo, AcpServerError>;
/// Load an existing session from a connector
async fn load_session(
&self,
connector_id: &str,
session_id: &str,
cwd: Option<String>,
mcp_servers: Option<serde_json::Value>,
) -> Result<SessionInfo, AcpServerError>;
/// Send a message to a session and wait for completion
async fn send_message(
&self,
connector_id: &str,
session_id: &str,
text: String,
) -> Result<String, AcpServerError>;
/// Cancel active generation on a session
async fn cancel_generation(
&self,
connector_id: &str,
session_id: &str,
) -> Result<(), AcpServerError>;
/// List all available connectors
async fn list_connectors(&self) -> Result<Vec<ConnectorInfo>, AcpServerError>;
/// Get the default connector ID (if configured)
async fn default_connector_id(&self) -> Option<String>;
/// Get available commands/tools from a connector
async fn get_connector_commands(
&self,
connector_id: &str,
) -> Result<Vec<crate::sse::SlashCommand>, AcpServerError>;
/// Send an agent response back to a connector
async fn send_agent_response(
&self,
connector_id: &str,
request_id: serde_json::Value,
response: serde_json::Value,
) -> Result<(), AcpServerError>;
/// Get session metadata (models/modes) from a session
async fn get_session_metadata(
&self,
connector_id: &str,
session_id: &str,
) -> Result<
(
Option<dirigent_protocol::SessionModelState>,
Option<dirigent_protocol::SessionModeState>,
),
AcpServerError,
>;
/// Set the session mode
async fn set_session_mode(
&self,
connector_id: &str,
session_id: &str,
mode_id: &str,
) -> Result<(), AcpServerError>;
/// Set the session model
async fn set_session_model(
&self,
connector_id: &str,
session_id: &str,
model_id: &str,
) -> Result<(), AcpServerError>;
/// Get the agent type for a connector (for mode/model mapping)
async fn get_connector_agent_type(
&self,
connector_id: &str,
) -> Result<Option<String>, AcpServerError>;
/// List all sessions on a connector
async fn list_sessions(
&self,
connector_id: &str,
) -> Result<Vec<SessionInfo>, AcpServerError>;
/// Look up which connector owns a session by its session ID.
/// Uses archivist to find the connector. Returns connector_id (not UID).
/// Default: returns None (no archivist available).
async fn resolve_session_connector(&self, _session_id: &str) -> Option<String> {
None
}
/// List sessions across all connectors (archivist-backed).
/// Used when the resolved connector is a gateway to provide cross-connector view.
/// Default: returns empty vec (no archivist available).
async fn list_all_sessions(&self) -> Result<Vec<SessionInfo>, AcpServerError> {
Ok(vec![])
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,435 @@
//! Content and Metadata Transformation Utilities
//!
//! This module provides helper functions for transforming content between
//! the internal Dirigent protocol and the ACP wire format.
use dirigent_protocol::ContentBlock;
use serde_json::json;
/// Extract 'kind' from tool call metadata (stored as acp_kind)
pub fn extract_kind(tool_call: &dirigent_protocol::ToolCall) -> Option<String> {
tool_call
.metadata
.as_ref()
.and_then(|m| m.get("acp_kind"))
.and_then(|k| k.as_str())
.map(String::from)
}
/// Convert ToolCallStatus to ACP string format
pub fn tool_call_status_to_string(status: &dirigent_protocol::ToolCallStatus) -> String {
match status {
dirigent_protocol::ToolCallStatus::Pending => "pending".to_string(),
dirigent_protocol::ToolCallStatus::Running => "in_progress".to_string(),
dirigent_protocol::ToolCallStatus::Completed => "completed".to_string(),
dirigent_protocol::ToolCallStatus::Error => "failed".to_string(),
}
}
/// Unwrap ToolCallContent wrappers to extract ContentBlocks for SSE output
///
/// The internal protocol uses ToolCallContent wrappers, but the SSE/ACP wire format
/// expects flat ContentBlock arrays. This function extracts Content variants only.
pub fn unwrap_tool_call_content(
content: &[dirigent_protocol::ToolCallContent],
) -> Vec<ContentBlock> {
content
.iter()
.filter_map(|wrapper| match wrapper {
dirigent_protocol::ToolCallContent::Content { content } => Some(content.clone()),
// Diff and Terminal variants are not yet supported in SSE output
_ => None,
})
.collect()
}
/// Rebuild _meta for ACP output (ensures claudeCode.toolName is present)
pub fn rebuild_meta(
meta: &Option<dirigent_protocol::Meta>,
tool_call: &dirigent_protocol::ToolCall,
) -> Option<serde_json::Value> {
let mut meta_obj = serde_json::Map::new();
// Add claudeCode.toolName
let mut claude_code = serde_json::Map::new();
claude_code.insert("toolName".to_string(), json!(tool_call.tool_name));
// Merge any existing _meta (preserves toolResponse, etc.)
if let Some(existing_meta) = meta {
// Convert Meta to JSON Value for merging
if let Ok(meta_value) = serde_json::to_value(existing_meta) {
if let Some(obj) = meta_value.as_object() {
for (k, v) in obj {
if k == "claudeCode" {
// Merge into our claudeCode object
if let Some(cc) = v.as_object() {
for (ck, cv) in cc {
// Don't override toolName we just set
if ck != "toolName" {
claude_code.insert(ck.clone(), cv.clone());
}
}
}
} else {
meta_obj.insert(k.clone(), v.clone());
}
}
}
}
}
meta_obj.insert("claudeCode".to_string(), json!(claude_code));
Some(json!(meta_obj))
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_extract_kind_present() {
let mut metadata = serde_json::Map::new();
metadata.insert("acp_kind".to_string(), json!("search"));
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "bash".to_string(),
status: dirigent_protocol::ToolCallStatus::Pending,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: Some(json!(metadata)),
origin: None,
};
assert_eq!(extract_kind(&tool_call), Some("search".to_string()));
}
#[test]
fn test_extract_kind_missing_metadata() {
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "bash".to_string(),
status: dirigent_protocol::ToolCallStatus::Pending,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
assert_eq!(extract_kind(&tool_call), None);
}
#[test]
fn test_extract_kind_missing_field() {
let mut metadata = serde_json::Map::new();
metadata.insert("other_field".to_string(), json!("value"));
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "bash".to_string(),
status: dirigent_protocol::ToolCallStatus::Pending,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: Some(json!(metadata)),
origin: None,
};
assert_eq!(extract_kind(&tool_call), None);
}
#[test]
fn test_extract_kind_malformed_metadata() {
// acp_kind is not a string
let mut metadata = serde_json::Map::new();
metadata.insert("acp_kind".to_string(), json!(123));
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "bash".to_string(),
status: dirigent_protocol::ToolCallStatus::Pending,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: Some(json!(metadata)),
origin: None,
};
assert_eq!(extract_kind(&tool_call), None);
}
#[test]
fn test_tool_call_status_to_string_all_variants() {
assert_eq!(
tool_call_status_to_string(&dirigent_protocol::ToolCallStatus::Pending),
"pending"
);
assert_eq!(
tool_call_status_to_string(&dirigent_protocol::ToolCallStatus::Running),
"in_progress"
);
assert_eq!(
tool_call_status_to_string(&dirigent_protocol::ToolCallStatus::Completed),
"completed"
);
assert_eq!(
tool_call_status_to_string(&dirigent_protocol::ToolCallStatus::Error),
"failed"
);
}
#[test]
fn test_rebuild_meta_basic() {
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "some_tool".to_string(),
status: dirigent_protocol::ToolCallStatus::Pending,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&None, &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
assert_eq!(meta_obj["claudeCode"]["toolName"], "some_tool");
}
#[test]
fn test_rebuild_meta_preserves_existing_fields() {
let existing_meta = dirigent_protocol::Meta {
provider: Some(dirigent_protocol::ProviderMeta {
name: "test_provider".to_string(),
original_ids: None,
raw_excerpt: None,
}),
extra: std::collections::HashMap::from([(
"customField".to_string(),
json!("customValue"),
)]),
};
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "bash".to_string(),
status: dirigent_protocol::ToolCallStatus::Running,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&Some(existing_meta), &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
// Verify claudeCode.toolName is present
assert_eq!(meta_obj["claudeCode"]["toolName"], "bash");
// Verify existing fields are preserved
assert_eq!(meta_obj["provider"]["name"], "test_provider");
assert_eq!(meta_obj["customField"], "customValue");
}
#[test]
fn test_rebuild_meta_merges_claude_code_fields() {
let mut existing_meta = dirigent_protocol::Meta::default();
existing_meta.extra.insert(
"claudeCode".to_string(),
json!({
"toolResponse": "some response",
"otherField": "other value"
}),
);
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "test_tool".to_string(),
status: dirigent_protocol::ToolCallStatus::Completed,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&Some(existing_meta), &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
// Verify toolName is present (we set it)
assert_eq!(meta_obj["claudeCode"]["toolName"], "test_tool");
// Verify existing claudeCode fields are preserved
assert_eq!(meta_obj["claudeCode"]["toolResponse"], "some response");
assert_eq!(meta_obj["claudeCode"]["otherField"], "other value");
}
#[test]
fn test_rebuild_meta_does_not_override_tool_name() {
// Existing meta has a different toolName
let mut existing_meta = dirigent_protocol::Meta::default();
existing_meta.extra.insert(
"claudeCode".to_string(),
json!({
"toolName": "wrong_tool",
"toolResponse": "response"
}),
);
let tool_call = dirigent_protocol::ToolCall {
id: "call_123".to_string(),
tool_name: "correct_tool".to_string(),
status: dirigent_protocol::ToolCallStatus::Completed,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&Some(existing_meta), &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
// Verify our toolName is used (not the one from existing meta)
assert_eq!(meta_obj["claudeCode"]["toolName"], "correct_tool");
// Verify other fields are preserved
assert_eq!(meta_obj["claudeCode"]["toolResponse"], "response");
}
#[test]
fn test_rebuild_meta_preserves_tool_response() {
// Test that rebuild_meta preserves toolResponse from incoming Claude updates
let mut existing_meta = dirigent_protocol::Meta::default();
existing_meta.extra.insert(
"claudeCode".to_string(),
json!({
"toolResponse": {
"mode": "content",
"numFiles": 0,
"filenames": [],
"content": "some output",
"numLines": 58,
"appliedLimit": 100
},
"toolName": "OldToolName"
}),
);
let tool_call = dirigent_protocol::ToolCall {
id: "call_456".to_string(),
tool_name: "Grep".to_string(),
status: dirigent_protocol::ToolCallStatus::Running,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&Some(existing_meta), &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
// toolName should be updated to current tool_call.tool_name
assert_eq!(meta_obj["claudeCode"]["toolName"], "Grep");
// toolResponse should be preserved completely
assert!(meta_obj["claudeCode"]["toolResponse"].is_object());
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["mode"], "content");
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["numFiles"], 0);
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["numLines"], 58);
}
#[test]
fn test_rebuild_meta_round_trip_no_data_loss() {
// Test incoming → internal → outgoing preserves all fields
let mut existing_meta = dirigent_protocol::Meta::default();
existing_meta.extra.insert(
"claudeCode".to_string(),
json!({
"toolResponse": {
"mode": "content",
"numFiles": 3,
"filenames": ["file1.rs", "file2.rs", "file3.rs"],
"content": "grep results here",
"numLines": 42,
"appliedLimit": 100,
"customField": "should be preserved",
"nestedObject": {
"deep": "value"
}
},
"additionalField": "also preserved"
}),
);
existing_meta
.extra
.insert("customTopLevel".to_string(), json!("preserved too"));
let tool_call = dirigent_protocol::ToolCall {
id: "call_789".to_string(),
tool_name: "Grep".to_string(),
status: dirigent_protocol::ToolCallStatus::Completed,
content: vec![],
raw_input: None,
raw_output: None,
title: None,
error: None,
metadata: None,
origin: None,
};
let meta = rebuild_meta(&Some(existing_meta), &tool_call);
assert!(meta.is_some());
let meta_obj = meta.unwrap();
// Verify NO data loss
assert_eq!(meta_obj["claudeCode"]["toolName"], "Grep");
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["mode"], "content");
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["numFiles"], 3);
assert_eq!(meta_obj["claudeCode"]["toolResponse"]["numLines"], 42);
assert_eq!(
meta_obj["claudeCode"]["toolResponse"]["customField"],
"should be preserved"
);
assert_eq!(
meta_obj["claudeCode"]["toolResponse"]["nestedObject"]["deep"],
"value"
);
assert_eq!(meta_obj["claudeCode"]["additionalField"], "also preserved");
assert_eq!(meta_obj["customTopLevel"], "preserved too");
}
}
@@ -0,0 +1,453 @@
//! Event Translation Layer
//!
//! This module provides translation from Dirigent protocol Events to ACP notifications.
//! It handles the mapping between internal event representations and the wire format
//! expected by ACP clients.
use dirigent_protocol::{Event, SessionUpdate};
use super::content_transform::{
extract_kind, rebuild_meta, tool_call_status_to_string, unwrap_tool_call_content,
};
use super::types::{
models_to_config_option, modes_to_config_option, ConfigOption, ConfigOptionChoice,
ConfigOptionType, SessionUpdateParams, SessionUpdateVariant,
};
/// Translate a Dirigent protocol Event to ACP notifications
///
/// Not all Dirigent events need to be forwarded to ACP clients. This function
/// returns a vector of notifications for events that should be sent, and an empty
/// vector for events that should be filtered out.
///
/// ## Mapped Events
///
/// - `Event::SessionUpdate` with `SessionUpdate::AgentMessageChunk` -> `MessageChunk`
/// - `Event::SessionUpdate` with `SessionUpdate::AgentThoughtChunk` -> `MessageChunk` (with content_type)
/// - `Event::SessionUpdate` with `SessionUpdate::ToolCall` -> `ToolCallUpdate`
/// - `Event::SessionUpdate` with `SessionUpdate::ToolCallUpdate` -> `ToolCallUpdate`
/// - `Event::MessageCompleted` -> `MessageComplete`
/// - `Event::SessionIdle` -> `SessionIdle`
/// - `Event::SessionMetadataReceived` -> `ConfigOptionUpdate` (with modes and models)
/// - `Event::MessageFailed` -> `SessionError`
/// - `Event::Error` -> `SessionError` (system-wide error)
///
/// ## Filtered Events
///
/// - `Event::SessionsListed` - List operations don't need streaming
/// - `Event::SessionCreated` - Handled via RPC response
/// - `Event::SessionUpdated` - Metadata updates, not content
/// - `Event::SessionMetadataUpdated` - Metadata updates
/// - `Event::SessionDeleted` - Handled via RPC
/// - `Event::Connected` / `Event::Disconnected` - System events
/// - `Event::ConnectorCreated` / `Event::ConnectorRemoved` - System events
/// - `Event::MessagesListed` - List operations
/// - `Event::MessageStarted` - Initial message, content comes via chunks
///
/// # Parameters
///
/// - `event`: The Dirigent protocol event to translate
///
/// # Returns
///
/// Vec of `SessionUpdateParams` to be sent. Most events return 0 or 1 update.
pub fn translate_event(event: &Event) -> Vec<SessionUpdateParams> {
match event {
// SessionUpdate events - the main streaming content
Event::SessionUpdate {
session_id, update, ..
} => translate_session_update(session_id, update)
.map(|u| vec![u])
.unwrap_or_default(),
// Message completion
Event::MessageCompleted { message, .. } => vec![SessionUpdateParams {
session_id: message.session_id.clone(),
update: SessionUpdateVariant::MessageComplete {
message_id: Some(message.id.clone()),
},
..Default::default()
}],
// Session idle
Event::SessionIdle { session_id, .. } => vec![SessionUpdateParams {
session_id: session_id.clone(),
update: SessionUpdateVariant::SessionIdle {},
..Default::default()
}],
// Session metadata received - forward as BOTH config_option_update AND current_mode_update
//
// We emit both notification types to support:
// - New clients (acp-beta flag): Use config_option_update
// - Legacy clients: Use current_mode_update (mode only - no legacy model updates exist)
//
// Clients safely ignore notification types they don't understand.
// See: https://agentclientprotocol.com/rfds/session-config-options
Event::SessionMetadataReceived {
session_id,
models,
modes,
config_options: event_config_options,
..
} => {
let mut updates = vec![];
// If the event already has config_options from the agent, prefer those
// over converting from modes/models (agent-provided options are authoritative).
if let Some(agent_config_options) = event_config_options {
if !agent_config_options.is_empty() {
// Emit ConfigOptionUpdate with agent-provided options
updates.push(SessionUpdateParams {
session_id: session_id.clone(),
update: SessionUpdateVariant::ConfigOptionUpdate {
config_options: agent_config_options.iter().map(|co| ConfigOption {
id: co.id.clone(),
name: co.name.clone(),
description: co.description.clone(),
category: co.category.clone(),
option_type: match co.option_type {
dirigent_protocol::ConfigOptionType::Select => ConfigOptionType::Select,
},
current_value: co.current_value.clone(),
options: Some(co.options.iter().map(|v| ConfigOptionChoice {
value: v.value.clone(),
name: v.name.clone(),
description: v.description.clone(),
}).collect()),
}).collect(),
},
..Default::default()
});
// Also emit CurrentModeUpdate for legacy clients if a mode option exists
if let Some(mode_opt) = agent_config_options.iter().find(|co| co.id == "mode" || co.category.as_deref() == Some("mode")) {
updates.push(SessionUpdateParams {
session_id: session_id.clone(),
update: SessionUpdateVariant::CurrentModeUpdate {
mode_id: mode_opt.current_value.clone(),
},
..Default::default()
});
}
return updates;
}
}
// Fall back to building config_options from legacy modes/models fields
let mut config_options: Vec<ConfigOption> = vec![];
if let Some(modes_state) = modes {
config_options.push(modes_to_config_option(modes_state));
// Also emit CurrentModeUpdate for legacy clients
// Legacy protocol only supports mode updates (no model updates)
updates.push(SessionUpdateParams {
session_id: session_id.clone(),
update: SessionUpdateVariant::CurrentModeUpdate {
mode_id: modes_state.current_mode_id.clone(),
},
..Default::default()
});
}
if let Some(models_state) = models {
config_options.push(models_to_config_option(models_state));
// Note: Legacy protocol has no CurrentModelUpdate - only new ConfigOptionUpdate
}
if !config_options.is_empty() {
updates.push(SessionUpdateParams {
session_id: session_id.clone(),
update: SessionUpdateVariant::ConfigOptionUpdate { config_options },
..Default::default()
});
}
updates
}
// Message failure - not forwarded in new structure (no equivalent variant)
Event::MessageFailed { .. } => vec![],
// System-wide error - not forwarded in new structure (no equivalent variant)
Event::Error { .. } => vec![],
// Agent request - will be handled in Phase 2 (bidirectional request/response)
// For now, not forwarded (requires new SSE variant and response mechanism)
Event::AgentRequest { .. } => vec![],
// Events that should not be forwarded via this translation path
// (they may be handled elsewhere or are not relevant to ACP clients)
Event::SessionsListed { .. }
| Event::SessionCreated { .. }
| Event::SessionUpdated { .. }
| Event::SessionMetadataUpdated { .. }
| Event::SessionDeleted { .. }
| Event::SessionClosed { .. }
| Event::SessionSystemMessageSet { .. }
| Event::SessionError { .. }
| Event::SessionTransferred { .. }
| Event::ForwardingPanic { .. }
| Event::MessagesListed { .. }
| Event::MessageStarted { .. }
| Event::TurnComplete { .. }
| Event::ConnectorCreated { .. }
| Event::ConnectorRemoved { .. }
| Event::ConnectorStateChanged { .. }
| Event::Connected
| Event::Disconnected => vec![],
// ACP client connection events - these are handled by the UI directly
// via the main SSE endpoint, not translated to session updates
Event::AcpClientConnected { .. }
| Event::AcpClientDisconnected { .. }
| Event::AcpClientSessionOpened { .. }
| Event::AcpClientSessionRouted { .. } => vec![],
// Inspector events - not relevant for ACP session updates
Event::InspectorSnapshot { .. }
| Event::InspectorNodeRegistered { .. }
| Event::InspectorNodeRemoved { .. }
| Event::InspectorStateChanged { .. }
| Event::InspectorPropertiesUpdated { .. } => vec![],
// Archivist-to-frontend signal — not relevant for ACP clients
Event::SessionRegistered { .. } => vec![],
// System task events — internal UI concern, not relevant for ACP clients
Event::SystemTaskStatusChanged { .. } => vec![],
}
}
/// Translate a SessionUpdate to a SessionUpdateParams
fn translate_session_update(
session_id: &str,
update: &SessionUpdate,
) -> Option<SessionUpdateParams> {
match update {
// Agent message content chunk
SessionUpdate::AgentMessageChunk { content, .. } => Some(SessionUpdateParams {
session_id: session_id.to_string(),
update: SessionUpdateVariant::AgentMessageChunk {
content: content.clone(),
},
..Default::default()
}),
// Agent thought chunk - render as agent message (thoughts shown as agent text in UI)
SessionUpdate::AgentThoughtChunk { content, .. } => Some(SessionUpdateParams {
session_id: session_id.to_string(),
update: SessionUpdateVariant::AgentMessageChunk {
content: content.clone(),
},
..Default::default()
}),
// User message chunks are typically not streamed to other clients
// Filter them out for now
SessionUpdate::UserMessageChunk { .. } => None,
// Tool call initiated - forward to client
SessionUpdate::ToolCall {
tool_call, _meta, ..
} => Some(SessionUpdateParams {
session_id: session_id.to_string(),
update: SessionUpdateVariant::ToolCall {
tool_call_id: tool_call.id.clone(),
title: tool_call.title.clone(),
kind: extract_kind(tool_call),
raw_input: tool_call.raw_input.clone(),
status: Some(tool_call_status_to_string(&tool_call.status)),
content: unwrap_tool_call_content(&tool_call.content),
_meta: rebuild_meta(_meta, tool_call),
},
..Default::default()
}),
// Tool call update - forward to client
SessionUpdate::ToolCallUpdate {
tool_call, _meta, ..
} => Some(SessionUpdateParams {
session_id: session_id.to_string(),
update: SessionUpdateVariant::ToolCallUpdate {
tool_call_id: tool_call.id.clone(),
status: Some(tool_call_status_to_string(&tool_call.status)),
content: unwrap_tool_call_content(&tool_call.content),
raw_output: tool_call.raw_output.clone(),
error: tool_call.error.clone(),
_meta: rebuild_meta(_meta, tool_call),
},
..Default::default()
}),
// Unknown update type - forward as-is for transparent proxying
SessionUpdate::Unknown { data } => Some(SessionUpdateParams {
session_id: session_id.to_string(),
update: SessionUpdateVariant::Unknown { data: data.clone() },
..Default::default()
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
use dirigent_protocol::{ContentBlock, Message, MessageRole, MessageStatus};
#[test]
fn test_translate_session_update_agent_chunk() {
let event = Event::SessionUpdate {
connector_id: "conn-1".to_string(),
session_id: "sess-1".to_string(),
update: SessionUpdate::AgentMessageChunk {
message_id: "msg-1".to_string(),
content: ContentBlock::Text {
text: "Hello".to_string(),
},
_meta: None,
},
};
let updates = translate_event(&event);
assert_eq!(updates.len(), 1);
let update_params = &updates[0];
assert_eq!(update_params.session_id, "sess-1");
match &update_params.update {
SessionUpdateVariant::AgentMessageChunk { content } => match content {
ContentBlock::Text { text } => {
assert_eq!(text, "Hello");
}
_ => panic!("Expected Text content"),
},
_ => panic!("Expected AgentMessageChunk variant"),
}
}
#[test]
fn test_translate_session_update_thought_chunk() {
let event = Event::SessionUpdate {
connector_id: "conn-1".to_string(),
session_id: "sess-1".to_string(),
update: SessionUpdate::AgentThoughtChunk {
message_id: "msg-1".to_string(),
content: ContentBlock::Text {
text: "Thinking...".to_string(),
},
_meta: None,
},
};
let updates = translate_event(&event);
assert_eq!(updates.len(), 1);
let update_params = &updates[0];
assert_eq!(update_params.session_id, "sess-1");
// Thought chunks are rendered as agent message chunks
match &update_params.update {
SessionUpdateVariant::AgentMessageChunk { content } => match content {
ContentBlock::Text { text } => {
assert_eq!(text, "Thinking...");
}
_ => panic!("Expected Text content"),
},
_ => panic!("Expected AgentMessageChunk variant"),
}
}
#[test]
fn test_translate_message_completed() {
let now = chrono::Utc::now();
let event = Event::MessageCompleted {
connector_id: "conn-1".to_string(),
message: Message {
id: "msg-1".to_string(),
session_id: "sess-1".to_string(),
role: MessageRole::Assistant,
created_at: now,
content: vec![],
status: MessageStatus::Completed,
metadata: None,
},
};
let updates = translate_event(&event);
assert_eq!(updates.len(), 1);
let update_params = &updates[0];
assert_eq!(update_params.session_id, "sess-1");
match &update_params.update {
SessionUpdateVariant::MessageComplete { message_id } => {
assert_eq!(message_id, &Some("msg-1".to_string()));
}
_ => panic!("Expected MessageComplete variant"),
}
}
#[test]
fn test_translate_session_idle() {
let event = Event::SessionIdle {
connector_id: "test-connector".to_string(),
session_id: "sess-1".to_string(),
};
let updates = translate_event(&event);
assert_eq!(updates.len(), 1);
let update_params = &updates[0];
assert_eq!(update_params.session_id, "sess-1");
match &update_params.update {
SessionUpdateVariant::SessionIdle {} => {
// Expected
}
_ => panic!("Expected SessionIdle variant"),
}
}
#[test]
fn test_translate_message_failed() {
let event = Event::MessageFailed {
message_id: "msg-1".to_string(),
error: "Connection lost".to_string(),
};
// MessageFailed events are now filtered (return empty vec)
let updates = translate_event(&event);
assert!(
updates.is_empty(),
"MessageFailed events should be filtered"
);
}
#[test]
fn test_translate_filtered_events() {
// These events should not produce notifications
let filtered_events = vec![
Event::Connected,
Event::Disconnected,
Event::SessionDeleted {
session_id: "s".to_string(),
},
Event::Error {
message: "System error".to_string(),
},
Event::MessageFailed {
message_id: "msg-1".to_string(),
error: "Failed".to_string(),
},
];
for event in filtered_events {
assert!(
translate_event(&event).is_empty(),
"Expected {:?} to be filtered",
event
);
}
}
}
+478
View File
@@ -0,0 +1,478 @@
//! SSE (Server-Sent Events) Notifications for ACP Server
//!
//! This module provides the SSE notification infrastructure for streaming events
//! to connected ACP clients. It handles per-client subscriptions using broadcast
//! channels and provides translation from Dirigent protocol events to ACP notifications.
//!
//! ## Architecture
//!
//! The module is organized into several sub-modules:
//!
//! - `types` - Data structures for SSE notifications (SessionUpdateParams, SessionUpdateVariant, etc.)
//! - `notifier` - SseNotifier for managing client subscriptions and broadcasting
//! - `event_translator` - Translation from Dirigent Events to ACP notifications
//! - `content_transform` - Helper functions for content and metadata transformation
//!
//! ## SSE Wire Format
//!
//! The SSE stream uses the following format:
//!
//! ```text
//! event: session/update
//! data: {"sessionId": "...", "update": {"sessionUpdate": "agent_message_chunk", ...}}
//!
//! event: session/update
//! data: {"sessionId": "...", "update": {"sessionUpdate": "message_complete", ...}}
//! ```
//!
//! ## Example
//!
//! ```rust,ignore
//! use dirigent_acp_api::sse::{SseNotifier, SessionUpdateParams, translate_event};
//!
//! // Create the notifier
//! let notifier = SseNotifier::new();
//!
//! // Subscribe a client
//! let stream = notifier.subscribe("client-123");
//!
//! // Translate and broadcast an event
//! let updates = translate_event(&event);
//! for update in updates {
//! notifier.broadcast("client-123", update);
//! }
//!
//! // Unsubscribe when done
//! notifier.unsubscribe("client-123");
//! ```
mod content_transform;
mod event_translator;
mod notifier;
mod types;
// Re-export public types
pub use content_transform::{
extract_kind, rebuild_meta, tool_call_status_to_string, unwrap_tool_call_content,
};
pub use event_translator::translate_event;
pub use notifier::SseNotifier;
pub use types::{
models_to_config_option, modes_to_config_option, AcpNotification, ConfigOption,
ConfigOptionChoice, ConfigOptionType, SessionUpdateParams, SessionUpdateVariant, SlashCommand,
};
// ============================================================================
// Tests (moved from original sse.rs)
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// ========================================================================
// AcpNotification Tests
// ========================================================================
#[test]
fn test_message_chunk_serialization() {
let notification = AcpNotification::MessageChunk {
session_id: "sess-123".to_string(),
message_id: "msg-456".to_string(),
content: "Hello, world!".to_string(),
content_type: Some("text".to_string()),
};
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains(r#""type":"message_chunk"#));
assert!(json.contains(r#""sessionId":"sess-123"#));
assert!(json.contains(r#""messageId":"msg-456"#));
assert!(json.contains(r#""content":"Hello, world!"#));
assert!(json.contains(r#""contentType":"text"#));
}
#[test]
fn test_message_complete_serialization() {
let notification = AcpNotification::MessageComplete {
session_id: "sess-123".to_string(),
message_id: "msg-456".to_string(),
};
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains(r#""type":"message_complete"#));
assert!(json.contains(r#""sessionId":"sess-123"#));
assert!(json.contains(r#""messageId":"msg-456"#));
}
#[test]
fn test_session_idle_serialization() {
let notification = AcpNotification::SessionIdle {
session_id: "sess-123".to_string(),
};
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains(r#""type":"session_idle"#));
assert!(json.contains(r#""sessionId":"sess-123"#));
}
#[test]
fn test_session_error_serialization() {
let notification = AcpNotification::SessionError {
session_id: "sess-123".to_string(),
error: "Something went wrong".to_string(),
};
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains(r#""type":"session_error"#));
assert!(json.contains(r#""error":"Something went wrong"#));
}
#[test]
fn test_tool_call_update_serialization() {
let notification = AcpNotification::ToolCallUpdate {
session_id: "sess-123".to_string(),
message_id: "msg-456".to_string(),
tool_call_id: "call-789".to_string(),
tool_name: "bash".to_string(),
status: "running".to_string(),
title: Some("Running command".to_string()),
error: None,
};
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains(r#""type":"tool_call_update"#));
assert!(json.contains(r#""toolName":"bash"#));
assert!(json.contains(r#""status":"running"#));
assert!(json.contains(r#""title":"Running command"#));
// error should not be present when None
assert!(!json.contains(r#""error""#));
}
#[test]
fn test_event_type() {
assert_eq!(
AcpNotification::MessageChunk {
session_id: "s".to_string(),
message_id: "m".to_string(),
content: "c".to_string(),
content_type: None,
}
.event_type(),
"message/chunk"
);
assert_eq!(
AcpNotification::MessageComplete {
session_id: "s".to_string(),
message_id: "m".to_string(),
}
.event_type(),
"message/complete"
);
assert_eq!(
AcpNotification::SessionIdle {
session_id: "s".to_string(),
}
.event_type(),
"session/idle"
);
assert_eq!(
AcpNotification::SessionError {
session_id: "s".to_string(),
error: "e".to_string(),
}
.event_type(),
"session/error"
);
assert_eq!(
AcpNotification::ToolCallUpdate {
session_id: "s".to_string(),
message_id: "m".to_string(),
tool_call_id: "t".to_string(),
tool_name: "bash".to_string(),
status: "running".to_string(),
title: None,
error: None,
}
.event_type(),
"tool/update"
);
}
#[test]
fn test_to_sse_string() {
let notification = AcpNotification::SessionIdle {
session_id: "sess-123".to_string(),
};
let sse = notification.to_sse_string();
assert!(sse.starts_with("event: session/idle\n"));
assert!(sse.contains("data: "));
assert!(sse.contains("sess-123"));
assert!(sse.ends_with("\n\n"));
}
#[test]
fn test_notification_roundtrip() {
let notifications = vec![
AcpNotification::MessageChunk {
session_id: "s".to_string(),
message_id: "m".to_string(),
content: "c".to_string(),
content_type: Some("text".to_string()),
},
AcpNotification::MessageComplete {
session_id: "s".to_string(),
message_id: "m".to_string(),
},
AcpNotification::SessionIdle {
session_id: "s".to_string(),
},
AcpNotification::SessionError {
session_id: "s".to_string(),
error: "e".to_string(),
},
AcpNotification::ToolCallUpdate {
session_id: "s".to_string(),
message_id: "m".to_string(),
tool_call_id: "t".to_string(),
tool_name: "bash".to_string(),
status: "running".to_string(),
title: Some("title".to_string()),
error: None,
},
];
for notification in notifications {
let json = serde_json::to_string(&notification).unwrap();
let deserialized: AcpNotification = serde_json::from_str(&json).unwrap();
assert_eq!(notification, deserialized);
}
}
// ========================================================================
// SessionUpdate Tag Verification Tests
// ========================================================================
#[test]
fn test_session_update_variant_uses_session_update_tag_tool_call() {
// Test ToolCall variant with flattened structure
let variant = SessionUpdateVariant::ToolCall {
tool_call_id: "call_456".to_string(),
title: Some("Running command".to_string()),
kind: Some("command".to_string()),
raw_input: Some(json!({"command": "ls"})),
status: Some("in_progress".to_string()),
content: vec![],
_meta: Some(json!({
"claudeCode": {
"toolName": "bash"
}
})),
};
let json = serde_json::to_value(&variant).unwrap();
// Verify correct tag name "sessionUpdate" is present
assert!(
json.get("sessionUpdate").is_some(),
"Missing 'sessionUpdate' field in JSON: {:?}",
json
);
assert_eq!(
json["sessionUpdate"], "tool_call",
"Expected sessionUpdate value 'tool_call', got: {:?}",
json["sessionUpdate"]
);
// Verify incorrect tag "type" is NOT present
assert!(
json.get("type").is_none(),
"Should not have 'type' field in ACP output, found: {:?}",
json.get("type")
);
// Verify flattened fields are present
assert!(json.get("toolCallId").is_some(), "Missing toolCallId field");
assert!(json.get("title").is_some(), "Missing title field");
assert!(json.get("status").is_some(), "Missing status field");
}
#[test]
fn test_session_update_variant_roundtrip_with_session_update_tag() {
// Test that serialization and deserialization work correctly with sessionUpdate tag
let variants = vec![
SessionUpdateVariant::SessionIdle {},
SessionUpdateVariant::MessageComplete {
message_id: Some("msg_1".to_string()),
},
SessionUpdateVariant::AgentMessageChunk {
content: dirigent_protocol::ContentBlock::Text {
text: "test".to_string(),
},
},
SessionUpdateVariant::ToolCall {
tool_call_id: "call_1".to_string(),
title: None,
kind: None,
raw_input: None,
status: Some("pending".to_string()),
content: vec![],
_meta: Some(json!({
"claudeCode": {
"toolName": "test_tool"
}
})),
},
];
for variant in variants {
// Serialize
let json_str = serde_json::to_string(&variant).unwrap();
let json_val: serde_json::Value = serde_json::from_str(&json_str).unwrap();
// Verify sessionUpdate tag in JSON
assert!(
json_val.get("sessionUpdate").is_some(),
"Missing sessionUpdate in serialized JSON for variant: {:?}",
variant
);
// Verify no type tag
assert!(
json_val.get("type").is_none(),
"Found unexpected 'type' tag for variant: {:?}",
variant
);
// Deserialize back
let deserialized: SessionUpdateVariant = serde_json::from_str(&json_str).unwrap();
// Verify equality (roundtrip)
assert_eq!(
variant, deserialized,
"Roundtrip failed for variant: {:?}",
variant
);
}
}
// ========================================================================
// camelCase Field Naming Compliance Tests
// ========================================================================
#[test]
fn test_all_fields_use_camel_case() {
// Test SessionUpdateParams with ToolCall variant
let params = SessionUpdateParams {
session_id: "sess_123".to_string(),
update: SessionUpdateVariant::ToolCall {
tool_call_id: "tool_123".to_string(),
title: Some("test".to_string()),
kind: Some("search".to_string()),
raw_input: Some(json!({"key": "value"})),
status: Some("pending".to_string()),
content: vec![],
_meta: None,
},
..Default::default()
};
let json = serde_json::to_value(&params).unwrap();
// Verify top-level camelCase
assert!(
json.get("sessionId").is_some(),
"Should have sessionId (not session_id)"
);
assert!(
json.get("session_id").is_none(),
"Should NOT have session_id"
);
let update = &json["update"];
// Verify ToolCall variant fields use camelCase
assert!(
update.get("toolCallId").is_some(),
"Should have toolCallId (not tool_call_id)"
);
assert!(
update.get("tool_call_id").is_none(),
"Should NOT have tool_call_id"
);
assert!(
update.get("rawInput").is_some(),
"Should have rawInput (not raw_input)"
);
assert!(
update.get("raw_input").is_none(),
"Should NOT have raw_input"
);
}
#[test]
fn test_no_snake_case_in_serialized_json() {
// Create various SessionUpdateParams with different variants
let test_cases = vec![
SessionUpdateParams {
session_id: "s1".to_string(),
update: SessionUpdateVariant::ToolCall {
tool_call_id: "tc1".to_string(),
title: Some("t".to_string()),
kind: Some("k".to_string()),
raw_input: Some(json!({})),
status: Some("pending".to_string()),
content: vec![],
_meta: None,
},
..Default::default()
},
SessionUpdateParams {
session_id: "s2".to_string(),
update: SessionUpdateVariant::ToolCallUpdate {
tool_call_id: "tc2".to_string(),
status: Some("completed".to_string()),
content: vec![],
raw_output: Some(json!({})),
error: None,
_meta: None,
},
..Default::default()
},
SessionUpdateParams {
session_id: "s3".to_string(),
update: SessionUpdateVariant::MessageComplete {
message_id: Some("m3".to_string()),
},
..Default::default()
},
];
for params in test_cases {
let json_str = serde_json::to_string(&params).unwrap();
// Check for common snake_case fields that should NOT appear
let forbidden_patterns = vec![
"session_id",
"tool_call_id",
"raw_input",
"raw_output",
"message_id",
];
for pattern in forbidden_patterns {
assert!(
!json_str.contains(&format!("\"{}\"", pattern)),
"Found forbidden snake_case field '{}' in JSON: {}",
pattern,
json_str
);
}
}
}
}
+403
View File
@@ -0,0 +1,403 @@
//! SSE Notifier for managing client subscriptions
//!
//! This module provides the subscription management infrastructure for SSE streaming.
//! The `SseNotifier` manages per-client broadcast channels for targeted notifications.
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::{Arc, RwLock};
use tokio::sync::broadcast;
use tokio_stream::wrappers::errors::BroadcastStreamRecvError;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::Stream;
use tracing::{debug, trace, warn};
use super::types::SessionUpdateParams;
/// Default broadcast channel capacity
const DEFAULT_CHANNEL_CAPACITY: usize = 256;
/// Internal state for SSE subscriptions
#[derive(Debug, Default)]
struct SseNotifierState {
/// Map from client_id to their broadcast sender
subscriptions: HashMap<String, broadcast::Sender<SessionUpdateParams>>,
}
/// SSE Notifier for managing client subscriptions and broadcasting notifications
///
/// The `SseNotifier` manages per-client subscriptions using tokio broadcast channels.
/// Each client has their own broadcast channel, allowing targeted notifications.
///
/// ## Thread Safety
///
/// The `SseNotifier` is designed to be cloned and shared across async tasks.
/// Internal state is protected by `Arc<RwLock<...>>` for thread-safe access.
///
/// ## Example
///
/// ```rust,ignore
/// let notifier = SseNotifier::new();
///
/// // Subscribe a client and get their stream
/// let stream = notifier.subscribe("client-123");
///
/// // In another task, broadcast notifications
/// notifier.broadcast("client-123", notification);
///
/// // When client disconnects
/// notifier.unsubscribe("client-123");
/// ```
#[derive(Debug, Clone)]
pub struct SseNotifier {
state: Arc<RwLock<SseNotifierState>>,
channel_capacity: usize,
}
impl Default for SseNotifier {
fn default() -> Self {
Self::new()
}
}
impl SseNotifier {
/// Create a new SSE notifier with default capacity
///
/// Creates a new notifier with the default channel capacity of 256 messages.
pub fn new() -> Self {
Self {
state: Arc::new(RwLock::new(SseNotifierState::default())),
channel_capacity: DEFAULT_CHANNEL_CAPACITY,
}
}
/// Create a new SSE notifier with custom channel capacity
///
/// # Parameters
///
/// - `capacity`: The maximum number of messages that can be buffered per client
pub fn with_capacity(capacity: usize) -> Self {
Self {
state: Arc::new(RwLock::new(SseNotifierState::default())),
channel_capacity: capacity,
}
}
/// Subscribe a client and return an async stream of notifications
///
/// Creates a broadcast channel for the client and returns a stream that can
/// be used with Axum's `Sse<impl Stream>` response type.
///
/// If the client is already subscribed, a new receiver is created from the
/// existing sender (allowing multiple connections per client).
///
/// # Parameters
///
/// - `client_id`: Unique identifier for the client
///
/// # Returns
///
/// A pinned stream of `SessionUpdateParams` wrapped in `Result` for error handling.
/// The stream is compatible with Axum's SSE handler.
pub fn subscribe(
&self,
client_id: &str,
) -> Pin<Box<dyn Stream<Item = Result<SessionUpdateParams, BroadcastStreamRecvError>> + Send>>
{
let mut state = self.state.write().expect("Lock poisoned");
// Get or create the sender for this client
let sender = state
.subscriptions
.entry(client_id.to_string())
.or_insert_with(|| {
debug!("Creating new broadcast channel for client: {}", client_id);
let (tx, _rx) = broadcast::channel(self.channel_capacity);
tx
});
// Create a new receiver
let receiver = sender.subscribe();
debug!(
"Client subscribed to SSE: {} (subscribers: {})",
client_id,
sender.receiver_count()
);
// Convert to stream using tokio_stream
let stream = BroadcastStream::new(receiver);
Box::pin(stream)
}
/// Unsubscribe a client and cleanup resources
///
/// Removes the client's subscription and drops the sender, which will
/// cause all associated receivers to receive an error on their next poll.
///
/// # Parameters
///
/// - `client_id`: The ID of the client to unsubscribe
///
/// # Returns
///
/// `true` if the client was subscribed and removed, `false` if not found
pub fn unsubscribe(&self, client_id: &str) -> bool {
let mut state = self.state.write().expect("Lock poisoned");
if let Some(sender) = state.subscriptions.remove(client_id) {
debug!(
"Client unsubscribed from SSE: {} (was {} receivers)",
client_id,
sender.receiver_count()
);
// Sender is dropped here, which will close the channel
true
} else {
debug!("Client was not subscribed: {}", client_id);
false
}
}
/// Broadcast a session update to a specific client
///
/// Sends the session update to all receivers subscribed to the specified client.
/// If no receivers are active (e.g., all have dropped or lagged), the send
/// is handled gracefully.
///
/// # Parameters
///
/// - `client_id`: The ID of the client to send to
/// - `update_params`: The session update parameters to send
///
/// # Returns
///
/// - `Ok(n)` where n is the number of receivers that received the update
/// - `Err(())` if the client is not subscribed
pub fn broadcast(
&self,
client_id: &str,
update_params: SessionUpdateParams,
) -> Result<usize, ()> {
let state = self.state.read().expect("Lock poisoned");
if let Some(sender) = state.subscriptions.get(client_id) {
match sender.send(update_params) {
Ok(n) => {
trace!(
"Broadcast session update to client {}: {} receivers",
client_id,
n
);
Ok(n)
}
Err(_) => {
// No active receivers - this is okay, the update is just dropped
trace!("Broadcast to client {}: no active receivers", client_id);
Ok(0)
}
}
} else {
warn!("Attempted broadcast to unsubscribed client: {}", client_id);
Err(())
}
}
/// Broadcast a session update to all subscribed clients
///
/// Sends the session update to all connected clients. Failed sends to
/// individual clients are logged but don't stop the broadcast.
///
/// # Parameters
///
/// - `update_params`: The session update parameters to broadcast
///
/// # Returns
///
/// The total number of receivers that received the update
pub fn broadcast_all(&self, update_params: SessionUpdateParams) -> usize {
let state = self.state.read().expect("Lock poisoned");
let mut total_receivers = 0;
for (client_id, sender) in state.subscriptions.iter() {
match sender.send(update_params.clone()) {
Ok(n) => {
total_receivers += n;
trace!("Broadcast to client {}: {} receivers", client_id, n);
}
Err(_) => {
trace!("Broadcast to client {}: no active receivers", client_id);
}
}
}
debug!(
"Broadcast to all clients: {} total receivers across {} clients",
total_receivers,
state.subscriptions.len()
);
total_receivers
}
/// Get the number of subscribed clients
pub fn client_count(&self) -> usize {
let state = self.state.read().expect("Lock poisoned");
state.subscriptions.len()
}
/// Check if a client is subscribed
pub fn is_subscribed(&self, client_id: &str) -> bool {
let state = self.state.read().expect("Lock poisoned");
state.subscriptions.contains_key(client_id)
}
/// Get the list of subscribed client IDs
pub fn subscribed_clients(&self) -> Vec<String> {
let state = self.state.read().expect("Lock poisoned");
state.subscriptions.keys().cloned().collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sse::types::SessionUpdateVariant;
use tokio_stream::StreamExt;
#[test]
fn test_sse_notifier_new() {
let notifier = SseNotifier::new();
assert_eq!(notifier.client_count(), 0);
}
#[test]
fn test_sse_notifier_with_capacity() {
let notifier = SseNotifier::with_capacity(512);
assert_eq!(notifier.channel_capacity, 512);
}
#[tokio::test]
async fn test_subscribe_unsubscribe() {
let notifier = SseNotifier::new();
// Subscribe
let _stream = notifier.subscribe("client-1");
assert!(notifier.is_subscribed("client-1"));
assert_eq!(notifier.client_count(), 1);
// Unsubscribe
assert!(notifier.unsubscribe("client-1"));
assert!(!notifier.is_subscribed("client-1"));
assert_eq!(notifier.client_count(), 0);
// Unsubscribe non-existent
assert!(!notifier.unsubscribe("client-1"));
}
#[tokio::test]
async fn test_broadcast_to_client() {
let notifier = SseNotifier::new();
// Subscribe
let mut stream = notifier.subscribe("client-1");
// Broadcast
let update_params = SessionUpdateParams {
session_id: "sess-1".to_string(),
update: SessionUpdateVariant::SessionIdle {},
..Default::default()
};
let result = notifier.broadcast("client-1", update_params.clone());
assert!(result.is_ok());
assert_eq!(result.unwrap(), 1);
// Receive
let received = stream.next().await;
assert!(received.is_some());
let received = received.unwrap();
assert!(received.is_ok());
assert_eq!(received.unwrap(), update_params);
}
#[tokio::test]
async fn test_broadcast_to_unsubscribed_client() {
let notifier = SseNotifier::new();
let update_params = SessionUpdateParams {
session_id: "sess-1".to_string(),
update: SessionUpdateVariant::SessionIdle {},
..Default::default()
};
let result = notifier.broadcast("unknown-client", update_params);
assert!(result.is_err());
}
#[tokio::test]
async fn test_broadcast_all() {
let notifier = SseNotifier::new();
// Subscribe multiple clients
let mut stream1 = notifier.subscribe("client-1");
let mut stream2 = notifier.subscribe("client-2");
let update_params = SessionUpdateParams {
session_id: "sess-1".to_string(),
update: SessionUpdateVariant::SessionIdle {},
..Default::default()
};
let count = notifier.broadcast_all(update_params.clone());
assert_eq!(count, 2);
// Both should receive
let r1 = stream1.next().await.unwrap().unwrap();
let r2 = stream2.next().await.unwrap().unwrap();
assert_eq!(r1, update_params);
assert_eq!(r2, update_params);
}
#[test]
fn test_subscribed_clients() {
let notifier = SseNotifier::new();
let _s1 = notifier.subscribe("client-1");
let _s2 = notifier.subscribe("client-2");
let clients = notifier.subscribed_clients();
assert_eq!(clients.len(), 2);
assert!(clients.contains(&"client-1".to_string()));
assert!(clients.contains(&"client-2".to_string()));
}
#[tokio::test]
async fn test_multiple_receivers_same_client() {
let notifier = SseNotifier::new();
// Subscribe same client twice
let mut stream1 = notifier.subscribe("client-1");
let mut stream2 = notifier.subscribe("client-1");
let update_params = SessionUpdateParams {
session_id: "sess-1".to_string(),
update: SessionUpdateVariant::SessionIdle {},
..Default::default()
};
let count = notifier.broadcast("client-1", update_params.clone());
assert!(count.is_ok());
assert_eq!(count.unwrap(), 2); // Both receivers
// Both should receive
let r1 = stream1.next().await.unwrap().unwrap();
let r2 = stream2.next().await.unwrap().unwrap();
assert_eq!(r1, update_params);
assert_eq!(r2, update_params);
}
}
+548
View File
@@ -0,0 +1,548 @@
//! SSE Type Definitions for ACP Server
//!
//! This module contains all the data structures and types used for SSE notifications.
//! These types define the wire format for ACP protocol messages.
use serde::{Deserialize, Serialize};
use dirigent_protocol::ContentBlock;
// ============================================================================
// SessionUpdateParams
// ============================================================================
/// Params for session/update JSON-RPC notification (ACP spec compliant)
///
/// This matches the structure sent by Claude and expected by Zed.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(rename_all = "camelCase", default)]
pub struct SessionUpdateParams {
/// The session ID this update belongs to
pub session_id: String,
/// The update content
pub update: SessionUpdateVariant,
/// Optional event type override (default: "session/update")
/// Reserved for future protocol extensions. Currently unused.
#[serde(skip)]
pub event_type_override: Option<String>,
}
impl SessionUpdateParams {
/// Get the SSE event type for session updates
///
/// According to ACP spec, session updates use "session/update" event type.
pub fn event_type(&self) -> &str {
self.event_type_override
.as_deref()
.unwrap_or("session/update")
}
/// Create a raw event with a custom event type
///
/// This creates an event that will be serialized as raw JSON data
/// (without the SessionUpdateParams wrapper) when sent over SSE.
/// Used for `_rpc_response` deferred responses pushed via SSE after gateway transfer.
pub fn raw_event(event_type: &str, data: serde_json::Value) -> Self {
Self {
session_id: String::new(), // Not used for raw events
update: SessionUpdateVariant::Unknown { data },
event_type_override: Some(event_type.to_string()),
}
}
/// Check if this is a raw event (should serialize data directly)
pub fn is_raw_event(&self) -> bool {
self.event_type_override.is_some()
}
/// Get the data to serialize for SSE
///
/// For raw events (with event_type_override), returns the raw JSON data.
/// For normal events, returns None (caller should serialize the full struct).
pub fn raw_data(&self) -> Option<&serde_json::Value> {
if self.event_type_override.is_some() {
if let SessionUpdateVariant::Unknown { data } = &self.update {
return Some(data);
}
}
None
}
/// Serialize to JSON string for SSE transmission
///
/// For raw events, serializes just the raw data.
/// For normal events, serializes the full SessionUpdateParams struct.
pub fn to_sse_json(&self) -> String {
if let Some(raw_data) = self.raw_data() {
// Raw event: serialize just the data
serde_json::to_string(raw_data).unwrap_or_else(|_| "{}".to_string())
} else {
// Normal event: serialize the full struct
serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string())
}
}
}
// ============================================================================
// ConfigOption Types
// ============================================================================
/// A single configuration option for session settings (outgoing ACP wire format)
///
/// Used in `config_option_update` notifications to send the complete set of
/// configuration options (modes, models, etc.) to the client.
///
/// Note: This is the **outgoing** wire format for ACP Server SSE.
/// For the **incoming** format (parsed from agents), see `dirigent_protocol::ConfigOption`.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct ConfigOption {
/// Unique identifier for this option (e.g., "mode", "model")
pub id: String,
/// Human-readable display name (e.g., "Session Mode", "Model")
pub name: String,
/// Optional description
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// Semantic category for UX grouping (e.g., "mode", "model", "thought_level")
#[serde(skip_serializing_if = "Option::is_none")]
pub category: Option<String>,
/// Input type (e.g., "select", "toggle", "text")
#[serde(rename = "type")]
pub option_type: ConfigOptionType,
/// Currently selected value
pub current_value: String,
/// Available options for "select" type
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<Vec<ConfigOptionChoice>>,
}
/// Type of configuration option input
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ConfigOptionType {
Select,
Toggle,
Text,
}
/// A single choice within a select-type config option
///
/// Matches ACP's `SessionConfigSelectOption` structure.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct ConfigOptionChoice {
/// Value identifier for this choice (sent back when selected)
pub value: String,
/// Human-readable display name
pub name: String,
/// Optional description
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
}
// ============================================================================
// SessionUpdateVariant
// ============================================================================
/// Variants of session updates (ACP spec compliant)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "sessionUpdate", rename_all = "snake_case")]
pub enum SessionUpdateVariant {
/// Agent message chunk (streaming text)
#[serde(rename = "agent_message_chunk")]
AgentMessageChunk {
/// The content block (text, image, etc.)
content: ContentBlock,
},
/// Message generation complete
#[serde(rename = "message_complete")]
MessageComplete {
/// The message ID that completed
#[serde(rename = "messageId", skip_serializing_if = "Option::is_none")]
message_id: Option<String>,
},
/// Session is idle and ready for input
#[serde(rename = "session_idle")]
SessionIdle {},
/// Available slash commands update
#[serde(rename = "available_commands_update")]
AvailableCommandsUpdate {
/// List of available slash commands
#[serde(rename = "availableCommands")]
available_commands: Vec<SlashCommand>,
},
/// Connector changed (session transferred to a different connector)
#[serde(rename = "connector_changed")]
ConnectorChanged {
/// The new connector ID
#[serde(rename = "newConnectorId")]
new_connector_id: String,
/// The new internal session ID in the target connector
#[serde(rename = "newInternalSessionId")]
new_internal_session_id: String,
/// Whether a new session was created (true) or existing session loaded (false)
#[serde(rename = "isNewSession")]
is_new_session: bool,
},
/// Tool call started/initiated
#[serde(rename = "tool_call")]
ToolCall {
/// The tool call ID
#[serde(rename = "toolCallId")]
tool_call_id: String,
/// Optional title for the tool call
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
/// Optional kind/category (e.g., "search", "edit")
#[serde(skip_serializing_if = "Option::is_none")]
kind: Option<String>,
/// Raw input parameters
#[serde(rename = "rawInput", skip_serializing_if = "Option::is_none")]
raw_input: Option<serde_json::Value>,
/// Current status (pending, in_progress, completed, failed)
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<String>,
/// Content blocks (e.g., tool output)
#[serde(default, skip_serializing_if = "Vec::is_empty")]
content: Vec<ContentBlock>,
/// Metadata (can include claudeCode.toolName, toolResponse, etc.)
#[serde(skip_serializing_if = "Option::is_none")]
_meta: Option<serde_json::Value>,
},
/// Tool call update (status change, output, etc.)
#[serde(rename = "tool_call_update")]
ToolCallUpdate {
/// The tool call ID being updated
#[serde(rename = "toolCallId")]
tool_call_id: String,
/// Updated status
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<String>,
/// Updated content blocks
#[serde(default, skip_serializing_if = "Vec::is_empty")]
content: Vec<ContentBlock>,
/// Raw output from the tool
#[serde(rename = "rawOutput", skip_serializing_if = "Option::is_none")]
raw_output: Option<serde_json::Value>,
/// Error message if tool call failed
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
/// Metadata (can include toolResponse from Claude)
#[serde(skip_serializing_if = "Option::is_none")]
_meta: Option<serde_json::Value>,
},
/// Agent request needing client response (e.g., permission prompt)
///
/// This variant is used for bidirectional requests where the agent initiates
/// a request (like `session/request_permission`) that requires a response from
/// the client. The client should respond via the `/acp/agent_response` endpoint.
#[serde(rename = "agent_request")]
AgentRequest {
/// The request ID from the agent (to correlate response)
#[serde(rename = "requestId")]
request_id: serde_json::Value,
/// The method being requested (e.g., "session/request_permission")
method: String,
/// The request parameters
params: serde_json::Value,
},
/// Session modes update (full state) - NOT SUPPORTED BY ZED
/// Kept for future ACP protocol support
#[serde(rename = "modes_update")]
ModesUpdate {
/// Session mode state (flattened)
#[serde(flatten)]
modes: dirigent_protocol::SessionModeState,
},
/// Session models update (full state) - NOT SUPPORTED BY ZED
/// Kept for future ACP protocol support
#[serde(rename = "models_update")]
ModelsUpdate {
/// Session model state (flattened)
#[serde(flatten)]
models: dirigent_protocol::SessionModelState,
},
/// Current mode update (standard ACP notification)
/// Signals which mode is currently selected. Zed accepts this.
#[serde(rename = "current_mode_update")]
CurrentModeUpdate {
/// The ID of the currently selected mode
/// Note: Zed expects "currentModeId" not "modeId"
#[serde(rename = "currentModeId")]
mode_id: String,
},
/// Current model update - NOT SUPPORTED BY ZED
/// Kept for completeness but Zed only accepts current_mode_update
#[serde(rename = "current_model_update")]
CurrentModelUpdate {
/// The ID of the currently selected model
#[serde(rename = "modelId")]
model_id: String,
},
/// Config option update - send full configuration options to client
///
/// This is used after session transfer to provide the target connector's
/// actual modes/models instead of Gateway's placeholder values.
/// See: https://agentclientprotocol.com/rfds/session-config-options
#[serde(rename = "config_option_update")]
ConfigOptionUpdate {
/// List of configuration options to display
#[serde(rename = "configOptions")]
config_options: Vec<ConfigOption>,
},
/// Unknown update type (forward compatibility - pass through as raw JSON)
#[serde(untagged)]
Unknown {
#[serde(flatten)]
data: serde_json::Value,
},
}
impl SessionUpdateVariant {
/// Returns the variant name for logging (without data)
pub fn variant_name(&self) -> &'static str {
match self {
SessionUpdateVariant::AgentMessageChunk { .. } => "AgentMessageChunk",
SessionUpdateVariant::MessageComplete { .. } => "MessageComplete",
SessionUpdateVariant::SessionIdle { .. } => "SessionIdle",
SessionUpdateVariant::AvailableCommandsUpdate { .. } => "AvailableCommandsUpdate",
SessionUpdateVariant::ConnectorChanged { .. } => "ConnectorChanged",
SessionUpdateVariant::ToolCall { .. } => "ToolCall",
SessionUpdateVariant::ToolCallUpdate { .. } => "ToolCallUpdate",
SessionUpdateVariant::AgentRequest { .. } => "AgentRequest",
SessionUpdateVariant::ModesUpdate { .. } => "ModesUpdate",
SessionUpdateVariant::ModelsUpdate { .. } => "ModelsUpdate",
SessionUpdateVariant::CurrentModeUpdate { .. } => "CurrentModeUpdate",
SessionUpdateVariant::CurrentModelUpdate { .. } => "CurrentModelUpdate",
SessionUpdateVariant::ConfigOptionUpdate { .. } => "ConfigOptionUpdate",
SessionUpdateVariant::Unknown { .. } => "Unknown",
}
}
}
impl Default for SessionUpdateVariant {
fn default() -> Self {
SessionUpdateVariant::SessionIdle {}
}
}
// ============================================================================
// SlashCommand
// ============================================================================
/// Slash command definition
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SlashCommand {
pub name: String,
pub description: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub input: Option<serde_json::Value>,
}
// ============================================================================
// AcpNotification (Legacy)
// ============================================================================
/// ACP Notification types for SSE streaming (DEPRECATED - keeping for compatibility)
///
/// These notifications are sent to clients over the SSE connection to inform
/// them about session events, message streaming, and errors.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(
tag = "type",
rename_all = "snake_case",
rename_all_fields = "camelCase"
)]
pub enum AcpNotification {
/// A chunk of message content has been received
///
/// Sent during streaming to provide incremental content updates.
/// The client should append this content to the message being built.
MessageChunk {
/// The session ID this chunk belongs to
session_id: String,
/// The message ID this chunk belongs to
message_id: String,
/// The content chunk (typically text)
content: String,
/// Optional content type (e.g., "text", "thought")
#[serde(skip_serializing_if = "Option::is_none")]
content_type: Option<String>,
},
/// A message has been completed
///
/// Sent when the agent has finished generating a message.
/// The client should finalize the message display.
MessageComplete {
/// The session ID
session_id: String,
/// The completed message ID
message_id: String,
},
/// A session has become idle
///
/// Sent when the session transitions to an idle state, meaning
/// no active generation is occurring.
SessionIdle {
/// The session ID that became idle
session_id: String,
},
/// An error occurred in a session
///
/// Sent when an error occurs during processing.
SessionError {
/// The session ID where the error occurred
session_id: String,
/// Human-readable error message
error: String,
},
/// A tool call has started or been updated
///
/// Sent when a tool call is initiated or its status changes.
ToolCallUpdate {
/// The session ID
session_id: String,
/// The message ID containing this tool call
message_id: String,
/// The tool call ID
tool_call_id: String,
/// The tool name
tool_name: String,
/// Current status (pending, running, completed, error)
status: String,
/// Optional title for the tool call
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
/// Optional error message
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
},
}
impl AcpNotification {
/// Get the SSE event type for this notification
///
/// This is used as the `event:` field in the SSE stream.
pub fn event_type(&self) -> &'static str {
match self {
AcpNotification::MessageChunk { .. } => "message/chunk",
AcpNotification::MessageComplete { .. } => "message/complete",
AcpNotification::SessionIdle { .. } => "session/idle",
AcpNotification::SessionError { .. } => "session/error",
AcpNotification::ToolCallUpdate { .. } => "tool/update",
}
}
/// Convert to SSE format string
///
/// Returns a string formatted for SSE transmission:
/// ```text
/// event: <event_type>
/// data: <json_data>
/// ```
pub fn to_sse_string(&self) -> String {
let data = serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string());
format!("event: {}\ndata: {}\n\n", self.event_type(), data)
}
}
// ============================================================================
// ConfigOption Conversion Functions
// ============================================================================
/// Convert SessionModeState to a ConfigOption
///
/// Transforms the protocol's mode state into the ACP `config_option_update` format.
/// Uses "mode" ID to match the ACP protocol standard.
pub fn modes_to_config_option(modes: &dirigent_protocol::SessionModeState) -> ConfigOption {
ConfigOption {
id: "mode".to_string(),
name: "Session Mode".to_string(),
description: None,
category: Some("mode".to_string()),
option_type: ConfigOptionType::Select,
current_value: modes.current_mode_id.clone(),
options: Some(
modes
.available_modes
.iter()
.map(|m| ConfigOptionChoice {
value: m.id.clone(),
name: m.name.clone(),
description: m.description.clone(),
})
.collect(),
),
}
}
/// Convert SessionModelState to a ConfigOption
///
/// Transforms the protocol's model state into the ACP `config_option_update` format.
/// Uses "model" ID to match the ACP protocol standard.
pub fn models_to_config_option(models: &dirigent_protocol::SessionModelState) -> ConfigOption {
ConfigOption {
id: "model".to_string(),
name: "Model".to_string(),
description: None,
category: Some("model".to_string()),
option_type: ConfigOptionType::Select,
current_value: models.current_model_id.clone(),
options: Some(
models
.available_models
.iter()
.map(|m| ConfigOptionChoice {
value: m.model_id.clone(),
name: m.name.clone(),
description: m.description.clone(),
})
.collect(),
),
}
}
+338
View File
@@ -0,0 +1,338 @@
//! Common test utilities for bidirectional flow testing.
//!
//! This module provides mock implementations and test helpers for testing
//! the bidirectional request/response flow in the ACP Server.
use anyhow::Result;
use serde_json::{json, Value};
use std::sync::Arc;
use tokio::sync::{mpsc, oneshot, Mutex};
use std::collections::HashMap;
/// Mock event for testing event bridge
#[derive(Debug, Clone)]
pub struct MockEvent {
pub event_type: String,
pub data: Value,
}
/// Mock SSE client for testing
///
/// Simulates an HTTP client that receives SSE events and posts responses.
pub struct MockSseClient {
pub client_id: String,
/// Events received via SSE
pub received_events: Arc<Mutex<Vec<MockEvent>>>,
/// Sender for simulating HTTP POST responses
pub response_tx: mpsc::UnboundedSender<(Value, oneshot::Sender<Result<()>>)>,
}
impl MockSseClient {
pub fn new(client_id: String) -> (Self, mpsc::UnboundedReceiver<(Value, oneshot::Sender<Result<()>>)>) {
let (response_tx, response_rx) = mpsc::unbounded_channel();
(
Self {
client_id,
received_events: Arc::new(Mutex::new(Vec::new())),
response_tx,
},
response_rx,
)
}
/// Simulate receiving an SSE event
pub async fn receive_sse(&self, event_type: String, data: Value) {
let mut events = self.received_events.lock().await;
events.push(MockEvent { event_type, data });
}
/// Get all received events
pub async fn get_events(&self) -> Vec<MockEvent> {
let events = self.received_events.lock().await;
events.clone()
}
/// Get the most recent event of a specific type
pub async fn get_latest_event(&self, event_type: &str) -> Option<MockEvent> {
let events = self.received_events.lock().await;
events.iter()
.filter(|e| e.event_type == event_type)
.last()
.cloned()
}
/// Clear received events
pub async fn clear_events(&self) {
let mut events = self.received_events.lock().await;
events.clear();
}
/// Simulate sending a response to /acp/agent_response
pub async fn send_response(&self, response: Value) -> Result<()> {
let (tx, rx) = oneshot::channel();
self.response_tx.send((response, tx))
.map_err(|_| anyhow::anyhow!("Failed to send response"))?;
rx.await?
}
}
/// Mock connector for testing
///
/// Simulates an ACP connector that can send agent requests and receive responses.
pub struct MockConnector {
pub connector_id: String,
/// Channel for receiving agent requests from this connector
pub request_tx: mpsc::UnboundedSender<(Value, String, String, Value)>,
/// Pending responses (request_id -> sender)
pub pending_responses: Arc<Mutex<HashMap<Value, oneshot::Sender<Value>>>>,
}
impl MockConnector {
pub fn new(
connector_id: String,
) -> (Self, mpsc::UnboundedReceiver<(Value, String, String, Value)>) {
let (request_tx, request_rx) = mpsc::unbounded_channel();
(
Self {
connector_id,
request_tx,
pending_responses: Arc::new(Mutex::new(HashMap::new())),
},
request_rx,
)
}
/// Simulate sending an agent request
pub async fn send_agent_request(
&self,
session_id: String,
request_id: Value,
method: String,
params: Value,
) -> oneshot::Receiver<Value> {
let (tx, rx) = oneshot::channel();
// Store the response sender
let mut pending = self.pending_responses.lock().await;
pending.insert(request_id.clone(), tx);
drop(pending);
// Send the request
self.request_tx.send((request_id, session_id, method, params))
.expect("Failed to send agent request");
rx
}
/// Complete a pending response (simulating response from ACP Server)
pub async fn complete_response(&self, request_id: Value, response: Value) -> Result<()> {
let mut pending = self.pending_responses.lock().await;
if let Some(tx) = pending.remove(&request_id) {
tx.send(response)
.map_err(|_| anyhow::anyhow!("Failed to send response to connector"))?;
Ok(())
} else {
Err(anyhow::anyhow!("No pending response for request_id: {}", request_id))
}
}
}
/// Test context for integration tests
///
/// Provides a complete test environment with mocked components.
pub struct TestContext {
/// Mock SSE clients by client_id
pub clients: Arc<Mutex<HashMap<String, MockSseClient>>>,
/// Mock connectors by connector_id
pub connectors: Arc<Mutex<HashMap<String, MockConnector>>>,
}
impl TestContext {
pub fn new() -> Self {
Self {
clients: Arc::new(Mutex::new(HashMap::new())),
connectors: Arc::new(Mutex::new(HashMap::new())),
}
}
/// Create a mock SSE client
pub async fn create_client(
&self,
client_id: String,
) -> (MockSseClient, mpsc::UnboundedReceiver<(Value, oneshot::Sender<Result<()>>)>) {
let (client, response_rx) = MockSseClient::new(client_id.clone());
let mut clients = self.clients.lock().await;
clients.insert(client_id.clone(), client.clone());
(client, response_rx)
}
/// Create a mock connector
pub async fn create_connector(
&self,
connector_id: String,
) -> (MockConnector, mpsc::UnboundedReceiver<(Value, String, String, Value)>) {
let (connector, request_rx) = MockConnector::new(connector_id.clone());
let mut connectors = self.connectors.lock().await;
connectors.insert(connector_id.clone(), connector.clone());
(connector, request_rx)
}
/// Get a client by ID
pub async fn get_client(&self, client_id: &str) -> Option<MockSseClient> {
let clients = self.clients.lock().await;
clients.get(client_id).cloned()
}
/// Get a connector by ID
pub async fn get_connector(&self, connector_id: &str) -> Option<MockConnector> {
let connectors = self.connectors.lock().await;
connectors.get(connector_id).cloned()
}
}
impl Default for TestContext {
fn default() -> Self {
Self::new()
}
}
/// Helper to create a sample permission request
pub fn sample_permission_request(request_id: u64) -> Value {
json!({
"jsonrpc": "2.0",
"id": request_id,
"method": "session/request_permission",
"params": {
"sessionId": "test-session",
"tool": "Write",
"parameters": {
"path": "/tmp/test.txt",
"content": "test"
}
}
})
}
/// Helper to create a sample permission response
pub fn sample_permission_response(request_id: u64, allow: bool) -> Value {
json!({
"jsonrpc": "2.0",
"id": request_id,
"result": {
"selectedOptionId": if allow { "allow" } else { "deny" }
}
})
}
/// Helper to extract agent_request data from SSE event
pub fn extract_agent_request(event: &MockEvent) -> Option<(Value, String, Value)> {
if event.event_type != "session/update" {
return None;
}
let update = event.data.get("update")?;
if update.get("sessionUpdate")?.as_str()? != "agent_request" {
return None;
}
let request_id = update.get("requestId")?.clone();
let method = update.get("method")?.as_str()?.to_string();
let params = update.get("params")?.clone();
Some((request_id, method, params))
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_mock_client() {
let (client, _response_rx) = MockSseClient::new("test-client".to_string());
// Simulate receiving an event
client.receive_sse("session/update".to_string(), json!({"test": "data"})).await;
// Verify event was received
let events = client.get_events().await;
assert_eq!(events.len(), 1);
assert_eq!(events[0].event_type, "session/update");
}
#[tokio::test]
async fn test_mock_connector() {
let (connector, mut request_rx) = MockConnector::new("test-connector".to_string());
// Simulate sending an agent request
let response_fut = connector.send_agent_request(
"session-1".to_string(),
json!(0),
"session/request_permission".to_string(),
json!({"tool": "Write"}),
).await;
// Verify request was sent
let request = request_rx.recv().await.unwrap();
assert_eq!(request.0, json!(0));
assert_eq!(request.1, "session-1");
assert_eq!(request.2, "session/request_permission");
// Simulate completing the response
connector.complete_response(json!(0), json!({"result": "success"})).await.unwrap();
// Verify response was received
let response = response_fut.await.unwrap();
assert_eq!(response, json!({"result": "success"}));
}
#[tokio::test]
async fn test_test_context() {
let ctx = TestContext::new();
// Create client and connector
let (client, _) = ctx.create_client("client-1".to_string()).await;
let (connector, _) = ctx.create_connector("connector-1".to_string()).await;
// Verify we can retrieve them
assert!(ctx.get_client("client-1").await.is_some());
assert!(ctx.get_connector("connector-1").await.is_some());
assert!(ctx.get_client("non-existent").await.is_none());
}
#[test]
fn test_sample_helpers() {
let request = sample_permission_request(0);
assert_eq!(request["method"], "session/request_permission");
let response = sample_permission_response(0, true);
assert_eq!(response["result"]["selectedOptionId"], "allow");
}
#[test]
fn test_extract_agent_request() {
let event = MockEvent {
event_type: "session/update".to_string(),
data: json!({
"sessionId": "session-1",
"update": {
"sessionUpdate": "agent_request",
"requestId": 0,
"method": "session/request_permission",
"params": {"tool": "Write"}
}
}),
};
let (request_id, method, params) = extract_agent_request(&event).unwrap();
assert_eq!(request_id, json!(0));
assert_eq!(method, "session/request_permission");
assert_eq!(params["tool"], "Write");
}
}
@@ -0,0 +1,327 @@
//! Integration test for concurrent agent requests (T050)
//!
//! This test verifies that the system can handle multiple agent requests
//! simultaneously without cross-contamination.
//!
//! Test scenario:
//! 1. Register multiple pending requests concurrently
//! 2. Complete them in random/different order
//! 3. Verify each response goes to the correct request
//! 4. Verify no cross-contamination
use dirigent_acp_api::agent_requests::AgentRequestTracker;
use serde_json::json;
use tokio::task::JoinSet;
#[tokio::test]
async fn test_concurrent_requests_basic() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// Register 5 concurrent requests
let mut receivers = Vec::new();
for i in 0..5 {
let rx = tracker.register(client_id, json!(i));
receivers.push((i, rx));
}
assert_eq!(tracker.pending_count(), 5);
// Complete them in reverse order
for i in (0..5).rev() {
let response = json!({"request": i, "result": "success"});
tracker.complete(client_id, json!(i), response).unwrap();
}
assert_eq!(tracker.pending_count(), 0);
// Verify each receiver got the correct response
for (i, rx) in receivers {
let response = rx.await.unwrap();
assert_eq!(response["request"], i);
}
}
#[tokio::test]
async fn test_concurrent_requests_multiple_clients() {
let tracker = AgentRequestTracker::new();
// Two clients, each with 3 requests
let client1 = "client-1";
let client2 = "client-2";
let mut receivers1 = Vec::new();
let mut receivers2 = Vec::new();
for i in 0..3 {
let rx1 = tracker.register(client1, json!(i));
let rx2 = tracker.register(client2, json!(i));
receivers1.push((i, rx1));
receivers2.push((i, rx2));
}
assert_eq!(tracker.pending_count(), 6);
assert_eq!(tracker.client_pending_count(client1), 3);
assert_eq!(tracker.client_pending_count(client2), 3);
// Complete client1's requests
for i in 0..3 {
let response = json!({"client": 1, "request": i});
tracker.complete(client1, json!(i), response).unwrap();
}
assert_eq!(tracker.client_pending_count(client1), 0);
assert_eq!(tracker.client_pending_count(client2), 3);
// Complete client2's requests
for i in 0..3 {
let response = json!({"client": 2, "request": i});
tracker.complete(client2, json!(i), response).unwrap();
}
assert_eq!(tracker.pending_count(), 0);
// Verify each receiver got the correct response
for (i, rx) in receivers1 {
let response = rx.await.unwrap();
assert_eq!(response["client"], 1);
assert_eq!(response["request"], i);
}
for (i, rx) in receivers2 {
let response = rx.await.unwrap();
assert_eq!(response["client"], 2);
assert_eq!(response["request"], i);
}
}
#[tokio::test]
async fn test_concurrent_requests_same_id_different_clients() {
// Test that same request_id for different clients are handled independently
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
let request_id = json!(0); // Same ID for both
let rx1 = tracker.register(client1, request_id.clone());
let rx2 = tracker.register(client2, request_id.clone());
assert_eq!(tracker.pending_count(), 2);
// Complete client1's request
let response1 = json!({"client": "client-1"});
tracker.complete(client1, request_id.clone(), response1.clone()).unwrap();
// Complete client2's request
let response2 = json!({"client": "client-2"});
tracker.complete(client2, request_id, response2.clone()).unwrap();
assert_eq!(tracker.pending_count(), 0);
// Verify each got the correct response
let received1 = rx1.await.unwrap();
let received2 = rx2.await.unwrap();
assert_eq!(received1["client"], "client-1");
assert_eq!(received2["client"], "client-2");
}
#[tokio::test]
async fn test_concurrent_async_completion() {
// Test completing requests from multiple async tasks concurrently
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let num_requests = 10;
// Register requests
let mut receivers = Vec::new();
for i in 0..num_requests {
let rx = tracker.register(client_id, json!(i));
receivers.push((i, rx));
}
assert_eq!(tracker.pending_count(), num_requests);
// Spawn tasks to complete requests concurrently
let mut join_set = JoinSet::new();
for i in 0..num_requests {
let tracker_clone = tracker.clone();
join_set.spawn(async move {
// Small delay to ensure concurrency
tokio::time::sleep(tokio::time::Duration::from_millis(((i % 3) * 10) as u64)).await;
let response = json!({"request": i, "result": "success"});
tracker_clone.complete(client_id, json!(i), response)
});
}
// Wait for all completions
while let Some(result) = join_set.join_next().await {
assert!(result.unwrap().is_ok());
}
assert_eq!(tracker.pending_count(), 0);
// Verify all receivers got correct responses
for (i, rx) in receivers {
let response = rx.await.unwrap();
assert_eq!(response["request"], i);
}
}
#[tokio::test]
async fn test_concurrent_register_and_complete() {
// Test registering and completing requests concurrently
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let num_requests = 20;
let mut join_set = JoinSet::new();
// Spawn tasks to register and complete requests
for i in 0..num_requests {
let tracker_clone = tracker.clone();
join_set.spawn(async move {
// Register
let rx = tracker_clone.register(client_id, json!(i));
// Small delay
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
// Complete
let response = json!({"request": i});
tracker_clone.complete(client_id, json!(i), response.clone()).unwrap();
// Wait for response
let received = rx.await.unwrap();
assert_eq!(received["request"], i);
i
});
}
// Wait for all tasks
let mut completed = Vec::new();
while let Some(result) = join_set.join_next().await {
completed.push(result.unwrap());
}
// All requests should have completed
assert_eq!(completed.len(), num_requests);
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_concurrent_mixed_operations() {
// Test mix of register, complete, and timeout operations
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let mut join_set = JoinSet::new();
// Spawn 15 tasks with different behaviors
for i in 0..15 {
let tracker_clone = tracker.clone();
join_set.spawn(async move {
let rx = tracker_clone.register(client_id, json!(i));
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
match i % 3 {
0 => {
// Complete normally
let response = json!({"request": i, "type": "complete"});
tracker_clone.complete(client_id, json!(i), response.clone()).unwrap();
let received = rx.await.unwrap();
assert_eq!(received["type"], "complete");
"completed"
}
1 => {
// Timeout
tracker_clone.timeout(client_id, json!(i));
assert!(rx.await.is_err());
"timeout"
}
_ => {
// Complete with delay
tokio::time::sleep(tokio::time::Duration::from_millis(20)).await;
let response = json!({"request": i, "type": "delayed"});
tracker_clone.complete(client_id, json!(i), response.clone()).unwrap();
let received = rx.await.unwrap();
assert_eq!(received["type"], "delayed");
"delayed"
}
}
});
}
// Wait for all tasks
let mut results = Vec::new();
while let Some(result) = join_set.join_next().await {
results.push(result.unwrap());
}
assert_eq!(results.len(), 15);
// Count outcomes
let completed = results.iter().filter(|&r| r == &"completed").count();
let timeout = results.iter().filter(|&r| r == &"timeout").count();
let delayed = results.iter().filter(|&r| r == &"delayed").count();
assert_eq!(completed, 5);
assert_eq!(timeout, 5);
assert_eq!(delayed, 5);
// All should be cleaned up
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_high_concurrency() {
// Stress test with many concurrent requests
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let num_requests = 100;
let mut join_set = JoinSet::new();
for i in 0..num_requests {
let tracker_clone = tracker.clone();
join_set.spawn(async move {
let rx = tracker_clone.register(client_id, json!(i));
// Random-ish delay
let delay = ((i * 7) % 20) as u64;
tokio::time::sleep(tokio::time::Duration::from_millis(delay)).await;
let response = json!({"request": i});
tracker_clone.complete(client_id, json!(i), response).unwrap();
rx.await.unwrap()["request"].as_u64().unwrap()
});
}
// Collect all results
let mut results = Vec::new();
while let Some(result) = join_set.join_next().await {
results.push(result.unwrap());
}
// Verify all requests completed
assert_eq!(results.len(), num_requests);
// Verify all request IDs are present
results.sort();
for (idx, &val) in results.iter().enumerate() {
assert_eq!(val, idx as u64);
}
// All cleaned up
assert_eq!(tracker.pending_count(), 0);
}
@@ -0,0 +1,286 @@
//! Integration test for client disconnection (T051)
//!
//! This test verifies that pending agent requests are cleaned up when a client
//! disconnects, and that other clients are unaffected.
//!
//! Test scenario:
//! 1. Register pending requests for multiple clients
//! 2. Simulate client disconnection
//! 3. Verify cleanup occurs for disconnected client
//! 4. Verify other clients are unaffected
use dirigent_acp_api::agent_requests::AgentRequestTracker;
use serde_json::json;
#[tokio::test]
async fn test_disconnect_single_client() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// Register 3 pending requests
let rx1 = tracker.register(client_id, json!(0));
let rx2 = tracker.register(client_id, json!(1));
let rx3 = tracker.register(client_id, json!(2));
assert_eq!(tracker.pending_count(), 3);
assert_eq!(tracker.client_pending_count(client_id), 3);
// Simulate client disconnection - clear all requests for this client
tracker.clear(Some(client_id));
// All requests should be removed
assert_eq!(tracker.pending_count(), 0);
assert_eq!(tracker.client_pending_count(client_id), 0);
// All receivers should get errors (channels closed)
assert!(rx1.await.is_err());
assert!(rx2.await.is_err());
assert!(rx3.await.is_err());
}
#[tokio::test]
async fn test_disconnect_multiple_clients() {
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
let client3 = "client-3";
// Register requests for all clients
let rx1_1 = tracker.register(client1, json!(0));
let rx1_2 = tracker.register(client1, json!(1));
let rx2_1 = tracker.register(client2, json!(0));
let rx2_2 = tracker.register(client2, json!(1));
let rx2_3 = tracker.register(client2, json!(2));
let rx3_1 = tracker.register(client3, json!(0));
assert_eq!(tracker.pending_count(), 6);
assert_eq!(tracker.client_pending_count(client1), 2);
assert_eq!(tracker.client_pending_count(client2), 3);
assert_eq!(tracker.client_pending_count(client3), 1);
// Disconnect client2
tracker.clear(Some(client2));
// Only client2's requests should be removed
assert_eq!(tracker.pending_count(), 3);
assert_eq!(tracker.client_pending_count(client1), 2);
assert_eq!(tracker.client_pending_count(client2), 0);
assert_eq!(tracker.client_pending_count(client3), 1);
// Client2's receivers should error
assert!(rx2_1.await.is_err());
assert!(rx2_2.await.is_err());
assert!(rx2_3.await.is_err());
// Client1 and client3 should still work
tracker.complete(client1, json!(0), json!({"result": "client1-0"})).unwrap();
assert_eq!(rx1_1.await.unwrap()["result"], "client1-0");
tracker.complete(client3, json!(0), json!({"result": "client3-0"})).unwrap();
assert_eq!(rx3_1.await.unwrap()["result"], "client3-0");
// Complete remaining client1 request
tracker.complete(client1, json!(1), json!({"result": "client1-1"})).unwrap();
assert_eq!(rx1_2.await.unwrap()["result"], "client1-1");
// All cleaned up
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_disconnect_then_reconnect() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// First connection - register requests
let rx1 = tracker.register(client_id, json!(0));
let rx2 = tracker.register(client_id, json!(1));
assert_eq!(tracker.pending_count(), 2);
// Disconnect - cleanup
tracker.clear(Some(client_id));
assert_eq!(tracker.pending_count(), 0);
// Old receivers should error
assert!(rx1.await.is_err());
assert!(rx2.await.is_err());
// Reconnect - register new requests (same client_id, same request_ids)
let rx3 = tracker.register(client_id, json!(0));
let rx4 = tracker.register(client_id, json!(1));
assert_eq!(tracker.pending_count(), 2);
// Complete new requests
tracker.complete(client_id, json!(0), json!({"result": "new-0"})).unwrap();
tracker.complete(client_id, json!(1), json!({"result": "new-1"})).unwrap();
// New receivers should get responses
assert_eq!(rx3.await.unwrap()["result"], "new-0");
assert_eq!(rx4.await.unwrap()["result"], "new-1");
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_disconnect_no_pending_requests() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// No pending requests
assert_eq!(tracker.client_pending_count(client_id), 0);
// Disconnect should be no-op
tracker.clear(Some(client_id));
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_clear_all_clients() {
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
let client3 = "client-3";
// Register requests for multiple clients
let rx1 = tracker.register(client1, json!(0));
let rx2 = tracker.register(client2, json!(0));
let rx3 = tracker.register(client3, json!(0));
assert_eq!(tracker.pending_count(), 3);
// Clear all (simulating server shutdown)
tracker.clear(None);
// All should be removed
assert_eq!(tracker.pending_count(), 0);
assert_eq!(tracker.client_pending_count(client1), 0);
assert_eq!(tracker.client_pending_count(client2), 0);
assert_eq!(tracker.client_pending_count(client3), 0);
// All receivers should error
assert!(rx1.await.is_err());
assert!(rx2.await.is_err());
assert!(rx3.await.is_err());
}
#[tokio::test]
async fn test_disconnect_race_with_completion() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// Register multiple requests
let rx1 = tracker.register(client_id, json!(0));
let rx2 = tracker.register(client_id, json!(1));
let rx3 = tracker.register(client_id, json!(2));
assert_eq!(tracker.pending_count(), 3);
// Complete one request
tracker.complete(client_id, json!(0), json!({"result": "0"})).unwrap();
// Verify it was removed
assert_eq!(tracker.pending_count(), 2);
// Now disconnect (should only clear remaining requests)
tracker.clear(Some(client_id));
assert_eq!(tracker.pending_count(), 0);
// First receiver should have gotten response
assert_eq!(rx1.await.unwrap()["result"], "0");
// Other receivers should error
assert!(rx2.await.is_err());
assert!(rx3.await.is_err());
}
#[tokio::test]
async fn test_partial_disconnect_completion() {
// Test that completing a request after disconnect fails gracefully
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let _rx = tracker.register(client_id, json!(0));
assert_eq!(tracker.pending_count(), 1);
// Disconnect
tracker.clear(Some(client_id));
assert_eq!(tracker.pending_count(), 0);
// Try to complete after disconnect - should fail
let result = tracker.complete(client_id, json!(0), json!({"result": "late"}));
assert!(result.is_err());
}
#[tokio::test]
async fn test_concurrent_disconnect_and_complete() {
use tokio::task::JoinSet;
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// Register many requests
for i in 0..50 {
tracker.register(client_id, json!(i));
}
assert_eq!(tracker.pending_count(), 50);
let mut join_set = JoinSet::new();
// Spawn task to disconnect after delay
{
let tracker_clone = tracker.clone();
join_set.spawn(async move {
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
tracker_clone.clear(Some(client_id));
"disconnected"
});
}
// Spawn tasks to complete requests
for i in 0..50 {
let tracker_clone = tracker.clone();
join_set.spawn(async move {
// Small delay to ensure some complete before disconnect
tokio::time::sleep(tokio::time::Duration::from_millis((i % 5) as u64)).await;
let response = json!({"request": i});
match tracker_clone.complete(client_id, json!(i), response) {
Ok(_) => "completed",
Err(_) => "failed",
}
});
}
// Wait for all tasks
let mut results = Vec::new();
while let Some(result) = join_set.join_next().await {
results.push(result.unwrap());
}
// Should have 1 disconnect + 50 completion attempts
assert_eq!(results.len(), 51);
// Some completions succeeded, some failed (after disconnect)
let disconnects = results.iter().filter(|r| r == &&"disconnected").count();
assert_eq!(disconnects, 1);
// All requests should be cleaned up
assert_eq!(tracker.pending_count(), 0);
}
@@ -0,0 +1,153 @@
//! Integration tests for session/list RPC method
use dirigent_acp_api::{
NoOpConnectorOperations, RpcHandler, SessionManager,
};
use dirigent_acp_api::agent_requests::AgentRequestTracker;
use dirigent_acp_api::sse::SseNotifier;
use serde_json::json;
use std::sync::Arc;
fn create_test_handler() -> RpcHandler<NoOpConnectorOperations> {
RpcHandler::new(
SessionManager::new(),
NoOpConnectorOperations,
SseNotifier::new(),
Arc::new(AgentRequestTracker::new()),
)
}
#[tokio::test]
async fn test_session_list_returns_sessions() {
let handler = create_test_handler();
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/list",
"params": {
"connectorId": "stub-connector"
}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
// NoOp returns one stub session
let result = &response_json["result"];
assert!(result["sessions"].is_array());
let sessions = result["sessions"].as_array().unwrap();
assert!(!sessions.is_empty());
assert!(sessions[0]["sessionId"].is_string());
}
#[tokio::test]
async fn test_session_list_no_params() {
let handler = create_test_handler();
// session/list with no params should use default connector
let request_body = json!({
"jsonrpc": "2.0",
"id": 2,
"method": "session/list"
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
// Should succeed using default connector from NoOp
let result = &response_json["result"];
assert!(result["sessions"].is_array());
}
#[tokio::test]
async fn test_session_list_creates_mappings() {
let session_manager = SessionManager::new();
let handler = RpcHandler::new(
session_manager.clone(),
NoOpConnectorOperations,
SseNotifier::new(),
Arc::new(AgentRequestTracker::new()),
);
// First, initialize to register a client
let init_body = json!({
"jsonrpc": "2.0",
"id": 0,
"method": "initialize",
"params": {}
});
let init_response = handler.handle_request(&init_body.to_string(), Some("test-client")).await;
let init_json = serde_json::to_value(&init_response).unwrap();
let client_id = init_json["result"]["clientId"].as_str().unwrap();
// Now list sessions
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/list",
"params": {
"connectorId": "stub-connector"
}
});
let response = handler.handle_request(&request_body.to_string(), Some(client_id)).await;
let response_json = serde_json::to_value(&response).unwrap();
let sessions = response_json["result"]["sessions"].as_array().unwrap();
assert!(!sessions.is_empty());
// Session mapping should exist for the returned session
let session_id = sessions[0]["sessionId"].as_str().unwrap();
let mapping = session_manager.get_mapping(session_id);
assert!(mapping.is_some(), "Session mapping should be created for listed sessions");
}
#[tokio::test]
async fn test_session_load_without_connector_id() {
let handler = create_test_handler();
// Standard ACP: only sessionId + cwd + mcpServers, no connectorId
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/load",
"params": {
"sessionId": "sess-abc",
"cwd": "G:\\dev\\projects\\test",
"mcpServers": []
}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let result = &response_json["result"];
assert!(result["sessionId"].is_string(), "session/load should succeed without connectorId");
assert!(result["createdAt"].is_string());
}
#[tokio::test]
async fn test_initialize_advertises_list_sessions() {
let handler = create_test_handler();
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let caps = &response_json["result"]["agentCapabilities"];
assert_eq!(caps["listSessions"], true, "listSessions capability should be advertised");
assert_eq!(caps["loadSession"], true, "loadSession capability should still be advertised");
// Verify nested sessionCapabilities.list is advertised (required by Zed v0.9.4+)
assert!(
caps["sessionCapabilities"]["list"].is_object(),
"sessionCapabilities.list should be advertised as an empty object"
);
}
@@ -0,0 +1,134 @@
//! Integration tests for session/resume RPC method
use dirigent_acp_api::{
NoOpConnectorOperations, RpcHandler, SessionManager,
};
use dirigent_acp_api::agent_requests::AgentRequestTracker;
use dirigent_acp_api::sse::SseNotifier;
use serde_json::json;
use std::sync::Arc;
fn create_test_handler() -> RpcHandler<NoOpConnectorOperations> {
RpcHandler::new(
SessionManager::new(),
NoOpConnectorOperations,
SseNotifier::new(),
Arc::new(AgentRequestTracker::new()),
)
}
#[tokio::test]
async fn test_session_resume_returns_session() {
let handler = create_test_handler();
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/resume",
"params": {
"sessionId": "sess-123",
"connectorId": "stub-connector"
}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let result = &response_json["result"];
assert!(result["sessionId"].is_string());
assert!(result["connectorId"].is_string());
assert!(result["createdAt"].is_string());
}
#[tokio::test]
async fn test_session_resume_missing_params() {
let handler = create_test_handler();
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/resume"
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
// Should return error for missing params
assert!(response_json["error"].is_object());
}
#[tokio::test]
async fn test_session_resume_creates_mapping() {
let session_manager = SessionManager::new();
let handler = RpcHandler::new(
session_manager.clone(),
NoOpConnectorOperations,
SseNotifier::new(),
Arc::new(AgentRequestTracker::new()),
);
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/resume",
"params": {
"sessionId": "sess-456",
"connectorId": "stub-connector"
}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let session_id = response_json["result"]["sessionId"].as_str().unwrap();
let mapping = session_manager.get_mapping(session_id);
assert!(mapping.is_some(), "Session mapping should be created for resumed session");
}
#[tokio::test]
async fn test_session_resume_without_connector_id() {
let handler = create_test_handler();
// Standard ACP: only sessionId, no connectorId — should resolve via default connector
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "session/resume",
"params": {
"sessionId": "sess-789",
"cwd": "G:\\dev\\projects\\test"
}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let result = &response_json["result"];
assert!(result["sessionId"].is_string(), "Should succeed without connectorId");
assert!(result["createdAt"].is_string());
}
#[tokio::test]
async fn test_initialize_advertises_session_resume() {
let handler = create_test_handler();
let request_body = json!({
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {}
});
let response = handler.handle_request(&request_body.to_string(), None).await;
let response_json = serde_json::to_value(&response).unwrap();
let caps = &response_json["result"]["agentCapabilities"];
assert!(
caps["sessionCapabilities"]["list"].is_object(),
"sessionCapabilities.list should be advertised as an empty object"
);
assert!(
caps["sessionCapabilities"]["resume"].is_object(),
"sessionCapabilities.resume should be advertised as an empty object"
);
}
@@ -0,0 +1,229 @@
//! Integration test for timeout handling (T049)
//!
//! This test verifies that the system handles timeouts gracefully when a client
//! fails to respond to an agent request within the timeout period.
//!
//! Test scenario:
//! 1. Register a pending agent request
//! 2. Wait for timeout (using reduced timeout for testing)
//! 3. Verify timeout occurs
//! 4. Verify cleanup happens correctly
//! 5. Verify no resource leaks
use dirigent_acp_api::agent_requests::AgentRequestTracker;
use serde_json::json;
use tokio::time::{timeout, Duration};
#[tokio::test]
async fn test_timeout_basic() {
// Create tracker
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let request_id = json!(0);
// Register request
let receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// Wait for timeout (use short timeout for testing)
let result = timeout(Duration::from_millis(100), receiver).await;
// Should timeout
assert!(result.is_err(), "Expected timeout but request completed");
// Manually trigger cleanup (in production, event bridge does this)
tracker.timeout(client_id, request_id);
// Verify cleanup
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_timeout_cleanup() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let request_id = json!(123);
// Register request
let receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// Trigger timeout before waiting
tracker.timeout(client_id, request_id);
// Verify cleanup happened
assert_eq!(tracker.pending_count(), 0);
// Receiver should get error (channel closed)
let result = receiver.await;
assert!(result.is_err(), "Expected receiver to get error after timeout");
}
#[tokio::test]
async fn test_timeout_multiple_clients() {
let tracker = AgentRequestTracker::new();
let client1 = "client-1";
let client2 = "client-2";
// Register requests for both clients
let rx1 = tracker.register(client1, json!(0));
let rx2 = tracker.register(client2, json!(0));
assert_eq!(tracker.pending_count(), 2);
assert_eq!(tracker.client_pending_count(client1), 1);
assert_eq!(tracker.client_pending_count(client2), 1);
// Timeout only client1's request
tracker.timeout(client1, json!(0));
// Verify only client1's request is removed
assert_eq!(tracker.pending_count(), 1);
assert_eq!(tracker.client_pending_count(client1), 0);
assert_eq!(tracker.client_pending_count(client2), 1);
// Client1's receiver should error
assert!(rx1.await.is_err());
// Complete client2's request normally
let response = json!({"result": "success"});
let result = tracker.complete(client2, json!(0), response);
assert!(result.is_ok());
// Client2's receiver should get response
let received = rx2.await.unwrap();
assert_eq!(received, json!({"result": "success"}));
// All cleaned up
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_timeout_no_double_cleanup() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let request_id = json!(0);
// Register request
let _receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// First timeout - should remove
tracker.timeout(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 0);
// Second timeout - should be no-op (not panic)
tracker.timeout(client_id, request_id);
assert_eq!(tracker.pending_count(), 0);
}
#[tokio::test]
async fn test_timeout_race_with_complete() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let request_id = json!(0);
// Register request
let receiver = tracker.register(client_id, request_id.clone());
assert_eq!(tracker.pending_count(), 1);
// Complete the request
let response = json!({"result": "success"});
let result = tracker.complete(client_id, request_id.clone(), response.clone());
assert!(result.is_ok());
assert_eq!(tracker.pending_count(), 0);
// Try to timeout after completion - should be no-op
tracker.timeout(client_id, request_id);
assert_eq!(tracker.pending_count(), 0);
// Receiver should still get the response
let received = receiver.await.unwrap();
assert_eq!(received, response);
}
#[tokio::test]
async fn test_concurrent_timeouts() {
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
// Register 10 requests
let mut receivers = Vec::new();
for i in 0..10 {
let rx = tracker.register(client_id, json!(i));
receivers.push((i, rx));
}
assert_eq!(tracker.pending_count(), 10);
// Spawn tasks to timeout each request after random delays
let tracker_clone = tracker.clone();
let timeout_handles: Vec<_> = (0..10)
.map(|i| {
let tracker = tracker_clone.clone();
tokio::spawn(async move {
// Small random-ish delay based on index
tokio::time::sleep(Duration::from_millis((i * 10) as u64)).await;
tracker.timeout(client_id, json!(i));
})
})
.collect();
// Wait for all timeouts to complete
for handle in timeout_handles {
handle.await.unwrap();
}
// All should be cleaned up
assert_eq!(tracker.pending_count(), 0);
// All receivers should get errors
for (_i, rx) in receivers {
assert!(rx.await.is_err());
}
}
#[tokio::test]
async fn test_timeout_with_actual_delay() {
// This test uses actual time delays to verify timeout behavior more realistically
let tracker = AgentRequestTracker::new();
let client_id = "test-client";
let request_id = json!(0);
let start = std::time::Instant::now();
// Register request
let receiver = tracker.register(client_id, request_id.clone());
// Spawn task to timeout after 200ms
let tracker_clone = tracker.clone();
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(200)).await;
tracker_clone.timeout(client_id, json!(0));
});
// Wait on receiver with longer timeout
let result = timeout(Duration::from_secs(1), receiver).await;
let elapsed = start.elapsed();
// Should complete due to timeout() call, not tokio::time::timeout
assert!(result.is_ok(), "Should complete when timeout() is called");
assert!(result.unwrap().is_err(), "Receiver should get error");
// Should take approximately 200ms
assert!(
elapsed >= Duration::from_millis(180) && elapsed < Duration::from_millis(300),
"Expected ~200ms but took {:?}",
elapsed
);
// Should be cleaned up
assert_eq!(tracker.pending_count(), 0);
}
+148
View File
@@ -0,0 +1,148 @@
# Package: dirigent_anth
Claude Code JSONL session parser and toolkit.
## Quick Facts
- **Type**: Library
- **Main Entry**: src/lib.rs
- **Dependencies**: serde, serde_json, chrono, uuid, camino, thiserror, tracing, dirs
- **Status**: Core parsing complete — ready for downstream consumers
## Purpose
Reads Claude Code's local JSONL session storage (`~/.claude/projects/`) and produces typed, deduplicated, correlated Rust data structures. The types are the product — downstream consumers (archivist import, shell usage analyzers, session browsers) depend on these structs.
## Key Features
- **Session Discovery**: Scan `~/.claude/projects/` for all Claude Code projects and sessions
- **JSONL Parsing**: Lenient line-by-line parser that handles unknown fields and message types
- **Streaming Dedup**: Collapse streamed assistant messages to their final version
- **Tool Correlation**: ID-based pairing of tool_use → tool_result across parallel calls
- **Conversation Tree**: Reconstruct uuid/parentUuid threading with branch detection
- **Noise Classification**: Identify meta messages, warmup, interruptions, API errors
- **Sub-Agent Loading**: Recursive parsing of sub-agent JSONL with metadata
- **Timestamp Parsing**: Handle ISO 8601, Unix seconds, and Unix milliseconds
## Architecture
### Design Principles
1. **Types are the product** — Well-typed Rust structs that downstream consumers import
2. **Lenient parsing** — Unknown fields ignored, unknown message types logged and skipped
3. **Stream-oriented** — Line-by-line BufReader parsing, never loads entire files
4. **Sync-first** — File parsing is CPU-bound; no async overhead
5. **Cross-platform** — camino::Utf8PathBuf throughout for Windows/Unix compatibility
### Module Organization
- **`types.rs`** — All public data types (Content, ContentBlock, RawMessage variants, ToolCall, etc.)
- **`error.rs`** — AntError enum with I/O, JSON parse, home-not-found, invalid-path variants
- **`parser.rs`** — JSONL line parser and file parser with lenient error handling
- **`dedup.rs`** — Streaming deduplication of assistant messages by uuid
- **`correlation.rs`** — Tool call ↔ result pairing by tool_use_id
- **`tree.rs`** — Conversation tree from uuid/parentUuid relationships
- **`noise.rs`** — Noise pattern classification (meta, warmup, interruptions, etc.)
- **`discovery.rs`** — Filesystem scanning for Claude projects and sessions
- **`subagent.rs`** — Sub-agent JSONL and metadata loading
- **`util.rs`** — Timestamp parsing utilities
## Public API
### Quick Start
```rust
use dirigent_anth::{discover_claude_home, discover_projects, load_session};
// Discover all projects
let home = discover_claude_home()?;
let projects = discover_projects(&home)?;
// Load a session with full parsing
for project in &projects {
for session_ref in &project.sessions {
let session = load_session(session_ref)?;
println!("Messages: {}, Tools: {}, Subagents: {}",
session.messages.len(),
session.tool_exchanges.len(),
session.subagents.len());
}
}
```
### Key Functions
| Function | Purpose |
|----------|---------|
| `discover_claude_home()` | Find `~/.claude/` directory |
| `discover_projects(home)` | Scan for all project directories |
| `parse_session(path)` | Parse a JSONL file into messages |
| `parse_session_deduped(path)` | Parse with streaming dedup applied |
| `dedup_messages(msgs)` | Deduplicate streamed assistant messages |
| `correlate_tools(msgs)` | Pair tool calls with results by ID |
| `ConversationTree::build(msgs)` | Build conversation tree |
| `classify_noise(msg)` | Classify a message as noise |
| `load_subagents(dir)` | Load sub-agent sessions from artifacts |
| `load_session(ref)` | Full parse: dedup + correlate + tree + subagents |
| `parse_timestamp(value)` | Parse ISO/Unix timestamps |
## Data Model
### Claude Code JSONL Format
Each line in `~/.claude/projects/<encoded-path>/<session-uuid>.jsonl` is a JSON object with a `type` field discriminator. Five types: `user`, `assistant`, `progress`, `system`, `queue-operation`.
- **Outer wrapper**: camelCase fields (sessionId, parentUuid, isSidechain, gitBranch)
- **Inner message body**: snake_case fields (stop_reason, tool_use_id, is_error)
- **Content**: Either a plain string or array of typed content blocks
### Content Blocks
| Type | Fields |
|------|--------|
| text | `text` |
| tool_use | `id`, `name`, `input` |
| tool_result | `tool_use_id`, `content`, `is_error` |
| thinking | `thinking` |
| image | `source` |
Unknown content block types are silently dropped (lenient deserialization).
## Testing
```bash
cargo test --package dirigent_anth
```
Tests use synthetic JSONL fixtures in `tests/fixtures/`:
- `minimal_session.jsonl` — Basic session with all message types
- `streaming_dedup.jsonl` — Streaming dedup scenario
- `tool_correlation.jsonl` — Parallel and sequential tool calls
- `branching_tree.jsonl` — Conversation with branches
- `noise_patterns.jsonl` — All noise pattern types
- `subagent/` — Sub-agent session with parent and metadata
## Error Handling
- Individual unparseable JSONL lines are logged and skipped (lenient)
- I/O errors and missing directories are propagated as AntError
- Unknown message types are skipped via serde
- Unknown content blocks are silently filtered
## Related Packages
- **dirigent_archivist** — Future consumer for session import
- No current dependencies on other dirigent packages (standalone)
## Future Enhancements
- Bash command analysis module (shell usage analytics)
- Archivist event transform/import
- CLI tool with scan/analyze/import subcommands
- SQLite caching layer
- Watch mode for new session monitoring
## Documentation
- **Package README**: `./README.md` - User-facing overview
- **API Docs**: Run `cargo doc --package dirigent_anth --open`
- **Design Plan**: `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md`
+37
View File
@@ -0,0 +1,37 @@
[package]
name = "dirigent_anth"
version = "0.1.0"
edition = "2021"
[lib]
path = "src/lib.rs"
[[bin]]
name = "anth_bear"
path = "src/bin/anth.rs"
[[bin]]
name = "anth_usage"
path = "src/bin/anth_usage.rs"
[features]
default = []
dirigent-paths = ["dep:dirigent_config"]
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.10"
uuid = { version = "1.11", features = ["serde"] }
camino = { version = "1.1", features = ["serde1"] }
dirs = "6.0"
thiserror = "2.0"
tracing = "0.1"
regex = "1"
portable-pty = "0.8"
vt100 = "0.15"
dirigent_config = { path = "../dirigent_config", optional = true }
[dev-dependencies]
tempfile = "3.0"
+331
View File
@@ -0,0 +1,331 @@
use chrono::{Datelike, NaiveDate, NaiveTime, Utc};
use chrono_tz::Tz;
use serde::Serialize;
#[derive(Debug, Serialize, Default)]
pub struct UsageData {
pub gauges: Vec<UsageGauge>,
#[serde(skip_serializing_if = "Option::is_none")]
pub contributions: Option<ContributionInfo>,
}
#[derive(Debug, Serialize)]
pub struct UsageGauge {
pub name: String,
pub percent_used: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub resets: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub resets_iso: Option<String>,
}
#[derive(Debug, Serialize, Default)]
pub struct ContributionInfo {
#[serde(skip_serializing_if = "Vec::is_empty")]
pub factors: Vec<ContributionFactor>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub subagents: Vec<SubagentUsage>,
}
#[derive(Debug, Serialize)]
pub struct ContributionFactor {
pub description: String,
pub percent: u32,
}
#[derive(Debug, Serialize)]
pub struct SubagentUsage {
pub name: String,
pub percent: u32,
}
pub struct ProcessedOutput {
pub raw_screen: String,
pub data: UsageData,
}
pub fn process_usage_screen(raw: &str) -> ProcessedOutput {
let lines: Vec<&str> = raw.lines().collect();
let start = lines
.iter()
.position(|l| {
let t = l.trim();
t.starts_with('─') && t.chars().filter(|&c| c == '─').count() >= 6
})
.unwrap_or(0);
let end = lines
.iter()
.rposition(|l| !l.trim().is_empty())
.map(|i| i + 1)
.unwrap_or(lines.len());
let clean_lines = &lines[start..end];
let raw_screen = clean_lines.join("\n");
let data = extract_usage_data(clean_lines);
ProcessedOutput { raw_screen, data }
}
fn extract_usage_data(lines: &[&str]) -> UsageData {
let mut data = UsageData::default();
let mut i = 0;
while i < lines.len() {
let trimmed = lines[i].trim();
if (trimmed.starts_with("Current session") || trimmed.starts_with("Current week"))
&& !trimmed.contains('%')
{
let name = trimmed.to_string();
if let Some(gauge) = find_gauge(&lines[i..], &name) {
data.gauges.push(gauge);
}
}
if let Some(factor) = parse_contribution_factor(trimmed) {
data.contributions
.get_or_insert_with(ContributionInfo::default)
.factors
.push(factor);
}
if trimmed.starts_with("Subagents") {
let subs = parse_subagent_table(&lines[i + 1..]);
if !subs.is_empty() {
data.contributions
.get_or_insert_with(ContributionInfo::default)
.subagents = subs;
}
}
i += 1;
}
data
}
fn find_gauge(lines: &[&str], name: &str) -> Option<UsageGauge> {
let mut percent = None;
let mut resets_raw = None;
for line in lines.iter().skip(1).take(4) {
let t = line.trim();
if let Some(pct) = extract_percent_used(t) {
percent = Some(pct);
}
if t.starts_with("Resets ") {
resets_raw = Some(t.trim_start_matches("Resets ").to_string());
}
}
percent.map(|p| {
let resets_iso = resets_raw.as_deref().and_then(parse_reset_to_iso);
UsageGauge {
name: name.to_string(),
percent_used: p,
resets: resets_raw,
resets_iso,
}
})
}
/// Parse reset strings like:
/// "12:30pm (Europe/Vienna)" → today at 12:30 in that tz
/// "May 12, 9am (Europe/Vienna)" → May 12 at 09:00
/// "May 12, 9:30am (Europe/Vienna)" → May 12 at 09:30
/// "Jun 1, 12pm (America/New_York)" → Jun 1 at 12:00
///
/// Claude Code uses JS `Intl.DateTimeFormat` style output.
fn parse_reset_to_iso(s: &str) -> Option<String> {
// Split off the timezone from parentheses
let (datetime_part, tz_str) = {
let open = s.rfind('(')?;
let close = s.rfind(')')?;
let tz = s[open + 1..close].trim();
let dt = s[..open].trim();
(dt, tz)
};
let tz: Tz = tz_str.parse().ok()?;
let now = Utc::now().with_timezone(&tz);
let (date, time_str) = if datetime_part.contains(',') {
// "May 12, 9am" or "May 12, 9:30am"
let comma_pos = datetime_part.find(',')?;
let date_part = datetime_part[..comma_pos].trim();
let time_part = datetime_part[comma_pos + 1..].trim();
let date = parse_month_day(date_part, now.year())?;
(date, time_part)
} else {
// "12:30pm" — today in the given timezone
(now.date_naive(), datetime_part)
};
let time = parse_12h_time(time_str)?;
let naive = date.and_time(time);
let local = naive.and_local_timezone(tz).earliest()?;
let utc = local.with_timezone(&Utc);
Some(utc.to_rfc3339())
}
/// Parse "May 12", "Jun 1", "December 25", etc.
fn parse_month_day(s: &str, year: i32) -> Option<NaiveDate> {
let parts: Vec<&str> = s.split_whitespace().collect();
if parts.len() != 2 {
return None;
}
let month = match parts[0].to_lowercase().as_str() {
"jan" | "january" => 1,
"feb" | "february" => 2,
"mar" | "march" => 3,
"apr" | "april" => 4,
"may" => 5,
"jun" | "june" => 6,
"jul" | "july" => 7,
"aug" | "august" => 8,
"sep" | "september" => 9,
"oct" | "october" => 10,
"nov" | "november" => 11,
"dec" | "december" => 12,
_ => return None,
};
let day: u32 = parts[1].parse().ok()?;
NaiveDate::from_ymd_opt(year, month, day)
}
/// Parse "9am", "12pm", "9:30am", "12:30pm"
fn parse_12h_time(s: &str) -> Option<NaiveTime> {
let s = s.trim().to_lowercase();
let is_pm = s.ends_with("pm");
let is_am = s.ends_with("am");
if !is_pm && !is_am {
return None;
}
let num_part = &s[..s.len() - 2];
let (hour, minute) = if let Some((h, m)) = num_part.split_once(':') {
(h.parse::<u32>().ok()?, m.parse::<u32>().ok()?)
} else {
(num_part.parse::<u32>().ok()?, 0)
};
let hour_24 = match (hour, is_pm) {
(12, true) => 12,
(12, false) => 0,
(h, true) => h + 12,
(h, false) => h,
};
NaiveTime::from_hms_opt(hour_24, minute, 0)
}
fn extract_percent_used(line: &str) -> Option<u32> {
let line = line.trim();
if !line.ends_with("% used") {
return None;
}
let before_pct = line.trim_end_matches("% used").trim();
before_pct
.rsplit_once(char::is_whitespace)
.map(|(_, n)| n)
.unwrap_or(before_pct)
.parse()
.ok()
}
fn parse_contribution_factor(line: &str) -> Option<ContributionFactor> {
if !line.contains("% of your usage") {
return None;
}
let pct_str = line.split('%').next()?;
let percent: u32 = pct_str.trim().parse().ok()?;
let description = line.to_string();
Some(ContributionFactor {
description,
percent,
})
}
fn parse_subagent_table(lines: &[&str]) -> Vec<SubagentUsage> {
let mut subs = Vec::new();
for line in lines {
let t = line.trim();
if t.is_empty() || t.starts_with('─') || t.contains("to day") || t.contains("to cancel") {
break;
}
if let Some(pos) = t.rfind('%') {
let num_start = t[..pos]
.rfind(char::is_whitespace)
.map(|i| i + 1)
.unwrap_or(0);
if let Ok(pct) = t[num_start..pos].parse::<u32>() {
let name = t[..num_start].trim().to_string();
if !name.is_empty() && !name.contains("% of") {
subs.push(SubagentUsage {
name,
percent: pct,
});
}
}
}
}
subs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_time_only() {
let t = parse_12h_time("12:30pm").unwrap();
assert_eq!(t, NaiveTime::from_hms_opt(12, 30, 0).unwrap());
}
#[test]
fn parse_time_am() {
let t = parse_12h_time("9am").unwrap();
assert_eq!(t, NaiveTime::from_hms_opt(9, 0, 0).unwrap());
}
#[test]
fn parse_time_12am() {
let t = parse_12h_time("12am").unwrap();
assert_eq!(t, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
}
#[test]
fn parse_time_with_minutes() {
let t = parse_12h_time("9:30am").unwrap();
assert_eq!(t, NaiveTime::from_hms_opt(9, 30, 0).unwrap());
}
#[test]
fn parse_reset_time_only() {
let iso = parse_reset_to_iso("12:30pm (Europe/Vienna)");
assert!(iso.is_some());
let iso = iso.unwrap();
assert!(iso.contains("T"));
// Should end in +00:00 (UTC via rfc3339)
assert!(iso.ends_with("+00:00"));
}
#[test]
fn parse_reset_date_and_time() {
let iso = parse_reset_to_iso("May 12, 9am (Europe/Vienna)").unwrap();
assert!(iso.contains("T07:00:00") || iso.contains("T08:00:00"));
// CEST is UTC+2, CET is UTC+1 — depends on whether May 12 is summer time
}
#[test]
fn parse_month_day_basic() {
let d = parse_month_day("May 12", 2026).unwrap();
assert_eq!(d, NaiveDate::from_ymd_opt(2026, 5, 12).unwrap());
}
}
+252
View File
@@ -0,0 +1,252 @@
//! Minimal CLI for dirigent_anth — validate parsing and search sessions.
//!
//! Usage:
//! cargo run --package dirigent_anth --bin ant # validate all sessions
//! cargo run --package dirigent_anth --bin ant -- search "query" # search user messages
//! cargo run --package dirigent_anth --bin ant -- stats # show statistics
use dirigent_anth::*;
use std::io::BufRead;
fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();
let home = match discover_claude_home() {
Ok(h) => h,
Err(e) => {
eprintln!("Could not find Claude home: {e}");
std::process::exit(1);
}
};
let projects = match discover_projects(&home) {
Ok(p) => p,
Err(e) => {
eprintln!("Could not discover projects: {e}");
std::process::exit(1);
}
};
match args.first().map(|s| s.as_str()) {
Some("search") => {
let query = args.get(1).map(|s| s.as_str()).unwrap_or("");
if query.is_empty() {
eprintln!("Usage: ant search <query>");
std::process::exit(1);
}
cmd_search(&projects, query);
}
Some("stats") => cmd_stats(&projects),
Some("validate") | None => cmd_validate(&projects),
Some(other) => {
eprintln!("Unknown command: {other}");
eprintln!("Commands: validate (default), search <query>, stats");
std::process::exit(1);
}
}
}
/// Validate that the parser can handle all sessions without errors.
fn cmd_validate(projects: &[ClaudeProject]) {
let mut total_sessions = 0;
let mut total_ok = 0;
let mut total_messages = 0;
let mut total_skipped_lines = 0;
let mut errors: Vec<(String, String)> = Vec::new();
for project in projects {
println!(
"Project: {} ({} sessions)",
project.original_path,
project.sessions.len()
);
for session in &project.sessions {
total_sessions += 1;
// Raw line-level validation: count how many lines parse vs skip
let (_raw_ok, raw_skip) = validate_lines(&session.jsonl_path);
total_skipped_lines += raw_skip;
// Full pipeline validation
match load_session(session) {
Ok(parsed) => {
total_ok += 1;
total_messages += parsed.messages.len();
let tools = parsed.tool_exchanges.len();
let subs = parsed.subagents.len();
let branches = if parsed.tree.is_linear() {
"linear"
} else {
"branched"
};
if raw_skip > 0 {
println!(
" {}{} msgs, {} tools, {} subagents, {} | {raw_skip} lines skipped",
&session.id[..8.min(session.id.len())],
parsed.messages.len(),
tools,
subs,
branches,
);
}
}
Err(e) => {
errors.push((session.id.clone(), e.to_string()));
eprintln!(" {} — ERROR: {e}", &session.id[..8.min(session.id.len())]);
}
}
}
}
println!("\n--- Validation Summary ---");
println!("Projects: {}", projects.len());
println!("Sessions: {total_sessions} ({total_ok} ok, {} errors)", errors.len());
println!("Messages: {total_messages}");
if total_skipped_lines > 0 {
println!("Skipped: {total_skipped_lines} unparseable lines");
}
if !errors.is_empty() {
println!("\nErrors:");
for (id, err) in &errors {
println!(" {id}: {err}");
}
std::process::exit(1);
}
}
/// Count parseable vs skipped lines in a JSONL file.
fn validate_lines(path: &camino::Utf8Path) -> (usize, usize) {
let file = match std::fs::File::open(path.as_std_path()) {
Ok(f) => f,
Err(_) => return (0, 0),
};
let reader = std::io::BufReader::new(file);
let mut ok = 0;
let mut skip = 0;
for (i, line) in reader.lines().enumerate() {
let line = match line {
Ok(l) => l,
Err(_) => {
skip += 1;
continue;
}
};
if line.trim().is_empty() {
continue;
}
if parse_line(&line, i + 1).is_some() {
ok += 1;
} else {
skip += 1;
}
}
(ok, skip)
}
/// Search user messages for a query string (case-insensitive).
fn cmd_search(projects: &[ClaudeProject], query: &str) {
let query_lower = query.to_lowercase();
let mut hits = 0;
for project in projects {
for session in &project.sessions {
let messages = match parse_session_deduped(&session.jsonl_path) {
Ok(m) => m,
Err(_) => continue,
};
for msg in &messages {
let text = match msg {
types::RawMessage::User(u) => match &u.message.content {
types::Content::Text(s) => s.clone(),
types::Content::Blocks(_) => continue,
},
types::RawMessage::Assistant(a) => {
let mut parts = Vec::new();
for block in &a.message.content {
if let types::ContentBlock::Text { text } = block {
parts.push(text.as_str());
}
}
parts.join(" ")
}
_ => continue,
};
if text.to_lowercase().contains(&query_lower) {
let role = match msg {
types::RawMessage::User(_) => "user",
types::RawMessage::Assistant(_) => "assistant",
_ => "other",
};
let preview = truncate(&text, 120);
println!(
"[{}] {} {} | {}",
&project.original_path,
&session.id[..8.min(session.id.len())],
role,
preview
);
hits += 1;
}
}
}
}
println!("\n{hits} matches for \"{query}\"");
}
/// Show aggregate statistics across all sessions.
fn cmd_stats(projects: &[ClaudeProject]) {
let mut total_sessions = 0;
let mut total_messages = 0;
let mut total_tools = 0;
let mut total_subagents = 0;
let mut tool_counts: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
for project in projects {
for session in &project.sessions {
total_sessions += 1;
if let Ok(parsed) = load_session(session) {
total_messages += parsed.messages.len();
total_tools += parsed.tool_exchanges.len();
total_subagents += parsed.subagents.len();
for ex in &parsed.tool_exchanges {
let name = format!("{:?}", ex.call.name);
*tool_counts.entry(name).or_default() += 1;
}
}
}
}
println!("--- Statistics ---");
println!("Projects: {}", projects.len());
println!("Sessions: {total_sessions}");
println!("Messages: {total_messages}");
println!("Tool calls: {total_tools}");
println!("Sub-agents: {total_subagents}");
if !tool_counts.is_empty() {
println!("\nTool usage:");
let mut sorted: Vec<_> = tool_counts.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
for (name, count) in sorted.iter().take(15) {
println!(" {name:20} {count}");
}
}
}
fn truncate(s: &str, max: usize) -> String {
let s = s.replace('\n', " ").replace('\r', "");
if s.len() <= max {
s
} else {
format!("{}...", &s[..max])
}
}
+192
View File
@@ -0,0 +1,192 @@
use portable_pty::{CommandBuilder, NativePtySystem, PtySize, PtySystem};
use std::io::{Read, Write};
use std::path::PathBuf;
use std::time::Duration;
const ROWS: u16 = 80;
const COLS: u16 = 120;
struct Args {
debug: bool,
raw: bool,
no_trust: bool,
workdir: Option<PathBuf>,
use_cwd: bool,
}
fn parse_args() -> Args {
let mut args = Args {
debug: false,
raw: false,
no_trust: false,
workdir: None,
use_cwd: false,
};
let mut iter = std::env::args().skip(1);
while let Some(arg) = iter.next() {
match arg.as_str() {
"--debug" => args.debug = true,
"--raw" => args.raw = true,
"--no-trust" => args.no_trust = true,
"--cwd" => args.use_cwd = true,
"--workdir" => {
args.workdir = Some(PathBuf::from(
iter.next().expect("--workdir requires a path argument"),
));
}
other => {
eprintln!("Unknown argument: {other}");
eprintln!(
"Usage: anth_usage [--debug] [--raw] [--no-trust] [--workdir <path>] [--cwd]"
);
std::process::exit(2);
}
}
}
args
}
fn resolve_workdir(args: &Args) -> PathBuf {
if let Some(ref dir) = args.workdir {
return dir.clone();
}
if args.use_cwd {
return std::env::current_dir().expect("failed to get current directory");
}
#[cfg(feature = "dirigent-paths")]
{
if let Ok(paths) = dirigent_config::DirigentPaths::resolve() {
let noproject = paths.noproject_home_dir();
if noproject.exists() {
return noproject;
}
}
}
dirs::home_dir().expect("failed to resolve home directory")
}
fn grab_screen(parser: &vt100::Parser) -> String {
let screen = parser.screen();
let mut output = String::new();
for line in screen.rows(0, COLS) {
output.push_str(&line);
output.push('\n');
}
output
}
macro_rules! debug {
($args:expr, $($tt:tt)*) => {
if $args.debug {
eprintln!($($tt)*);
}
};
}
fn main() {
let args = parse_args();
let workdir = resolve_workdir(&args);
debug!(args, "Working directory: {}", workdir.display());
let pty_system = NativePtySystem::default();
let pair = pty_system
.openpty(PtySize {
rows: ROWS,
cols: COLS,
pixel_width: 0,
pixel_height: 0,
})
.expect("failed to open pty");
let mut cmd = CommandBuilder::new("claude");
cmd.cwd(&workdir);
let mut child = pair.slave.spawn_command(cmd).expect("failed to spawn claude");
drop(pair.slave);
let mut writer = pair.master.take_writer().expect("failed to get writer");
let reader = pair.master.try_clone_reader().expect("failed to get reader");
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let mut reader = reader;
let mut buf = [0u8; 4096];
loop {
match reader.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = tx.send(buf[..n].to_vec());
}
Err(_) => break,
}
}
});
// Wait for claude to render
std::thread::sleep(Duration::from_secs(5));
debug!(
args,
"Child alive: {}",
matches!(child.try_wait(), Ok(None))
);
// Grab screen
let mut parser = vt100::Parser::new(ROWS, COLS, 0);
while let Ok(data) = rx.try_recv() {
parser.process(&data);
}
let output = grab_screen(&parser);
debug!(args, "=== SCREEN ===\n{output}=== END ===");
// Handle trust prompt
if output.contains("Yes, I trust this folder") {
if args.no_trust {
eprintln!("Folder is not trusted: {}", workdir.display());
eprintln!("Run claude in this folder manually to trust it, or omit --no-trust.");
let _ = child.kill();
std::process::exit(1);
}
debug!(args, "Sending enter for trust...");
writer.write_all(b"\r").expect("failed to confirm trust");
std::thread::sleep(Duration::from_secs(3));
while let Ok(data) = rx.try_recv() {
parser.process(&data);
}
debug!(
args,
"=== AFTER TRUST ===\n{}=== END ===",
grab_screen(&parser)
);
}
// Send /usage
debug!(args, "Sending /usage...");
writer
.write_all(b"/usage\r")
.expect("failed to send /usage");
std::thread::sleep(Duration::from_secs(3));
while let Ok(data) = rx.try_recv() {
parser.process(&data);
}
let raw_output = grab_screen(&parser);
let processed = dirigent_anth::anth_usage::process_usage_screen(&raw_output);
if args.raw {
println!("{}", processed.raw_screen);
} else {
println!(
"{}",
serde_json::to_string_pretty(&processed.data).expect("failed to serialize usage data")
);
}
let _ = child.kill();
}
+157
View File
@@ -0,0 +1,157 @@
use portable_pty::{Child, CommandBuilder, NativePtySystem, PtySize, PtySystem};
use std::io::{Read, Write};
use std::sync::mpsc::{self, Receiver};
use std::time::Duration;
use vt100::Parser;
const DEFAULT_ROWS: u16 = 80;
const DEFAULT_COLS: u16 = 120;
pub struct PtySession {
parser: Parser,
writer: Option<Box<dyn Write + Send>>,
rx: Receiver<Vec<u8>>,
cols: u16,
#[allow(dead_code)]
child: Box<dyn Child + Send + Sync>,
}
impl PtySession {
pub fn spawn_claude(args: &[&str]) -> Self {
Self::spawn_claude_with_size(args, DEFAULT_ROWS, DEFAULT_COLS)
}
pub fn spawn_claude_with_size(args: &[&str], rows: u16, cols: u16) -> Self {
let pty_system = NativePtySystem::default();
let pair = pty_system
.openpty(PtySize {
rows,
cols,
pixel_width: 0,
pixel_height: 0,
})
.expect("failed to open pty");
let mut cmd = CommandBuilder::new("claude");
for arg in args {
cmd.arg(*arg);
}
if let Some(home) = dirs::home_dir() {
cmd.cwd(home);
}
let child = pair
.slave
.spawn_command(cmd)
.expect("failed to spawn claude");
drop(pair.slave);
let writer = pair.master.take_writer().expect("failed to get writer");
let reader = pair
.master
.try_clone_reader()
.expect("failed to get reader");
let (tx, rx) = mpsc::channel::<Vec<u8>>();
std::thread::spawn(move || {
let mut reader = reader;
let mut chunk = [0u8; 4096];
loop {
match reader.read(&mut chunk) {
Ok(0) => break,
Ok(n) => {
if tx.send(chunk[..n].to_vec()).is_err() {
break;
}
}
Err(_) => break,
}
}
});
Self {
parser: Parser::new(rows, cols, 0),
writer: Some(writer),
rx,
cols,
child,
}
}
pub fn grab_screen(&mut self) -> String {
while let Ok(data) = self.rx.try_recv() {
self.parser.process(&data);
}
let deadline = std::time::Instant::now() + Duration::from_millis(200);
while std::time::Instant::now() < deadline {
match self.rx.recv_timeout(Duration::from_millis(50)) {
Ok(data) => self.parser.process(&data),
Err(_) => {}
}
}
let screen = self.parser.screen();
let mut output = String::new();
for line in screen.rows(0, self.cols) {
output.push_str(&line);
output.push('\n');
}
output
}
pub fn wait_for(&mut self, needle: &str, timeout: Duration) -> bool {
self.wait_for_any(&[needle], timeout)
}
pub fn wait_for_any(&mut self, needles: &[&str], timeout: Duration) -> bool {
let deadline = std::time::Instant::now() + timeout;
while std::time::Instant::now() < deadline {
match self.rx.recv_timeout(Duration::from_millis(100)) {
Ok(data) => self.parser.process(&data),
Err(_) => {}
}
let screen = self.parser.screen();
let mut content = String::new();
for line in screen.rows(0, self.cols) {
content.push_str(&line);
content.push('\n');
}
for needle in needles {
if content.contains(needle) {
return true;
}
}
}
false
}
pub fn is_alive(&mut self) -> bool {
matches!(self.child.try_wait(), Ok(None))
}
pub fn send(&mut self, input: &[u8]) {
self.writer.as_mut().expect("writer gone").write_all(input).expect("failed to write to pty");
}
pub fn try_send(&mut self, input: &[u8]) -> std::io::Result<()> {
match self.writer.as_mut() {
Some(w) => w.write_all(input),
None => Err(std::io::Error::new(std::io::ErrorKind::BrokenPipe, "writer gone")),
}
}
pub fn try_send_line(&mut self, text: &str) -> std::io::Result<()> {
self.try_send(text.as_bytes())?;
self.try_send(b"\r")
}
pub fn send_enter(&mut self) {
self.send(b"\r");
}
pub fn send_line(&mut self, text: &str) {
self.send(text.as_bytes());
self.send_enter();
}
}
+107
View File
@@ -0,0 +1,107 @@
//! Tool call correlation — matches assistant ToolUse blocks with their
//! corresponding user ToolResult blocks by ID across a message sequence.
use std::collections::HashMap;
use crate::types::{
Content, ContentBlock, RawAssistantMessage, RawMessage, RawUserMessage, ToolCall,
ToolExchange, ToolName, ToolResultData,
};
/// Extract tool calls from an assistant message's content blocks.
fn extract_tool_calls(msg: &RawAssistantMessage) -> Vec<ToolCall> {
let source_uuid = msg.uuid.clone().unwrap_or_default();
msg.message
.content
.iter()
.filter_map(|block| {
if let ContentBlock::ToolUse { id, name, input, .. } = block {
Some(ToolCall {
id: id.clone(),
name: ToolName::from(name.clone()),
input: input.clone(),
source_message_uuid: source_uuid.clone(),
})
} else {
None
}
})
.collect()
}
/// Extract tool results from a user message's content blocks.
fn extract_tool_results(msg: &RawUserMessage) -> Vec<ToolResultData> {
let source_uuid = msg.uuid.clone().unwrap_or_default();
match &msg.message.content {
Content::Blocks(blocks) => blocks
.iter()
.filter_map(|block| {
if let ContentBlock::ToolResult { tool_use_id, content, is_error } = block {
// Extract text content from the tool result
let text_content = content.as_ref().and_then(|c| match c {
Content::Text(s) => Some(s.clone()),
Content::Blocks(bs) => {
// Concatenate text blocks
let texts: Vec<&str> = bs
.iter()
.filter_map(|b| {
if let ContentBlock::Text { text } = b {
Some(text.as_str())
} else {
None
}
})
.collect();
if texts.is_empty() { None } else { Some(texts.join("\n")) }
}
});
Some(ToolResultData {
tool_use_id: tool_use_id.clone(),
content: text_content,
is_error: *is_error,
source_message_uuid: source_uuid.clone(),
})
} else {
None
}
})
.collect(),
Content::Text(_) => Vec::new(),
}
}
/// Correlate tool calls with their results across a message sequence.
///
/// Iterates messages in order, collecting ToolUse blocks from assistant
/// messages and matching them by ID to ToolResult blocks in subsequent user
/// messages. Any tool calls that never received a result are emitted with
/// `result: None`.
pub fn correlate_tools(messages: &[RawMessage]) -> Vec<ToolExchange> {
let mut pending: HashMap<String, ToolCall> = HashMap::new();
let mut exchanges: Vec<ToolExchange> = Vec::new();
for msg in messages {
match msg {
RawMessage::Assistant(asst) => {
for call in extract_tool_calls(asst) {
pending.insert(call.id.clone(), call);
}
}
RawMessage::User(user) => {
for result in extract_tool_results(user) {
if let Some(call) = pending.remove(&result.tool_use_id) {
exchanges.push(ToolExchange { call, result: Some(result) });
}
}
}
_ => {}
}
}
// Emit unmatched calls (no result found)
for (_id, call) in pending {
exchanges.push(ToolExchange { call, result: None });
}
exchanges
}
+116
View File
@@ -0,0 +1,116 @@
//! Streaming deduplication for assistant messages.
use crate::types::{RawAssistantMessage, RawMessage};
/// Deduplicate streamed assistant messages.
///
/// Claude Code writes multiple JSONL lines for the same assistant message
/// as it streams. Each shares the same `uuid` with progressively more
/// content blocks. We keep only the last entry per uuid.
///
/// Non-assistant messages pass through unchanged.
pub fn dedup_messages(messages: Vec<RawMessage>) -> Vec<RawMessage> {
let mut result: Vec<RawMessage> = Vec::new();
let mut buffered_assistant: Option<RawAssistantMessage> = None;
for msg in messages {
match msg {
RawMessage::Assistant(ref asst) => {
let current_uuid = asst.uuid.as_deref();
if let Some(ref buffered) = buffered_assistant {
let buffered_uuid = buffered.uuid.as_deref();
if current_uuid == buffered_uuid {
// Same uuid — replace buffer with newer (more complete) version
buffered_assistant = Some(asst.clone());
} else {
// Different uuid — flush old buffer, start new
result.push(RawMessage::Assistant(buffered.clone()));
buffered_assistant = Some(asst.clone());
}
} else {
// No buffer yet — start buffering
buffered_assistant = Some(asst.clone());
}
}
_ => {
// Non-assistant: flush any buffered assistant first, then push this
if let Some(buffered) = buffered_assistant.take() {
result.push(RawMessage::Assistant(buffered));
}
result.push(msg);
}
}
}
// Flush remaining buffer
if let Some(buffered) = buffered_assistant {
result.push(RawMessage::Assistant(buffered));
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{AssistantInner, ContentBlock};
fn make_assistant(uuid: &str, stop_reason: Option<&str>, text: &str) -> RawMessage {
RawMessage::Assistant(RawAssistantMessage {
uuid: Some(uuid.to_string()),
parent_uuid: None,
timestamp: None,
session_id: None,
cwd: None,
version: None,
git_branch: None,
is_sidechain: false,
request_id: None,
message: AssistantInner {
model: None,
id: None,
message_type: None,
role: None,
content: vec![ContentBlock::Text { text: text.to_string() }],
stop_reason: stop_reason.map(str::to_string),
stop_sequence: None,
usage: None,
},
})
}
#[test]
fn dedup_single_streamed_message() {
let msgs = vec![
make_assistant("a-1", None, "Part 1"),
make_assistant("a-1", None, "Part 1 more"),
make_assistant("a-1", Some("end_turn"), "Part 1 final"),
];
let deduped = dedup_messages(msgs);
assert_eq!(deduped.len(), 1);
if let RawMessage::Assistant(a) = &deduped[0] {
assert_eq!(a.message.stop_reason.as_deref(), Some("end_turn"));
match &a.message.content[0] {
ContentBlock::Text { text } => assert_eq!(text, "Part 1 final"),
_ => panic!("Expected text block"),
}
}
}
#[test]
fn dedup_two_distinct_assistants() {
let msgs = vec![
make_assistant("a-1", Some("end_turn"), "First"),
make_assistant("a-2", Some("end_turn"), "Second"),
];
let deduped = dedup_messages(msgs);
assert_eq!(deduped.len(), 2);
}
#[test]
fn dedup_empty_input() {
let deduped = dedup_messages(vec![]);
assert!(deduped.is_empty());
}
}
+342
View File
@@ -0,0 +1,342 @@
use std::collections::HashMap;
use camino::{Utf8Path, Utf8PathBuf};
use crate::types::*;
use crate::error::{AntError, Result};
/// Discover the Claude Code home directory (~/.claude/).
pub fn discover_claude_home() -> Result<Utf8PathBuf> {
let home = dirs::home_dir().ok_or(AntError::HomeNotFound)?;
let claude_dir = home.join(".claude");
if !claude_dir.exists() {
return Err(AntError::HomeNotFound);
}
Utf8PathBuf::try_from(claude_dir.to_path_buf())
.map_err(|e| AntError::InvalidPath(e.to_string()))
}
/// Normalise a native path to forward slashes for consistent storage.
fn normalize_to_forward_slashes(path: &str) -> String {
path.replace('\\', "/")
}
/// Resolve the original filesystem path for a Claude project directory.
///
/// Priority:
/// 1. `projectPath` from `sessions-index.json` (authoritative, cheap)
/// 2. `cwd` from the first user message in any session JSONL (authoritative, costs one file parse)
/// 3. `decode_project_path` (lossy fallback for empty project directories)
pub fn resolve_original_path(dir_name: &str, sessions: &[SessionRef]) -> String {
// 1. Try sessions-index.json projectPath
for session in sessions {
if let Some(ref idx) = session.index_entry {
if let Some(ref path) = idx.project_path {
if !path.is_empty() {
return normalize_to_forward_slashes(path);
}
}
}
}
// 2. Try cwd from first user message in any session
for session in sessions {
if let Ok(msgs) = crate::parser::parse_session(&session.jsonl_path) {
for msg in &msgs {
if let crate::types::RawMessage::User(user) = msg {
if let Some(ref cwd) = user.cwd {
if !cwd.is_empty() {
return normalize_to_forward_slashes(cwd);
}
}
}
}
}
}
// 3. Lossy fallback
decode_project_path(dir_name)
}
/// Discover all Claude Code project directories under the given home.
pub fn discover_projects(home: &Utf8Path) -> Result<Vec<ClaudeProject>> {
let projects_dir = home.join("projects");
if !projects_dir.as_std_path().exists() {
return Ok(Vec::new());
}
let mut projects = Vec::new();
for entry in std::fs::read_dir(projects_dir.as_std_path())? {
let entry = entry?;
let path = entry.path();
if !path.is_dir() {
continue;
}
let dir_name = match path.file_name().and_then(|n| n.to_str()) {
Some(name) => name.to_string(),
None => continue,
};
let utf8_path = match Utf8PathBuf::try_from(path.clone()) {
Ok(p) => p,
Err(_) => continue,
};
let sessions = discover_sessions(&utf8_path)?;
let original_path = resolve_original_path(&dir_name, &sessions);
projects.push(ClaudeProject {
path: utf8_path,
original_path,
sessions,
});
}
Ok(projects)
}
/// Decode an encoded project folder name back to the original path (lossy).
///
/// **Warning**: Claude Code's encoding replaces `\`, `/`, AND `_` all with
/// `-`, making this decoding ambiguous. For example, `G--dev-projects-adk-rust`
/// could be `G:/dev/projects/adk-rust` or `G:/dev/projects/adk/rust`. Prefer
/// [`resolve_original_path`] which reads ground truth from `sessions-index.json`
/// or session JSONL files. This function is a last-resort fallback for empty
/// project directories with no sessions or index.
pub fn decode_project_path(encoded: &str) -> String {
// Split on "--" to recover path segments separated by the original separators.
let parts: Vec<&str> = encoded.split("--").collect();
if parts.is_empty() {
return encoded.to_string();
}
let mut result = String::new();
let first = parts[0];
if first.len() == 1 && first.chars().next().map_or(false, |c| c.is_ascii_uppercase()) {
// Windows drive letter: "G" → "G:"
result.push_str(first);
result.push(':');
} else if first.starts_with('-') || first.is_empty() {
// Unix-style absolute path: the original path started with "/".
// The first segment has a leading "-" that encoded the root separator.
// Strip that leading "-" to recover the first directory component.
let component = first.trim_start_matches('-');
result.push('/');
if !component.is_empty() {
// Single dashes within the component are path separators.
result.push_str(&component.replace('-', "/"));
}
} else {
result.push_str(first);
}
// Remaining "--"-separated parts are additional path components.
// Within each part, single "-" represent path separators.
for part in &parts[1..] {
result.push('/');
result.push_str(&part.replace('-', "/"));
}
result
}
/// Discover all session JSONL files in a project directory.
pub fn discover_sessions(project_dir: &Utf8Path) -> Result<Vec<SessionRef>> {
let index = load_session_index(project_dir);
let mut sessions = Vec::new();
for entry in std::fs::read_dir(project_dir.as_std_path())? {
let entry = entry?;
let path = entry.path();
// Only .jsonl files
let extension = path.extension().and_then(|e| e.to_str());
if extension != Some("jsonl") {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s.to_string(),
None => continue,
};
let utf8_path = match Utf8PathBuf::try_from(path) {
Ok(p) => p,
Err(_) => continue,
};
// Check for artifacts directory (same name as the session stem).
let artifacts_dir = {
let dir = project_dir.join(&stem);
if dir.as_std_path().is_dir() {
Some(dir)
} else {
None
}
};
let index_entry = index.as_ref().and_then(|idx| idx.get(&stem).cloned());
sessions.push(SessionRef {
id: stem,
jsonl_path: utf8_path,
artifacts_dir,
index_entry,
});
}
Ok(sessions)
}
/// Load `sessions-index.json` if it exists in the given project directory.
fn load_session_index(project_dir: &Utf8Path) -> Option<HashMap<String, SessionIndexEntry>> {
let index_path = project_dir.join("sessions-index.json");
if !index_path.as_std_path().exists() {
return None;
}
let content = std::fs::read_to_string(index_path.as_std_path()).ok()?;
serde_json::from_str::<HashMap<String, SessionIndexEntry>>(&content).ok()
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn decode_project_path_windows() {
assert_eq!(
decode_project_path("G--dev-projects-dirigent"),
"G:/dev/projects/dirigent"
);
}
#[test]
fn decode_project_path_windows_users() {
assert_eq!(
decode_project_path("C--Users-g4b-tmp"),
"C:/Users/g4b/tmp"
);
}
#[test]
fn decode_project_path_unix() {
assert_eq!(
decode_project_path("-home-user-projects-foo"),
"/home/user/projects/foo"
);
}
#[test]
fn discover_sessions_in_temp_dir() {
let tmp = TempDir::new().unwrap();
let project_dir = Utf8Path::from_path(tmp.path()).unwrap();
// Create fake session files.
std::fs::write(project_dir.join("abc-def-123.jsonl").as_std_path(), "{}\n").unwrap();
std::fs::write(project_dir.join("xyz-456-789.jsonl").as_std_path(), "{}\n").unwrap();
// Create an artifacts directory for one session.
std::fs::create_dir(project_dir.join("abc-def-123").as_std_path()).unwrap();
let sessions = discover_sessions(project_dir).unwrap();
assert_eq!(sessions.len(), 2);
let with_artifacts = sessions.iter().find(|s| s.id == "abc-def-123").unwrap();
assert!(with_artifacts.artifacts_dir.is_some());
let without_artifacts = sessions.iter().find(|s| s.id == "xyz-456-789").unwrap();
assert!(without_artifacts.artifacts_dir.is_none());
}
#[test]
fn discover_sessions_ignores_non_jsonl() {
let tmp = TempDir::new().unwrap();
let project_dir = Utf8Path::from_path(tmp.path()).unwrap();
std::fs::write(project_dir.join("session.jsonl").as_std_path(), "{}\n").unwrap();
std::fs::write(
project_dir.join("sessions-index.json").as_std_path(),
"{}",
)
.unwrap();
std::fs::create_dir(project_dir.join("some-dir").as_std_path()).unwrap();
let sessions = discover_sessions(project_dir).unwrap();
assert_eq!(sessions.len(), 1);
assert_eq!(sessions[0].id, "session");
}
#[test]
fn discover_sessions_loads_index_entry() {
let tmp = TempDir::new().unwrap();
let project_dir = Utf8Path::from_path(tmp.path()).unwrap();
std::fs::write(project_dir.join("abc-123.jsonl").as_std_path(), "{}\n").unwrap();
let index_json = r#"{
"abc-123": {
"sessionId": "abc-123",
"firstPrompt": "Hello",
"summary": "A test session",
"messageCount": 5
}
}"#;
std::fs::write(
project_dir.join("sessions-index.json").as_std_path(),
index_json,
)
.unwrap();
let sessions = discover_sessions(project_dir).unwrap();
assert_eq!(sessions.len(), 1);
let entry = sessions[0].index_entry.as_ref().unwrap();
assert_eq!(entry.session_id.as_deref(), Some("abc-123"));
assert_eq!(entry.first_prompt.as_deref(), Some("Hello"));
assert_eq!(entry.message_count, Some(5));
}
#[test]
fn resolve_original_path_prefers_index_project_path() {
let sessions = vec![SessionRef {
id: "test-session".to_string(),
jsonl_path: Utf8PathBuf::from("/tmp/fake.jsonl"),
artifacts_dir: None,
index_entry: Some(SessionIndexEntry {
session_id: Some("test-session".to_string()),
first_prompt: None,
summary: None,
message_count: None,
created: None,
modified: None,
git_branch: None,
project_path: Some(r"G:\dev\projects\bevy_sprite3d".to_string()),
}),
}];
let result = resolve_original_path("G--dev-projects-bevy-sprite3d", &sessions);
assert_eq!(result, "G:/dev/projects/bevy_sprite3d");
}
#[test]
fn resolve_original_path_falls_back_to_decode() {
let sessions: Vec<SessionRef> = vec![];
let result = resolve_original_path("G--dev-projects-dirigent", &sessions);
assert_eq!(result, "G:/dev/projects/dirigent");
}
#[test]
fn discover_projects_empty_when_no_projects_dir() {
let tmp = TempDir::new().unwrap();
let home_dir = Utf8Path::from_path(tmp.path()).unwrap();
// No "projects" subdirectory — should return empty vec, not an error.
let projects = discover_projects(home_dir).unwrap();
assert!(projects.is_empty());
}
}
+19
View File
@@ -0,0 +1,19 @@
#[derive(Debug, thiserror::Error)]
pub enum AntError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("JSON parse error at line {line}: {source}")]
JsonParse {
line: usize,
source: serde_json::Error,
},
#[error("Claude home directory not found")]
HomeNotFound,
#[error("Invalid path: {0}")]
InvalidPath(String),
}
pub type Result<T> = std::result::Result<T, AntError>;
+52
View File
@@ -0,0 +1,52 @@
//! dirigent_anth — Claude Code Session Parser & Toolkit
//!
//! Reads Claude Code's local JSONL session storage and produces typed,
//! deduplicated, correlated Rust data structures.
//!
//! # Design
//!
//! See `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md`
pub mod claude_grab;
pub mod anth_usage;
pub mod correlation;
pub mod dedup;
pub mod discovery;
pub mod error;
pub mod noise;
pub mod parser;
pub mod subagent;
pub mod tree;
pub mod types;
pub mod util;
/// Load and fully parse a session: dedup, correlate, tree, subagents.
pub fn load_session(session_ref: &types::SessionRef) -> error::Result<types::ParsedSession> {
let messages = parser::parse_session_deduped(&session_ref.jsonl_path)?;
let tree = tree::ConversationTree::build(&messages);
let tool_exchanges = correlation::correlate_tools(&messages);
let mut subagents = if let Some(ref dir) = session_ref.artifacts_dir {
subagent::load_subagents(dir)?
} else {
Vec::new()
};
subagent::link_subagents_to_calls(&mut subagents, &tool_exchanges);
Ok(types::ParsedSession {
messages,
tree,
tool_exchanges,
subagents,
})
}
pub use correlation::correlate_tools;
pub use dedup::dedup_messages;
pub use discovery::{decode_project_path, discover_claude_home, discover_projects, discover_sessions, resolve_original_path};
pub use error::{AntError, Result};
pub use noise::{classify_noise, NoiseKind};
pub use parser::{parse_line, parse_session, parse_session_deduped};
pub use subagent::{link_subagents_to_calls, load_subagents};
pub use tree::{message_parent_uuid, message_uuid, ConversationNode, ConversationTree};
pub use types::*;
pub use util::parse_timestamp;
+72
View File
@@ -0,0 +1,72 @@
use crate::types::*;
/// Classification of noise patterns in Claude Code JSONL.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NoiseKind {
Meta,
Warmup,
Interrupted,
Continuation,
ApiError,
SystemCaveat,
QueueOp,
}
/// Classify a message as noise, if applicable.
/// Returns None for normal messages.
pub fn classify_noise(message: &RawMessage) -> Option<NoiseKind> {
match message {
RawMessage::QueueOperation(_) => Some(NoiseKind::QueueOp),
RawMessage::User(user) => {
if user.is_meta.unwrap_or(false) {
return Some(NoiseKind::Meta);
}
if let Some(text) = extract_user_text(user) {
if text == "Warmup" {
return Some(NoiseKind::Warmup);
}
if text.starts_with("[Request interrupted") {
return Some(NoiseKind::Interrupted);
}
if text.starts_with("This session is being continued") {
return Some(NoiseKind::Continuation);
}
if text.starts_with("API Error") {
return Some(NoiseKind::ApiError);
}
if text.starts_with("Caveat: The messages below") {
return Some(NoiseKind::SystemCaveat);
}
}
None
}
_ => None,
}
}
/// Extract plain text from a user message's content.
fn extract_user_text(user: &RawUserMessage) -> Option<&str> {
match &user.message.content {
Content::Text(s) => Some(s.as_str()),
Content::Blocks(_) => None, // tool_result blocks, not plain text
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normal_assistant_is_not_noise() {
let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"id":"m","role":"assistant","content":[{"type":"text","text":"Hello"}],"stop_reason":"end_turn"}}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
assert_eq!(classify_noise(&msg), None);
}
#[test]
fn queue_op_is_noise() {
let json = r#"{"type":"queue-operation","operation":"enqueue","timestamp":"2026-01-01T00:00:00Z","sessionId":"s"}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
assert_eq!(classify_noise(&msg), Some(NoiseKind::QueueOp));
}
}
+50
View File
@@ -0,0 +1,50 @@
//! JSONL line parser for Claude Code session files.
use std::io::BufRead;
use camino::Utf8Path;
use crate::error::Result;
use crate::types::RawMessage;
/// Parse a single JSONL line into a RawMessage.
/// Returns None for lines that cannot be parsed (logged via tracing).
pub fn parse_line(line: &str, line_number: usize) -> Option<RawMessage> {
match serde_json::from_str::<RawMessage>(line) {
Ok(msg) => Some(msg),
Err(e) => {
tracing::warn!(line = line_number, error = %e, "Skipping unparseable JSONL line");
None
}
}
}
/// Parse all messages from a JSONL file.
/// Skips unparseable lines (lenient). Returns I/O errors.
pub fn parse_session(path: &Utf8Path) -> Result<Vec<RawMessage>> {
let file = std::fs::File::open(path.as_std_path())?;
let reader = std::io::BufReader::new(file);
let mut messages = Vec::new();
for (i, line) in reader.lines().enumerate() {
let line = line?;
if line.trim().is_empty() {
continue;
}
if let Some(msg) = parse_line(&line, i + 1) {
messages.push(msg);
}
}
Ok(messages)
}
/// Parse a session JSONL file with streaming deduplication applied.
///
/// Claude Code writes multiple JSONL lines for the same assistant message as
/// it streams. This function collapses those into a single final version per
/// uuid. See [`crate::dedup::dedup_messages`] for details.
pub fn parse_session_deduped(path: &Utf8Path) -> Result<Vec<RawMessage>> {
let messages = parse_session(path)?;
Ok(crate::dedup::dedup_messages(messages))
}
+215
View File
@@ -0,0 +1,215 @@
//! Sub-agent session loading.
//!
//! Claude Code spawns sub-agents for Agent tool calls and stores their
//! conversations under `<session-artifacts-dir>/subagents/`. Each sub-agent
//! has a JSONL file and an optional `.meta.json` with metadata such as the
//! agent type.
use camino::Utf8Path;
use crate::error::Result;
use crate::parser::parse_session;
use crate::types::{SubAgentMeta, SubAgentSession, ToolExchange};
/// Load all sub-agent sessions from a session's artifacts directory.
///
/// Expects files at: `<session_artifacts_dir>/subagents/agent-<id>.jsonl`
/// with optional companion: `<session_artifacts_dir>/subagents/agent-<id>.meta.json`
///
/// Returns an empty `Vec` if the `subagents/` subdirectory does not exist.
pub fn load_subagents(session_artifacts_dir: &Utf8Path) -> Result<Vec<SubAgentSession>> {
let subagents_dir = session_artifacts_dir.join("subagents");
if !subagents_dir.as_std_path().exists() {
return Ok(Vec::new());
}
let mut subagents = Vec::new();
for entry in std::fs::read_dir(subagents_dir.as_std_path())? {
let entry = entry?;
let path = entry.path();
// Only process agent-*.jsonl files
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(name) => name.to_string(),
None => continue,
};
if !file_name.starts_with("agent-") || !file_name.ends_with(".jsonl") {
continue;
}
// Extract agent ID: "agent-abc123.jsonl" → "abc123"
let agent_id = file_name
.strip_prefix("agent-")
.and_then(|s| s.strip_suffix(".jsonl"))
.unwrap_or(&file_name)
.to_string();
let jsonl_path = match camino::Utf8PathBuf::try_from(path.clone()) {
Ok(p) => p,
Err(_) => continue,
};
// Parse the JSONL session
let messages = parse_session(&jsonl_path)?;
// Try to load companion metadata file
let meta_path = path.with_file_name(format!("agent-{}.meta.json", agent_id));
let meta = if meta_path.exists() {
let content = std::fs::read_to_string(&meta_path)?;
serde_json::from_str::<SubAgentMeta>(&content)
.unwrap_or(SubAgentMeta { agent_type: None })
} else {
SubAgentMeta { agent_type: None }
};
subagents.push(SubAgentSession {
agent_id,
meta,
messages,
parent_tool_call_id: None,
});
}
Ok(subagents)
}
/// Try to link sub-agent sessions to their parent Agent tool calls.
///
/// For each Agent tool call in `tool_exchanges`, parses the tool result text
/// for `agentId: <id>` and matches it against sub-agent sessions. On match,
/// sets `SubAgentSession.parent_tool_call_id` to the tool call's ID.
///
/// This is best-effort: if the agentId text format changes or a result is
/// missing, the sub-agent is still usable but without tool_use linkage.
pub fn link_subagents_to_calls(
subagents: &mut [SubAgentSession],
tool_exchanges: &[ToolExchange],
) {
use regex::Regex;
if subagents.is_empty() || tool_exchanges.is_empty() {
return;
}
// Compile once, match many
let re = Regex::new(r"agentId:\s*(\S+)").expect("valid regex");
for exchange in tool_exchanges {
// Only look at Agent tool calls
if exchange.call.name != crate::types::ToolName::Agent {
continue;
}
// Extract agentId from the tool result text
let agent_id = exchange
.result
.as_ref()
.and_then(|r| r.content.as_deref())
.and_then(|text| re.captures(text))
.and_then(|caps| caps.get(1))
.map(|m| m.as_str());
let agent_id = match agent_id {
Some(id) => id,
None => continue,
};
// Find matching sub-agent and set the linkage
if let Some(subagent) = subagents.iter_mut().find(|s| s.agent_id == agent_id) {
subagent.parent_tool_call_id = Some(exchange.call.id.clone());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{ToolCall, ToolName, ToolResultData};
#[test]
fn test_link_subagents_to_calls_matches_agent_id() {
let mut subagents = vec![
SubAgentSession {
agent_id: "abc123def".to_string(),
meta: SubAgentMeta { agent_type: Some("Explore".to_string()) },
messages: vec![],
parent_tool_call_id: None,
},
SubAgentSession {
agent_id: "xyz789".to_string(),
meta: SubAgentMeta { agent_type: None },
messages: vec![],
parent_tool_call_id: None,
},
];
let exchanges = vec![
ToolExchange {
call: ToolCall {
id: "toolu_01ABC".to_string(),
name: ToolName::Agent,
input: serde_json::json!({"description": "test"}),
source_message_uuid: "msg-1".to_string(),
},
result: Some(ToolResultData {
tool_use_id: "toolu_01ABC".to_string(),
content: Some("agentId: abc123def (use SendMessage with to: 'abc123def' to continue)\n<usage>total_tokens: 1000</usage>".to_string()),
is_error: false,
source_message_uuid: "msg-2".to_string(),
}),
},
ToolExchange {
call: ToolCall {
id: "toolu_02DEF".to_string(),
name: ToolName::Read,
input: serde_json::json!({}),
source_message_uuid: "msg-3".to_string(),
},
result: None,
},
];
link_subagents_to_calls(&mut subagents, &exchanges);
assert_eq!(subagents[0].parent_tool_call_id, Some("toolu_01ABC".to_string()));
assert_eq!(subagents[1].parent_tool_call_id, None);
}
#[test]
fn test_link_subagents_empty_inputs() {
let mut empty_subagents: Vec<SubAgentSession> = vec![];
let empty_exchanges: Vec<ToolExchange> = vec![];
link_subagents_to_calls(&mut empty_subagents, &empty_exchanges);
// No panic
}
#[test]
fn test_link_subagents_no_match() {
let mut subagents = vec![SubAgentSession {
agent_id: "no_match".to_string(),
meta: SubAgentMeta { agent_type: None },
messages: vec![],
parent_tool_call_id: None,
}];
let exchanges = vec![ToolExchange {
call: ToolCall {
id: "toolu_99".to_string(),
name: ToolName::Agent,
input: serde_json::json!({}),
source_message_uuid: "msg-1".to_string(),
},
result: Some(ToolResultData {
tool_use_id: "toolu_99".to_string(),
content: Some("agentId: different_id\n<usage>tokens: 500</usage>".to_string()),
is_error: false,
source_message_uuid: "msg-2".to_string(),
}),
}];
link_subagents_to_calls(&mut subagents, &exchanges);
assert_eq!(subagents[0].parent_tool_call_id, None);
}
}
+171
View File
@@ -0,0 +1,171 @@
//! Conversation tree module — builds a parent/child tree from `RawMessage`s.
//!
//! Claude Code sessions are not purely linear: the user can edit earlier
//! messages, producing branches. Each message carries a `uuid` and a
//! `parentUuid` that describe the relationship. This module reconstructs
//! the tree so callers can walk threads, detect branches, and select the
//! main thread.
use std::collections::HashMap;
use crate::types::RawMessage;
// ---------------------------------------------------------------------------
// Node & tree types
// ---------------------------------------------------------------------------
/// A single node in the conversation tree.
#[derive(Debug)]
pub struct ConversationNode {
/// The UUID of this message.
pub uuid: String,
/// The raw message stored at this node.
pub message: RawMessage,
/// UUIDs of direct children, in insertion order.
pub children: Vec<String>,
}
/// The full conversation tree for a session.
///
/// A session may have multiple roots when the first message has no
/// `parentUuid`, or when a message refers to a parent that is not present
/// in the slice provided to [`ConversationTree::build`].
#[derive(Debug)]
pub struct ConversationTree {
/// Root node UUIDs (messages with no parent or with an unknown parent).
pub roots: Vec<String>,
/// All nodes indexed by UUID.
pub nodes: HashMap<String, ConversationNode>,
}
// ---------------------------------------------------------------------------
// UUID / parent-UUID helpers
// ---------------------------------------------------------------------------
/// Extract the `uuid` from any `RawMessage` variant.
///
/// Returns `None` for variants that carry no UUID (e.g. `QueueOperation`).
pub fn message_uuid(msg: &RawMessage) -> Option<&str> {
match msg {
RawMessage::User(m) => m.uuid.as_deref(),
RawMessage::Assistant(m) => m.uuid.as_deref(),
RawMessage::Progress(m) => m.uuid.as_deref(),
RawMessage::System(m) => m.uuid.as_deref(),
RawMessage::QueueOperation(_)
| RawMessage::FileHistorySnapshot(_)
| RawMessage::LastPrompt(_) => None,
}
}
/// Extract the `parent_uuid` from any `RawMessage` variant.
///
/// Returns `None` for variants that carry no parent UUID.
pub fn message_parent_uuid(msg: &RawMessage) -> Option<&str> {
match msg {
RawMessage::User(m) => m.parent_uuid.as_deref(),
RawMessage::Assistant(m) => m.parent_uuid.as_deref(),
RawMessage::Progress(m) => m.parent_uuid.as_deref(),
RawMessage::System(m) => m.parent_uuid.as_deref(),
RawMessage::QueueOperation(_)
| RawMessage::FileHistorySnapshot(_)
| RawMessage::LastPrompt(_) => None,
}
}
// ---------------------------------------------------------------------------
// ConversationTree impl
// ---------------------------------------------------------------------------
impl ConversationTree {
/// Build a conversation tree from a sequence of messages.
///
/// Messages without a UUID (e.g. `QueueOperation`) are silently skipped.
/// If a message's `parentUuid` is present but not found in the set,
/// that message is treated as a root.
pub fn build(messages: &[RawMessage]) -> Self {
let mut nodes: HashMap<String, ConversationNode> = HashMap::new();
let mut roots: Vec<String> = Vec::new();
// First pass: insert every addressable message as a node.
for msg in messages {
if let Some(uuid) = message_uuid(msg) {
nodes.insert(
uuid.to_string(),
ConversationNode {
uuid: uuid.to_string(),
message: msg.clone(),
children: Vec::new(),
},
);
}
}
// Second pass: collect (uuid, parent_uuid) pairs so we can wire up
// parent→child edges without a simultaneous mutable borrow.
let parent_links: Vec<(String, Option<String>)> = messages
.iter()
.filter_map(|msg| {
let uuid = message_uuid(msg)?.to_string();
let parent = message_parent_uuid(msg).map(|s| s.to_string());
Some((uuid, parent))
})
.collect();
for (uuid, parent_uuid) in parent_links {
match parent_uuid {
Some(parent_id) if nodes.contains_key(&parent_id) => {
// Safe: parent_id != uuid (a message cannot be its own parent).
nodes
.get_mut(&parent_id)
.expect("parent key confirmed above")
.children
.push(uuid);
}
_ => {
// No parent, or parent not in the provided slice — treat as root.
roots.push(uuid);
}
}
}
ConversationTree { roots, nodes }
}
/// Walk the *main thread*: start from the first root and always follow
/// the first child at each step.
///
/// In a linear session this is the complete conversation. In a branching
/// session this is the path taken before any edits.
pub fn main_thread(&self) -> Vec<&ConversationNode> {
let mut result = Vec::new();
if let Some(root_id) = self.roots.first() {
let mut current = root_id.as_str();
loop {
match self.nodes.get(current) {
Some(node) => {
result.push(node);
match node.children.first() {
Some(first_child) => current = first_child.as_str(),
None => break,
}
}
None => break,
}
}
}
result
}
/// Returns `true` when every node has at most one child (no branches).
pub fn is_linear(&self) -> bool {
self.nodes.values().all(|n| n.children.len() <= 1)
}
/// Returns all nodes that have more than one child (branch points).
pub fn branch_points(&self) -> Vec<&ConversationNode> {
self.nodes
.values()
.filter(|n| n.children.len() > 1)
.collect()
}
}
+847
View File
@@ -0,0 +1,847 @@
//! Core types for parsing Claude Code JSONL session data.
use camino::Utf8PathBuf;
use serde::{Deserialize, Serialize};
// ---------------------------------------------------------------------------
// Content types
// ---------------------------------------------------------------------------
/// Content is either a plain string or an array of content blocks.
///
/// Uses a custom deserializer so that `Blocks` variant applies lenient
/// deserialization — unknown content block types (e.g. `tool_reference`)
/// are silently skipped instead of failing the entire message.
#[derive(Debug, Clone, Serialize)]
#[serde(untagged)]
pub enum Content {
Text(String),
Blocks(Vec<ContentBlock>),
}
impl<'de> serde::Deserialize<'de> for Content {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = serde_json::Value::deserialize(deserializer)?;
match value {
serde_json::Value::String(s) => Ok(Content::Text(s)),
serde_json::Value::Array(arr) => {
let blocks = arr
.into_iter()
.filter_map(|v| {
serde_json::from_value::<ContentBlock>(v.clone())
.ok()
.or_else(|| {
tracing::debug!(
"Skipping unknown content block: {:?}",
v.get("type")
);
None
})
})
.collect();
Ok(Content::Blocks(blocks))
}
other => Err(serde::de::Error::custom(format!(
"expected string or array for Content, got {}",
match &other {
serde_json::Value::Null => "null",
serde_json::Value::Bool(_) => "bool",
serde_json::Value::Number(_) => "number",
serde_json::Value::Object(_) => "object",
_ => "unknown",
}
))),
}
}
}
/// Typed content block inside messages.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlock {
Text {
text: String,
},
ToolUse {
id: String,
name: String,
input: serde_json::Value,
#[serde(default)]
caller: Option<serde_json::Value>,
},
ToolResult {
tool_use_id: String,
#[serde(default)]
content: Option<Content>,
#[serde(default)]
is_error: bool,
},
Thinking {
thinking: String,
},
Image {
source: serde_json::Value,
},
}
// ---------------------------------------------------------------------------
// Lenient content block deserialization
// ---------------------------------------------------------------------------
/// Deserializes a `Vec<ContentBlock>` leniently — unknown block types are
/// silently skipped instead of failing the entire message.
fn deserialize_content_blocks<'de, D>(
deserializer: D,
) -> std::result::Result<Vec<ContentBlock>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::Deserialize as _;
let raw: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
Ok(raw
.into_iter()
.filter_map(|v| {
serde_json::from_value::<ContentBlock>(v.clone()).ok().or_else(|| {
tracing::debug!("Skipping unknown content block: {:?}", v.get("type"));
None
})
})
.collect())
}
// ---------------------------------------------------------------------------
// Top-level JSONL line discriminator
// ---------------------------------------------------------------------------
/// Top-level JSONL line discriminator.
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type", rename_all = "kebab-case")]
pub enum RawMessage {
User(RawUserMessage),
Assistant(RawAssistantMessage),
Progress(RawProgressMessage),
System(RawSystemMessage),
QueueOperation(RawQueueOperation),
FileHistorySnapshot(RawFileHistorySnapshot),
LastPrompt(RawLastPrompt),
}
// ---------------------------------------------------------------------------
// User message
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawUserMessage {
#[serde(default)]
pub uuid: Option<String>,
#[serde(default)]
pub parent_uuid: Option<String>,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub cwd: Option<String>,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub git_branch: Option<String>,
#[serde(default)]
pub is_sidechain: bool,
#[serde(default)]
pub is_meta: Option<bool>,
#[serde(default)]
pub user_type: Option<String>,
pub message: UserMessageInner,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct UserMessageInner {
pub role: String,
pub content: Content,
}
// ---------------------------------------------------------------------------
// Assistant message
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawAssistantMessage {
#[serde(default)]
pub uuid: Option<String>,
#[serde(default)]
pub parent_uuid: Option<String>,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub cwd: Option<String>,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub git_branch: Option<String>,
#[serde(default)]
pub is_sidechain: bool,
#[serde(default)]
pub request_id: Option<String>,
pub message: AssistantInner,
}
// NOTE: AssistantInner is the Anthropic API response object nested inside
// the Claude Code JSONL wrapper. The API uses snake_case (stop_reason, etc.)
// unlike the outer JSONL wrapper which uses camelCase.
#[derive(Debug, Clone, Deserialize)]
pub struct AssistantInner {
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub id: Option<String>,
#[serde(default, rename = "type")]
pub message_type: Option<String>,
#[serde(default)]
pub role: Option<String>,
#[serde(default, deserialize_with = "deserialize_content_blocks")]
pub content: Vec<ContentBlock>,
#[serde(default)]
pub stop_reason: Option<String>,
#[serde(default)]
pub stop_sequence: Option<String>,
#[serde(default)]
pub usage: Option<serde_json::Value>,
}
// ---------------------------------------------------------------------------
// Progress message
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawProgressMessage {
#[serde(default)]
pub uuid: Option<String>,
#[serde(default)]
pub parent_uuid: Option<String>,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub cwd: Option<String>,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub git_branch: Option<String>,
#[serde(default)]
pub is_sidechain: bool,
#[serde(default)]
pub data: Option<serde_json::Value>,
}
// ---------------------------------------------------------------------------
// System message
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawSystemMessage {
#[serde(default)]
pub uuid: Option<String>,
#[serde(default)]
pub parent_uuid: Option<String>,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub cwd: Option<String>,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub git_branch: Option<String>,
#[serde(default)]
pub is_sidechain: bool,
#[serde(default)]
pub data: Option<serde_json::Value>,
}
// ---------------------------------------------------------------------------
// Queue operation
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawQueueOperation {
pub operation: String,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
}
// ---------------------------------------------------------------------------
// File history snapshot
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawFileHistorySnapshot {
#[serde(default)]
pub message_id: Option<String>,
#[serde(default)]
pub is_snapshot_update: bool,
#[serde(default)]
pub snapshot: Option<serde_json::Value>,
}
// ---------------------------------------------------------------------------
// Last prompt
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RawLastPrompt {
#[serde(default)]
pub last_prompt: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
}
// ---------------------------------------------------------------------------
// Tool types (for correlation module later)
// ---------------------------------------------------------------------------
/// Known tool names used by Claude Code.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ToolName {
Bash,
Read,
Write,
Edit,
Grep,
Glob,
Agent,
Skill,
WebSearch,
WebFetch,
TodoWrite,
NotebookEdit,
Other(String),
}
impl From<String> for ToolName {
fn from(s: String) -> Self {
match s.as_str() {
"Bash" => ToolName::Bash,
"Read" => ToolName::Read,
"Write" => ToolName::Write,
"Edit" => ToolName::Edit,
"Grep" => ToolName::Grep,
"Glob" => ToolName::Glob,
"Agent" => ToolName::Agent,
"Skill" => ToolName::Skill,
"WebSearch" => ToolName::WebSearch,
"WebFetch" => ToolName::WebFetch,
"TodoWrite" => ToolName::TodoWrite,
"NotebookEdit" => ToolName::NotebookEdit,
other => ToolName::Other(other.to_string()),
}
}
}
/// A tool call extracted from an assistant message.
#[derive(Debug, Clone)]
pub struct ToolCall {
pub id: String,
pub name: ToolName,
pub input: serde_json::Value,
pub source_message_uuid: String,
}
/// A tool result extracted from a user message.
#[derive(Debug, Clone)]
pub struct ToolResultData {
pub tool_use_id: String,
pub content: Option<String>,
pub is_error: bool,
pub source_message_uuid: String,
}
/// A correlated tool call + result pair.
#[derive(Debug, Clone)]
pub struct ToolExchange {
pub call: ToolCall,
pub result: Option<ToolResultData>,
}
// ---------------------------------------------------------------------------
// Discovery types (for discovery module later)
// ---------------------------------------------------------------------------
/// A discovered Claude Code project directory.
#[derive(Debug, Clone)]
pub struct ClaudeProject {
pub path: Utf8PathBuf,
pub original_path: String,
pub sessions: Vec<SessionRef>,
}
/// Reference to a session (not yet parsed).
#[derive(Debug, Clone)]
pub struct SessionRef {
pub id: String,
pub jsonl_path: Utf8PathBuf,
pub artifacts_dir: Option<Utf8PathBuf>,
pub index_entry: Option<SessionIndexEntry>,
}
/// From sessions-index.json (when available).
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SessionIndexEntry {
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub first_prompt: Option<String>,
#[serde(default)]
pub summary: Option<String>,
#[serde(default)]
pub message_count: Option<u32>,
#[serde(default)]
pub created: Option<serde_json::Value>,
#[serde(default)]
pub modified: Option<serde_json::Value>,
#[serde(default)]
pub git_branch: Option<String>,
#[serde(default)]
pub project_path: Option<String>,
}
// ---------------------------------------------------------------------------
// Sub-agent types
// ---------------------------------------------------------------------------
/// Sub-agent metadata from .meta.json.
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SubAgentMeta {
#[serde(default)]
pub agent_type: Option<String>,
}
/// A parsed sub-agent session.
#[derive(Debug, Clone)]
pub struct SubAgentSession {
pub agent_id: String,
pub meta: SubAgentMeta,
pub messages: Vec<RawMessage>,
pub parent_tool_call_id: Option<String>,
}
// ---------------------------------------------------------------------------
// MessageMeta (convenience, future use)
// ---------------------------------------------------------------------------
/// Common metadata extracted from any message. Defined for future consumers.
#[derive(Debug, Clone)]
pub struct MessageMeta {
pub uuid: String,
pub parent_uuid: Option<String>,
pub timestamp: Option<String>,
pub session_id: String,
pub cwd: Option<String>,
pub version: Option<String>,
pub git_branch: Option<String>,
pub is_sidechain: bool,
}
// ---------------------------------------------------------------------------
// ParsedSession
// ---------------------------------------------------------------------------
/// A fully parsed session with all correlations built.
#[derive(Debug)]
pub struct ParsedSession {
pub messages: Vec<RawMessage>,
pub tree: crate::tree::ConversationTree,
pub tool_exchanges: Vec<ToolExchange>,
pub subagents: Vec<SubAgentSession>,
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_content_text_string() {
let json = r#""Hello world""#;
let content: Content = serde_json::from_str(json).unwrap();
match content {
Content::Text(s) => assert_eq!(s, "Hello world"),
_ => panic!("Expected Content::Text"),
}
}
#[test]
fn parse_content_blocks() {
let json = r#"[{"type": "text", "text": "Hello"}]"#;
let content: Content = serde_json::from_str(json).unwrap();
match content {
Content::Blocks(blocks) => {
assert_eq!(blocks.len(), 1);
match &blocks[0] {
ContentBlock::Text { text } => assert_eq!(text, "Hello"),
_ => panic!("Expected ContentBlock::Text"),
}
}
_ => panic!("Expected Content::Blocks"),
}
}
#[test]
fn parse_tool_use_block() {
let json = r#"{"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}"#;
let block: ContentBlock = serde_json::from_str(json).unwrap();
match block {
ContentBlock::ToolUse { id, name, .. } => {
assert_eq!(id, "toolu_123");
assert_eq!(name, "Bash");
}
_ => panic!("Expected ContentBlock::ToolUse"),
}
}
#[test]
fn parse_tool_result_block() {
let json = r#"{"type": "tool_result", "tool_use_id": "toolu_123", "content": "output text", "is_error": false}"#;
let block: ContentBlock = serde_json::from_str(json).unwrap();
match block {
ContentBlock::ToolResult {
tool_use_id,
is_error,
..
} => {
assert_eq!(tool_use_id, "toolu_123");
assert!(!is_error);
}
_ => panic!("Expected ContentBlock::ToolResult"),
}
}
#[test]
fn parse_thinking_block() {
let json = r#"{"type": "thinking", "thinking": "Let me consider..."}"#;
let block: ContentBlock = serde_json::from_str(json).unwrap();
match block {
ContentBlock::Thinking { thinking } => {
assert_eq!(thinking, "Let me consider...");
}
_ => panic!("Expected ContentBlock::Thinking"),
}
}
#[test]
fn parse_queue_operation() {
let json = r#"{"type": "queue-operation", "operation": "enqueue", "timestamp": "2026-03-14T21:15:17.531Z", "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73"}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::QueueOperation(op) => {
assert_eq!(op.operation, "enqueue");
assert_eq!(
op.session_id.as_deref(),
Some("00f72d8d-fc54-485c-a082-310ffcabdb73")
);
}
_ => panic!("Expected RawMessage::QueueOperation"),
}
}
#[test]
fn parse_user_message_with_string_content() {
let json = r#"{
"parentUuid": "b1ab1ac7-fdb6-4e25-bc17-4c060b470b4a",
"isSidechain": false,
"userType": "external",
"cwd": "G:\\dev\\projects\\dirigent",
"sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73",
"version": "2.1.71",
"gitBranch": "main",
"type": "user",
"message": {
"role": "user",
"content": "Hello world"
},
"isMeta": false,
"uuid": "1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0",
"timestamp": "2026-03-14T21:08:58.586Z"
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::User(u) => {
assert_eq!(u.uuid.as_deref(), Some("1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0"));
assert_eq!(u.session_id.as_deref(), Some("00f72d8d-fc54-485c-a082-310ffcabdb73"));
assert_eq!(u.is_meta, Some(false));
match &u.message.content {
Content::Text(s) => assert_eq!(s, "Hello world"),
_ => panic!("Expected Content::Text"),
}
}
_ => panic!("Expected RawMessage::User"),
}
}
#[test]
fn parse_assistant_message_with_tool_use() {
let json = r#"{
"parentUuid": "77793647-f957-4aec-8b04-a9c07e01e37b",
"isSidechain": false,
"userType": "external",
"cwd": "G:\\dev\\projects\\dirigent",
"sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73",
"version": "2.1.71",
"gitBranch": "main",
"message": {
"model": "claude-opus-4-6",
"id": "msg_01NcwYjEydGEyZCSCgwmcnYd",
"type": "message",
"role": "assistant",
"content": [
{
"type": "tool_use",
"id": "toolu_01DP5mkAQnAi2o54idq24cPn",
"name": "Agent",
"input": {
"description": "Investigate config sources of truth",
"subagent_type": "Explore",
"prompt": "test prompt"
},
"caller": { "type": "direct" }
}
],
"stop_reason": null,
"stop_sequence": null,
"usage": {
"input_tokens": 3,
"cache_creation_input_tokens": 20147,
"cache_read_input_tokens": 0,
"output_tokens": 9,
"service_tier": "standard"
}
},
"requestId": "req_011CZ3fYWGjcQCgh5d58d2k8",
"type": "assistant",
"uuid": "6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b",
"timestamp": "2026-03-14T21:15:27.916Z"
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::Assistant(a) => {
assert_eq!(a.uuid.as_deref(), Some("6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b"));
assert_eq!(a.message.model.as_deref(), Some("claude-opus-4-6"));
assert_eq!(a.message.content.len(), 1);
match &a.message.content[0] {
ContentBlock::ToolUse { name, id, .. } => {
assert_eq!(name, "Agent");
assert_eq!(id, "toolu_01DP5mkAQnAi2o54idq24cPn");
}
_ => panic!("Expected ContentBlock::ToolUse"),
}
assert!(a.message.stop_reason.is_none());
assert!(a.message.usage.is_some());
}
_ => panic!("Expected RawMessage::Assistant"),
}
}
#[test]
fn unknown_content_block_type_skipped_in_assistant() {
let json = r#"{
"parentUuid": null,
"isSidechain": false,
"sessionId": "test",
"message": {
"role": "assistant",
"content": [
{"type": "text", "text": "known"},
{"type": "future_type", "data": "something"}
]
},
"type": "assistant",
"uuid": "test-uuid",
"timestamp": "2026-01-01T00:00:00Z"
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::Assistant(a) => {
assert_eq!(a.message.content.len(), 1);
match &a.message.content[0] {
ContentBlock::Text { text } => assert_eq!(text, "known"),
_ => panic!("Expected ContentBlock::Text"),
}
}
_ => panic!("Expected RawMessage::Assistant"),
}
}
// -----------------------------------------------------------------------
// Regression tests for parse failure audit (2026-04-04)
// -----------------------------------------------------------------------
#[test]
fn tool_reference_in_tool_result_content_does_not_fail() {
// Suggestion 1 & 3: tool_reference blocks inside tool_result.content
// should be silently skipped, not fail the entire message.
let json = r#"{
"type": "user",
"uuid": "test-uuid",
"parentUuid": null,
"isSidechain": false,
"sessionId": "test-session",
"message": {
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "toolu_abc123",
"content": [
{"type": "text", "text": "File contents here"},
{"type": "tool_reference", "tool_name": "TodoWrite"}
],
"is_error": false
}
]
}
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::User(u) => {
match &u.message.content {
Content::Blocks(blocks) => {
assert_eq!(blocks.len(), 1);
match &blocks[0] {
ContentBlock::ToolResult { tool_use_id, content, .. } => {
assert_eq!(tool_use_id, "toolu_abc123");
// The inner content should have 1 block (text), tool_reference skipped
match content.as_ref().unwrap() {
Content::Blocks(inner) => {
assert_eq!(inner.len(), 1);
match &inner[0] {
ContentBlock::Text { text } => {
assert_eq!(text, "File contents here");
}
_ => panic!("Expected inner ContentBlock::Text"),
}
}
_ => panic!("Expected inner Content::Blocks"),
}
}
_ => panic!("Expected ContentBlock::ToolResult"),
}
}
_ => panic!("Expected Content::Blocks"),
}
}
_ => panic!("Expected RawMessage::User"),
}
}
#[test]
fn file_history_snapshot_parses() {
// Suggestion 2: file-history-snapshot lines should parse, not fail.
let json = r#"{
"type": "file-history-snapshot",
"messageId": "abc-123",
"isSnapshotUpdate": false,
"snapshot": {
"messageId": "abc-123",
"trackedFileBackups": {
"src/main.rs": {"backupFileName": "main.rs.bak", "backupTime": "2026-01-01T00:00:00Z", "version": "1"}
},
"timestamp": "2026-01-01T00:00:00Z"
}
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::FileHistorySnapshot(s) => {
assert_eq!(s.message_id.as_deref(), Some("abc-123"));
assert!(!s.is_snapshot_update);
assert!(s.snapshot.is_some());
}
_ => panic!("Expected RawMessage::FileHistorySnapshot"),
}
}
#[test]
fn last_prompt_parses() {
// Suggestion 2: last-prompt lines should parse, not fail.
let json = r#"{
"type": "last-prompt",
"lastPrompt": "Fix the bug in auth middleware",
"sessionId": "session-uuid-123"
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::LastPrompt(lp) => {
assert_eq!(lp.last_prompt.as_deref(), Some("Fix the bug in auth middleware"));
assert_eq!(lp.session_id.as_deref(), Some("session-uuid-123"));
}
_ => panic!("Expected RawMessage::LastPrompt"),
}
}
#[test]
fn unknown_content_block_in_user_message_skipped() {
// Suggestion 3: Unknown block types in user message content
// should be silently skipped (lenient everywhere).
let json = r#"{
"type": "user",
"uuid": "test-uuid",
"isSidechain": false,
"sessionId": "test",
"message": {
"role": "user",
"content": [
{"type": "text", "text": "known"},
{"type": "future_unknown_type", "data": "something"}
]
}
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
match msg {
RawMessage::User(u) => {
match &u.message.content {
Content::Blocks(blocks) => {
assert_eq!(blocks.len(), 1);
match &blocks[0] {
ContentBlock::Text { text } => assert_eq!(text, "known"),
_ => panic!("Expected ContentBlock::Text"),
}
}
_ => panic!("Expected Content::Blocks"),
}
}
_ => panic!("Expected RawMessage::User"),
}
}
#[test]
fn tool_name_from_string() {
assert_eq!(ToolName::from("Bash".to_string()), ToolName::Bash);
assert_eq!(ToolName::from("Read".to_string()), ToolName::Read);
assert_eq!(ToolName::from("Agent".to_string()), ToolName::Agent);
assert_eq!(ToolName::from("WebSearch".to_string()), ToolName::WebSearch);
assert_eq!(
ToolName::from("CustomTool".to_string()),
ToolName::Other("CustomTool".to_string())
);
}
}
+70
View File
@@ -0,0 +1,70 @@
use chrono::{DateTime, Utc};
/// Parse a timestamp from various formats found in Claude Code data.
///
/// Supports:
/// - ISO 8601 string: "2026-03-22T17:00:13.192Z"
/// - Unix milliseconds (number > 1e12): 1769461914249
/// - Unix seconds (number <= 1e12): 1769461914
pub fn parse_timestamp(value: &serde_json::Value) -> Option<DateTime<Utc>> {
match value {
serde_json::Value::String(s) => {
DateTime::parse_from_rfc3339(s)
.ok()
.map(|dt| dt.with_timezone(&Utc))
}
serde_json::Value::Number(n) => {
if let Some(ms) = n.as_i64() {
if ms > 1_000_000_000_000 {
DateTime::from_timestamp_millis(ms)
} else {
DateTime::from_timestamp(ms, 0)
}
} else {
None
}
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Datelike;
#[test]
fn parse_timestamp_iso8601() {
let v = serde_json::json!("2026-03-22T17:00:13.192Z");
let dt = parse_timestamp(&v).unwrap();
assert_eq!(dt.year(), 2026);
assert_eq!(dt.month(), 3);
assert_eq!(dt.day(), 22);
}
#[test]
fn parse_timestamp_unix_millis() {
let v = serde_json::json!(1769461914249_i64);
let dt = parse_timestamp(&v).unwrap();
assert!(dt.year() >= 2025);
}
#[test]
fn parse_timestamp_unix_seconds() {
let v = serde_json::json!(1769461914_i64);
let dt = parse_timestamp(&v).unwrap();
assert!(dt.year() >= 2025);
}
#[test]
fn parse_timestamp_null_returns_none() {
let v = serde_json::json!(null);
assert!(parse_timestamp(&v).is_none());
}
#[test]
fn parse_timestamp_invalid_string_returns_none() {
let v = serde_json::json!("not a date");
assert!(parse_timestamp(&v).is_none());
}
}
@@ -0,0 +1,6 @@
{"type":"user","uuid":"r-001","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Help me"}}
{"type":"assistant","uuid":"a-001","parentUuid":"r-001","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"Sure"}],"stop_reason":"end_turn","usage":{"input_tokens":10,"output_tokens":5}}}
{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Do option A"}}
{"type":"assistant","uuid":"a-003","parentUuid":"u-002","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-003","type":"message","role":"assistant","content":[{"type":"text","text":"Doing A"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}}
{"type":"user","uuid":"u-002b","parentUuid":"a-001","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Actually, do option B"}}
{"type":"assistant","uuid":"a-003b","parentUuid":"u-002b","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-003","message":{"model":"claude-opus-4-6","id":"msg-003b","type":"message","role":"assistant","content":[{"type":"text","text":"Doing B"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}}
@@ -0,0 +1,6 @@
{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-001"}
{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-14T21:00:00.001Z","sessionId":"test-session-001"}
{"type":"user","uuid":"u-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Hello, help me with this project"}}
{"type":"assistant","uuid":"a-001","parentUuid":"u-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"I'll help you."},{"type":"tool_use","id":"toolu_01","name":"Bash","input":{"command":"ls","description":"List files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}}
{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01","content":"file1.rs\nfile2.rs","is_error":false}]}}
{"type":"assistant","uuid":"a-002","parentUuid":"u-002","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-002","type":"message","role":"assistant","content":[{"type":"text","text":"I can see two Rust files in the directory."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":30}}}
@@ -0,0 +1,9 @@
{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-noise"}
{"type":"user","uuid":"u-n-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":"system injected stuff"}}
{"type":"user","uuid":"u-n-002","parentUuid":"u-n-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Warmup"}}
{"type":"user","uuid":"u-n-003","parentUuid":"u-n-002","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"[Request interrupted by user"}}
{"type":"user","uuid":"u-n-004","parentUuid":"u-n-003","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"This session is being continued from a previous conversation"}}
{"type":"user","uuid":"u-n-005","parentUuid":"u-n-004","timestamp":"2026-03-14T21:00:05.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"API Error: rate limit exceeded"}}
{"type":"user","uuid":"u-n-006","parentUuid":"u-n-005","timestamp":"2026-03-14T21:00:06.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Caveat: The messages below were generated by the user"}}
{"type":"user","uuid":"u-n-007","parentUuid":"u-n-006","timestamp":"2026-03-14T21:00:07.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Please help me fix this bug"}}
{"type":"assistant","uuid":"a-n-001","parentUuid":"u-n-007","timestamp":"2026-03-14T21:00:08.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"message":{"id":"msg-n-001","role":"assistant","content":[{"type":"text","text":"Sure, let me help."}],"stop_reason":"end_turn"}}
@@ -0,0 +1,6 @@
{"type":"user","uuid":"u-100","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"What files are here?"}}
{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me"}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":3}}}
{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.100Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look"},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":""}}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":12}}}
{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.200Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look at this."},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":"ls"}}],"stop_reason":"tool_use","usage":{"input_tokens":50,"output_tokens":20}}}
{"type":"user","uuid":"u-101","parentUuid":"a-100","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_100","content":"main.rs\nlib.rs","is_error":false}]}}
{"type":"assistant","uuid":"a-101","parentUuid":"u-101","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-101","message":{"model":"claude-opus-4-6","id":"msg-101","type":"message","role":"assistant","content":[{"type":"text","text":"Done."}],"stop_reason":"end_turn","usage":{"input_tokens":100,"output_tokens":5}}}
@@ -0,0 +1,4 @@
{"type":"user","uuid":"u-300","parentUuid":null,"timestamp":"2026-03-23T12:00:00.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Search the codebase"}}
{"type":"assistant","uuid":"a-300","parentUuid":"u-300","timestamp":"2026-03-23T12:00:01.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-300","message":{"model":"claude-opus-4-6","id":"msg-300","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_300","name":"Agent","input":{"description":"Search codebase","subagent_type":"Explore","prompt":"Find all config files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":20}}}
{"type":"user","uuid":"u-301","parentUuid":"a-300","timestamp":"2026-03-23T12:00:30.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_300","content":"Found 3 config files","is_error":false}]}}
{"type":"assistant","uuid":"a-301","parentUuid":"u-301","timestamp":"2026-03-23T12:00:31.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-301","message":{"model":"claude-opus-4-6","id":"msg-301","type":"message","role":"assistant","content":[{"type":"text","text":"I found the config files."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":10}}}
@@ -0,0 +1,2 @@
{"type":"user","uuid":"sa-u1","parentUuid":null,"timestamp":"2026-03-23T12:00:02.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"isMeta":false,"message":{"role":"user","content":"Find all config files"}}
{"type":"assistant","uuid":"sa-a1","parentUuid":"sa-u1","timestamp":"2026-03-23T12:00:03.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"requestId":"req-sa1","message":{"model":"claude-opus-4-6","id":"msg-sa1","type":"message","role":"assistant","content":[{"type":"text","text":"Found config.toml, settings.json, .env"}],"stop_reason":"end_turn","usage":{"input_tokens":50,"output_tokens":15}}}
@@ -0,0 +1 @@
{"agentType": "Explore"}
@@ -0,0 +1,6 @@
{"type":"user","uuid":"u-200","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Fix the bug"}}
{"type":"assistant","uuid":"a-200","parentUuid":"u-200","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-200","message":{"model":"claude-opus-4-6","id":"msg-200","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_200","name":"Bash","input":{"command":"cargo test"}},{"type":"tool_use","id":"toolu_201","name":"Read","input":{"file_path":"src/main.rs"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}}
{"type":"user","uuid":"u-201","parentUuid":"a-200","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_200","content":"test result output","is_error":false},{"type":"tool_result","tool_use_id":"toolu_201","content":"fn main() {}","is_error":false}]}}
{"type":"assistant","uuid":"a-201","parentUuid":"u-201","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-201","message":{"model":"claude-opus-4-6","id":"msg-201","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_202","name":"Write","input":{"file_path":"src/fix.rs","content":"fixed"}}],"stop_reason":"tool_use","usage":{"input_tokens":150,"output_tokens":30}}}
{"type":"user","uuid":"u-202","parentUuid":"a-201","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_202","content":"File written successfully","is_error":false}]}}
{"type":"assistant","uuid":"a-202","parentUuid":"u-202","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-202","message":{"model":"claude-opus-4-6","id":"msg-202","type":"message","role":"assistant","content":[{"type":"text","text":"Bug is fixed."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":20}}}
@@ -0,0 +1,294 @@
use camino::{Utf8Path, Utf8PathBuf};
use chrono::Datelike;
use dirigent_anth::{
correlation::correlate_tools,
dedup::dedup_messages,
noise::{classify_noise, NoiseKind},
parse_session,
tree::ConversationTree,
types::{ContentBlock, RawMessage},
util::parse_timestamp,
};
#[test]
fn parse_minimal_session() {
let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl");
let messages = parse_session(path).unwrap();
assert_eq!(messages.len(), 6, "Expected 6 messages, got {}", messages.len());
let type_names: Vec<&str> = messages
.iter()
.map(|m| match m {
RawMessage::User(_) => "user",
RawMessage::Assistant(_) => "assistant",
RawMessage::Progress(_) => "progress",
RawMessage::System(_) => "system",
RawMessage::QueueOperation(_) => "queue-operation",
RawMessage::FileHistorySnapshot(_) => "file-history-snapshot",
RawMessage::LastPrompt(_) => "last-prompt",
})
.collect();
assert_eq!(
type_names.iter().filter(|&&t| t == "queue-operation").count(),
2
);
assert_eq!(type_names.iter().filter(|&&t| t == "user").count(), 2);
assert_eq!(
type_names.iter().filter(|&&t| t == "assistant").count(),
2
);
}
#[test]
fn parse_line_returns_none_for_invalid_json() {
assert!(dirigent_anth::parse_line("not valid json", 1).is_none());
assert!(dirigent_anth::parse_line("{}", 1).is_none());
}
#[test]
fn dedup_streaming_session() {
let path = Utf8Path::new("tests/fixtures/streaming_dedup.jsonl");
let messages = parse_session(path).unwrap();
// Raw should have 6 lines (including 3 versions of same assistant message)
assert_eq!(messages.len(), 6, "Raw messages: expected 6, got {}", messages.len());
let deduped = dedup_messages(messages);
// After dedup: U1, A1(final), U2, A2 = 4
assert_eq!(deduped.len(), 4, "Deduped messages: expected 4, got {}", deduped.len());
// The kept assistant message must be the final version
let first_assistant = deduped.iter().find(|m| matches!(m, RawMessage::Assistant(_))).unwrap();
if let RawMessage::Assistant(a) = first_assistant {
assert!(a.message.stop_reason.is_some(), "Deduped assistant should have stop_reason set");
assert_eq!(a.message.stop_reason.as_deref(), Some("tool_use"));
assert_eq!(a.message.content.len(), 2, "Final version should have 2 content blocks");
} else {
unreachable!();
}
}
#[test]
fn dedup_preserves_non_streamed_messages() {
let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl");
let messages = parse_session(path).unwrap();
let count_before = messages.len();
let deduped = dedup_messages(messages);
// No streaming in minimal_session, so count should be same
assert_eq!(deduped.len(), count_before);
}
#[test]
fn correlate_parallel_tools() {
let path = Utf8Path::new("tests/fixtures/tool_correlation.jsonl");
let messages = dirigent_anth::parse_session_deduped(path).unwrap();
let exchanges = correlate_tools(&messages);
// 3 tool calls: 2 parallel (Bash+Read) + 1 sequential (Write)
assert_eq!(exchanges.len(), 3);
// All should have results
assert!(exchanges.iter().all(|e| e.result.is_some()));
// Verify correct pairing by ID
for ex in &exchanges {
assert_eq!(ex.call.id, ex.result.as_ref().unwrap().tool_use_id);
}
}
#[test]
fn correlate_no_tools_returns_empty() {
// Test with just a plain user message — no tool calls or results
let messages = vec![
serde_json::from_str::<RawMessage>(
r#"{"type":"user","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"role":"user","content":"hello"}}"#,
)
.unwrap(),
];
let exchanges = correlate_tools(&messages);
assert!(exchanges.is_empty());
}
#[test]
fn build_branching_tree() {
let path = Utf8Path::new("tests/fixtures/branching_tree.jsonl");
let messages = dirigent_anth::parse_session(path).unwrap();
let tree = ConversationTree::build(&messages);
assert_eq!(tree.roots.len(), 1);
assert!(!tree.is_linear());
assert_eq!(tree.branch_points().len(), 1); // A1 has 2 children
let main = tree.main_thread();
assert_eq!(main.len(), 4); // R → A1 → U2 → A3 (first branch)
}
#[test]
fn linear_conversation_is_linear() {
let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl");
let messages = dirigent_anth::parse_session(path).unwrap();
let tree = ConversationTree::build(&messages);
assert!(tree.is_linear());
}
#[test]
fn classify_noise_from_fixture() {
let path = Utf8Path::new("tests/fixtures/noise_patterns.jsonl");
let messages = dirigent_anth::parse_session(path).unwrap();
assert_eq!(messages.len(), 9, "Expected 9 messages in noise fixture");
let classifications: Vec<Option<NoiseKind>> = messages.iter()
.map(classify_noise)
.collect();
assert_eq!(classifications[0], Some(NoiseKind::QueueOp));
assert_eq!(classifications[1], Some(NoiseKind::Meta));
assert_eq!(classifications[2], Some(NoiseKind::Warmup));
assert_eq!(classifications[3], Some(NoiseKind::Interrupted));
assert_eq!(classifications[4], Some(NoiseKind::Continuation));
assert_eq!(classifications[5], Some(NoiseKind::ApiError));
assert_eq!(classifications[6], Some(NoiseKind::SystemCaveat));
assert_eq!(classifications[7], None); // normal user
assert_eq!(classifications[8], None); // normal assistant
}
#[test]
fn load_subagent_from_fixture() {
let artifacts_dir = Utf8Path::new("tests/fixtures/subagent/parent");
let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap();
assert_eq!(subagents.len(), 1);
assert_eq!(subagents[0].agent_id, "abc123");
assert_eq!(subagents[0].meta.agent_type.as_deref(), Some("Explore"));
assert_eq!(subagents[0].messages.len(), 2);
}
#[test]
fn load_subagents_empty_dir() {
// Non-existent artifacts dir should return empty vec
let artifacts_dir = Utf8Path::new("tests/fixtures/nonexistent");
let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap();
assert!(subagents.is_empty());
}
#[test]
fn load_full_session_with_subagents() {
use dirigent_anth::types::SessionRef;
let session_ref = SessionRef {
id: "parent".to_string(),
jsonl_path: Utf8PathBuf::from("tests/fixtures/subagent/parent.jsonl"),
artifacts_dir: Some(Utf8PathBuf::from("tests/fixtures/subagent/parent")),
index_entry: None,
};
let session = dirigent_anth::load_session(&session_ref).unwrap();
assert!(!session.messages.is_empty());
assert!(!session.subagents.is_empty());
assert!(!session.tree.roots.is_empty());
assert!(!session.tool_exchanges.is_empty());
}
#[test]
fn load_session_without_artifacts() {
use dirigent_anth::types::SessionRef;
let session_ref = SessionRef {
id: "minimal".to_string(),
jsonl_path: Utf8PathBuf::from("tests/fixtures/minimal_session.jsonl"),
artifacts_dir: None,
index_entry: None,
};
let session = dirigent_anth::load_session(&session_ref).unwrap();
assert_eq!(session.messages.len(), 6); // 2 queue-ops + 2 users + 2 assistants
assert!(session.subagents.is_empty());
assert!(session.tree.is_linear());
}
#[test]
fn content_as_string_or_blocks() {
// String content
let s: dirigent_anth::types::Content = serde_json::from_str(r#""hello""#).unwrap();
assert!(matches!(s, dirigent_anth::types::Content::Text(_)));
// Block content
let b: dirigent_anth::types::Content =
serde_json::from_str(r#"[{"type":"text","text":"hi"}]"#).unwrap();
assert!(matches!(b, dirigent_anth::types::Content::Blocks(_)));
// Empty blocks
let empty: dirigent_anth::types::Content = serde_json::from_str(r#"[]"#).unwrap();
assert!(matches!(empty, dirigent_anth::types::Content::Blocks(ref v) if v.is_empty()));
}
#[test]
fn missing_optional_fields_dont_crash() {
// Minimal assistant message with many fields missing
let json = r#"{
"type": "assistant",
"message": {
"content": [{"type": "text", "text": "hi"}]
}
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
assert!(matches!(msg, RawMessage::Assistant(_)));
}
#[test]
fn tool_result_content_string_and_blocks() {
// tool_result with string content
let json = r#"{"type":"tool_result","tool_use_id":"t1","content":"output text","is_error":false}"#;
let block: ContentBlock = serde_json::from_str(json).unwrap();
if let ContentBlock::ToolResult { content, is_error, .. } = block {
assert!(!is_error);
assert!(content.is_some());
} else {
panic!("Expected ToolResult");
}
// tool_result with no content
let json2 = r#"{"type":"tool_result","tool_use_id":"t2"}"#;
let block2: ContentBlock = serde_json::from_str(json2).unwrap();
if let ContentBlock::ToolResult { content, is_error, .. } = block2 {
assert!(!is_error);
assert!(content.is_none());
} else {
panic!("Expected ToolResult");
}
}
#[test]
fn extra_unknown_fields_are_ignored() {
// Messages with extra fields not in our structs should parse fine
let json = r#"{
"type": "user",
"uuid": "x",
"timestamp": "2026-01-01T00:00:00Z",
"sessionId": "s",
"unknownField": "should be ignored",
"anotherExtra": 42,
"message": {"role": "user", "content": "hello"}
}"#;
let msg: RawMessage = serde_json::from_str(json).unwrap();
assert!(matches!(msg, RawMessage::User(_)));
}
#[test]
fn timestamp_parsing_all_formats() {
// ISO 8601
let iso = parse_timestamp(&serde_json::json!("2026-03-22T17:00:13.192Z")).unwrap();
assert_eq!(iso.year(), 2026);
// Unix millis
let ms = parse_timestamp(&serde_json::json!(1769461914249_i64)).unwrap();
assert!(ms.year() >= 2025);
// Unix seconds
let secs = parse_timestamp(&serde_json::json!(1769461914_i64)).unwrap();
assert!(secs.year() >= 2025);
}
+101
View File
@@ -0,0 +1,101 @@
use dirigent_anth::anth_usage::process_usage_screen;
const SAMPLE: &str = r#"
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Status Config Usage Stats
Session
Total cost: $0.0000
Total duration (API): 0s
Total duration (wall): 4s
Total code changes: 0 lines added, 0 lines removed
Usage: 0 input, 0 output, 0 cache read, 0 cache write
Current session
███████ 14% used
Resets 12:30pm (Europe/Vienna)
Current week (all models)
██████ 12% used
Resets May 12, 9am (Europe/Vienna)
Current week (Sonnet only)
0% used
Resets May 12, 9am (Europe/Vienna)
What's contributing to your limits usage?
Approximate, based on local sessions on this machine — does not include other devices or claude.ai
Last 24h · these are independent characteristics of your usage, not a breakdown
97% of your usage came from subagent-heavy sessions
Each subagent runs its own requests. Be deliberate about spawning them — and
consider configuring a cheaper model for simpler subagents.
16% of your usage was at >150k context
Longer sessions are more expensive even when cached. /compact mid-task, /clear
when switches to new tasks.
Subagents % of usage
Explore 3%
claude-code-guide 2%
d to day · w to week
Esc to cancel
"#;
#[test]
fn parses_gauges() {
let result = process_usage_screen(SAMPLE);
assert_eq!(result.data.gauges.len(), 3);
assert_eq!(result.data.gauges[0].name, "Current session");
assert_eq!(result.data.gauges[0].percent_used, 14);
assert_eq!(
result.data.gauges[0].resets.as_deref(),
Some("12:30pm (Europe/Vienna)")
);
assert_eq!(result.data.gauges[1].name, "Current week (all models)");
assert_eq!(result.data.gauges[1].percent_used, 12);
assert_eq!(
result.data.gauges[1].resets.as_deref(),
Some("May 12, 9am (Europe/Vienna)")
);
assert_eq!(result.data.gauges[2].name, "Current week (Sonnet only)");
assert_eq!(result.data.gauges[2].percent_used, 0);
// resets_iso should be present for all gauges with reset info
assert!(result.data.gauges[0].resets_iso.is_some());
assert!(result.data.gauges[1].resets_iso.is_some());
assert!(result.data.gauges[2].resets_iso.is_some());
// Week resets should contain the right date
let week_iso = result.data.gauges[1].resets_iso.as_ref().unwrap();
assert!(week_iso.starts_with("2026-05-12") || week_iso.contains("05-12"));
}
#[test]
fn parses_contributions() {
let result = process_usage_screen(SAMPLE);
let contrib = result.data.contributions.as_ref().unwrap();
assert_eq!(contrib.factors.len(), 2);
assert_eq!(contrib.factors[0].percent, 97);
assert!(contrib.factors[0].description.contains("subagent-heavy"));
assert_eq!(contrib.factors[1].percent, 16);
assert_eq!(contrib.subagents.len(), 2);
assert_eq!(contrib.subagents[0].name, "Explore");
assert_eq!(contrib.subagents[0].percent, 3);
assert_eq!(contrib.subagents[1].name, "claude-code-guide");
assert_eq!(contrib.subagents[1].percent, 2);
}
#[test]
fn raw_screen_starts_with_rule() {
let result = process_usage_screen(SAMPLE);
assert!(result.raw_screen.starts_with('─'));
}
+761
View File
@@ -0,0 +1,761 @@
# Package: dirigent_archivist
Persistent storage for all agentic interactions in Dirigent.
## Quick Facts
- **Type**: Library
- **Main Entry**: src/lib.rs
- **Dependencies**: dirigent_protocol, uuid, chrono, serde, tokio, tracing, thiserror, sha2, hex, async-trait
- **Status**: Complete - Production ready with comprehensive tests
## Purpose
The Archivist provides file-based archival storage for all session data, messages, and attachments in Dirigent. It implements an archive-first architecture with connector API fallback, using NDJSON, JSON, and TSV formats for durability and human-readability.
## Key Features
- **File-based Storage**: NDJSON for messages, JSON for metadata, TSV for indices
- **Content-Addressable Files**: SHA-256 based storage for attachments with automatic deduplication
- **Session Lineage**: Track splits, continuations, and mutations with parent references
- **Connector Registry**: Coordinate UID assignment across connectors with collision detection
- **Event Streaming**: Real-time updates via EventHandler subscribing to dirigent_protocol events
- **Archive-First Design**: Read from archive first, fall back to connector API when needed
- **Caching**: In-memory caching of connector and session mappings for performance
## Architecture
The Archivist is built on three core architectural principles:
### 1. Archive-First Read Strategy
The Archivist is the primary source of truth for historical data:
- UI and APIs query the archive first
- Only fall back to connector APIs if data is not in archive
- This enables offline access and consistent history across restarts
### 2. Write-Through Event Capture (Append-Only)
The EventHandler subscribes to the global event stream from dirigent_core:
- Captures session creation, message streaming, and tool calls in real-time
- Uses MessageAccumulator to assemble streaming chunks into complete messages
- Writes complete messages to archive immediately upon finalization
- No polling required - fully event-driven
- **Append-only writes**: Messages are appended as events arrive, NOT in chronological order
- File order reflects event timing, not message timestamps
### 3. File-Based Storage with Sort-on-Read
All data is stored in human-readable, grep-able formats:
- **NDJSON** (Newline-Delimited JSON): Incremental append-only logs for messages and mappings
- **JSON**: Structured metadata for sessions and connectors
- **TSV** (Tab-Separated Values): Fast indices for cross-references
- **Content-Addressed Files**: Binary attachments stored by SHA-256 hash for deduplication
- **Sort-on-Read**: `get_messages()` sorts by timestamp and message_id to ensure chronological order despite append-only writes
## Backend Trait Layer (Phase 2)
The archivist uses a trait-based backend abstraction. `ArchiveBackend`
defines the mandatory session and message primitives every backend must
provide, plus `as_xxx()` accessors returning optional sub-traits:
- `SearchBackend` — reserved for Phase 3+ indexed backends (not wired)
- `DagBackend` — session lineage DAG edges
- `MetaEventsBackend` — ACP connection lifecycle events
- `ConnectorRegistryBackend` — per-archive connector metadata
- `SessionMappingBackend` — native↔scroll session ID mapping
`JsonlBackend` is the Phase 2 concrete implementation (file-based
NDJSON/JSON/TSV) and opts into every sub-trait except `SearchBackend`
(content search continues to be served by ripgrep via
`crates/api/src/archivist/search_task.rs`).
The `Archivist` struct (in `src/coordinator/`) owns a registry of backends
keyed by archive name and performs orchestration (alias detection, session
lineage, move/copy, DAG walks, archive lifecycle). Consumers hold
`Arc<Archivist>` directly — the coordinator is concrete, not a trait.
See `docs/plans/2026-04-18-archivist-phase2-design.md` for design rationale.
## Multi-Backend Registry (Phase 3)
The coordinator (`Archivist`) holds `Vec<Arc<ArchiveRegistration>>` sorted
by `read_priority` instead of a flat `HashMap<name, Arc<dyn ArchiveBackend>>`.
Each registration carries:
- `backend: Arc<dyn ArchiveBackend>` + its declared capabilities
- `failure_mode`: `Required` (must succeed) | `BestEffort` (errors log + drift health)
- `read_priority`: lower = tried first for reads; also selects the default
write target when no archive is named
- `write_active`: participates in fanout writes
- `enabled`: kill-switch without removing config
- `write_policy`: `Inline` (default; `await` per call) or `Queued`
(mpsc + batch_window + overflow policy)
- Runtime state: `last_health`, `last_error`, `consecutive_failures`
(all `Arc<RwLock<_>>`, shared with the writer task when queued)
- Optional `writer: Option<WriterHandle>` (Some iff `write_policy = Queued`)
Backends are declared in `dirigent.toml` under `[[archives]]` and
constructed at boot via `Archivist::from_config(cfg, &BackendRegistry)`.
Add a new backend type by implementing `BackendFactory` and registering
it on the `BackendRegistry` before `from_config`.
### Reads
`get_session`, `get_messages_paged`, `count_messages`, `get_meta_events`,
`get_children`, etc. walk the registry in priority order via
`read_walk_per_session(scroll_id, predicate, op)`. The predicate
capability-filters; `Unavailable` backends are skipped. The first backend
that returns `Some(value)` wins and its name is cached against the
`scroll_id` in a positive LRU (capacity 10_000). Subsequent reads for the
same `scroll_id` short-circuit to the cached backend before falling back
to the full priority walk.
Collection-shape reads (`list_sessions_paged`, `list_connectors`,
`list_meta_sessions`, `find_meta_session_by_client`) use
`read_walk_collection` — first enabled backend that can answer wins, no
cache, no aggregation across backends. Phase 3 explicitly defers
cross-backend merge/dedup to a later phase.
### Writes
Mutating methods (`append_messages`, `register_session`, `update_session_*`,
`append_meta_events`, `append_dag_edge`, `clear_session_messages`,
`update_connector_fingerprint`) resolve a primary (per-call `archive:
Some(name)` override or the default-write target) and fan out to every
other `enabled && write_active` backend that has the required capability.
Capability-mismatched backends are skipped with a debug `capability_skip`
log (never an error). `Required` failures propagate to the caller;
`BestEffort` failures log + drift health.
`register_connector` currently does NOT fan out — alias detection + the
tri-state `Accepted`/`Aliased`/`Rejected` return shape make replication
non-trivial. Fanout for connectors is deferred; single-backend setups are
unaffected.
For `write_policy = Queued` backends, the primary/secondary write paths
enqueue a `WriteOp` into the backend's writer task instead of awaiting.
Errors drift the backend's health but do not propagate to the caller.
Coalescing merges consecutive `AppendMessages`/`AppendMetaEvents` for the
same `scroll_id` within `batch_window_ms`.
### Cross-backend operations
- `delete_session(scroll_id, _)` fans out to every enabled backend that has
the session. Copies in `write_active=false` backends produce
`ArchivistError::DeleteOnReadOnlyBackend` (write-active copies are still
deleted); cache invalidated regardless of outcome.
- `copy_session(scroll_id, from, to)` reads from `from`, writes to `to`,
including DAG and meta-events when both sides have the capability. The
source remains canonical (the cache is NOT rewritten).
- `move_session(scroll_id, from, to)` is `copy + delete-from-source`. If
the source-side delete fails after the copy succeeded,
`ArchivistError::PartialMove { copied_to, delete_error }` is returned so
the caller knows the session now lives in both places.
The Phase 2 connector-aware `move_session(scroll_id, target_connector_uid, _)`
and `copy_session(scroll_id, target_connector_uid, _)` survived the Phase
3 rename as `move_session_to_connector` / `copy_session_to_connector`.
Their bulk variant is `move_sessions_to_connector`.
### Health
`HealthStatus` drifts on every coordinator call that observes a backend:
- Successful write → `Healthy`; `consecutive_failures` reset to 0.
- Successful read → `Healthy` (only rescues `Degraded`; does not reset the counter).
- Write failure → `Degraded { reason }`; `consecutive_failures += 1`; after
K = 5 consecutive failures drifts to `Unavailable { reason }`. Reads skip
`Unavailable` backends; writes against an `Unavailable` `Required`
backend fail, while writes against an `Unavailable` `BestEffort` backend
are still attempted.
- Read failure alone never drifts past `Degraded`; writes are the
authoritative health signal.
`list_archives_with_health()` returns a `Vec<ArchiveStatus>` snapshot of
every registration: name, type, capabilities, health, last_error, and
queue_depth (for queued backends).
### Lifecycle
Phase 3 is **startup-only**. `add_archive` / `remove_archive` /
`set_default_archive` on the coordinator return
`ArchivistError::DynamicRegistryUnsupported`. To change the registry,
edit `dirigent.toml` and restart the server. `Archivist::shutdown()`
drains queued writer tasks (sends `WriteOp::Shutdown` on each writer's
mpsc and awaits ack); call it before process exit.
Test-only constructors `Archivist::from_registrations(regs)` and
`SessionMetadata::stub(scroll_id)` live under `#[cfg(any(test, feature =
"test-utils"))]` for integration tests that bypass the factory.
See `docs/plans/2026-04-19-archivist-phase3-design.md` for the full
design rationale, and `examples/multi_backend.rs` for a runnable
end-to-end example.
## Module Organization
### Core Modules
- **`lib.rs`**: Public API surface and re-exports
- **`types.rs`**: Core data structures (session metadata, message records, connector info, API types)
- **`error.rs`**: Error types and Result alias for archivist operations
### Backend Layer (`backend/`)
- **`traits.rs`**: `ArchiveBackend` trait + 5 optional sub-traits
- **`capability.rs`**: `ArchiveCapability` enum + `CapabilitySet` type
- **`health.rs`**: `HealthStatus` enum returned by `health_check`
- **`contract.rs`**: Reusable behavioral tests for any `&dyn ArchiveBackend` (cfg-gated)
- **`mock.rs`**: In-memory `MockBackend` for coordinator unit tests (cfg-gated)
### Concrete Backends (`backends/`)
- **`jsonl/`**: The file-based `JsonlBackend` — the only Phase 2 backend.
Reuses `storage/` primitives for NDJSON/JSON/TSV operations.
### Coordinator (`coordinator/`)
- **`mod.rs`**: The `Archivist` struct + constructors
- **`archives.rs`**: Archive lifecycle (add/remove/list/default)
- **`connectors.rs`**: Connector registration + alias detection
- **`sessions.rs`**: Session registration, metadata updates, move/copy
- **`meta.rs`**: Meta events, DAG walks, cleanup
### Storage Layer (`storage/`)
Low-level file I/O primitives used by `JsonlBackend`. All storage operations are async and use tokio.
- **`paths.rs`**: ArchivePaths utility for consistent directory structure and path resolution
- **`ndjson.rs`**: Newline-delimited JSON operations (read_ndjson, append_ndjson)
- **`json.rs`**: JSON operations (read_json, write_json)
- **`tsv.rs`**: Tab-separated value operations for connector index
- **`files.rs`**: Content-addressable file storage with SHA-256 hashing and deduplication
### Supporting Modules
- **`registry.rs`**: Archive registry persistence (multi-archive metadata)
- **`migration.rs`**: Single-archive → multi-archive migration path
- **`session.rs`**: Session lineage types shared across layers
- **`accumulator.rs`**: MessageAccumulator for assembling streaming message chunks
- **`backfill.rs`**: Backfill helpers for importing historical sessions
- **`import/`**: External conversation importers (e.g. Claude export)
### Events
- **`events.rs`**: EventHandler for subscribing to dirigent_protocol events and archiving them
## Configuration
The Archivist archive root is determined by `DirigentPaths` resolution:
- Set `DIRIGENT_DATA_DIR` to override the data directory; archives will be stored at `<data_dir>/archives/`
- Defaults to `~/.local/share/dirigent/archives/` (or platform equivalent)
```bash
DIRIGENT_DATA_DIR=/path/to/data dx serve
```
## Archive Structure
```
dirigent_archive/
├── .contexts/
│ └── {scroll_id:uuidv7}/ # One directory per session
│ ├── session.json # Session metadata
│ ├── messages.jsonl # Incremental message log (.ndjson also supported)
│ └── lineage.json # Session lineage info (optional)
├── .db/
│ └── connectors/
│ ├── index.tsv # Fast connector lookup (TSV)
│ └── {connector_uid}/
│ ├── connector.json # Connector metadata
│ └── sessions.jsonl # Session mappings (.ndjson also supported)
└── .files/
└── {sha256-hash} # Content-addressable file storage
```
### Why Hidden Directories?
The `.contexts`, `.db`, and `.files` directories are hidden (prefixed with `.`) to keep the archive root clean for future rendered outputs (like `chat.md` exports). This is similar to how `.git` hides implementation details in a codebase.
## File Formats
### Session Metadata (`session.json`)
```json
{
"version": 1,
"scroll_id": "01936e8f-e5a7-7000-8000-000000000001",
"created_at": "2025-01-01T12:00:00Z",
"updated_at": "2025-01-01T12:30:00Z",
"title": "Implement user authentication",
"connector_uid": "01936e8f-e5a7-7000-8000-000000000002",
"native_session_id": "abc123",
"agent_id": null,
"parent_scroll_id": null,
"continuation": null,
"tags": ["backend", "auth"],
"metadata": {
"source": "OpenCode",
"model": "claude-3-5-sonnet"
}
}
```
### Messages Log (`messages.jsonl`)
One JSON object per line, **append-only**:
```jsonl
{"version":1,"message_id":"01936e8f-e5a7-7000-8000-000000000003","session":"01936e8f-e5a7-7000-8000-000000000001","parent_id":null,"ts":"2025-01-01T12:01:00Z","role":"user","author":"alice","content_md":"How do I implement JWT auth?","attachments":[],"metadata":{}}
{"version":1,"message_id":"01936e8f-e5a7-7000-8000-000000000004","session":"01936e8f-e5a7-7000-8000-000000000001","parent_id":"01936e8f-e5a7-7000-8000-000000000003","ts":"2025-01-01T12:01:10Z","role":"assistant","author":"claude","content_md":"Here's how to implement JWT authentication...","attachments":[],"metadata":{"model":"claude-3-5-sonnet"}}
```
**IMPORTANT - Ordering**: The order of lines in the message log file (`messages.jsonl` or `messages.ndjson`) reflects **event arrival order**, NOT chronological order. Assistant replies often arrive after subsequent user messages due to streaming latency, resulting in non-chronological file order. Always use the `Archivist::get_messages()` API to retrieve messages, which sorts by `ts` (timestamp) and `message_id` (UUIDv7) to guarantee chronological order.
**File Format Compatibility**: The archivist supports both `.ndjson` and `.jsonl` file extensions for newline-delimited JSON files. When reading, `.jsonl` is preferred if present, with automatic fallback to `.ndjson` for backward compatibility. Write operations use `.jsonl` (canonical format). Both formats are identical in content - the difference is purely the file extension.
### Connector Index (`index.tsv`)
Tab-separated values with header row:
```tsv
connector_uid type title client_native_id alias_of created_at
01936e8f-e5a7-7000-8000-000000000002 OpenCode OpenCode Local opencode@http://localhost:12225 2025-01-01T12:00:00Z
```
### Session Mappings (`sessions.jsonl`)
Maps native session IDs from connectors to scroll IDs in the archive:
```jsonl
{"version":1,"connector_uid":"01936e8f-e5a7-7000-8000-000000000002","native_session_id":"abc123","scroll_id":"01936e8f-e5a7-7000-8000-000000000001","created_at":"2025-01-01T12:00:00Z","alias_of":null}
```
## Message Ordering Guarantees
### The Problem: Append Order ≠ Chronological Order
In the event-driven architecture, messages are written to the message log file (`messages.jsonl`) as completion events arrive. Due to streaming latency:
- User messages complete nearly instantly and are written immediately
- Assistant messages stream over time and complete later
- A second user message can be written before the first assistant reply completes
Example scenario:
```
T0: User sends "tell me a joke about snakes" (ts=18:23:36.947)
T1: Assistant starts streaming reply (ts=18:23:36.969)
T2: User sends "now one about tigers" (ts=18:23:49.429) <- completes and writes BEFORE assistant finishes
T3: Assistant finishes "snakes" reply <- writes AFTER "tigers" user message
```
File order in the message log file:
```
1. user "snakes" (18:23:36.947)
2. user "tigers" (18:23:49.429) <- written second
3. assistant "snakes" (18:23:36.969) <- written third, but timestamp is earlier!
```
### The Solution: Sort-on-Read
The `Archivist::get_messages()` implementation sorts messages before returning:
1. **Primary sort**: `ts` (timestamp) ascending
2. **Secondary sort**: `message_id` (UUIDv7) ascending for stable tie-breaking
This guarantees chronological order regardless of NDJSON append order:
```
1. user "snakes" (18:23:36.947)
2. assistant "snakes" (18:23:36.969)
3. user "tigers" (18:23:49.429)
```
### Why This Approach?
- **Maintains durability**: Append-only writes preserve crash safety
- **No migration needed**: Existing archives work without rewrites
- **Simple implementation**: No buffered writes or complex write-time ordering
- **Performance trade-off**: Small CPU cost on read (sorting) vs. complex write-time coordination
### Consumer Guidance
- **DO**: Use `Archivist::get_messages()` to retrieve messages
- **DON'T**: Read the message log file directly and assume file order = chronological order
- **UI/API**: Always sort by `ts` then `message_id` for defense in depth
- **Tie-breaking**: Use `message_id` (UUIDv7) as secondary sort for stable ordering when timestamps match
## Key Types
### SessionMetadata
Stores all metadata about a session including:
- **scroll_id**: UUIDv7 identifier for the session
- **connector_uid**: Which connector owns this session
- **native_session_id**: Original session ID from the connector (optional)
- **title**: Optional human-readable session title (see Title Management below)
- **parent_scroll_id**: For session lineage (splits, continuations)
- **continuation**: Type of continuation (SPLIT, COMPACT, REFERENCE, EDIT)
- **tags**: User-defined categorization
- **metadata**: Free-form JSON for connector-specific fields
#### Title Management
Session titles are fully supported and persist across restarts. Titles are stored in the `SessionMetadata` struct and saved to the `session.json` file.
**Setting Titles:**
```rust
// Update title for an existing session
archivist.update_session_metadata(
scroll_id,
Some("My Custom Session Title".to_string()),
None, // model
None // archive
).await?;
```
**Default Behavior:**
- New sessions can specify an initial title during registration
- If no title is provided, sessions default to `None`
- The UI typically displays "Untitled" for sessions without titles
**Title Loading:**
- Titles are automatically loaded when retrieving session metadata via `get_session_metadata()`
- Session lists include titles via `list_sessions()` and `list_sessions_all()`
- Titles are part of the `SessionMetadata` struct returned by all session queries
**UI Integration:**
- The web UI displays session titles in the session list and sidebar
- Users can rename sessions via the "Rename" button in the session list view
- Renaming calls `api::archivist::rename_session()` which uses `update_session_metadata()`
- Title changes are persisted immediately and survive application restarts
### MessageRecord
Represents a single message in the archive:
- **message_id**: UUIDv7 identifier
- **session**: scroll_id this message belongs to
- **role**: "user", "assistant", or "system"
- **content_md**: Message content in Markdown format
- **attachments**: References to attached files
- **metadata**: Free-form JSON for connector-specific fields
### ConnectorRecord
Metadata about a connector:
- **connector_uid**: UUIDv7 identifier
- **type**: "OpenCode", "ACP", or custom
- **client_native_id**: Unique identifier from client (e.g., "opencode@http://localhost:12225")
- **alias_of**: If this connector is an alias of another (for deduplication)
## Archivist Public API
The `Archivist` struct (in `coordinator/`) is the main public entry point
for archival operations. Consumers hold `Arc<Archivist>` and call inherent
methods — there is no `Archivist` trait anymore. The coordinator resolves
the target backend per call (via `archive: Option<String>`) and delegates
to `ArchiveBackend` methods.
Key method families (see `coordinator/*.rs` for full signatures):
- **Archive lifecycle** (`archives.rs`): `add_archive`, `remove_archive`,
`list_archives`, `set_default_archive`
- **Connectors** (`connectors.rs`): `register_connector` with tri-state
result (Accepted / Aliased / Rejected), `list_connectors`
- **Sessions** (`sessions.rs`): `register_session`, `get_session_metadata`,
`update_session_metadata`, `list_sessions_paged`, `move_session`,
`copy_session`, `resolve_session`
- **Messages**: `append_messages`, `get_messages` (sorts by `ts` then
`message_id` for stable chronological order)
- **Meta / DAG** (`meta.rs`): meta-event recording, session lineage DAG
walks, cleanup routines
## List Filter vs. Full-Text Search
Two distinct query paths exist — do not conflate them.
**List filter**`Archivist::list_sessions_paged(SessionListQuery)` returns a
cursor-paged list of sessions, AND-filtered by `title_query` (substring on
title), `tags`, `model_filter` (substring on `metadata.model`), `project_id`,
`connector_uid`, and `include_hidden`. This is the right tool for "narrow the
list of visible sessions."
**Full-text search**`api::search_sessions` (in the `api` package, backed by
`api::archivist::search_task::SearchTask`) spawns `rg --json` over the
archive's `.contexts/` tree to find messages containing text. It streams
`SearchExcerpt`s with parsed NDJSON content and supports cancellation via
`CancellationToken`. This is the right tool for "find messages containing
text."
**Do not extend `list_sessions_paged` to do content search.** Content search
belongs in the ripgrep pipeline. Future improvements to content search
(indexed backends, relevance scoring) are Phase 2d / Phase 3 concerns.
## JsonlBackend Implementation
The Phase 2 production backend — an implementation of `ArchiveBackend` plus
every sub-trait except `SearchBackend`:
- **Thread-safe**: Uses RwLock for in-memory caches
- **Async**: All operations use tokio for non-blocking I/O
- **Caching**: In-memory caches for connector and session mappings
- **Collision Detection**: Tri-state registration for connectors and sessions
Located under `src/backends/jsonl/` and split by concern (`backend.rs`,
`connectors.rs`, `dag.rs`, `mapping.rs`, `meta.rs`).
### Caching Strategy
`JsonlBackend` maintains two in-memory caches:
1. **connector_cache**: HashMap<Uuid, ConnectorRecord>
- Populated on registration
- Read from TSV index on startup (future enhancement)
2. **session_cache**: HashMap<(Uuid, String), Uuid>
- Maps (connector_uid, native_session_id) to scroll_id
- Populated on registration and session resolution
- Enables fast session lookups without disk I/O
## Event Handling
The EventHandler subscribes to dirigent_protocol events and archives them in real-time:
```rust
// Create archivist and event handler
let archivist = Archivist::new_with_single_archive(archive_path).await?;
let handler = EventHandler::new(Arc::new(archivist));
// Subscribe to event stream from dirigent_core
let events = event_stream.subscribe();
// Run event loop (blocking)
handler.run(events).await;
```
### Supported Events
- **SessionCreated**: Registers new sessions with the archivist
- **MessageCompleted**: Writes finalized messages to the archive
- **SessionUpdate**: Accumulates streaming message chunks
- AgentMessageChunk
- UserMessageChunk
- AgentThoughtChunk
- ToolCall
### MessageAccumulator
Assembles streaming message chunks into complete messages:
- Accumulates text chunks by message_id
- Tracks thinking blocks separately
- Stores tool calls with input/output
- Finalizes messages on MessageCompleted event
- Converts to MessageRecord for archival
## Integration with dirigent_core
The Archivist integrates with dirigent_core via the global event stream:
1. **CoreRuntime** emits events for all connector operations
2. **EventHandler** subscribes to event stream
3. **MessageAccumulator** assembles streaming chunks
4. **Archivist** writes complete messages to archive
This enables:
- Automatic archival of all sessions and messages
- No polling required - fully event-driven
- Consistent history across restarts
- Offline access to historical data
## Testing
The package has comprehensive test coverage across multiple dimensions:
### Unit Tests
Located in each module (`src/*.rs`, `src/storage/*.rs`):
- Type serialization/deserialization
- UUIDv7 generation and ordering
- Timestamp formatting (RFC 3339)
- Storage operations (NDJSON, JSON, TSV, files)
- Connector registration tri-state logic
- Session registration and alias detection
### Integration Tests
Located in `tests/`:
- `integration_tests.rs`: Full `Archivist` + `JsonlBackend` lifecycle, event
handler integration, multi-connector scenarios, session lineage, message
accumulation
- `list_sessions_paged_test.rs`, `pagination_test.rs`: List filter + cursor
pagination coverage
- `import_claude_idempotency_test.rs`: Claude export re-import idempotency
### Backend Contract Tests
`src/backend/contract.rs` holds reusable async assertions that any
`&dyn ArchiveBackend` must pass. `JsonlBackend` and `MockBackend` both
run the contract suite; new backends added in Phase 3+ should do the same.
### Examples
Located in `examples/`:
- `basic_usage.rs`: Core archivist operations
- `event_handling.rs`: EventHandler and MessageAccumulator
- `file_storage.rs`: Content-addressable file storage
Run tests:
```bash
cargo test --package dirigent_archivist
```
Run examples:
```bash
cargo run --package dirigent_archivist --example basic_usage
cargo run --package dirigent_archivist --example event_handling
cargo run --package dirigent_archivist --example file_storage
```
## Performance Characteristics
- **Append Operations**: O(1) with sequential file writes
- **Session Lookup**: O(1) with in-memory cache, O(n) cache miss
- **Message Retrieval**: O(n) where n = number of messages (NDJSON parsing)
- **File Storage**: O(1) content-addressable lookup with SHA-256 hashing
- **Connector Index**: O(n) TSV scan, suitable for hundreds of connectors
### Scalability Considerations
- **Large Sessions**: NDJSON is append-only, so reading large sessions requires parsing all lines
- **Many Sessions**: TSV indices are suitable for thousands of sessions per connector
- **File Deduplication**: SHA-256 hashing provides automatic deduplication across sessions
- **Concurrent Access**: RwLock allows multiple concurrent readers, single writer
## Error Handling
The Archivist uses thiserror for rich error types:
```rust
pub enum ArchivistError {
IoError(std::io::Error),
SerdeError(serde_json::Error),
SessionUnknown(Uuid),
CollisionInconsistent(Uuid),
// ... etc
}
```
All public APIs return `Result<T, ArchivistError>` for explicit error handling.
## Development Notes
- All storage operations are async (using tokio)
- Content-addressable storage uses SHA-256 hashes (hex-encoded)
- Archive directory structure mirrors session/message hierarchy
- UUIDv7 provides time-ordered, sortable identifiers
- RFC 3339 UTC timestamps for all time-based fields
- Schema versioning via `version` field in all records
## Related Packages
- **dirigent_protocol**: Shared types and protocol definitions (dependency)
- **dirigent_core**: Runtime integration for SSE event capture (integration point)
- **api**: Server functions for archive queries (future)
- **web**: UI for archive browsing and search (future)
## Phase 4: `ArchiveFilter` (2026-04-21)
Every `ArchiveRegistration` carries a `filter: ArchiveFilter`. The filter
describes which sessions/writes the backend wants to receive. Fields:
- `include_connectors: Option<HashSet<Uuid>>` — if Some, only these
connector UIDs pass. `None` means no connector gate.
- `exclude_connectors: HashSet<Uuid>` — always rejected.
- `include_tags: HashSet<String>` — if non-empty, the session must carry
at least one matching tag.
- `exclude_tags: HashSet<String>` — any matching tag rejects.
- `include_hidden: bool` — default `true`. If `false`, sessions whose
metadata has `"hidden": true` are skipped.
### Primary-always-writes invariant
The per-call primary (either the `archive: Some(name)` argument or the
default write-target) is **never** filtered. If a caller explicitly asks
to write to archive X, the filter on X is not consulted. Filters only
gate secondary fanout.
### Boot validator
At boot (`coordinator/boot.rs`), the validator rejects configurations
where:
- No write-active + enabled registration has an **unrestricted** filter
(`ArchiveFilter::default()` is unrestricted). Prevents configurations
that silently drop all writes.
- An archive's filter has `include_connectors = Some(empty set)`
equivalent to "reject everything", which is almost certainly a config
bug.
See `docs/plans/2026-04-21-archivist-phase4-design.md` §4 for the full
design rationale.
## Phase 5: Importers (2026-04-21)
The `import::` module centres on an `Importer` trait with per-source
implementations under `import::sources::*`. Each source produces a
`ParsedConversation` (ChatGPT) / `ParsedSession` (Codex) / session
directory walk (Claude) and feeds the results through the common
`import_sessions` orchestrator, which fires `ImportProgressEvent`s on a
bounded `ImportProgressSink`.
### `Importer` trait
Every importer declares a `config_shape()` so UIs can render a dynamic
form; a `discover()` that returns an `ImportDiscovery` preview; and an
`import()` that does the actual work. All three methods are async.
The trait lives in `import::trait_def`. Shape types (`ImportConfig`,
`ImportTarget`, `ConfigField`, `ConfigFieldKind`, `ImportError`) are
serialisable and safe to cross the WASM boundary.
### Registry
`ImporterRegistry::with_defaults()` registers every enabled
`importer-*` feature. Currently: `claude`, `chatgpt`, `codex`. The
registry is constructed at boot and stored on `AppState`.
### Progress sink
`ImportProgressSink::channel()` returns a bounded mpsc pair.
Non-terminal events use `try_send` (dropped on full); terminal events
use `send().await` so consumers always see the final state.
### Source crates
- `dirigent_chatgpt` — parses `conversations.json` from the OpenAI data
export.
- `dirigent_codex` — parses `*.jsonl` session files under
`~/.codex/sessions`.
Both crates hold pure parser types with zero dirigent-specific types.
See `docs/plans/2026-04-21-archivist-phase5-design.md`.
## Future Enhancements
- Indexed `SearchBackend` implementations (tantivy/sqlite) — currently
content search is ripgrep-based in the `api` package
- Session splitting and lineage management (mutations.ndjson)
- Knowledge overview generation (chat.md exports)
- Embedding storage and search (embeds/)
- Network RPC interface for remote archivist
- Compaction and pruning policies
- Additional concrete backends (e.g. SQLite, remote)
## Documentation
- **Package README**: `./README.md` - User-facing overview
- **Architecture Docs**: `../../docs/building/05_archivist/` - Design and planning
- **API Docs**: Run `cargo doc --package dirigent_archivist --open`
- **Examples**: See `examples/` directory for working code samples
+69
View File
@@ -0,0 +1,69 @@
[package]
name = "dirigent_archivist"
version = "0.1.0"
edition = "2021"
[lib]
path = "src/lib.rs"
[features]
# All built-in importers are on by default. Turn the corresponding
# `importer-*` flag off (and opt out of `default`) to ship a slimmer build.
default = ["importer-claude", "importer-chatgpt", "importer-codex"]
# Exposes the sub-trait contract test harness (`backend::contract`) to
# downstream crates so new backends can reuse the same behavioral checks.
test-utils = []
# Per-source importer feature gates. Each flag guards the corresponding
# `ImporterRegistry::with_defaults` registration and (where relevant) the
# source module itself.
importer-claude = []
importer-chatgpt = ["dep:dirigent_chatgpt"]
importer-codex = ["dep:dirigent_codex"]
[dependencies]
# Core dependencies
dirigent_protocol = { path = "../dirigent_protocol" }
dirigent_anth = { path = "../dirigent_anth" }
dirigent_chatgpt = { path = "../dirigent_chatgpt", optional = true }
dirigent_codex = { path = "../dirigent_codex", optional = true }
camino = "1.1"
# UUID support with v7 and serde
uuid = { version = "1.11", features = ["v5", "v7", "serde"] }
# Date/time handling
chrono = { version = "0.4", features = ["serde"] }
# Serialization
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
# Async runtime and file operations
tokio = { version = "1.42", features = ["fs", "sync", "time", "io-util", "macros", "rt-multi-thread"] }
# Logging
tracing = "0.1"
# Error handling
thiserror = "2.0"
anyhow = "1"
# Hashing for content-addressable storage
sha2 = "0.10"
hex = "0.4"
# LRU read cache for registry backends
lru = "0.12"
# Async traits
async-trait = "0.1"
# Async futures
futures = "0.3"
[dev-dependencies]
tempfile = "3.0"
walkdir = "2"
+338
View File
@@ -0,0 +1,338 @@
# Dirigent Archivist
Persistent storage for all agentic interactions in Dirigent.
## Overview
The Archivist automatically archives every conversation, message, and file from your AI sessions into a local, grep-able, human-readable archive. No cloud required - your data stays on your machine in formats you can read and search manually.
## Why Archivist?
- **Offline Access**: All conversations are saved locally, accessible even when connectors are offline
- **Manual Curation**: Files are in plain JSON/NDJSON/TSV - grep, edit, or analyze them with any tool
- **Knowledge Base**: Build a searchable archive of all your AI interactions across projects
- **Session Lineage**: Track conversation branches, splits, and continuations
- **File Deduplication**: Attachments are stored once, referenced multiple times (content-addressable)
- **Archive-First**: UI reads from local archive first, only falls back to remote connectors when needed
## Quick Start
The Archivist runs automatically when you start Dirigent. The archive location is determined by the `DIRIGENT_DATA_DIR` environment variable (archives are stored at `<data_dir>/archives/`):
```bash
# Override data directory (archives at /path/to/data/archives/)
DIRIGENT_DATA_DIR=/path/to/data dx serve
```
That's it! Every session and message will be automatically archived.
## Archive Structure
Your archive is organized like this:
```
dirigent_archive/
├── .contexts/ # Session data
│ └── 01936e8f-e5a7-7000-8000.../
│ ├── session.json # Session metadata
│ └── messages.ndjson # All messages (one JSON per line)
├── .db/
│ └── connectors/ # Connector registry
│ ├── index.tsv # Fast lookup table
│ └── 01936e8f-e5a7.../
│ ├── connector.json # Connector info
│ └── sessions.ndjson # Session ID mappings
└── .files/ # Attachments (by SHA-256)
└── a1b2c3d4... # Content-addressable storage
```
### Why Hidden Directories?
The `.contexts`, `.db`, and `.files` directories start with `.` to keep them internal (like `.git`). In the future, you'll be able to export rendered markdown files into the archive root for easy reading.
## File Formats
### Session Metadata (`.contexts/{id}/session.json`)
```json
{
"version": 1,
"scroll_id": "01936e8f-e5a7-7000-8000-000000000001",
"created_at": "2025-01-01T12:00:00Z",
"updated_at": "2025-01-01T12:30:00Z",
"title": "Implement user authentication",
"connector_uid": "01936e8f-e5a7-7000-8000-000000000002",
"tags": ["backend", "auth"],
"metadata": {
"source": "OpenCode",
"model": "claude-3-5-sonnet"
}
}
```
### Messages (`.contexts/{id}/messages.ndjson`)
Newline-delimited JSON - one message per line, **append-only**:
```jsonl
{"version":1,"message_id":"...","session":"...","role":"user","ts":"2025-01-01T12:01:00Z","content_md":"How do I implement JWT auth?","attachments":[],"metadata":{}}
{"version":1,"message_id":"...","session":"...","role":"assistant","ts":"2025-01-01T12:01:10Z","content_md":"Here's how to implement JWT authentication...","attachments":[],"metadata":{"model":"claude-3-5-sonnet"}}
```
**IMPORTANT**: Messages are written as events arrive, NOT in chronological order. Assistant replies often appear after subsequent user messages due to streaming latency. When reading programmatically, use the Archivist API which sorts by timestamp (`ts`) to ensure correct order. For manual inspection, sort by the `ts` field.
### Connector Index (`.db/connectors/index.tsv`)
Tab-separated values for fast scanning:
```tsv
connector_uid type title client_native_id alias_of created_at
01936e8f... OpenCode OpenCode Local opencode@http://localhost:12225 2025-01-01T12:00:00Z
```
## Searching Your Archive
Since everything is plain text, you can use standard Unix tools:
```bash
# Find all sessions about "authentication"
grep -r "authentication" dirigent_archive/.contexts/*/session.json
# Find messages mentioning a specific error
grep "ECONNREFUSED" dirigent_archive/.contexts/*/messages.ndjson
# List all sessions for a connector
cat dirigent_archive/.db/connectors/*/sessions.ndjson | jq .
# Get all user messages from a session (sorted by timestamp)
cat dirigent_archive/.contexts/01936e8f.../messages.ndjson | jq -s 'sort_by(.ts) | .[] | select(.role=="user")'
# View messages in chronological order
cat dirigent_archive/.contexts/01936e8f.../messages.ndjson | jq -s 'sort_by(.ts)'
```
**Note on ordering**: Remember that the file order is append-only (event arrival order). Always sort by `ts` (timestamp) when reading manually to see messages in chronological order.
## Integration with Dirigent
The Archivist integrates seamlessly with Dirigent's core runtime:
1. **Automatic Archiving**: Every session and message is archived in real-time as events arrive
2. **Event-Driven**: No polling - listens to dirigent_core's event stream
3. **Append-Only Writes**: Messages written as completion events arrive (preserves durability)
4. **Sort-on-Read**: API returns messages in chronological order despite append-only file order
5. **UI Integration**: Web UI reads from archive first, shows data even when connectors are offline
6. **Connector Coordination**: Assigns stable UUIDs to connectors with collision detection
## Key Concepts
### Scroll IDs
Every session gets a unique `scroll_id` (UUIDv7) that's independent of the connector's native session ID. This allows:
- Sessions to move between connectors
- Stable references even if connector data is deleted
- Time-ordered sorting (UUIDv7 encodes timestamp)
### Session Lineage
Sessions can have parent sessions, creating a tree of related conversations:
- **Split**: Fork conversation at a specific message
- **Compact**: Summarized version of parent
- **Reference**: Points to parent without duplication
- **Edit**: Modified version of parent
### Content-Addressable Storage
Files are stored by their SHA-256 hash, so:
- Same file uploaded twice uses same storage
- Files can be shared across sessions without duplication
- You can verify file integrity by hash
## Configuration
### Environment Variables
- `DIRIGENT_DATA_DIR`: Override data directory; archives are stored at `<data_dir>/archives/`
### Example Configurations
```bash
# Use custom data directory (archives at /home/user/mydata/archives/)
DIRIGENT_DATA_DIR=/home/user/mydata dx serve
# Use global data directory
DIRIGENT_DATA_DIR=/home/user/.dirigent dx serve
# Use temporary data directory (testing)
DIRIGENT_DATA_DIR=/tmp/dirigent_test dx serve
```
## Programmatic Access
While the Archivist runs automatically, you can also use it programmatically:
```rust
use dirigent_archivist::Archivist;
use std::path::PathBuf;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create an archivist over a single archive directory.
// Internally this wires up a `JsonlBackend` for the archive.
let archivist = Archivist::new_with_single_archive(
PathBuf::from("./dirigent_archive")
).await?;
// List sessions for a connector
let sessions = archivist.list_sessions(connector_uid).await?;
for session in sessions {
println!("{}: {}", session.scroll_id, session.title.unwrap_or_default());
}
Ok(())
}
```
`Archivist` is a concrete struct that owns a registry of `ArchiveBackend`
implementations keyed by archive name. In Phase 2 the only backend is
`JsonlBackend` (file-based NDJSON/JSON/TSV). See `examples/` for more
detailed usage.
## Performance
The Archivist is designed for human-scale workloads (thousands of sessions, millions of messages):
- **Fast Writes**: Append-only NDJSON is O(1)
- **Cached Reads**: Common lookups cached in memory
- **Grep-able**: TSV indices can be scanned in milliseconds
- **Incremental**: Only new messages are written, no full re-writes
### Scalability Notes
- Large sessions (1000+ messages) may take a few seconds to load
- TSV indices are suitable for 100-1000 connectors
- File deduplication saves space for repeated attachments
## Querying and Curation
### Future: Knowledge Overviews
The Archivist is designed to support knowledge curation workflows:
- Export sessions as clean markdown files
- Create summaries and overviews across sessions
- Tag and categorize conversations
- Build a personal knowledge base
These features are planned for future releases.
### Current: Manual Curation
For now, you can manually curate your archive:
- Edit `session.json` to add tags
- Grep through messages for specific topics
- Copy/organize sessions into project folders
- Use jq/awk/sed to extract insights
## Advanced Features
### Session Splitting
Create a new conversation branch from any point in history:
```rust
// Future API (not yet implemented)
let new_session = archivist.split_session(
session_id,
at_message_id,
Continuation::Split
).await?;
```
### Attachment Storage
Files are automatically deduplicated using SHA-256:
```rust
// Store file (content-addressable)
let file_id = archivist.store_file(
&file_data,
"spec.pdf",
Some("application/pdf")
).await?;
// Reference in message
let attachment = AttachmentRef {
file_id, // "sha256:abc123..."
name: "spec.pdf".to_string(),
mime_type: Some("application/pdf".to_string()),
};
```
### Multi-Archive Support
`Archivist` natively manages multiple named archives via an on-disk
registry. Each archive is backed by its own `ArchiveBackend` (currently
`JsonlBackend`) and selected per call via an optional `archive` argument.
This enables:
- Separate archives per project
- A default archive plus specialized side archives
- Moving or copying sessions between archives
Future backends (e.g. SQLite, indexed, remote) will plug into the same
trait layer without changing the coordinator API.
## Troubleshooting
### Archive Not Created
If the archive directory doesn't appear:
1. Check `DIRIGENT_DATA_DIR` is set correctly (or that the default data directory is writable)
2. Ensure write permissions on parent directory
3. Check logs for I/O errors
### Missing Sessions
If sessions don't appear in archive:
1. Verify EventHandler is running
2. Check for event subscription errors in logs
3. Ensure connector emits `SessionCreated` events
### Large Archive Size
If archive grows too large:
1. Check for duplicate files in `.files/`
2. Consider archiving old sessions separately
3. Future: Use compaction features (not yet implemented)
## Development Status
**Current** (Phase 2 complete):
- Automatic archival of sessions and messages
- Event-driven integration with dirigent_core
- File-based storage with NDJSON/JSON/TSV (`JsonlBackend`)
- Content-addressable file storage
- Multi-archive coordinator with per-archive backends
- Trait-based backend abstraction (`ArchiveBackend` + sub-traits)
**Future**:
- Indexed `SearchBackend` implementations (full-text search)
- Additional concrete backends (SQLite, remote)
- Session splitting and lineage management
- Knowledge overview generation
- Network RPC interface
## Documentation
- **Developer Guide**: `CLAUDE.md` - Package architecture and implementation details
- **Architecture**: `docs/building/05_archivist/vision.md` - Design rationale
- **API Docs**: `cargo doc --package dirigent_archivist --open`
- **Examples**: See `examples/` for working code
## Contributing
The Archivist is part of the Dirigent project. See the main repository for contribution guidelines.
## License
Part of the Dirigent project.
@@ -0,0 +1,198 @@
//! Basic usage example for dirigent_archivist
//!
//! This example demonstrates:
//! - Creating a Archivist
//! - Registering a connector
//! - Registering a session
//! - Appending messages to a session
//! - Listing sessions for a connector
//! - Retrieving messages for a session
use chrono::Utc;
use dirigent_archivist::{
Archivist, MessageRecord, RegisterConnectorRequest, RegisterSessionRequest,
Result,
};
use std::path::PathBuf;
use uuid::Uuid;
#[tokio::main]
async fn main() -> Result<()> {
// Create a temporary archive directory for this example
let temp_dir = std::env::temp_dir().join(format!("dirigent_example_{}", Uuid::now_v7()));
println!("Creating archive at: {}", temp_dir.display());
// Step 1: Create a Archivist
let archivist = Archivist::new_with_single_archive(temp_dir.clone()).await?;
println!("Archivist created successfully");
// Step 2: Register a connector
println!("\n--- Registering Connector ---");
let connector_req = RegisterConnectorRequest {
r#type: "OpenCode".to_string(),
title: "OpenCode Local".to_string(),
client_native_id: "opencode@http://localhost:12225".to_string(),
custom_uid: None, // Let archivist generate a UID
metadata: serde_json::json!({
"version": "0.1.0",
"protocol": "OpenCode HTTP API"
}),
fingerprint: None,
};
let connector_resp = archivist.register_connector(connector_req, None).await?;
println!("Connector registered: {:?}", connector_resp);
let connector_uid = connector_resp.connector_uid;
// Step 3: Register a session
println!("\n--- Registering Session ---");
let session_req = RegisterSessionRequest {
connector_uid,
native_session_id: "session-abc123".to_string(),
title: Some("Example chat session".to_string()),
custom_scroll_id: None, // Let archivist generate a scroll ID
metadata: serde_json::json!({
"project_path": "/home/user/projects/example",
"model": "claude-3-5-sonnet"
}),
completeness: Default::default(),
parent_scroll_id: None,
is_subagent: false,
continuation: None,
agent_id: None,
subagent_type: None,
spawning_tool_use_id: None,
};
let session_resp = archivist.register_session(session_req, None).await?;
println!("Session registered: {:?}", session_resp);
let scroll_id = session_resp.scroll_id;
// Step 4: Append messages to the session
println!("\n--- Appending Messages ---");
// User message
let user_msg = MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: scroll_id,
parent_id: None,
ts: Utc::now(),
role: "user".to_string(),
author: Some("alice".to_string()),
content_md: "Hello! Can you help me write a function to calculate fibonacci numbers?"
.to_string(),
content_parts: None,
attachments: vec![],
metadata: serde_json::json!({}),
};
// Assistant message
let assistant_msg = MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: scroll_id,
parent_id: Some(user_msg.message_id),
ts: Utc::now(),
role: "assistant".to_string(),
author: Some("claude".to_string()),
content_md: r#"Sure! Here's a recursive fibonacci function in Rust:
```rust
fn fibonacci(n: u32) -> u64 {
match n {
0 => 0,
1 => 1,
_ => fibonacci(n - 1) + fibonacci(n - 2),
}
}
```
This is the classic recursive implementation, though it's not the most efficient for large values of n."#
.to_string(),
content_parts: None,
attachments: vec![],
metadata: serde_json::json!({
"model": "claude-3-5-sonnet",
"latency_ms": 1245
}),
};
archivist
.append_messages(scroll_id, vec![user_msg.clone(), assistant_msg.clone()], None)
.await?;
println!("Appended 2 messages to session");
// Step 5: List all sessions for the connector
println!("\n--- Listing Sessions ---");
let page = archivist
.list_sessions_paged(
dirigent_archivist::SessionListQuery::default()
.with_connector(connector_uid)
.with_limit(100),
)
.await?;
let sessions = page.items;
println!("Found {} session(s) for connector:", sessions.len());
for session in &sessions {
println!(
" - {} ({}): {:?}",
session.scroll_id,
session.created_at.format("%Y-%m-%d %H:%M:%S"),
session.title
);
}
// Step 6: Retrieve all messages for the session
println!("\n--- Retrieving Messages ---");
let messages = archivist.get_messages(scroll_id, None).await?;
println!("Retrieved {} message(s):", messages.len());
for msg in &messages {
println!("\n[{}] {}", msg.role, msg.ts.format("%Y-%m-%d %H:%M:%S"));
println!("{}", msg.content_md);
}
// Step 7: Demonstrate session resolution
println!("\n--- Resolving Session ---");
let resolved_scroll_id = archivist
.resolve_session(connector_uid, "session-abc123", None)
.await?;
println!(
"Resolved native session 'session-abc123' to scroll_id: {}",
resolved_scroll_id
);
assert_eq!(resolved_scroll_id, scroll_id);
// Step 8: Show archive structure
println!("\n--- Archive Structure ---");
println!("Archive root: {}", temp_dir.display());
println!("\nDirectory structure:");
show_directory_tree(&temp_dir, 0)?;
// Cleanup
println!("\n--- Cleanup ---");
std::fs::remove_dir_all(&temp_dir)?;
println!("Removed temporary archive");
Ok(())
}
/// Helper function to display directory tree
fn show_directory_tree(path: &PathBuf, depth: usize) -> Result<()> {
let indent = " ".repeat(depth);
if path.is_dir() {
println!("{}{}/", indent, path.file_name().unwrap().to_string_lossy());
let mut entries: Vec<_> = std::fs::read_dir(path)?.filter_map(|e| e.ok()).collect();
entries.sort_by_key(|e| e.path());
for entry in entries {
show_directory_tree(&entry.path(), depth + 1)?;
}
} else {
println!("{}{}", indent, path.file_name().unwrap().to_string_lossy());
}
Ok(())
}
@@ -0,0 +1,156 @@
// Demonstration of archivist types serialization
// Run with: cargo run --package dirigent_archivist --example demo_types
use chrono::Utc;
use dirigent_archivist::*;
use uuid::Uuid;
fn main() {
println!("=== ARCHIVIST TYPES DEMONSTRATION ===\n");
// Demo 1: SessionMetadata (matches session.json format)
println!("1. SessionMetadata (session.json):");
let session_metadata = SessionMetadata {
version: 1,
scroll_id: Uuid::now_v7(),
created_at: Utc::now(),
updated_at: Utc::now(),
title: Some("Example Session".to_string()),
connector_uid: Uuid::now_v7(),
native_session_id: Some("abc123".to_string()),
agent_id: Some("claude-3-5".to_string()),
parent_scroll_id: None,
continuation: Some(Continuation::Split),
tags: vec!["example".to_string(), "test".to_string()],
metadata: serde_json::json!({
"source": "OpenCode",
"project": "dirigent"
}),
no_update: false,
kind: SessionKind::Chat,
acp_client_id: None,
is_connected: None,
current_session_id: None,
models: None,
modes: None,
config_options: None,
completeness: SessionCompleteness::default(),
matrix_room_id: None,
matrix_sharing_active: false,
matrix_shared_at: None,
is_subagent: false,
subagent_type: None,
spawning_tool_use_id: None,
};
println!(
"{}\n",
serde_json::to_string_pretty(&session_metadata).unwrap()
);
// Demo 2: MessageRecord (matches messages.ndjson format)
println!("2. MessageRecord (messages.ndjson line):");
let message = MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: session_metadata.scroll_id,
parent_id: None,
ts: Utc::now(),
role: "user".to_string(),
author: Some("alice".to_string()),
content_md: "How do I implement archivist types?".to_string(),
content_parts: None,
attachments: vec![AttachmentRef {
file_id: "sha256:abc123".to_string(),
name: "spec.pdf".to_string(),
mime_type: Some("application/pdf".to_string()),
}],
metadata: serde_json::json!({
"connector_msg_id": "msg-456"
}),
};
// NDJSON format (one line)
println!("{}\n", serde_json::to_string(&message).unwrap());
// Demo 3: ConnectorRecord (matches connector.json format)
println!("3. ConnectorRecord (connector.json):");
let connector = ConnectorRecord {
version: 1,
connector_uid: session_metadata.connector_uid,
r#type: "OpenCode".to_string(),
title: "OpenCode Local".to_string(),
client_native_id: "opencode@http://localhost:12225".to_string(),
alias_of: None,
created_at: Utc::now(),
metadata: serde_json::json!({}),
fingerprint: None,
};
println!("{}\n", serde_json::to_string_pretty(&connector).unwrap());
// Demo 4: SessionMapping (matches sessions.ndjson format)
println!("4. SessionMapping (sessions.ndjson line):");
let mapping = SessionMapping {
version: 1,
connector_uid: connector.connector_uid,
native_session_id: "abc123".to_string(),
scroll_id: session_metadata.scroll_id,
created_at: Utc::now(),
alias_of: None,
};
println!("{}\n", serde_json::to_string(&mapping).unwrap());
// Demo 5: FileRecord (matches file_index.jsonl format)
println!("5. FileRecord (file_index.jsonl line):");
let file_record = FileRecord {
version: 1,
file_id: "sha256:abc123def456".to_string(),
path: ".files/ab/cd/abc123def456".to_string(),
size: 123456,
mime: Some("application/pdf".to_string()),
original_name: "spec.pdf".to_string(),
sessions: vec![session_metadata.scroll_id],
metadata: serde_json::json!({
"source": "upload"
}),
};
println!("{}\n", serde_json::to_string(&file_record).unwrap());
// Demo 6: Enum serialization
println!("6. Enum Serialization:");
println!(
" Continuation::Split: {}",
serde_json::to_string(&Continuation::Split).unwrap()
);
println!(
" Continuation::Compact: {}",
serde_json::to_string(&Continuation::Compact).unwrap()
);
println!(
" RegisterStatus::Accepted: {}",
serde_json::to_string(&RegisterStatus::Accepted).unwrap()
);
println!(
" RegisterStatus::Aliased: {}",
serde_json::to_string(&RegisterStatus::Aliased).unwrap()
);
println!();
// Demo 7: API types
println!("7. RegisterConnectorResponse:");
let response = RegisterConnectorResponse {
status: RegisterStatus::Accepted,
connector_uid: Uuid::now_v7(),
alias_of: None,
note: Some("Successfully registered".to_string()),
};
println!("{}\n", serde_json::to_string_pretty(&response).unwrap());
println!("8. RegisterSessionResponse:");
let response = RegisterSessionResponse {
status: RegisterStatus::Aliased,
scroll_id: Uuid::now_v7(),
alias_of: Some(Uuid::now_v7()),
};
println!("{}\n", serde_json::to_string_pretty(&response).unwrap());
println!("=== ALL TYPES MATCH VISION.MD SPECIFICATION ===");
}
@@ -0,0 +1,277 @@
//! Event handling example for dirigent_archivist
//!
//! This example demonstrates:
//! - Creating an EventHandler
//! - Subscribing to dirigent_protocol events
//! - Accumulating streaming message chunks
//! - Finalizing complete messages
//! - Automatic archival via event stream
use chrono::Utc;
use dirigent_archivist::{Archivist, EventHandler, Result};
use dirigent_protocol::streaming::{BusEvent, BusReceiver, EventOrigin, EventRouting};
use dirigent_protocol::{
ContentBlock, Event, Message, MessageMetadata, MessagePart, MessageRole, MessageStatus,
Session, SessionMetadata, SessionUpdate, ToolCall, ToolCallStatus,
};
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use tokio::sync::mpsc;
use uuid::Uuid;
/// Wrap a raw `Event` in a `BusEvent` with default routing.
fn wrap(event: Event) -> BusEvent {
BusEvent {
routing: EventRouting::default(),
origin: EventOrigin::Runtime,
event: Arc::new(event),
}
}
#[tokio::main]
async fn main() -> Result<()> {
// Create a temporary archive directory for this example
let temp_dir = std::env::temp_dir().join(format!("dirigent_event_example_{}", Uuid::now_v7()));
println!("Creating archive at: {}", temp_dir.display());
// Step 1: Create archivist and event handler
let archivist = Archivist::new_with_single_archive(temp_dir.clone()).await?;
let archivist = Arc::new(archivist);
let handler = EventHandler::new(archivist.clone());
println!("EventHandler created successfully");
// Step 2: Create a mock event stream. In production this is built
// by `SharingBus::subscribe_all()`; here we fabricate a `BusReceiver`
// directly so the example stays self-contained.
let (tx, rx) = mpsc::channel::<BusEvent>(100);
let bus_rx = BusReceiver {
id: 0,
rx,
lagged: Arc::new(AtomicU64::new(0)),
};
// Step 3: Spawn event handler task
let handler_task = tokio::spawn(async move {
handler.run(bus_rx).await;
});
// Step 4: Simulate event flow
println!("\n--- Simulating Event Stream ---");
// Generate connector and session IDs
let connector_id = Uuid::now_v7().to_string();
let session_id = Uuid::now_v7().to_string();
let message_id = Uuid::now_v7().to_string();
// Event 1: SessionCreated
println!("\n1. Sending SessionCreated event...");
let session_created = Event::SessionCreated {
connector_id: connector_id.clone(),
session: Session {
id: session_id.clone(),
title: "Example streaming session".to_string(),
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: SessionMetadata {
project_path: "/home/user/project".to_string(),
model: Some("claude-3-5-sonnet".to_string()),
total_messages: 0,
system_message: None,
current_mode_id: None,
_meta: None,
project_id: None,
},
cwd: None,
models: None,
modes: None,
config_options: None,
acp_client_id: None,
},
};
tx.send(wrap(session_created)).await.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// Event 2-5: Streaming message chunks (AgentMessageChunk)
println!("2. Sending streaming message chunks...");
let chunks = vec!["Hello! ", "I'm here to ", "help you with ", "your code."];
for (i, chunk) in chunks.iter().enumerate() {
let chunk_event = Event::SessionUpdate {
connector_id: connector_id.clone(),
session_id: session_id.clone(),
update: SessionUpdate::AgentMessageChunk {
message_id: message_id.clone(),
content: ContentBlock::Text {
text: chunk.to_string(),
},
_meta: None,
},
};
tx.send(wrap(chunk_event)).await.unwrap();
println!(" Chunk {}: {:?}", i + 1, chunk);
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
// Event 6: Thinking chunk
println!("3. Sending thinking chunk...");
let thinking_event = Event::SessionUpdate {
connector_id: connector_id.clone(),
session_id: session_id.clone(),
update: SessionUpdate::AgentThoughtChunk {
message_id: message_id.clone(),
content: ContentBlock::Text {
text: "Let me consider the best approach...".to_string(),
},
_meta: None,
},
};
tx.send(wrap(thinking_event)).await.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// Event 7: Tool call
println!("4. Sending tool call event...");
let tool_call_event = Event::SessionUpdate {
connector_id: connector_id.clone(),
session_id: session_id.clone(),
update: SessionUpdate::ToolCall {
message_id: message_id.clone(),
tool_call: ToolCall {
id: "tool_call_123".to_string(),
tool_name: "read_file".to_string(),
status: ToolCallStatus::Completed,
content: vec![],
raw_input: Some(serde_json::json!({
"path": "/home/user/project/main.rs"
})),
raw_output: Some(serde_json::json!({
"content": "fn main() { println!(\"Hello\"); }"
})),
title: None,
error: None,
metadata: None,
origin: None,
},
_meta: None,
},
};
tx.send(wrap(tool_call_event)).await.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// Event 8: MessageCompleted (triggers finalization)
println!("5. Sending MessageCompleted event...");
let message_completed = Event::MessageCompleted {
connector_id: connector_id.clone(),
message: Message {
id: message_id.clone(),
session_id: session_id.clone(),
role: MessageRole::Assistant,
created_at: Utc::now(),
content: vec![MessagePart::Text {
text: chunks.concat(),
}],
status: MessageStatus::Completed,
metadata: Some(MessageMetadata {
cost: None,
tokens_input: None,
tokens_output: None,
response_time_ms: None,
latency_ms: Some(1500),
model: Some("claude-3-5-sonnet".to_string()),
other: None,
}),
},
};
tx.send(wrap(message_completed)).await.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
// Event 9: Second message (user response)
println!("6. Sending user message...");
let user_message_id = Uuid::now_v7().to_string();
let user_chunks = vec!["Thanks! ", "Can you explain ", "the code?"];
for (i, chunk) in user_chunks.iter().enumerate() {
let chunk_event = Event::SessionUpdate {
connector_id: connector_id.clone(),
session_id: session_id.clone(),
update: SessionUpdate::UserMessageChunk {
message_id: user_message_id.clone(),
content: ContentBlock::Text {
text: chunk.to_string(),
},
_meta: None,
},
};
tx.send(wrap(chunk_event)).await.unwrap();
println!(" User chunk {}: {:?}", i + 1, chunk);
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
let user_message_completed = Event::MessageCompleted {
connector_id: connector_id.clone(),
message: Message {
id: user_message_id.clone(),
session_id: session_id.clone(),
role: MessageRole::User,
created_at: Utc::now(),
content: vec![MessagePart::Text {
text: user_chunks.concat(),
}],
status: MessageStatus::Completed,
metadata: None,
},
};
tx.send(wrap(user_message_completed)).await.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
// Step 5: Verify archived data
println!("\n--- Verifying Archived Data ---");
// Parse connector_uid from connector_id string
let connector_uid =
Uuid::parse_str(&connector_id).expect("connector_id should be a valid UUID");
// List sessions
let page = archivist
.list_sessions_paged(
dirigent_archivist::SessionListQuery::default()
.with_connector(connector_uid)
.with_limit(100),
)
.await?;
let sessions = page.items;
println!("Found {} session(s) in archive", sessions.len());
for session in &sessions {
println!(" Session: {} - {:?}", session.scroll_id, session.title);
}
// Get messages
if let Some(session) = sessions.first() {
let messages = archivist.get_messages(session.scroll_id, None).await?;
println!("\nFound {} message(s):", messages.len());
for msg in &messages {
println!("\n[{}] {} chars", msg.role, msg.content_md.len());
println!(
"Content preview: {}",
&msg.content_md.chars().take(100).collect::<String>()
);
}
}
// Step 6: Cleanup
println!("\n--- Cleanup ---");
// Drop the event sender to close the channel
drop(tx);
// Wait for handler to finish
handler_task.await.expect("Handler task failed");
// Remove temporary archive
std::fs::remove_dir_all(&temp_dir)?;
println!("Removed temporary archive");
println!("\nExample completed successfully!");
Ok(())
}
@@ -0,0 +1,214 @@
//! File storage example for dirigent_archivist
//!
//! This example demonstrates:
//! - Storing files with content-addressing
//! - Retrieving files by file_id
//! - Automatic deduplication of identical content
//! - Session tracking for file references
use dirigent_archivist::storage::{files, ndjson, paths::ArchivePaths};
use dirigent_archivist::types::FileRecord;
use dirigent_archivist::Result;
use uuid::Uuid;
#[tokio::main]
async fn main() -> Result<()> {
// Create a temporary archive directory for this example
let temp_dir = std::env::temp_dir().join(format!("dirigent_files_example_{}", Uuid::now_v7()));
println!("Creating archive at: {}", temp_dir.display());
let paths = ArchivePaths::new(temp_dir.clone());
// Example 1: Store a file
println!("\n--- Example 1: Store a File ---");
let content1 = b"This is a sample document with some text content.";
let session1 = Uuid::now_v7();
let file_id1 = files::store_file(
&paths,
content1,
"document.txt".to_string(),
Some("text/plain".to_string()),
session1,
)
.await?;
println!("Stored file with ID: {}", file_id1);
println!("Session: {}", session1);
// Example 2: Retrieve the file
println!("\n--- Example 2: Retrieve the File ---");
let retrieved1 = files::get_file(&paths, &file_id1).await?;
println!("Retrieved {} bytes", retrieved1.len());
println!("Content: {}", String::from_utf8_lossy(&retrieved1));
// Example 3: Store the same content from a different session (deduplication)
println!("\n--- Example 3: Deduplication Demo ---");
let session2 = Uuid::now_v7();
let file_id2 = files::store_file(
&paths,
content1, // Same content as before
"duplicate.txt".to_string(), // Different name
Some("text/plain".to_string()),
session2,
)
.await?;
println!("Stored same content with different name");
println!("File ID 1: {}", file_id1);
println!("File ID 2: {}", file_id2);
println!("Same file_id? {}", file_id1 == file_id2);
println!("\nDeduplication: Same content produces same file_id, stored only once!");
// Example 4: Check the file index
println!("\n--- Example 4: File Index ---");
let index_path = paths.root().join(".files").join("file_index.jsonl");
let records: Vec<FileRecord> = ndjson::read_ndjson(&index_path).await?;
println!("File index contains {} record(s)", records.len());
for record in &records {
println!("\nFile: {}", record.file_id);
println!(" Original name: {}", record.original_name);
println!(" MIME type: {:?}", record.mime);
println!(" Size: {} bytes", record.size);
println!(" Referenced by {} session(s):", record.sessions.len());
for session_id in &record.sessions {
println!(" - {}", session_id);
}
}
// Example 5: Store different content
println!("\n--- Example 5: Store Different Content ---");
let content2 = b"This is completely different content with more data!";
let session3 = Uuid::now_v7();
let file_id3 = files::store_file(
&paths,
content2,
"different.txt".to_string(),
Some("text/plain".to_string()),
session3,
)
.await?;
println!("Stored different content");
println!("File ID 3: {}", file_id3);
println!("Different from file_id1? {}", file_id1 != file_id3);
// Example 6: Store binary content
println!("\n--- Example 6: Binary Content ---");
let binary_content: Vec<u8> = (0..256).map(|i| i as u8).collect();
let session4 = Uuid::now_v7();
let file_id4 = files::store_file(
&paths,
&binary_content,
"binary.dat".to_string(),
Some("application/octet-stream".to_string()),
session4,
)
.await?;
println!("Stored binary content (256 bytes)");
println!("File ID: {}", file_id4);
// Retrieve and verify
let retrieved_binary = files::get_file(&paths, &file_id4).await?;
println!("Retrieved {} bytes", retrieved_binary.len());
println!(
"Binary content verified: {}",
retrieved_binary == binary_content
);
// Example 7: Show final archive structure
println!("\n--- Example 7: Archive Structure ---");
println!("Archive root: {}", temp_dir.display());
show_files_directory(&paths)?;
// Example 8: Final statistics
println!("\n--- Final Statistics ---");
let final_records: Vec<FileRecord> = ndjson::read_ndjson(&index_path).await?;
println!("Total unique files stored: {}", final_records.len());
let total_sessions: usize = final_records.iter().map(|r| r.sessions.len()).sum();
println!("Total session references: {}", total_sessions);
let total_size: u64 = final_records.iter().map(|r| r.size).sum();
println!("Total storage used: {} bytes", total_size);
// Content-addressing means if we had stored content1 1000 times,
// we'd still only use storage for it once!
println!("\nContent-addressing benefit:");
println!(" File '{}' is referenced by {} sessions", file_id1, 2);
println!(" But stored only once on disk!");
// Cleanup
println!("\n--- Cleanup ---");
std::fs::remove_dir_all(&temp_dir)?;
println!("Removed temporary archive");
println!("\nExample completed successfully!");
Ok(())
}
/// Helper function to show .files directory structure
fn show_files_directory(paths: &ArchivePaths) -> Result<()> {
let files_dir = paths.root().join(".files");
if !files_dir.exists() {
println!("No files directory found");
return Ok(());
}
println!("\n.files/ directory:");
// Show index file
let index_path = files_dir.join("file_index.jsonl");
if index_path.exists() {
let metadata = std::fs::metadata(&index_path)?;
println!(" file_index.jsonl ({} bytes)", metadata.len());
}
// Show shard directories
for entry in std::fs::read_dir(&files_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
println!(" {}/", path.file_name().unwrap().to_string_lossy());
// Show files in shard
for sub_entry in std::fs::read_dir(&path)? {
let sub_entry = sub_entry?;
let sub_path = sub_entry.path();
if sub_path.is_dir() {
println!(" {}/", sub_path.file_name().unwrap().to_string_lossy());
// Show files in sub-shard
for file_entry in std::fs::read_dir(&sub_path)? {
let file_entry = file_entry?;
let file_path = file_entry.path();
let metadata = std::fs::metadata(&file_path)?;
println!(
" {} ({} bytes)",
file_path.file_name().unwrap().to_string_lossy(),
metadata.len()
);
}
} else {
let metadata = std::fs::metadata(&sub_path)?;
println!(
" {} ({} bytes)",
sub_path.file_name().unwrap().to_string_lossy(),
metadata.len()
);
}
}
}
}
Ok(())
}
@@ -0,0 +1,199 @@
//! Example: two `JsonlBackend`s side by side, demonstrating boot-from-config,
//! priority-ordered read routing, write fanout, and a health snapshot.
//!
//! Layout:
//! - `primary` → `read_priority = 0`, `failure_mode = required` (default)
//! - `mirror` → `read_priority = 10`, `failure_mode = best_effort`
//!
//! The primary is the default write target (lowest priority among
//! Required+write-active backends). `append_messages` fans out inline to the
//! mirror too. Reads walk the registrations in priority order, so the primary
//! answers first; if it is missing a session, the walk falls through to the
//! mirror.
//!
//! Run with:
//!
//! cargo run --package dirigent_archivist --example multi_backend
use std::sync::Arc;
use chrono::Utc;
use dirigent_archivist::coordinator::Archivist;
use dirigent_archivist::registry::{ArchivesConfig, BackendRegistry};
use dirigent_archivist::types::{
MessageRecord, RegisterConnectorRequest, RegisterSessionRequest,
};
use uuid::Uuid;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let dir_a = tempfile::tempdir()?;
let dir_b = tempfile::tempdir()?;
// Build a two-archive config entirely from TOML so the example doubles as
// a faithful demonstration of the config surface.
let cfg_src = format!(
r#"
[[archives]]
name = "primary"
type = "jsonl"
read_priority = 0
[archives.params]
path = "{a}"
[[archives]]
name = "mirror"
type = "jsonl"
failure_mode = "best_effort"
read_priority = 10
[archives.params]
path = "{b}"
"#,
a = dir_a.path().to_string_lossy().replace('\\', "/"),
b = dir_b.path().to_string_lossy().replace('\\', "/"),
);
let cfg: ArchivesConfig = toml::from_str(&cfg_src)?;
let registry = BackendRegistry::with_jsonl();
let archivist = Arc::new(Archivist::from_config(cfg, &registry, None).await?);
println!("\n=== Multi-backend Archivist example ===\n");
println!("Boot complete. Archives (ordered by read_priority):");
for s in archivist.list_archives_with_health().await {
println!(
" - name={:<8} type={:<6} priority={:<3} enabled={} write_active={} failure_mode={:?} health={:?}",
s.name,
s.type_name,
s.read_priority,
s.enabled,
s.write_active,
s.failure_mode,
s.health,
);
}
// ------------------------------------------------------------------
// Register a connector. The primary owns the canonical record; fanout
// mirrors it to the secondary.
// ------------------------------------------------------------------
let connector_resp = archivist
.register_connector(
RegisterConnectorRequest {
r#type: "Example".into(),
title: "multi-backend demo".into(),
client_native_id: "example://multi_backend".into(),
custom_uid: None,
metadata: serde_json::json!({ "demo": true }),
fingerprint: None,
},
None,
)
.await?;
let connector_uid = connector_resp.connector_uid;
println!(
"\nRegistered connector: uid={} status={:?}",
connector_uid, connector_resp.status
);
// ------------------------------------------------------------------
// Register a session under that connector. `register_session` writes
// the mapping and `session.json` on the primary first, then fans out
// to any enabled secondaries.
// ------------------------------------------------------------------
let session_resp = archivist
.register_session(
RegisterSessionRequest {
connector_uid,
native_session_id: "demo-session-1".into(),
title: Some("multi-backend demo session".into()),
custom_scroll_id: None,
metadata: serde_json::json!({ "model": "demo" }),
completeness: Default::default(),
parent_scroll_id: None,
is_subagent: false,
continuation: None,
agent_id: None,
subagent_type: None,
spawning_tool_use_id: None,
},
None,
)
.await?;
let scroll_id = session_resp.scroll_id;
println!(
"Registered session: scroll_id={} status={:?}",
scroll_id, session_resp.status
);
// ------------------------------------------------------------------
// Append a couple of messages. `append_messages` writes to the primary
// and then fans out inline to the mirror.
// ------------------------------------------------------------------
let user_msg = MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: scroll_id,
parent_id: None,
ts: Utc::now(),
role: "user".into(),
author: Some("alice".into()),
content_md: "Hello from the multi-backend example!".into(),
content_parts: None,
attachments: vec![],
metadata: serde_json::json!({}),
};
let asst_msg = MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session: scroll_id,
parent_id: Some(user_msg.message_id),
ts: Utc::now(),
role: "assistant".into(),
author: Some("claude".into()),
content_md: "Greetings. I have been written to two archives.".into(),
content_parts: None,
attachments: vec![],
metadata: serde_json::json!({}),
};
archivist
.append_messages(scroll_id, vec![user_msg, asst_msg], None)
.await?;
println!("\nAppended 2 messages — fanned out to primary + mirror.");
// ------------------------------------------------------------------
// Read path: the priority walk tries the primary first (priority=0).
// It finds the session there and never consults the mirror.
// ------------------------------------------------------------------
let meta = archivist.get_session_metadata(scroll_id, None).await?;
println!(
"\nRead session via priority walk: title={:?} completeness={:?}",
meta.title, meta.completeness
);
println!(
"Read cache size after read: {}",
archivist.read_cache_size().await
);
let messages = archivist.get_messages(scroll_id, None).await?;
println!("Read {} message(s) from the archive:", messages.len());
for m in &messages {
println!(" - [{}] {}", m.role, m.content_md);
}
// ------------------------------------------------------------------
// Final health snapshot. Both backends should still be Available and
// have no queued writes (both run Inline write policies by default).
// ------------------------------------------------------------------
println!("\nFinal health snapshot:");
for s in archivist.list_archives_with_health().await {
println!(
" - {:<8} health={:?} queue_depth={:?} last_error={:?}",
s.name, s.health, s.queue_depth, s.last_error
);
}
// Clean shutdown drains any queued writer tasks. Both backends here run
// Inline, so this is effectively a no-op but remains the correct API.
archivist.shutdown().await?;
println!("\nShutdown complete.");
Ok(())
}
@@ -0,0 +1,923 @@
//! Message accumulator for incremental message assembly.
//!
//! This is a thin wrapper around [`dirigent_protocol::accumulator::MessageAccumulator`]
//! that delegates chunk/tool/thinking operations to the protocol accumulator and
//! converts [`AccumulatedMessage`] to [`MessageRecord`] on `finalize()`.
//!
//! The accumulator preserves the order of content parts (text, thinking, tool calls)
//! as they arrive in the event stream, enabling inline tool rendering in the UI.
use chrono::{DateTime, Utc};
use dirigent_protocol::accumulator::{
AccumulatedMessage, AccumulatedPart,
MessageAccumulator as ProtocolAccumulator,
};
#[cfg(test)]
use dirigent_protocol::MessagePart;
use dirigent_protocol::ContentBlock;
use serde_json::Value;
use std::collections::HashMap;
use uuid::Uuid;
use crate::error::Result;
use crate::types::MessageRecord;
// Re-export ToolCallData from the protocol for backward compatibility.
pub use dirigent_protocol::accumulator::ToolCallData;
/// Accumulator for assembling streaming message deltas into [`MessageRecord`]s.
///
/// Wraps the protocol-level [`ProtocolAccumulator`] and adds archivist-specific
/// concerns: per-message metadata, UUID parsing, and markdown generation.
#[derive(Debug, Default)]
pub struct MessageAccumulator {
inner: ProtocolAccumulator,
/// Per-message metadata not tracked by the protocol accumulator.
metadata: HashMap<String, Value>,
}
impl MessageAccumulator {
/// Create a new message accumulator
pub fn new() -> Result<Self> {
Ok(Self {
inner: ProtocolAccumulator::new(),
metadata: HashMap::new(),
})
}
/// Add a content chunk to the message buffer
pub fn add_chunk(
&mut self,
message_id: String,
session_id: String,
connector_id: String,
role: String,
content: ContentBlock,
) {
self.inner
.add_chunk(&message_id, &session_id, &connector_id, &role, content);
}
/// Add thinking content to the message buffer
pub fn add_thinking(
&mut self,
message_id: String,
session_id: String,
connector_id: String,
content: String,
) {
self.inner.add_thinking(&message_id, &session_id, &connector_id, &content);
}
/// Add or update a tool call in the message buffer
///
/// This method handles both initial ToolCall events and ToolCallUpdate events.
/// If a tool call with the given ID already exists, it updates the existing entry.
/// Otherwise, it adds a new entry.
///
/// This ensures that each tool_call_id appears exactly ONCE in the final message,
/// with the most recent input/output data.
pub fn add_or_update_tool_call(&mut self, message_id: String, tool_call: ToolCallData) {
self.inner.add_or_update_tool_call(&message_id, tool_call);
}
/// Add a tool call to the message buffer (DEPRECATED - use add_or_update_tool_call)
#[deprecated(note = "Use add_or_update_tool_call instead to avoid duplicates")]
pub fn add_tool_call(&mut self, message_id: String, tool_call: ToolCallData) {
self.add_or_update_tool_call(message_id, tool_call);
}
/// Update an existing tool call in the message buffer
///
/// Finds the tool call by ID and updates its input/output with non-empty values
/// from the update. If no matching tool call is found, this is a no-op (the
/// update arrived before the initial ToolCall).
pub fn update_tool_call(
&mut self,
message_id: String,
tool_call_id: &str,
input: Option<Value>,
output: Option<Value>,
) {
// Construct a ToolCallData and delegate to add_or_update_tool_call.
// We need the tool_name but don't have it here; use an empty string
// since add_or_update_tool_call only updates existing entries when the
// id matches. However, if there's no existing entry, this would create
// a new one with empty tool_name - so we need to check first.
//
// Instead, we use the protocol accumulator's update semantics directly:
// build a ToolCallData with the values we have.
let tool_call = ToolCallData {
id: tool_call_id.to_string(),
tool_name: String::new(), // Will be overwritten by existing entry's name
input: input.unwrap_or(Value::Null),
output,
};
// Only delegate if a buffer exists for this message (matching original behavior).
if self.inner.has_buffer(&message_id) {
self.inner.add_or_update_tool_call(&message_id, tool_call);
}
}
/// Get all message IDs for a given session that have active buffers
pub fn get_message_ids_for_session(&self, session_id: &str) -> Vec<String> {
self.inner.message_ids_for_session(session_id)
}
/// Get message IDs for buffers that have been inactive longer than the threshold
pub fn get_stale_message_ids(
&self,
_now: DateTime<Utc>,
threshold: std::time::Duration,
) -> Vec<String> {
self.inner.stale_message_ids(threshold)
}
/// Get all message IDs that have active buffers
pub fn get_all_message_ids(&self) -> Vec<String> {
self.inner.active_message_ids()
}
/// Finalize a message and produce a complete `(MessageRecord, connector_id, native_session_id)`.
///
/// Returns `None` if no buffer exists for the given `message_id`.
/// The `connector_id` and `native_session_id` in the tuple are the raw values
/// that were passed into `add_chunk`/`add_thinking` — callers in Task 5 will use
/// these to resolve the canonical scroll_id.
pub fn finalize(&mut self, message_id: &str) -> Option<(MessageRecord, String, String)> {
let accumulated = self.inner.finalize(message_id)?;
let connector_id = accumulated.connector_id.clone();
let native_session_id = accumulated.session_id.clone();
// Take stored metadata for this message (if any).
let metadata = self
.metadata
.remove(message_id)
.unwrap_or(Value::Null);
let record = accumulated_to_record(accumulated, metadata);
Some((record, connector_id, native_session_id))
}
}
// ---------------------------------------------------------------------------
// Conversion helpers
// ---------------------------------------------------------------------------
/// Convert an [`AccumulatedMessage`] into a [`MessageRecord`] for archival.
fn accumulated_to_record(accumulated: AccumulatedMessage, metadata: Value) -> MessageRecord {
// Build content_md by iterating parts in order
let mut content_md = String::new();
for part in &accumulated.parts {
match part {
AccumulatedPart::Text { text } => {
content_md.push_str(text);
}
AccumulatedPart::Thinking { text } => {
content_md.push_str("\n\n<thinking>\n");
content_md.push_str(text);
content_md.push_str("\n</thinking>");
}
AccumulatedPart::Tool { data } => {
content_md.push_str(&format!(
"\n\n**Tool**: {}\n```json\n{}\n```",
data.tool_name,
serde_json::to_string_pretty(&data.input)
.unwrap_or_else(|_| "{}".to_string())
));
}
}
}
// Convert accumulated parts to protocol MessageParts for rich rendering
let message_parts = accumulated.to_message_parts();
// Serialize content_parts for storage (None if empty to save space)
let content_parts = if message_parts.is_empty() {
None
} else {
serde_json::to_value(&message_parts).ok()
};
// Parse UUIDs from strings
// Strip "msg-" prefix if present (ACP connectors use this format)
let message_id_str = accumulated
.message_id
.strip_prefix("msg-")
.unwrap_or(&accumulated.message_id);
if message_id_str != accumulated.message_id.as_str() {
tracing::debug!(
"Stripped 'msg-' prefix from message_id: {} -> {}",
accumulated.message_id,
message_id_str
);
}
let message_uuid = match Uuid::parse_str(message_id_str) {
Ok(uuid) => uuid,
Err(_) => {
tracing::warn!(
"Failed to parse message_id as UUID: {}",
accumulated.message_id
);
Uuid::now_v7()
}
};
// Strip "msg-" prefix from session_id if present (for consistency)
let session_id_str = accumulated
.session_id
.strip_prefix("msg-")
.unwrap_or(&accumulated.session_id);
if session_id_str != accumulated.session_id.as_str() {
tracing::debug!(
"Stripped 'msg-' prefix from session_id: {} -> {}",
accumulated.session_id,
session_id_str
);
}
let session_uuid = match Uuid::parse_str(session_id_str) {
Ok(uuid) => uuid,
Err(_) => {
tracing::warn!(
"Failed to parse session_id as UUID: {}",
accumulated.session_id
);
Uuid::now_v7()
}
};
MessageRecord {
version: 1,
message_id: message_uuid,
session: session_uuid,
parent_id: None,
ts: accumulated.created_at.unwrap_or_else(Utc::now),
role: accumulated.role,
author: None,
content_md,
content_parts,
attachments: Vec::new(),
metadata,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_accumulator_creation() {
let acc = MessageAccumulator::new().unwrap();
assert_eq!(acc.get_all_message_ids().len(), 0);
}
#[test]
fn test_add_text_chunk() {
let mut acc = MessageAccumulator::new().unwrap();
acc.add_chunk(
"msg_1".to_string(),
"session_1".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Hello, ".to_string(),
},
);
acc.add_chunk(
"msg_1".to_string(),
"session_1".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "world!".to_string(),
},
);
// Two consecutive text chunks should be coalesced.
// Finalize and check the result.
let (record, _, _) = acc.finalize("msg_1").unwrap();
assert_eq!(record.content_md, "Hello, world!");
}
#[test]
fn test_add_thinking_chunk() {
let mut acc = MessageAccumulator::new().unwrap();
acc.add_thinking(
"msg_2".to_string(),
"session_2".to_string(),
"connector_1".to_string(),
"Let me think... ".to_string(),
);
acc.add_thinking(
"msg_2".to_string(),
"session_2".to_string(),
"connector_1".to_string(),
"I need to analyze this.".to_string(),
);
// Finalize and verify thinking was coalesced
let (record, _, _) = acc.finalize("msg_2").unwrap();
assert!(record.content_md.contains("Let me think... I need to analyze this."));
assert_eq!(record.role, "assistant");
}
#[test]
fn test_finalize_text_only() {
let mut acc = MessageAccumulator::new().unwrap();
acc.add_chunk(
"01936e8f-e5a7-7000-8000-000000000001".to_string(),
"01936e8f-e5a7-7000-8000-000000000002".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Hello, ".to_string(),
},
);
acc.add_chunk(
"01936e8f-e5a7-7000-8000-000000000001".to_string(),
"01936e8f-e5a7-7000-8000-000000000002".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "world!".to_string(),
},
);
let (record, _, _) = acc
.finalize("01936e8f-e5a7-7000-8000-000000000001")
.unwrap();
assert_eq!(record.content_md, "Hello, world!");
assert_eq!(record.role, "user");
assert!(record.ts <= Utc::now());
}
#[test]
fn test_finalize_with_thinking() {
let mut acc = MessageAccumulator::new().unwrap();
acc.add_chunk(
"01936e8f-e5a7-7000-8000-000000000003".to_string(),
"01936e8f-e5a7-7000-8000-000000000004".to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Here's my response.".to_string(),
},
);
acc.add_thinking(
"01936e8f-e5a7-7000-8000-000000000003".to_string(),
"01936e8f-e5a7-7000-8000-000000000004".to_string(),
"connector_1".to_string(),
"Let me analyze this carefully.".to_string(),
);
let (record, _, _) = acc
.finalize("01936e8f-e5a7-7000-8000-000000000003")
.unwrap();
assert!(record.content_md.contains("Here's my response."));
assert!(record.content_md.contains("<thinking>"));
assert!(record.content_md.contains("Let me analyze this carefully."));
assert!(record.content_md.contains("</thinking>"));
}
#[test]
fn test_finalize_nonexistent_message() {
let mut acc = MessageAccumulator::new().unwrap();
let result = acc.finalize("nonexistent");
assert!(result.is_none());
}
#[test]
fn test_add_tool_call() {
let mut acc = MessageAccumulator::new().unwrap();
// First add a text chunk to create the buffer
acc.add_chunk(
"msg_tool".to_string(),
"session_tool".to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "I'll use a tool.".to_string(),
},
);
// Add a tool call
let tool_call = ToolCallData {
id: "call_123".to_string(),
tool_name: "search".to_string(),
input: serde_json::json!({"query": "test"}),
output: Some(serde_json::json!({"results": ["a", "b"]})),
};
#[allow(deprecated)]
acc.add_tool_call("msg_tool".to_string(), tool_call);
// Finalize and verify
let (record, _, _) = acc.finalize("msg_tool").unwrap();
let parts =
serde_json::from_value::<Vec<MessagePart>>(record.content_parts.unwrap()).unwrap();
assert_eq!(parts.len(), 2); // One Text, one Tool
assert!(matches!(parts[1], MessagePart::Tool { .. }));
if let MessagePart::Tool { tool, .. } = &parts[1] {
assert_eq!(tool, "search");
}
}
#[test]
fn test_finalize_with_tool_calls() {
let mut acc = MessageAccumulator::new().unwrap();
acc.add_chunk(
"01936e8f-e5a7-7000-8000-000000000005".to_string(),
"01936e8f-e5a7-7000-8000-000000000006".to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Let me search for that.".to_string(),
},
);
let tool_call = ToolCallData {
id: "call_456".to_string(),
tool_name: "web_search".to_string(),
input: serde_json::json!({"query": "Rust async"}),
output: None,
};
#[allow(deprecated)]
acc.add_tool_call(
"01936e8f-e5a7-7000-8000-000000000005".to_string(),
tool_call,
);
let (record, _, _) = acc
.finalize("01936e8f-e5a7-7000-8000-000000000005")
.unwrap();
assert!(record.content_md.contains("Let me search for that."));
assert!(record.content_md.contains("**Tool**: web_search"));
assert!(record.content_md.contains("Rust async"));
}
#[test]
fn test_concurrent_messages() {
let mut acc = MessageAccumulator::new().unwrap();
// Add chunks for two different messages
acc.add_chunk(
"msg_a".to_string(),
"session_1".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Message A".to_string(),
},
);
acc.add_chunk(
"msg_b".to_string(),
"session_1".to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Message B".to_string(),
},
);
acc.add_chunk(
"msg_a".to_string(),
"session_1".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: " continued".to_string(),
},
);
// Both messages should be buffered
assert_eq!(acc.get_all_message_ids().len(), 2);
// Finalize and check
let (record_a, _, _) = acc.finalize("msg_a").unwrap();
assert_eq!(record_a.content_md, "Message A continued");
let (record_b, _, _) = acc.finalize("msg_b").unwrap();
assert_eq!(record_b.content_md, "Message B");
}
#[test]
fn test_get_message_ids_for_session() {
let mut acc = MessageAccumulator::new().unwrap();
// Add messages to different sessions
acc.add_chunk(
"msg_1".to_string(),
"session_a".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Message 1".to_string(),
},
);
acc.add_chunk(
"msg_2".to_string(),
"session_a".to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Message 2".to_string(),
},
);
acc.add_chunk(
"msg_3".to_string(),
"session_b".to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Message 3".to_string(),
},
);
// Get message IDs for session_a
let mut session_a_ids = acc.get_message_ids_for_session("session_a");
session_a_ids.sort();
assert_eq!(session_a_ids, vec!["msg_1", "msg_2"]);
// Get message IDs for session_b
let session_b_ids = acc.get_message_ids_for_session("session_b");
assert_eq!(session_b_ids, vec!["msg_3"]);
// Get message IDs for non-existent session
let empty_ids = acc.get_message_ids_for_session("session_c");
assert!(empty_ids.is_empty());
}
#[test]
fn test_finalize_with_msg_prefix() {
let mut acc = MessageAccumulator::new().unwrap();
// Use message_id and session_id with "msg-" prefix (ACP format)
let uuid_str = "01936e8f-e5a7-7000-8000-000000000007";
let session_uuid_str = "01936e8f-e5a7-7000-8000-000000000008";
acc.add_chunk(
format!("msg-{}", uuid_str),
format!("msg-{}", session_uuid_str),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Testing msg- prefix handling.".to_string(),
},
);
let (record, _, _) = acc.finalize(&format!("msg-{}", uuid_str)).unwrap();
// Verify that the UUID was correctly parsed (not regenerated)
assert_eq!(record.message_id.to_string(), uuid_str);
assert_eq!(record.session.to_string(), session_uuid_str);
assert_eq!(record.content_md, "Testing msg- prefix handling.");
}
#[test]
fn test_finalize_without_msg_prefix() {
let mut acc = MessageAccumulator::new().unwrap();
// Use message_id and session_id without "msg-" prefix
let uuid_str = "01936e8f-e5a7-7000-8000-000000000009";
let session_uuid_str = "01936e8f-e5a7-7000-8000-00000000000a";
acc.add_chunk(
uuid_str.to_string(),
session_uuid_str.to_string(),
"connector_1".to_string(),
"user".to_string(),
ContentBlock::Text {
text: "Testing without prefix.".to_string(),
},
);
let (record, _, _) = acc.finalize(uuid_str).unwrap();
// Verify that the UUID was correctly parsed
assert_eq!(record.message_id.to_string(), uuid_str);
assert_eq!(record.session.to_string(), session_uuid_str);
assert_eq!(record.content_md, "Testing without prefix.");
}
#[test]
fn test_interleaved_tool_calls() {
let mut acc = MessageAccumulator::new().unwrap();
let msg_id = "01936e8f-e5a7-7000-8000-000000000010";
let session_id = "01936e8f-e5a7-7000-8000-000000000011";
// Text chunk 1
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Let me search for that. ".to_string(),
},
);
// Tool call 1
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_1".to_string(),
tool_name: "search".to_string(),
input: serde_json::json!({"query": "rust"}),
output: None,
},
);
// Text chunk 2
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Now let me check the documentation. ".to_string(),
},
);
// Tool call 2
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_2".to_string(),
tool_name: "read_docs".to_string(),
input: serde_json::json!({"path": "README.md"}),
output: None,
},
);
// Text chunk 3
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Based on my research...".to_string(),
},
);
let (record, _, _) = acc.finalize(msg_id).unwrap();
// Verify content_md has correct order (text1, tool1, text2, tool2, text3)
let content = &record.content_md;
let search_pos = content.find("**Tool**: search").expect("search tool not found");
let docs_pos = content
.find("**Tool**: read_docs")
.expect("read_docs tool not found");
let text1_pos = content.find("Let me search").expect("text1 not found");
let text2_pos = content.find("Now let me check").expect("text2 not found");
let text3_pos = content.find("Based on my research").expect("text3 not found");
// Verify order: text1 < search < text2 < read_docs < text3
assert!(
text1_pos < search_pos,
"text1 should come before search tool"
);
assert!(
search_pos < text2_pos,
"search tool should come before text2"
);
assert!(
text2_pos < docs_pos,
"text2 should come before read_docs tool"
);
assert!(
docs_pos < text3_pos,
"read_docs tool should come before text3"
);
// Verify content_parts structure
let parts =
serde_json::from_value::<Vec<MessagePart>>(record.content_parts.unwrap()).unwrap();
assert_eq!(
parts.len(),
5,
"Should have 5 parts: text, tool, text, tool, text"
);
// Verify each part type in order
assert!(matches!(parts[0], MessagePart::Text { .. }));
assert!(matches!(parts[1], MessagePart::Tool { .. }));
assert!(matches!(parts[2], MessagePart::Text { .. }));
assert!(matches!(parts[3], MessagePart::Tool { .. }));
assert!(matches!(parts[4], MessagePart::Text { .. }));
}
#[test]
fn test_text_coalescing_with_tool_separation() {
let mut acc = MessageAccumulator::new().unwrap();
let msg_id = "msg1";
let session_id = "session1";
// Two consecutive text chunks (should coalesce)
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Hello ".to_string(),
},
);
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "world. ".to_string(),
},
);
// Tool call (separates text)
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_1".to_string(),
tool_name: "search".to_string(),
input: serde_json::json!({}),
output: None,
},
);
// Two more consecutive text chunks (should coalesce separately)
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "More ".to_string(),
},
);
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "text.".to_string(),
},
);
let (record, _, _) = acc.finalize(msg_id).unwrap();
// Should have 3 parts: coalesced text1, tool, coalesced text2
let parts =
serde_json::from_value::<Vec<MessagePart>>(record.content_parts.unwrap()).unwrap();
assert_eq!(parts.len(), 3);
// Verify first text part is coalesced
if let MessagePart::Text { text } = &parts[0] {
assert_eq!(text, "Hello world. ");
} else {
panic!("Expected Text part");
}
// Verify tool part
assert!(matches!(parts[1], MessagePart::Tool { .. }));
// Verify second text part is coalesced
if let MessagePart::Text { text } = &parts[2] {
assert_eq!(text, "More text.");
} else {
panic!("Expected Text part");
}
}
#[test]
fn test_tool_call_progressive_updates() {
let mut acc = MessageAccumulator::new().unwrap();
let msg_id = "msg1";
let session_id = "session1";
// Create buffer with initial text chunk
acc.add_chunk(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"assistant".to_string(),
ContentBlock::Text {
text: "Using grep... ".to_string(),
},
);
// Initial tool call (empty input, no output)
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_1".to_string(),
tool_name: "grep".to_string(),
input: serde_json::json!({}),
output: None,
},
);
// Update with actual input
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_1".to_string(),
tool_name: "grep".to_string(),
input: serde_json::json!({"pattern": "rust"}),
output: None,
},
);
// Update with output
acc.add_or_update_tool_call(
msg_id.to_string(),
ToolCallData {
id: "call_1".to_string(),
tool_name: "grep".to_string(),
input: serde_json::json!({}), // Empty, should not overwrite
output: Some(serde_json::json!({"results": ["match1", "match2"]})),
},
);
let (record, _, _) = acc.finalize(msg_id).unwrap();
// Should have 2 parts: text and tool (tool merged from 3 updates)
let parts =
serde_json::from_value::<Vec<MessagePart>>(record.content_parts.unwrap()).unwrap();
assert_eq!(parts.len(), 2);
// Verify first part is text
assert!(matches!(parts[0], MessagePart::Text { .. }));
// Verify second part is tool with merged data
if let MessagePart::Tool {
tool, input, output, ..
} = &parts[1]
{
assert_eq!(tool, "grep");
assert_eq!(input, &serde_json::json!({"pattern": "rust"})); // Input preserved
assert!(output.is_some()); // Output added
} else {
panic!("Expected Tool part");
}
}
#[test]
fn test_thinking_coalescing() {
let mut acc = MessageAccumulator::new().unwrap();
let msg_id = "msg1";
let session_id = "session1";
// Add multiple thinking chunks
acc.add_thinking(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"First thought. ".to_string(),
);
acc.add_thinking(
msg_id.to_string(),
session_id.to_string(),
"connector_1".to_string(),
"Second thought.".to_string(),
);
let (record, _, _) = acc.finalize(msg_id).unwrap();
// Should have 1 thinking part (coalesced)
let parts =
serde_json::from_value::<Vec<MessagePart>>(record.content_parts.unwrap()).unwrap();
assert_eq!(parts.len(), 1);
// Verify it's coalesced thinking
if let MessagePart::Thinking { text } = &parts[0] {
assert_eq!(text, "First thought. Second thought.");
} else {
panic!("Expected Thinking part");
}
}
}
@@ -0,0 +1,18 @@
//! Archive backend capability enumeration.
//!
//! Mandatory session + message primitives are NOT listed here — every
//! backend has them. This enum represents the *optional* sub-traits a
//! backend opts into, surfaced through `ArchiveBackend::as_xxx()` accessors.
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum ArchiveCapability {
Search,
Dag,
MetaEvents,
ConnectorRegistry,
SessionMapping,
}
pub type CapabilitySet = std::collections::HashSet<ArchiveCapability>;
@@ -0,0 +1,108 @@
//! Reusable sub-trait contract tests.
//!
//! Pass any `&dyn ArchiveBackend` to verify it honors the behavioral
//! contract of each sub-trait it exposes. Phase 2 runs this against
//! `JsonlBackend`; Phase 3+ reuses it for every new backend.
#![cfg(any(test, feature = "test-utils"))]
use uuid::Uuid;
use crate::backend::ArchiveBackend;
/// Exercises `ConnectorRegistryBackend` through `as_connector_registry()`.
/// Skips silently if the backend does not expose the sub-trait.
pub async fn verify_connector_registry_contract(backend: &dyn ArchiveBackend) {
let Some(registry) = backend.as_connector_registry() else {
return;
};
// Empty state — listing returns Vec::new(), not an error.
let list = registry.list_connectors().await.expect("list_connectors");
assert!(list.is_empty(), "fresh backend should have no connectors");
// get_connector on missing UID returns Ok(None).
let missing = registry
.get_connector(Uuid::new_v4())
.await
.expect("get_connector");
assert!(missing.is_none());
// resolve_connector_uid on unknown id returns Ok(None).
let unresolved = registry
.resolve_connector_uid("nonexistent@host")
.await
.expect("resolve_connector_uid");
assert!(unresolved.is_none());
}
/// Exercises `SessionMappingBackend`.
pub async fn verify_session_mapping_contract(backend: &dyn ArchiveBackend) {
let Some(mapping) = backend.as_session_mapping() else {
return;
};
let missing = mapping
.get_mapping(Uuid::new_v4(), "absent")
.await
.expect("get_mapping");
assert!(missing.is_none());
let owner = mapping
.find_owner("absent")
.await
.expect("find_owner");
assert!(owner.is_none());
}
/// Exercises `DagBackend`.
pub async fn verify_dag_contract(backend: &dyn ArchiveBackend) {
let Some(dag) = backend.as_dag() else {
return;
};
let children = dag
.get_children(Uuid::new_v4())
.await
.expect("get_children");
assert!(children.is_empty());
let edges = dag
.get_dag_edges(Uuid::new_v4())
.await
.expect("get_dag_edges");
assert!(edges.is_empty());
}
/// Exercises `MetaEventsBackend`.
pub async fn verify_meta_events_contract(backend: &dyn ArchiveBackend) {
let Some(meta) = backend.as_meta_events() else {
return;
};
let events = meta
.get_meta_events(Uuid::new_v4())
.await
.expect("get_meta_events");
assert!(events.is_empty());
let by_client = meta
.find_meta_session_by_client("absent")
.await
.expect("find_meta_session_by_client");
assert!(by_client.is_none());
let all = meta
.list_meta_sessions()
.await
.expect("list_meta_sessions");
assert!(all.is_empty());
}
/// One-shot helper: runs every sub-trait contract whose capability is present.
pub async fn verify_all_contracts(backend: &dyn ArchiveBackend) {
verify_connector_registry_contract(backend).await;
verify_session_mapping_contract(backend).await;
verify_dag_contract(backend).await;
verify_meta_events_contract(backend).await;
}
@@ -0,0 +1,10 @@
//! Health status reported by `ArchiveBackend::health_check`.
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealthStatus {
Healthy,
Degraded { reason: String },
Unavailable { reason: String },
}
@@ -0,0 +1,574 @@
//! In-memory `ArchiveBackend` for coordinator unit tests.
//!
//! Fully supports every sub-trait. State lives in `Mutex<HashMap<…>>`.
#![cfg(any(test, feature = "test-utils"))]
use std::collections::HashMap;
use std::sync::Mutex;
use async_trait::async_trait;
use uuid::Uuid;
use crate::backend::{
ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend,
DagBackend, HealthStatus, MetaEventsBackend, SessionMappingBackend,
};
use crate::error::{ArchivistError, Result};
use crate::types::{
ConnectorRecord, DagEdge, MessageCursor, MessagePage, MessageRecord,
MetaEventRecord, SessionListQuery, SessionMapping, SessionMetadata, SessionPage,
};
pub struct MockBackend {
capabilities: CapabilitySet,
sessions: Mutex<HashMap<Uuid, SessionMetadata>>,
messages: Mutex<HashMap<Uuid, Vec<MessageRecord>>>,
connectors: Mutex<HashMap<Uuid, ConnectorRecord>>,
mappings: Mutex<HashMap<(Uuid, String), Uuid>>,
meta_events: Mutex<HashMap<Uuid, Vec<MetaEventRecord>>>,
dag_edges: Mutex<Vec<DagEdge>>,
fail_next_writes: std::sync::atomic::AtomicUsize,
fail_next_reads: std::sync::atomic::AtomicUsize,
permanent_error: std::sync::Mutex<Option<String>>,
append_calls: std::sync::Mutex<std::collections::HashMap<Uuid, usize>>,
per_op_delay: std::sync::Mutex<std::time::Duration>,
}
impl MockBackend {
pub fn new() -> Self {
let mut capabilities = CapabilitySet::new();
capabilities.insert(ArchiveCapability::Dag);
capabilities.insert(ArchiveCapability::MetaEvents);
capabilities.insert(ArchiveCapability::ConnectorRegistry);
capabilities.insert(ArchiveCapability::SessionMapping);
Self {
capabilities,
sessions: Mutex::new(HashMap::new()),
messages: Mutex::new(HashMap::new()),
connectors: Mutex::new(HashMap::new()),
mappings: Mutex::new(HashMap::new()),
meta_events: Mutex::new(HashMap::new()),
dag_edges: Mutex::new(Vec::new()),
fail_next_writes: std::sync::atomic::AtomicUsize::new(0),
fail_next_reads: std::sync::atomic::AtomicUsize::new(0),
permanent_error: std::sync::Mutex::new(None),
append_calls: std::sync::Mutex::new(std::collections::HashMap::new()),
per_op_delay: std::sync::Mutex::new(std::time::Duration::ZERO),
}
}
}
impl MockBackend {
/// Build a mock with the exact capability set provided. All other state
/// starts empty (same as `new()`).
pub fn with_capabilities(capabilities: CapabilitySet) -> Self {
let mut m = Self::new();
m.capabilities = capabilities;
m
}
/// Test helper: does this mock have any meta events for the given session?
pub fn has_meta_events(&self, scroll_id: uuid::Uuid) -> bool {
self.meta_events
.lock()
.unwrap()
.get(&scroll_id)
.map(|v| !v.is_empty())
.unwrap_or(false)
}
/// Queue up `count` injected write failures. The next `count` calls to
/// any mutating API return `ArchivistError::Other("injected write failure")`
/// before touching state.
pub fn inject_write_failures(&self, count: usize) {
self.fail_next_writes
.store(count, std::sync::atomic::Ordering::SeqCst);
}
/// Queue up `count` injected read failures for per-scroll_id reads.
pub fn inject_read_failures(&self, count: usize) {
self.fail_next_reads
.store(count, std::sync::atomic::Ordering::SeqCst);
}
/// Simulate a permanently broken backend.
pub fn break_permanently(&self, reason: impl Into<String>) {
*self.permanent_error.lock().unwrap() = Some(reason.into());
}
pub fn clear_failures(&self) {
self.fail_next_writes
.store(0, std::sync::atomic::Ordering::SeqCst);
self.fail_next_reads
.store(0, std::sync::atomic::Ordering::SeqCst);
*self.permanent_error.lock().unwrap() = None;
}
/// Test helper: how many `MessageRecord`s this mock has for the given session.
pub fn appended_count(&self, scroll_id: uuid::Uuid) -> usize {
self.messages
.lock()
.unwrap()
.get(&scroll_id)
.map(|v| v.len())
.unwrap_or(0)
}
/// Test helper: how many times `append_messages` was invoked for the
/// given session (regardless of message count per invocation).
pub fn append_call_count(&self, scroll_id: uuid::Uuid) -> usize {
self.append_calls
.lock()
.unwrap()
.get(&scroll_id)
.copied()
.unwrap_or(0)
}
/// Test helper: artificially slow every mutating backend operation by
/// sleeping `d` before it touches state. Used to simulate a slow backend
/// for backpressure tests.
pub fn set_per_op_delay(&self, d: std::time::Duration) {
*self.per_op_delay.lock().unwrap() = d;
}
async fn maybe_delay(&self) {
let d = *self.per_op_delay.lock().unwrap();
if !d.is_zero() {
tokio::time::sleep(d).await;
}
}
pub(crate) fn check_write_failure(&self) -> Result<()> {
if let Some(reason) = self.permanent_error.lock().unwrap().clone() {
return Err(ArchivistError::Other(reason));
}
let prev = self
.fail_next_writes
.fetch_update(
std::sync::atomic::Ordering::SeqCst,
std::sync::atomic::Ordering::SeqCst,
|n| if n > 0 { Some(n - 1) } else { None },
)
.ok();
if prev.is_some() {
return Err(ArchivistError::Other("injected write failure".into()));
}
Ok(())
}
pub(crate) fn check_read_failure(&self) -> Result<()> {
if let Some(reason) = self.permanent_error.lock().unwrap().clone() {
return Err(ArchivistError::Other(reason));
}
let prev = self
.fail_next_reads
.fetch_update(
std::sync::atomic::Ordering::SeqCst,
std::sync::atomic::Ordering::SeqCst,
|n| if n > 0 { Some(n - 1) } else { None },
)
.ok();
if prev.is_some() {
return Err(ArchivistError::Other("injected read failure".into()));
}
Ok(())
}
}
impl Default for MockBackend {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl ArchiveBackend for MockBackend {
fn capabilities(&self) -> &CapabilitySet {
&self.capabilities
}
async fn health_check(&self) -> HealthStatus {
HealthStatus::Healthy
}
async fn put_session(&self, meta: SessionMetadata) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.sessions.lock().unwrap().insert(meta.scroll_id, meta);
Ok(())
}
async fn get_session(&self, scroll_id: Uuid) -> Result<Option<SessionMetadata>> {
self.check_read_failure()?;
Ok(self.sessions.lock().unwrap().get(&scroll_id).cloned())
}
async fn list_sessions_paged(&self, query: SessionListQuery) -> Result<SessionPage> {
let mut items: Vec<SessionMetadata> =
self.sessions.lock().unwrap().values().cloned().collect();
if !query.connector_uids.is_empty() {
items.retain(|s| query.connector_uids.contains(&s.connector_uid));
}
items.sort_by(|a, b| {
b.updated_at
.cmp(&a.updated_at)
.then(b.scroll_id.cmp(&a.scroll_id))
});
let limit = query.limit.min(crate::types::MAX_PAGE_LIMIT).max(1);
let total_count = items.len();
let items: Vec<_> = items.into_iter().take(limit).collect();
Ok(SessionPage {
items,
next_cursor: None,
total_count: Some(total_count),
})
}
async fn delete_session(&self, scroll_id: Uuid) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
if self.sessions.lock().unwrap().remove(&scroll_id).is_none() {
return Err(ArchivistError::SessionUnknown(scroll_id));
}
self.messages.lock().unwrap().remove(&scroll_id);
Ok(())
}
async fn append_messages(
&self,
scroll_id: Uuid,
msgs: Vec<MessageRecord>,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
*self
.append_calls
.lock()
.unwrap()
.entry(scroll_id)
.or_insert(0) += 1;
self.messages
.lock()
.unwrap()
.entry(scroll_id)
.or_default()
.extend(msgs);
Ok(())
}
async fn get_messages_paged(
&self,
scroll_id: Uuid,
cursor: Option<MessageCursor>,
limit: usize,
) -> Result<MessagePage> {
self.check_read_failure()?;
let mut all = self
.messages
.lock()
.unwrap()
.get(&scroll_id)
.cloned()
.unwrap_or_default();
all.sort_by(|a, b| a.ts.cmp(&b.ts).then(a.message_id.cmp(&b.message_id)));
if let Some(c) = cursor.as_ref() {
all.retain(|m| (m.ts, m.message_id) > (c.ts, c.message_id));
}
let total = all.len();
let taken: Vec<_> = all.into_iter().take(limit.max(1)).collect();
let next_cursor = if total > taken.len() {
taken.last().map(|m| MessageCursor {
ts: m.ts,
message_id: m.message_id,
})
} else {
None
};
Ok(MessagePage {
items: taken,
next_cursor,
})
}
async fn count_messages(&self, scroll_id: Uuid) -> Result<usize> {
self.check_read_failure()?;
Ok(self
.messages
.lock()
.unwrap()
.get(&scroll_id)
.map(|v| v.len())
.unwrap_or(0))
}
async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.messages.lock().unwrap().remove(&scroll_id);
Ok(())
}
fn as_dag(&self) -> Option<&dyn DagBackend> {
if self.capabilities.contains(&ArchiveCapability::Dag) {
Some(self)
} else {
None
}
}
fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> {
if self.capabilities.contains(&ArchiveCapability::MetaEvents) {
Some(self)
} else {
None
}
}
fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> {
if self
.capabilities
.contains(&ArchiveCapability::ConnectorRegistry)
{
Some(self)
} else {
None
}
}
fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> {
if self
.capabilities
.contains(&ArchiveCapability::SessionMapping)
{
Some(self)
} else {
None
}
}
}
#[async_trait]
impl ConnectorRegistryBackend for MockBackend {
async fn put_connector(&self, record: ConnectorRecord) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.connectors
.lock()
.unwrap()
.insert(record.connector_uid, record);
Ok(())
}
async fn get_connector(&self, connector_uid: Uuid) -> Result<Option<ConnectorRecord>> {
Ok(self
.connectors
.lock()
.unwrap()
.get(&connector_uid)
.cloned())
}
async fn list_connectors(&self) -> Result<Vec<ConnectorRecord>> {
Ok(self.connectors.lock().unwrap().values().cloned().collect())
}
async fn resolve_connector_uid(&self, client_native_id: &str) -> Result<Option<Uuid>> {
Ok(self
.connectors
.lock()
.unwrap()
.values()
.find(|c| c.client_native_id == client_native_id)
.map(|c| c.connector_uid))
}
async fn update_connector_fingerprint(
&self,
connector_uid: Uuid,
fingerprint: String,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
if let Some(r) = self.connectors.lock().unwrap().get_mut(&connector_uid) {
r.fingerprint = Some(fingerprint);
Ok(())
} else {
Err(ArchivistError::ConnectorUnknown(connector_uid))
}
}
}
#[async_trait]
impl SessionMappingBackend for MockBackend {
async fn put_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
scroll_id: Uuid,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.mappings
.lock()
.unwrap()
.insert((connector_uid, native_session_id.to_string()), scroll_id);
Ok(())
}
async fn get_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
) -> Result<Option<Uuid>> {
Ok(self
.mappings
.lock()
.unwrap()
.get(&(connector_uid, native_session_id.to_string()))
.copied())
}
async fn list_mappings_for_connector(
&self,
connector_uid: Uuid,
) -> Result<Vec<SessionMapping>> {
Ok(self
.mappings
.lock()
.unwrap()
.iter()
.filter(|((c, _), _)| *c == connector_uid)
.map(|((c, n), s)| SessionMapping {
version: 1,
connector_uid: *c,
native_session_id: n.clone(),
scroll_id: *s,
created_at: chrono::Utc::now(),
alias_of: None,
})
.collect())
}
async fn find_owner(&self, native_session_id: &str) -> Result<Option<(Uuid, Uuid)>> {
Ok(self
.mappings
.lock()
.unwrap()
.iter()
.find(|((_, n), _)| n == native_session_id)
.map(|((c, _), s)| (*c, *s)))
}
async fn rewrite_connector_mappings(
&self,
connector_uid: Uuid,
mappings: Vec<SessionMapping>,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
let mut map = self.mappings.lock().unwrap();
map.retain(|(c, _), _| *c != connector_uid);
for m in mappings {
map.insert((connector_uid, m.native_session_id), m.scroll_id);
}
Ok(())
}
}
#[async_trait]
impl DagBackend for MockBackend {
async fn append_dag_edge(&self, edge: DagEdge) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.dag_edges.lock().unwrap().push(edge);
Ok(())
}
async fn get_children(&self, parent: Uuid) -> Result<Vec<SessionMetadata>> {
self.check_read_failure()?;
let edges = self.dag_edges.lock().unwrap();
let sessions = self.sessions.lock().unwrap();
Ok(edges
.iter()
.filter(|e| e.parent == parent)
.filter_map(|e| sessions.get(&e.child).cloned())
.collect())
}
async fn get_dag_edges(&self, root: Uuid) -> Result<Vec<DagEdge>> {
self.check_read_failure()?;
Ok(self
.dag_edges
.lock()
.unwrap()
.iter()
.filter(|e| e.parent == root)
.cloned()
.collect())
}
}
#[async_trait]
impl MetaEventsBackend for MockBackend {
async fn append_meta_events(
&self,
scroll_id: Uuid,
events: Vec<MetaEventRecord>,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
self.meta_events
.lock()
.unwrap()
.entry(scroll_id)
.or_default()
.extend(events);
Ok(())
}
async fn get_meta_events(&self, scroll_id: Uuid) -> Result<Vec<MetaEventRecord>> {
self.check_read_failure()?;
Ok(self
.meta_events
.lock()
.unwrap()
.get(&scroll_id)
.cloned()
.unwrap_or_default())
}
async fn update_meta_session_status(
&self,
scroll_id: Uuid,
is_connected: bool,
current_session_id: Option<Uuid>,
) -> Result<()> {
self.check_write_failure()?;
self.maybe_delay().await;
if let Some(s) = self.sessions.lock().unwrap().get_mut(&scroll_id) {
s.is_connected = Some(is_connected);
s.current_session_id = current_session_id;
Ok(())
} else {
Err(ArchivistError::SessionUnknown(scroll_id))
}
}
async fn list_meta_sessions(&self) -> Result<Vec<SessionMetadata>> {
Ok(self
.sessions
.lock()
.unwrap()
.values()
.filter(|s| matches!(s.kind, crate::types::SessionKind::AcpConnection))
.cloned()
.collect())
}
async fn find_meta_session_by_client(
&self,
client_id: &str,
) -> Result<Option<SessionMetadata>> {
Ok(self
.sessions
.lock()
.unwrap()
.values()
.find(|s| s.acp_client_id.as_deref() == Some(client_id))
.cloned())
}
}
#[cfg(test)]
mod failure_injection_tests {
use super::*;
#[tokio::test]
async fn injected_write_failure_returns_error_then_recovers() {
let m = MockBackend::new();
m.inject_write_failures(2);
let scroll = uuid::Uuid::nil();
assert!(m.append_messages(scroll, vec![]).await.is_err());
assert!(m.append_messages(scroll, vec![]).await.is_err());
assert!(m.append_messages(scroll, vec![]).await.is_ok()); // back to normal
}
}
@@ -0,0 +1,20 @@
//! Archive backend trait layer.
//!
//! See `docs/plans/2026-04-18-archivist-phase2-design.md` for the design.
pub mod capability;
pub mod health;
pub mod traits;
#[cfg(any(test, feature = "test-utils"))]
pub mod contract;
#[cfg(any(test, feature = "test-utils"))]
pub mod mock;
pub use capability::{ArchiveCapability, CapabilitySet};
pub use health::HealthStatus;
pub use traits::{
ArchiveBackend, ConnectorRegistryBackend, DagBackend, MetaEventsBackend,
SearchBackend, SessionMappingBackend,
};
@@ -0,0 +1,167 @@
//! Archive backend trait definitions.
//!
//! `ArchiveBackend` is mandatory for every backend: session + message
//! primitives plus self-description (capabilities, health). Optional
//! sub-traits (`SearchBackend`, `DagBackend`, `MetaEventsBackend`,
//! `ConnectorRegistryBackend`, `SessionMappingBackend`) are surfaced
//! via `as_xxx() -> Option<&dyn SubTrait>` accessors returning a
//! borrow from `self`.
//!
//! See `docs/plans/2026-04-18-archivist-phase2-design.md` §Trait Definitions.
use async_trait::async_trait;
use uuid::Uuid;
use crate::backend::capability::CapabilitySet;
use crate::backend::health::HealthStatus;
use crate::error::Result;
use crate::types::{
ConnectorRecord, DagEdge, MessageCursor, MessagePage, MessageRecord,
MetaEventRecord, SessionListQuery, SessionMapping, SessionMetadata,
SessionPage,
};
// ---------------------------------------------------------------------------
// Mandatory backend surface
// ---------------------------------------------------------------------------
/// An archive storage backend.
///
/// All backends must implement session metadata and message primitives;
/// optional capabilities are exposed through `as_xxx()` accessors that
/// return `None` when unsupported. `JsonlBackend` implements every
/// sub-trait except `SearchBackend`.
#[async_trait]
pub trait ArchiveBackend: Send + Sync {
// --- Self-description ---
fn capabilities(&self) -> &CapabilitySet;
async fn health_check(&self) -> HealthStatus;
// --- Session metadata ---
async fn put_session(&self, meta: SessionMetadata) -> Result<()>;
async fn get_session(&self, scroll_id: Uuid) -> Result<Option<SessionMetadata>>;
async fn list_sessions_paged(&self, query: SessionListQuery) -> Result<SessionPage>;
async fn delete_session(&self, scroll_id: Uuid) -> Result<()>;
// --- Messages ---
async fn append_messages(
&self,
scroll_id: Uuid,
messages: Vec<MessageRecord>,
) -> Result<()>;
async fn get_messages_paged(
&self,
scroll_id: Uuid,
cursor: Option<MessageCursor>,
limit: usize,
) -> Result<MessagePage>;
async fn count_messages(&self, scroll_id: Uuid) -> Result<usize>;
async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()>;
// --- Optional capability accessors ---
fn as_search(&self) -> Option<&dyn SearchBackend> {
None
}
fn as_dag(&self) -> Option<&dyn DagBackend> {
None
}
fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> {
None
}
fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> {
None
}
fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> {
None
}
}
// ---------------------------------------------------------------------------
// Optional sub-traits
// ---------------------------------------------------------------------------
/// Content search. Reserved in Phase 2; not wired to `JsonlBackend`.
///
/// `packages/api/src/archivist/search_task.rs` continues to serve content
/// search via ripgrep — this trait exists as a forward-compatible hook for
/// indexed backends (ChromaDB, tantivy, …) arriving in Phase 3+.
#[async_trait]
pub trait SearchBackend: Send + Sync {
// Deliberately left without methods; Phase 3 adds the concrete
// query/result shapes when a real indexed backend lands.
}
#[async_trait]
pub trait DagBackend: Send + Sync {
async fn append_dag_edge(&self, edge: DagEdge) -> Result<()>;
async fn get_children(&self, parent: Uuid) -> Result<Vec<SessionMetadata>>;
async fn get_dag_edges(&self, root: Uuid) -> Result<Vec<DagEdge>>;
}
#[async_trait]
pub trait MetaEventsBackend: Send + Sync {
async fn append_meta_events(
&self,
scroll_id: Uuid,
events: Vec<MetaEventRecord>,
) -> Result<()>;
async fn get_meta_events(&self, scroll_id: Uuid) -> Result<Vec<MetaEventRecord>>;
async fn update_meta_session_status(
&self,
scroll_id: Uuid,
is_connected: bool,
current_session_id: Option<Uuid>,
) -> Result<()>;
async fn list_meta_sessions(&self) -> Result<Vec<SessionMetadata>>;
async fn find_meta_session_by_client(
&self,
client_id: &str,
) -> Result<Option<SessionMetadata>>;
}
#[async_trait]
pub trait ConnectorRegistryBackend: Send + Sync {
async fn put_connector(&self, record: ConnectorRecord) -> Result<()>;
async fn get_connector(&self, connector_uid: Uuid) -> Result<Option<ConnectorRecord>>;
async fn list_connectors(&self) -> Result<Vec<ConnectorRecord>>;
async fn resolve_connector_uid(&self, client_native_id: &str) -> Result<Option<Uuid>>;
async fn update_connector_fingerprint(
&self,
connector_uid: Uuid,
fingerprint: String,
) -> Result<()>;
}
#[async_trait]
pub trait SessionMappingBackend: Send + Sync {
async fn put_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
scroll_id: Uuid,
) -> Result<()>;
async fn get_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
) -> Result<Option<Uuid>>;
async fn list_mappings_for_connector(
&self,
connector_uid: Uuid,
) -> Result<Vec<SessionMapping>>;
async fn find_owner(&self, native_session_id: &str) -> Result<Option<(Uuid, Uuid)>>;
/// Replace the entire mapping table for `connector_uid` with `mappings`.
///
/// Phase 2 uses this to remove an individual mapping — callers read the
/// current table via `list_mappings_for_connector`, filter out the
/// unwanted row, and call this method with the remainder. Implementations
/// must also invalidate any in-memory cache entries that reference the
/// removed rows so subsequent `get_mapping` / `find_owner` calls don't
/// return stale hits.
async fn rewrite_connector_mappings(
&self,
connector_uid: Uuid,
mappings: Vec<SessionMapping>,
) -> Result<()>;
}
@@ -0,0 +1,624 @@
//! `JsonlBackend` — the Phase 2 concrete backend.
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use async_trait::async_trait;
use chrono::Utc;
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::backend::{
ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend,
DagBackend, HealthStatus, MetaEventsBackend, SessionMappingBackend,
};
use crate::error::{ArchivistError, Result};
use crate::storage::{
append_ndjson, read_connector_index, read_json, read_ndjson, write_json, ArchivePaths,
};
use crate::types::{
ConnectorRecord, MessageCursor, MessagePage, MessageRecord, SessionCompleteness,
SessionKind, SessionListQuery, SessionMapping, SessionMetadata, SessionPage,
};
/// NDJSON/JSON/TSV file-based `ArchiveBackend`.
pub struct JsonlBackend {
pub(crate) paths: ArchivePaths,
pub(crate) connector_cache: RwLock<HashMap<Uuid, ConnectorRecord>>,
pub(crate) session_cache: RwLock<HashMap<(Uuid, String), Uuid>>,
pub(crate) capabilities: CapabilitySet,
}
impl JsonlBackend {
/// Create a new backend rooted at `archive_root`.
///
/// Creates the required directories (`.contexts`, `.db/connectors`, `.files`)
/// and initializes empty caches. Matches `FileBasedArchivist::new`.
pub async fn new(archive_root: PathBuf) -> Result<Self> {
let paths = ArchivePaths::new(archive_root);
tokio::fs::create_dir_all(paths.root().join(".contexts")).await?;
tokio::fs::create_dir_all(paths.root().join(".db").join("connectors")).await?;
tokio::fs::create_dir_all(paths.root().join(".files")).await?;
let mut capabilities = HashSet::new();
capabilities.insert(ArchiveCapability::Dag);
capabilities.insert(ArchiveCapability::MetaEvents);
capabilities.insert(ArchiveCapability::ConnectorRegistry);
capabilities.insert(ArchiveCapability::SessionMapping);
Ok(Self {
paths,
connector_cache: RwLock::new(HashMap::new()),
session_cache: RwLock::new(HashMap::new()),
capabilities,
})
}
/// Filesystem path utilities for this backend.
pub fn paths(&self) -> &ArchivePaths {
&self.paths
}
/// Read and chronologically sort all messages for a session.
///
/// See module docs for the append-order vs. chronological-order rationale.
pub(crate) async fn read_messages_sorted(
&self,
scroll_id: Uuid,
) -> Result<Vec<MessageRecord>> {
let path = self.paths.messages_path_for_read(scroll_id);
let mut msgs: Vec<MessageRecord> =
read_ndjson(&path).await.unwrap_or_default();
msgs.sort_by(|a, b| {
a.ts.cmp(&b.ts).then(a.message_id.cmp(&b.message_id))
});
Ok(msgs)
}
/// Locate the (connector_uid, native_session_id) owning `scroll_id` by
/// scanning the session cache first, then each connector's session
/// mapping files on disk.
async fn find_mapping_for_scroll_id(&self, scroll_id: Uuid) -> Option<(Uuid, String)> {
// Check cache first
{
let cache = self.session_cache.read().await;
for ((connector_uid, native_id), cached_scroll_id) in cache.iter() {
if *cached_scroll_id == scroll_id {
return Some((*connector_uid, native_id.clone()));
}
}
}
// Cache miss: scan connector index and each connector's sessions file
let index_path = self.paths.connector_index_tsv();
let rows = match read_connector_index(&index_path).await {
Ok(rows) => rows,
Err(_) => return None,
};
for row in &rows {
let sessions_path = self.paths.sessions_path_for_read(row.connector_uid);
let mappings: Vec<SessionMapping> = match read_ndjson(&sessions_path).await {
Ok(m) => m,
Err(_) => continue,
};
for mapping in mappings {
if mapping.scroll_id == scroll_id {
return Some((row.connector_uid, mapping.native_session_id));
}
}
}
None
}
/// Load every session for a connector, including hidden ones. Used by
/// `list_sessions_paged` — it applies visibility filters itself.
async fn load_sessions_for_connector(
&self,
connector_uid: Uuid,
) -> Result<Vec<SessionMetadata>> {
let sessions_path = self.paths.sessions_path_for_read(connector_uid);
let mappings: Vec<SessionMapping> = read_ndjson(&sessions_path).await?;
let mut sessions = Vec::new();
for mapping in mappings {
let session_json_path = self.paths.session_json(mapping.scroll_id);
match read_json::<SessionMetadata>(&session_json_path).await {
Ok(metadata) => sessions.push(metadata),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tracing::debug!(
scroll_id = %mapping.scroll_id,
"session.json missing, surfacing as Discovered stub"
);
sessions.push(SessionMetadata {
version: 1,
scroll_id: mapping.scroll_id,
created_at: mapping.created_at,
updated_at: mapping.created_at,
title: None,
connector_uid,
native_session_id: Some(mapping.native_session_id.clone()),
agent_id: None,
parent_scroll_id: None,
continuation: None,
tags: Vec::new(),
metadata: serde_json::json!({}),
no_update: false,
kind: SessionKind::Chat,
acp_client_id: None,
is_connected: None,
current_session_id: None,
models: None,
modes: None,
config_options: None,
completeness: SessionCompleteness::Discovered,
matrix_room_id: None,
matrix_sharing_active: false,
matrix_shared_at: None,
is_subagent: false,
subagent_type: None,
spawning_tool_use_id: None,
});
}
Err(e) => return Err(e.into()),
}
}
Ok(sessions)
}
}
/// Returns true if `session` satisfies every filter in `query`.
///
/// `connector_uid` is already honored by the caller (it picks which connector
/// directories to scan), so we do not re-check it here.
fn matches_query(
session: &SessionMetadata,
query: &crate::types::SessionListQuery,
) -> bool {
// Visibility
if !query.include_hidden && (session.no_update || session.is_subagent) {
return false;
}
// Project scope — project_ids lives in metadata.project_id
if !query.project_ids.is_empty() {
let session_project_id = session
.metadata
.get("project_id")
.and_then(|v| v.as_str());
match session_project_id {
Some(pid) => {
if !query.project_ids.iter().any(|q| q.as_str() == pid) {
return false;
}
}
None => return false,
}
}
// Project path filter — exact match on metadata.project_path
if let Some(ref path) = query.project_path {
let session_path = session
.metadata
.get("project_path")
.and_then(|v| v.as_str());
if session_path != Some(path.as_str()) {
return false;
}
}
// Title filter — case-insensitive substring.
if let Some(q) = query.title_query.as_ref() {
let needle = q.to_lowercase();
let haystack = match session.title.as_ref() {
Some(t) => t.to_lowercase(),
None => return false,
};
if !haystack.contains(&needle) {
return false;
}
}
// Tag filter — all requested tags must be present on the session.
if !query.tags.is_empty() {
for required in &query.tags {
if !session.tags.iter().any(|t| t == required) {
return false;
}
}
}
// Model filter — case-insensitive substring on metadata.model.
if let Some(q) = query.model_filter.as_ref() {
let needle = q.to_lowercase();
let haystack = session
.metadata
.get("model")
.and_then(|v| v.as_str())
.map(|s| s.to_lowercase());
match haystack {
Some(h) if h.contains(&needle) => {}
_ => return false,
}
}
true
}
#[async_trait]
impl ArchiveBackend for JsonlBackend {
fn capabilities(&self) -> &CapabilitySet {
&self.capabilities
}
async fn health_check(&self) -> HealthStatus {
match tokio::fs::metadata(self.paths.root()).await {
Ok(m) if m.is_dir() => HealthStatus::Healthy,
Ok(_) => HealthStatus::Unavailable {
reason: "archive root is not a directory".into(),
},
Err(e) => HealthStatus::Unavailable {
reason: format!("stat archive root failed: {e}"),
},
}
}
async fn put_session(&self, meta: SessionMetadata) -> Result<()> {
tokio::fs::create_dir_all(&self.paths.session_dir(meta.scroll_id)).await?;
write_json(&self.paths.session_json(meta.scroll_id), &meta).await?;
Ok(())
}
async fn get_session(&self, scroll_id: Uuid) -> Result<Option<SessionMetadata>> {
// FileBasedArchivist ignores archive parameter (single-archive only)
let session_json_path = self.paths.session_json(scroll_id);
match read_json(&session_json_path).await {
Ok(metadata) => Ok(Some(metadata)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e.into()),
}
}
async fn list_sessions_paged(&self, query: SessionListQuery) -> Result<SessionPage> {
use crate::types::{SessionCursor, SessionPage, MAX_PAGE_LIMIT};
// Determine which connectors to scan.
let connector_uids: Vec<Uuid> = if !query.connector_uids.is_empty() {
query.connector_uids.clone()
} else {
// Iterate every primary (non-alias) connector.
let index_path = self.paths.connector_index_tsv();
let connectors = read_connector_index(&index_path).await?;
connectors
.into_iter()
.filter(|c| c.alias_of.is_none())
.map(|c| c.connector_uid)
.collect()
};
// Stream matching sessions from every selected connector.
let mut matched: Vec<SessionMetadata> = Vec::new();
for connector_uid in connector_uids {
let sessions = match self.load_sessions_for_connector(connector_uid).await {
Ok(s) => s,
Err(e) => {
tracing::warn!(
connector_uid = %connector_uid,
error = %e,
"Failed to list sessions for connector during paged scan, skipping"
);
continue;
}
};
for session in sessions {
if !matches_query(&session, &query) {
continue;
}
matched.push(session);
}
}
// Sort by (updated_at DESC, scroll_id DESC).
matched.sort_by(|a, b| {
b.updated_at
.cmp(&a.updated_at)
.then_with(|| b.scroll_id.cmp(&a.scroll_id))
});
// Skip entries at-or-before the cursor.
if let Some(cursor) = query.cursor.as_ref() {
matched.retain(|s| {
(s.updated_at, s.scroll_id) < (cursor.updated_at, cursor.scroll_id)
});
}
// Capture total count before slicing.
let total_count = matched.len();
// Clamp limit and paginate.
let effective_limit = query.limit.min(MAX_PAGE_LIMIT).max(1);
let has_more = matched.len() > effective_limit;
matched.truncate(effective_limit);
let next_cursor = if has_more {
matched.last().map(|s| SessionCursor {
updated_at: s.updated_at,
scroll_id: s.scroll_id,
})
} else {
None
};
Ok(SessionPage {
items: matched,
next_cursor,
total_count: Some(total_count),
})
}
async fn delete_session(&self, scroll_id: Uuid) -> Result<()> {
// FileBasedArchivist ignores archive parameter (single-archive only)
// First, read session metadata to get connector_uid and native_session_id
let session_dir = self.paths.session_dir(scroll_id);
let session_json_path = self.paths.session_json(scroll_id);
if !session_dir.exists() {
return Err(ArchivistError::SessionUnknown(scroll_id));
}
// Read session metadata to get connector info
let metadata: SessionMetadata = read_json(&session_json_path).await?;
let connector_uid = metadata.connector_uid;
let native_session_id = metadata.native_session_id.clone();
// Delete the session directory and all its contents
tokio::fs::remove_dir_all(&session_dir).await.map_err(|e| {
tracing::error!("Failed to delete session directory {:?}: {}", session_dir, e);
ArchivistError::Io(e)
})?;
tracing::info!(
"Deleted session directory for scroll_id: {}",
scroll_id
);
// Remove from session cache
if let Some(native_id) = &native_session_id {
let mut cache = self.session_cache.write().await;
cache.remove(&(connector_uid, native_id.clone()));
}
// Note: We're not removing from sessions.ndjson because it's append-only.
// The session simply won't have a directory anymore, so list_sessions will skip it.
// A future enhancement could add a "deleted" flag or periodic compaction.
tracing::info!(
"Successfully deleted session {} (connector: {})",
scroll_id,
connector_uid
);
Ok(())
}
async fn append_messages(
&self,
scroll_id: Uuid,
messages: Vec<MessageRecord>,
) -> Result<()> {
// Ensure session directory exists (handles resync case where directory was deleted)
self.paths.ensure_dirs(scroll_id).await?;
// Append each message to messages.jsonl
let messages_path = self.paths.messages_path_for_write(scroll_id);
for message in &messages {
append_ndjson(&messages_path, message).await?;
}
// Update session.json timestamp (or create if missing)
let session_json_path = self.paths.session_json(scroll_id);
let now = Utc::now();
let session_metadata = match read_json::<SessionMetadata>(&session_json_path).await {
Ok(mut metadata) => {
metadata.updated_at = now;
metadata
}
Err(_) => {
// session.json doesn't exist, create minimal metadata
// This handles resync case where directory was deleted but mapping still exists
tracing::info!(
scroll_id = %scroll_id,
"Creating minimal session.json during append (was missing)"
);
// Look up the correct connector_uid and native_session_id via session mappings
let (connector_uid, native_session_id) = match self.find_mapping_for_scroll_id(scroll_id).await {
Some(mapping) => mapping,
None => {
tracing::error!(
scroll_id = %scroll_id,
"Cannot reconstruct session.json: no connector mapping found. \
Messages written but session metadata will remain missing."
);
return Ok(());
}
};
SessionMetadata {
version: 1,
scroll_id,
created_at: now,
updated_at: now,
title: None,
connector_uid,
native_session_id: Some(native_session_id),
agent_id: None,
parent_scroll_id: None,
continuation: None,
tags: Vec::new(),
metadata: serde_json::json!({}),
no_update: false,
kind: SessionKind::Chat,
acp_client_id: None,
is_connected: None,
current_session_id: None,
models: None,
modes: None,
config_options: None,
completeness: SessionCompleteness::default(),
matrix_room_id: None,
matrix_sharing_active: false,
matrix_shared_at: None,
is_subagent: false,
subagent_type: None,
spawning_tool_use_id: None,
}
}
};
write_json(&session_json_path, &session_metadata).await?;
Ok(())
}
async fn get_messages_paged(
&self,
scroll_id: Uuid,
cursor: Option<MessageCursor>,
limit: usize,
) -> Result<MessagePage> {
use crate::types::MAX_PAGE_LIMIT;
// Hard-clamp limit — same policy as sessions.
let effective_limit = limit.min(MAX_PAGE_LIMIT).max(1);
// Read NDJSON, sort, apply cursor.
let mut all = self.read_messages_sorted(scroll_id).await?;
if let Some(c) = cursor.as_ref() {
// Keep strictly-after the cursor point in (ts, message_id) order.
all.retain(|m| (m.ts, m.message_id) > (c.ts, c.message_id));
}
let total = all.len();
let taken: Vec<_> = all.into_iter().take(effective_limit).collect();
let next_cursor = if total > taken.len() {
taken.last().map(|m| MessageCursor {
ts: m.ts,
message_id: m.message_id,
})
} else {
None
};
Ok(MessagePage {
items: taken,
next_cursor,
})
}
async fn count_messages(&self, scroll_id: Uuid) -> Result<usize> {
let messages_path = self.paths.messages_path_for_read(scroll_id);
// Read file and count lines (each line = one message)
// If file doesn't exist, return 0 (empty session)
match tokio::fs::read_to_string(&messages_path).await {
Ok(content) => {
// Count non-empty lines
let count = content.lines().filter(|line| !line.trim().is_empty()).count();
Ok(count)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// File doesn't exist yet - empty session
Ok(0)
}
Err(e) => Err(e.into()),
}
}
async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()> {
// First, verify the session exists by reading its metadata
let session_json_path = self.paths.session_json(scroll_id);
let mut session_metadata: SessionMetadata = match read_json(&session_json_path).await {
Ok(metadata) => metadata,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(ArchivistError::SessionUnknown(scroll_id));
}
Err(e) => return Err(e.into()),
};
// Truncate the messages file (clear all messages)
// First try to clear .jsonl (new format), then fall back to .ndjson (legacy)
let jsonl_path = self.paths.messages_path_for_write(scroll_id);
#[allow(deprecated)]
let ndjson_path = self.paths.messages_ndjson(scroll_id);
let mut cleared = false;
// Clear .jsonl if it exists
if jsonl_path.exists() {
tokio::fs::write(&jsonl_path, "").await?;
cleared = true;
}
// Also clear .ndjson if it exists (in case both are present)
if ndjson_path.exists() {
tokio::fs::write(&ndjson_path, "").await?;
cleared = true;
}
if cleared {
tracing::info!(
scroll_id = %scroll_id,
"Cleared all messages from session"
);
}
// Update the session's updated_at timestamp
session_metadata.updated_at = Utc::now();
write_json(&session_json_path, &session_metadata).await?;
tracing::info!(
scroll_id = %scroll_id,
"Updated session metadata after clearing messages"
);
Ok(())
}
fn as_dag(&self) -> Option<&dyn DagBackend> {
Some(self)
}
fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> {
Some(self)
}
fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> {
Some(self)
}
fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> {
Some(self)
}
}
#[cfg(test)]
mod contract_tests {
use super::*;
use tempfile::tempdir;
#[tokio::test]
async fn jsonl_backend_honors_all_contracts() {
let dir = tempdir().expect("tempdir");
let backend = JsonlBackend::new(dir.path().to_path_buf())
.await
.expect("new");
crate::backend::contract::verify_all_contracts(&backend).await;
}
}
@@ -0,0 +1,161 @@
//! `ConnectorRegistryBackend` impl for `JsonlBackend`.
use async_trait::async_trait;
use uuid::Uuid;
use crate::backend::ConnectorRegistryBackend;
use crate::backends::jsonl::backend::JsonlBackend;
use crate::error::{ArchivistError, Result};
use crate::storage::{
read_connector_index, read_json, write_connector_index, write_json,
};
use crate::types::{ConnectorIndexRow, ConnectorRecord};
#[async_trait]
impl ConnectorRegistryBackend for JsonlBackend {
async fn put_connector(&self, record: ConnectorRecord) -> Result<()> {
// Write connector.json
let connector_dir = self.paths.connector_dir(record.connector_uid);
tokio::fs::create_dir_all(&connector_dir).await?;
write_json(&connector_dir.join("connector.json"), &record).await?;
// Append row to index.tsv (read-modify-write).
let index_path = self.paths.connector_index_tsv();
let mut rows = read_connector_index(&index_path).await?;
rows.push(ConnectorIndexRow {
connector_uid: record.connector_uid,
r#type: record.r#type.clone(),
title: record.title.clone(),
client_native_id: record.client_native_id.clone(),
alias_of: record.alias_of,
created_at: record.created_at,
fingerprint: record.fingerprint.clone(),
});
write_connector_index(&index_path, &rows).await?;
// Update cache
self.connector_cache
.write()
.await
.insert(record.connector_uid, record);
Ok(())
}
async fn get_connector(&self, connector_uid: Uuid) -> Result<Option<ConnectorRecord>> {
// Fast path: consult the in-memory cache.
{
let cache = self.connector_cache.read().await;
if let Some(record) = cache.get(&connector_uid) {
return Ok(Some(record.clone()));
}
}
// Disk fallback.
let connector_json = self
.paths
.connector_dir(connector_uid)
.join("connector.json");
match read_json::<ConnectorRecord>(&connector_json).await {
Ok(record) => Ok(Some(record)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e.into()),
}
}
async fn list_connectors(&self) -> Result<Vec<ConnectorRecord>> {
let index_path = self.paths.connector_index_tsv();
let rows = read_connector_index(&index_path).await?;
let mut connectors = Vec::new();
for row in rows {
if row.alias_of.is_some() {
continue;
}
let connector_json = self
.paths
.connector_dir(row.connector_uid)
.join("connector.json");
match read_json::<ConnectorRecord>(&connector_json).await {
Ok(record) => connectors.push(record),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
Err(e) => return Err(e.into()),
}
}
Ok(connectors)
}
async fn resolve_connector_uid(
&self,
client_native_id: &str,
) -> Result<Option<Uuid>> {
// First, try parsing client_native_id as a UUID directly
// This handles the common case where the connector_id IS the UUID
if let Ok(uuid) = Uuid::parse_str(client_native_id) {
// Check if this UUID is a registered connector_uid in cache
let cache = self.connector_cache.read().await;
if cache.contains_key(&uuid) {
return Ok(Some(uuid));
}
drop(cache);
// Check on disk if not in cache
let connector_json = self.paths.connector_dir(uuid).join("connector.json");
if connector_json.exists() {
return Ok(Some(uuid));
}
}
// Not a UUID or not registered as a connector_uid - search by client_native_id
// Load connector index and find by client_native_id
let index_path = self.paths.connector_index_tsv();
let connectors = read_connector_index(&index_path).await?;
if let Some(connector) = connectors
.iter()
.find(|c| c.client_native_id == client_native_id)
{
return Ok(Some(connector.connector_uid));
}
// Not found - return Ok(None). Error wrapping is a coordinator concern.
tracing::warn!(
"Failed to resolve connector_uid for client_native_id '{}'. \
This connector may not be registered with the archivist.",
client_native_id
);
Ok(None)
}
async fn update_connector_fingerprint(
&self,
connector_uid: Uuid,
fingerprint: String,
) -> Result<()> {
// 1. Read and update connector.json
let connector_dir = self.paths.connector_dir(connector_uid);
let connector_json = connector_dir.join("connector.json");
let mut record: ConnectorRecord = read_json(&connector_json)
.await
.map_err(|_| ArchivistError::ConnectorUnknown(connector_uid))?;
record.fingerprint = Some(fingerprint.clone());
write_json(&connector_json, &record).await?;
// 2. Update in-memory cache
self.connector_cache
.write()
.await
.insert(connector_uid, record);
// 3. Update index.tsv
let index_path = self.paths.connector_index_tsv();
let mut rows = read_connector_index(&index_path).await?;
if let Some(row) = rows.iter_mut().find(|r| r.connector_uid == connector_uid) {
row.fingerprint = Some(fingerprint);
}
write_connector_index(&index_path, &rows).await?;
Ok(())
}
}
@@ -0,0 +1,69 @@
//! `DagBackend` impl for `JsonlBackend`.
use async_trait::async_trait;
use uuid::Uuid;
use crate::backend::DagBackend;
use crate::backends::jsonl::backend::JsonlBackend;
use crate::error::Result;
use crate::storage::{append_ndjson, read_ndjson};
use crate::types::{DagEdge, SessionMetadata};
#[async_trait]
impl DagBackend for JsonlBackend {
async fn append_dag_edge(&self, edge: DagEdge) -> Result<()> {
let dag_path = self.paths.dag_path();
if let Some(parent) = dag_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
append_ndjson(&dag_path, &edge).await?;
Ok(())
}
async fn get_children(&self, parent: Uuid) -> Result<Vec<SessionMetadata>> {
let dag_path = self.paths.dag_path();
let edges: Vec<DagEdge> = read_ndjson(&dag_path).await.unwrap_or_default();
let child_ids: Vec<Uuid> = edges
.iter()
.filter(|e| e.parent == parent)
.map(|e| e.child)
.collect();
let mut children = Vec::new();
for child_id in child_ids {
match crate::backend::ArchiveBackend::get_session(self, child_id).await {
Ok(Some(meta)) => children.push(meta),
Ok(None) => {
tracing::warn!(
child_scroll_id = %child_id,
"DAG child session not found"
);
}
Err(e) => {
tracing::warn!(
child_scroll_id = %child_id,
error = %e,
"DAG child session not found"
);
}
}
}
Ok(children)
}
async fn get_dag_edges(&self, root: Uuid) -> Result<Vec<DagEdge>> {
// Single-level read: return edges whose parent == root.
// The recursive DAG walk is coordinator-level orchestration.
let dag_path = self.paths.dag_path();
let all_edges: Vec<DagEdge> = read_ndjson(&dag_path).await.unwrap_or_default();
let edges = all_edges
.into_iter()
.filter(|e| e.parent == root)
.collect();
Ok(edges)
}
}
@@ -0,0 +1,179 @@
//! `SessionMappingBackend` impl for `JsonlBackend`.
use async_trait::async_trait;
use chrono::Utc;
use uuid::Uuid;
use crate::backend::SessionMappingBackend;
use crate::backends::jsonl::backend::JsonlBackend;
use crate::error::Result;
use crate::storage::{append_ndjson, read_connector_index, read_ndjson, write_ndjson};
use crate::types::SessionMapping;
#[async_trait]
impl SessionMappingBackend for JsonlBackend {
async fn put_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
scroll_id: Uuid,
) -> Result<()> {
// Ported from the mapping-persistence tail of
// `FileBasedArchivist::register_session`: ensure the connector
// directory exists, append a `SessionMapping` row to
// `.db/connectors/{uid}/sessions.jsonl`, and prime `session_cache`.
//
// No alias detection — the caller has already chosen `scroll_id`.
let now = Utc::now();
// Ensure connector directory exists before appending.
self.paths.ensure_connector_dir(connector_uid).await?;
let session_mapping = SessionMapping {
version: 1,
connector_uid,
native_session_id: native_session_id.to_string(),
scroll_id,
created_at: now,
alias_of: None,
};
let sessions_write_path = self.paths.sessions_path_for_write(connector_uid);
append_ndjson(&sessions_write_path, &session_mapping).await?;
// Prime the in-memory cache for fast resolution.
self.session_cache
.write()
.await
.insert((connector_uid, native_session_id.to_string()), scroll_id);
Ok(())
}
async fn get_mapping(
&self,
connector_uid: Uuid,
native_session_id: &str,
) -> Result<Option<Uuid>> {
// Ported from `FileBasedArchivist::resolve_session`. Cache-first
// lookup; on miss, scan the connector's sessions file and populate
// the cache on hit. Unlike the archivist trait, a miss returns
// `Ok(None)` instead of `Err(SessionUnknown)`.
// Check cache first
let cache_key = (connector_uid, native_session_id.to_string());
{
let cache = self.session_cache.read().await;
if let Some(&scroll_id) = cache.get(&cache_key) {
return Ok(Some(scroll_id));
}
}
// Cache miss - load from disk
let sessions_path = self.paths.sessions_path_for_read(connector_uid);
let mappings: Vec<SessionMapping> = read_ndjson(&sessions_path).await?;
// Find mapping by native_session_id
if let Some(mapping) = mappings
.iter()
.find(|m| m.native_session_id == native_session_id)
{
// Update cache
self.session_cache
.write()
.await
.insert(cache_key, mapping.scroll_id);
Ok(Some(mapping.scroll_id))
} else {
Ok(None)
}
}
async fn list_mappings_for_connector(
&self,
connector_uid: Uuid,
) -> Result<Vec<SessionMapping>> {
// Read `.db/connectors/{uid}/sessions.jsonl` (with `.ndjson`
// fallback handled by `sessions_path_for_read` + `read_ndjson`).
let sessions_path = self.paths.sessions_path_for_read(connector_uid);
let mappings: Vec<SessionMapping> =
read_ndjson(&sessions_path).await.unwrap_or_default();
Ok(mappings)
}
async fn find_owner(
&self,
native_session_id: &str,
) -> Result<Option<(Uuid, Uuid)>> {
// Ported verbatim from `FileBasedArchivist::find_session_owner`.
// Fast path: scan in-memory session_cache
{
let cache = self.session_cache.read().await;
for ((connector_uid, cached_native_id), scroll_id) in cache.iter() {
if cached_native_id == native_session_id {
return Ok(Some((*connector_uid, *scroll_id)));
}
}
}
// Slow path: read connector index and scan each connector's sessions file
let index_path = self.paths.connector_index_tsv();
let rows = read_connector_index(&index_path).await?;
for row in &rows {
// Skip alias connectors - only search primary connectors
if row.alias_of.is_some() {
continue;
}
let sessions_path = self.paths.sessions_path_for_read(row.connector_uid);
let mappings: Vec<SessionMapping> = read_ndjson(&sessions_path).await?;
if let Some(mapping) = mappings
.iter()
.find(|m| m.native_session_id == native_session_id)
{
// Cache the found mapping for future lookups
let cache_key = (row.connector_uid, native_session_id.to_string());
self.session_cache
.write()
.await
.insert(cache_key, mapping.scroll_id);
return Ok(Some((row.connector_uid, mapping.scroll_id)));
}
}
Ok(None)
}
async fn rewrite_connector_mappings(
&self,
connector_uid: Uuid,
mappings: Vec<SessionMapping>,
) -> Result<()> {
// Ensure the connector directory exists before we write.
self.paths.ensure_connector_dir(connector_uid).await?;
// Invalidate cache entries for this connector first, then re-prime
// from the new mapping set. Any (connector_uid, native_id) entry not
// present in `mappings` is dropped.
{
let mut cache = self.session_cache.write().await;
cache.retain(|(cu, _), _| *cu != connector_uid);
for m in &mappings {
cache.insert(
(connector_uid, m.native_session_id.clone()),
m.scroll_id,
);
}
}
// Truncate + re-write the canonical `.jsonl` table.
let write_path = self.paths.sessions_path_for_write(connector_uid);
write_ndjson(&write_path, &mappings).await?;
Ok(())
}
}
@@ -0,0 +1,200 @@
//! `MetaEventsBackend` impl for `JsonlBackend`.
use async_trait::async_trait;
use chrono::Utc;
use uuid::Uuid;
use crate::backend::MetaEventsBackend;
use crate::backends::jsonl::backend::JsonlBackend;
use crate::error::{ArchivistError, Result};
use crate::storage::{append_ndjson, read_json, read_ndjson, write_json};
use crate::types::{
MetaEventRecord, SessionCompleteness, SessionKind, SessionMetadata,
};
#[async_trait]
impl MetaEventsBackend for JsonlBackend {
async fn append_meta_events(
&self,
scroll_id: Uuid,
events: Vec<MetaEventRecord>,
) -> Result<()> {
// Ensure session directory exists
self.paths.ensure_dirs(scroll_id).await?;
// Append each event to events.jsonl
let events_path = self.paths.events_path(scroll_id);
for event in &events {
append_ndjson(&events_path, event).await?;
}
// Update session.json timestamp
let session_json_path = self.paths.session_json(scroll_id);
let now = Utc::now();
let session_metadata = match read_json::<SessionMetadata>(&session_json_path).await {
Ok(mut metadata) => {
metadata.updated_at = now;
metadata
}
Err(_) => {
// session.json doesn't exist, this shouldn't happen for meta sessions
// but we'll handle it gracefully
tracing::warn!(
scroll_id = %scroll_id,
"session.json missing when appending meta events, creating minimal metadata"
);
SessionMetadata {
version: 1,
scroll_id,
created_at: now,
updated_at: now,
title: None,
connector_uid: scroll_id, // Use scroll_id as placeholder
native_session_id: None,
agent_id: None,
parent_scroll_id: None,
continuation: None,
tags: Vec::new(),
metadata: serde_json::json!({}),
no_update: false,
kind: SessionKind::AcpConnection,
acp_client_id: None,
is_connected: None,
current_session_id: None,
models: None,
modes: None,
config_options: None,
completeness: SessionCompleteness::default(),
matrix_room_id: None,
matrix_sharing_active: false,
matrix_shared_at: None,
is_subagent: false,
subagent_type: None,
spawning_tool_use_id: None,
}
}
};
write_json(&session_json_path, &session_metadata).await?;
Ok(())
}
async fn get_meta_events(&self, scroll_id: Uuid) -> Result<Vec<MetaEventRecord>> {
let events_path = self.paths.events_path(scroll_id);
// Read events from events.jsonl
let mut events: Vec<MetaEventRecord> = read_ndjson(&events_path)
.await
.unwrap_or_else(|_| Vec::new());
// Sort by timestamp then event_id for stable ordering
events.sort_by(|a, b| {
a.ts.cmp(&b.ts).then_with(|| a.event_id.cmp(&b.event_id))
});
Ok(events)
}
async fn update_meta_session_status(
&self,
scroll_id: Uuid,
is_connected: bool,
current_session_id: Option<Uuid>,
) -> Result<()> {
// Load existing session metadata
let session_json_path = self.paths.session_json(scroll_id);
let mut session_metadata: SessionMetadata = match read_json(&session_json_path).await {
Ok(metadata) => metadata,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(ArchivistError::SessionUnknown(scroll_id));
}
Err(e) => return Err(e.into()),
};
// Update connection status fields
session_metadata.is_connected = Some(is_connected);
session_metadata.current_session_id = current_session_id;
session_metadata.updated_at = Utc::now();
// Write updated metadata back to disk
write_json(&session_json_path, &session_metadata).await?;
tracing::info!(
scroll_id = %scroll_id,
is_connected = %is_connected,
current_session_id = ?current_session_id,
"Updated meta session status"
);
Ok(())
}
async fn list_meta_sessions(&self) -> Result<Vec<SessionMetadata>> {
// Scan .contexts/ directory for all session.json files
let contexts_dir = self.paths.root().join(".contexts");
if !contexts_dir.exists() {
return Ok(Vec::new());
}
let mut meta_sessions = Vec::new();
// Read all session directories
let mut entries = tokio::fs::read_dir(&contexts_dir).await?;
while let Some(entry) = entries.next_entry().await? {
if !entry.file_type().await?.is_dir() {
continue;
}
let session_json_path = entry.path().join("session.json");
// Try to read session.json
match read_json::<SessionMetadata>(&session_json_path).await {
Ok(metadata) => {
// Filter to only AcpConnection sessions
if metadata.kind == SessionKind::AcpConnection {
meta_sessions.push(metadata);
}
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// Skip missing session files
continue;
}
Err(e) => {
tracing::warn!(
path = ?session_json_path,
error = %e,
"Failed to read session.json while listing meta sessions"
);
continue;
}
}
}
// Sort by updated_at descending (newest first)
meta_sessions.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
Ok(meta_sessions)
}
async fn find_meta_session_by_client(
&self,
client_id: &str,
) -> Result<Option<SessionMetadata>> {
// Use list_meta_sessions and filter by acp_client_id
let meta_sessions = self.list_meta_sessions().await?;
let result = meta_sessions
.into_iter()
.find(|session| {
session.acp_client_id.as_deref() == Some(client_id)
});
Ok(result)
}
}
@@ -0,0 +1,12 @@
//! NDJSON/JSON/TSV file-based backend.
//!
//! Ports the body of the former `FileBasedArchivist`. Uses the existing
//! `crate::storage` free-function primitives unchanged.
mod backend;
mod connectors;
mod dag;
mod mapping;
mod meta;
pub use backend::JsonlBackend;
@@ -0,0 +1,5 @@
//! Concrete backend implementations for `ArchiveBackend`.
pub mod jsonl;
pub use jsonl::JsonlBackend;
+558
View File
@@ -0,0 +1,558 @@
//! Backfill functionality for importing existing sessions from connectors.
//!
//! This module provides utilities to import sessions and messages from connectors
//! that support listing operations (like OpenCode connectors) into the Archivist.
use futures::future::BoxFuture;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{MessageRecord, RegisterSessionRequest, RegisterStatus};
use dirigent_protocol::{Message, Session};
/// Statistics collected during a backfill operation.
///
/// This provides a summary of what was imported and any errors encountered.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct BackfillStats {
/// Total number of sessions found in the connector
pub sessions_found: usize,
/// Number of sessions successfully imported (new registrations)
pub sessions_imported: usize,
/// Number of sessions skipped (already archived)
pub sessions_skipped: usize,
/// Total number of messages imported across all sessions
pub messages_imported: usize,
/// Error messages for sessions that failed to import
pub errors: Vec<String>,
}
impl BackfillStats {
/// Create a new BackfillStats with all counts at zero
pub fn new() -> Self {
Self {
sessions_found: 0,
sessions_imported: 0,
sessions_skipped: 0,
messages_imported: 0,
errors: Vec::new(),
}
}
}
impl Default for BackfillStats {
fn default() -> Self {
Self::new()
}
}
/// Backfill sessions from a connector into the archive.
///
/// This function imports existing sessions from a connector by:
/// 1. Attempting to register each session with the archivist
/// 2. For newly registered sessions, fetching messages via the provided closure
/// 3. Appending fetched messages to the archive
/// 4. Collecting statistics on successes, failures, and skips
///
/// # Arguments
///
/// * `archivist` - The archivist to backfill into
/// * `connector_uid` - The UID of the connector being backfilled
/// * `sessions` - List of sessions to import (from connector's list_sessions())
/// * `fetch_messages` - Async closure to fetch messages for a given native session ID
///
/// # Returns
///
/// Statistics about the backfill operation including counts and errors
///
/// # Error Handling
///
/// This function continues processing all sessions even if individual sessions fail.
/// Errors are collected in `BackfillStats.errors` rather than aborting the operation.
///
/// # Example
///
/// ```no_run
/// use dirigent_archivist::{Archivist, backfill_from_sessions};
/// use dirigent_protocol::{Session, Message};
/// use uuid::Uuid;
///
/// # async fn example(archivist: &Archivist, sessions: Vec<Session>) {
/// let connector_uid = Uuid::now_v7();
///
/// let stats = backfill_from_sessions(
/// archivist,
/// connector_uid,
/// sessions,
/// |session_id| {
/// Box::pin(async move {
/// // Fetch messages from connector
/// // Return Vec<Message>
/// Ok(vec![])
/// })
/// }
/// ).await.unwrap();
///
/// println!("Imported {} sessions, {} messages",
/// stats.sessions_imported,
/// stats.messages_imported);
/// # }
/// ```
pub async fn backfill_from_sessions<F>(
archivist: &Archivist,
connector_uid: Uuid,
sessions: Vec<Session>,
fetch_messages: F,
) -> Result<BackfillStats>
where
F: Fn(&str) -> BoxFuture<'static, Result<Vec<Message>>> + Send + Sync,
{
let mut stats = BackfillStats::new();
stats.sessions_found = sessions.len();
for session in sessions {
let native_session_id = session.id.clone();
// Try to resolve the session - if it exists, skip it
match archivist
.resolve_session(connector_uid, &native_session_id, None)
.await
{
Ok(_scroll_id) => {
// Session already archived, skip
stats.sessions_skipped += 1;
continue;
}
Err(ArchivistError::SessionUnknown(_)) => {
// Session not found, proceed with import
}
Err(e) => {
// Unexpected error during resolution
stats.errors.push(format!(
"Failed to resolve session {}: {}",
native_session_id, e
));
continue;
}
}
// Register the session
let register_req = RegisterSessionRequest {
connector_uid,
native_session_id: native_session_id.clone(),
title: Some(session.title.clone()),
custom_scroll_id: None, // Let archivist generate
metadata: serde_json::to_value(&session.metadata)
.unwrap_or_else(|_| serde_json::json!({})),
completeness: Default::default(),
parent_scroll_id: None,
is_subagent: false,
continuation: None,
agent_id: None,
subagent_type: None,
spawning_tool_use_id: None,
};
let scroll_id = match archivist.register_session(register_req, None).await {
Ok(response) => {
match response.status {
RegisterStatus::Accepted => {
stats.sessions_imported += 1;
response.scroll_id
}
RegisterStatus::Aliased => {
// Already exists (shouldn't happen since we checked, but handle gracefully)
stats.sessions_skipped += 1;
continue;
}
RegisterStatus::Rejected => {
// Registration rejected (collision inconsistency)
stats.errors.push(format!(
"Session registration rejected for {}: UID collision",
native_session_id
));
continue;
}
}
}
Err(e) => {
stats.errors.push(format!(
"Failed to register session {}: {}",
native_session_id, e
));
continue;
}
};
// Fetch messages for this session
let messages = match fetch_messages(&native_session_id).await {
Ok(msgs) => msgs,
Err(e) => {
stats.errors.push(format!(
"Failed to fetch messages for session {}: {}",
native_session_id, e
));
continue;
}
};
// Convert protocol messages to message records
let message_records: Vec<MessageRecord> = messages
.into_iter()
.map(|msg| convert_message_to_record(msg, scroll_id))
.collect();
let message_count = message_records.len();
// Append messages to the archive
if let Err(e) = archivist
.append_messages(scroll_id, message_records, None)
.await
{
stats.errors.push(format!(
"Failed to append messages for session {}: {}",
native_session_id, e
));
continue;
}
stats.messages_imported += message_count;
}
Ok(stats)
}
/// Convert a dirigent_protocol::Message to a MessageRecord for archival.
///
/// This function translates the protocol message format into the archivist's
/// internal storage format, extracting markdown content and metadata.
pub fn convert_message_to_record(msg: Message, scroll_id: Uuid) -> MessageRecord {
// Extract text content from message parts and convert to markdown
let mut md_parts = Vec::new();
for part in &msg.content {
match part {
dirigent_protocol::MessagePart::Text { text } => {
md_parts.push(text.clone());
}
dirigent_protocol::MessagePart::Thinking { text } => {
md_parts.push(format!("<thinking>\n{}\n</thinking>", text));
}
dirigent_protocol::MessagePart::Code { language, code } => {
md_parts.push(format!("```{}\n{}\n```", language, code));
}
dirigent_protocol::MessagePart::Tool {
tool,
tool_call_id: _,
input,
output,
} => {
let mut tool_text =
format!("**Tool: {}**\n\nInput:\n```json\n{}\n```", tool, input);
if let Some(out) = output {
tool_text.push_str(&format!("\n\nOutput:\n```json\n{}\n```", out));
}
md_parts.push(tool_text);
}
dirigent_protocol::MessagePart::File { path, content } => {
md_parts.push(format!("**File: {}**\n\n```\n{}\n```", path, content));
}
}
}
let content_md = md_parts.join("\n\n");
// Serialize original content parts for rich UI rendering
let content_parts = serde_json::to_value(&msg.content).ok();
// Convert role
let role = match msg.role {
dirigent_protocol::MessageRole::User => "user",
dirigent_protocol::MessageRole::Assistant => "assistant",
}
.to_string();
// Generate message ID from the protocol message ID or create new one
let message_id = Uuid::now_v7();
MessageRecord {
version: 1,
message_id,
session: scroll_id,
parent_id: None,
ts: msg.created_at,
role,
author: None, // Protocol messages don't have author field
content_md,
content_parts,
attachments: Vec::new(), // Would need to extract from message parts if supported
metadata: msg
.metadata
.and_then(|m| serde_json::to_value(m).ok())
.unwrap_or_else(|| serde_json::json!({})),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::coordinator::Archivist;
use chrono::Utc;
use dirigent_protocol::{MessageRole, MessageStatus, SessionMetadata};
use tempfile::TempDir;
async fn setup_test_archivist() -> (Archivist, TempDir) {
let temp_dir = TempDir::new().unwrap();
// Use `from_single_backend` so each test is isolated (no shared
// registry file in the tempdir's parent racing against siblings).
let backend = std::sync::Arc::new(
crate::backends::JsonlBackend::new(temp_dir.path().to_path_buf())
.await
.unwrap(),
);
let archivist = Archivist::from_single_backend("main".into(), backend)
.await
.unwrap();
(archivist, temp_dir)
}
fn create_test_session(id: &str, title: &str) -> Session {
Session {
id: id.to_string(),
title: title.to_string(),
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: SessionMetadata {
project_path: "/test".to_string(),
model: Some("test-model".to_string()),
total_messages: 0,
system_message: None,
current_mode_id: None,
_meta: None,
project_id: None,
},
cwd: None,
config_options: None,
acp_client_id: None,
models: None,
modes: None,
}
}
fn create_test_message(id: &str, session_id: &str, role: MessageRole, text: &str) -> Message {
Message {
id: id.to_string(),
session_id: session_id.to_string(),
role,
created_at: Utc::now(),
content: vec![dirigent_protocol::MessagePart::Text {
text: text.to_string(),
}],
status: MessageStatus::Completed,
metadata: None,
}
}
#[tokio::test]
async fn test_backfill_new_sessions() {
let (archivist, _temp) = setup_test_archivist().await;
// Register connector first
let connector_uid = Uuid::now_v7();
let connector_req = crate::types::RegisterConnectorRequest {
custom_uid: Some(connector_uid),
r#type: "OpenCode".to_string(),
title: "Test Connector".to_string(),
client_native_id: "test-connector".to_string(),
metadata: serde_json::json!({}),
fingerprint: None,
};
archivist
.register_connector(connector_req, None)
.await
.unwrap();
// Create test sessions
let sessions = vec![
create_test_session("session-1", "Session 1"),
create_test_session("session-2", "Session 2"),
];
// Mock message fetcher
let fetch_messages = |session_id: &str| {
let sid = session_id.to_string();
Box::pin(async move {
Ok(vec![
create_test_message("msg-1", &sid, MessageRole::User, "Hello"),
create_test_message("msg-2", &sid, MessageRole::Assistant, "Hi there"),
])
}) as BoxFuture<'static, Result<Vec<Message>>>
};
// Backfill
let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages)
.await
.unwrap();
// Verify stats
assert_eq!(stats.sessions_found, 2);
assert_eq!(stats.sessions_imported, 2);
assert_eq!(stats.sessions_skipped, 0);
assert_eq!(stats.messages_imported, 4); // 2 messages per session
assert_eq!(stats.errors.len(), 0);
}
#[tokio::test]
async fn test_backfill_skips_existing_sessions() {
let (archivist, _temp) = setup_test_archivist().await;
// Register connector first
let connector_uid = Uuid::now_v7();
let connector_req = crate::types::RegisterConnectorRequest {
custom_uid: Some(connector_uid),
r#type: "OpenCode".to_string(),
title: "Test Connector".to_string(),
client_native_id: "test-connector".to_string(),
metadata: serde_json::json!({}),
fingerprint: None,
};
archivist
.register_connector(connector_req, None)
.await
.unwrap();
// Pre-register one session
let session1 = create_test_session("session-1", "Session 1");
let req = RegisterSessionRequest {
connector_uid,
native_session_id: session1.id.clone(),
title: Some(session1.title.clone()),
custom_scroll_id: None,
metadata: serde_json::json!({}),
completeness: Default::default(),
parent_scroll_id: None,
is_subagent: false,
continuation: None,
agent_id: None,
subagent_type: None,
spawning_tool_use_id: None,
};
archivist.register_session(req, None).await.unwrap();
// Create sessions including the pre-registered one
let sessions = vec![session1, create_test_session("session-2", "Session 2")];
// Mock message fetcher
let fetch_messages = |session_id: &str| {
let sid = session_id.to_string();
Box::pin(async move {
Ok(vec![create_test_message(
"msg-1",
&sid,
MessageRole::User,
"Test",
)])
}) as BoxFuture<'static, Result<Vec<Message>>>
};
// Backfill
let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages)
.await
.unwrap();
// Verify stats - session-1 should be skipped
assert_eq!(stats.sessions_found, 2);
assert_eq!(stats.sessions_imported, 1); // Only session-2
assert_eq!(stats.sessions_skipped, 1); // session-1 already exists
assert_eq!(stats.messages_imported, 1); // Only messages from session-2
assert_eq!(stats.errors.len(), 0);
}
#[tokio::test]
async fn test_backfill_handles_fetch_errors() {
let (archivist, _temp) = setup_test_archivist().await;
// Register connector first
let connector_uid = Uuid::now_v7();
let connector_req = crate::types::RegisterConnectorRequest {
custom_uid: Some(connector_uid),
r#type: "OpenCode".to_string(),
title: "Test Connector".to_string(),
client_native_id: "test-connector".to_string(),
metadata: serde_json::json!({}),
fingerprint: None,
};
archivist
.register_connector(connector_req, None)
.await
.unwrap();
let sessions = vec![create_test_session("session-1", "Session 1")];
// Mock message fetcher that fails
let fetch_messages = |_session_id: &str| {
Box::pin(async move {
Err(ArchivistError::InvalidRequest(
"Failed to fetch messages".to_string(),
))
}) as BoxFuture<'static, Result<Vec<Message>>>
};
// Backfill
let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages)
.await
.unwrap();
// Verify stats - session registered but messages failed
assert_eq!(stats.sessions_found, 1);
assert_eq!(stats.sessions_imported, 1); // Session was registered
assert_eq!(stats.messages_imported, 0); // But no messages imported
assert_eq!(stats.errors.len(), 1); // Error recorded
assert!(stats.errors[0].contains("Failed to fetch messages"));
}
#[test]
fn test_backfill_stats_default() {
let stats = BackfillStats::default();
assert_eq!(stats.sessions_found, 0);
assert_eq!(stats.sessions_imported, 0);
assert_eq!(stats.sessions_skipped, 0);
assert_eq!(stats.messages_imported, 0);
assert_eq!(stats.errors.len(), 0);
}
#[test]
fn test_convert_message_to_record() {
let scroll_id = Uuid::now_v7();
let msg = create_test_message("msg-1", "session-1", MessageRole::User, "Hello world");
let record = convert_message_to_record(msg, scroll_id);
assert_eq!(record.session, scroll_id);
assert_eq!(record.role, "user");
assert_eq!(record.content_md, "Hello world");
assert_eq!(record.version, 1);
}
#[test]
fn test_convert_message_with_thinking() {
let scroll_id = Uuid::now_v7();
let msg = Message {
id: "msg-1".to_string(),
session_id: "session-1".to_string(),
role: MessageRole::Assistant,
created_at: Utc::now(),
content: vec![dirigent_protocol::MessagePart::Thinking {
text: "Let me think...".to_string(),
}],
status: MessageStatus::Completed,
metadata: None,
};
let record = convert_message_to_record(msg, scroll_id);
assert!(record.content_md.contains("<thinking>"));
assert!(record.content_md.contains("Let me think..."));
assert!(record.content_md.contains("</thinking>"));
}
}
@@ -0,0 +1,70 @@
//! Admin / inspection methods on `Archivist`.
//!
//! Split out because they aren't part of the hot-path coordinator API:
//! `shutdown` drains queued writer tasks, `list_archives_with_health`
//! snapshots every registration's health + queue depth, and the cache
//! admin methods delegate to `ReadCache`.
use std::sync::Arc;
use tokio::sync::oneshot;
use crate::error::Result;
use crate::registry::writer::WriteOp;
use crate::registry::{ArchiveRegistration, ArchiveStatus};
use super::Archivist;
impl Archivist {
/// Drain every queued writer task. Inline backends are no-ops.
/// Call before process exit to ensure in-flight batches land.
pub async fn shutdown(&self) -> Result<()> {
let regs: Vec<Arc<ArchiveRegistration>> = self.registrations.read().await.clone();
for reg in regs.iter() {
if let Some(writer) = reg.writer.as_ref() {
let (tx, rx) = oneshot::channel();
// If the send fails, the writer task has already exited — skip the wait.
if writer.sender.send(WriteOp::Shutdown(tx)).await.is_ok() {
let _ = rx.await;
}
// Join the task, if it's still attached.
if let Some(handle) = writer.join.lock().await.take() {
let _ = handle.await;
}
}
}
Ok(())
}
/// Snapshot every registered archive's current status.
pub async fn list_archives_with_health(&self) -> Vec<ArchiveStatus> {
let regs: Vec<Arc<ArchiveRegistration>> = self.registrations.read().await.clone();
let mut out = Vec::with_capacity(regs.len());
for reg in regs.iter() {
let health = reg.last_health.read().await.clone();
let last_error = reg.last_error.read().await.clone();
let queue_depth = reg.writer.as_ref().map(|w| w.queue_depth_now());
out.push(ArchiveStatus {
name: reg.name.clone(),
type_name: reg.type_name.to_string(),
enabled: reg.enabled,
write_active: reg.write_active,
failure_mode: reg.failure_mode,
read_priority: reg.read_priority,
capabilities: reg.capabilities().clone(),
health,
last_error,
queue_depth,
});
}
out
}
pub async fn clear_read_cache(&self) {
self.read_cache.clear().await;
}
pub async fn read_cache_size(&self) -> usize {
self.read_cache.len().await
}
}
@@ -0,0 +1,77 @@
//! Archive lifecycle methods for `Archivist`.
//!
//! Phase 3 is **startup-only**: the archive registry is constructed from
//! `dirigent.toml` at boot and not mutated at runtime. Accordingly,
//! `add_archive`, `remove_archive`, and `set_default_archive` all return
//! [`ArchivistError::DynamicRegistryUnsupported`]. The `list_archives`
//! and `get_default_archive` read-paths continue to operate against the
//! new `Vec<Arc<ArchiveRegistration>>` storage.
use std::path::PathBuf;
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::registry::FailureMode;
impl Archivist {
/// **Deprecated in Phase 3.** Archive registry is configured at boot
/// via `dirigent.toml`; runtime mutation is not supported.
pub async fn add_archive(&self, _name: String, _path: PathBuf) -> Result<()> {
Err(ArchivistError::DynamicRegistryUnsupported)
}
/// **Deprecated in Phase 3.** Archive registry is configured at boot
/// via `dirigent.toml`; runtime mutation is not supported.
pub async fn remove_archive(&self, _name: String, _force: bool) -> Result<()> {
Err(ArchivistError::DynamicRegistryUnsupported)
}
/// List all configured archives. Session counts are reported as `0`
/// because the Phase 3 multi-backend coordinator does not persist a
/// per-archive connector index; counts will be reintroduced by the
/// admin-status query in Task 23.
pub async fn list_archives(&self) -> Result<Vec<super::types::ArchiveInfo>> {
let regs = self.registrations.read().await;
let primary_name = regs
.iter()
.filter(|r| {
r.enabled && r.write_active && r.failure_mode == FailureMode::Required
})
.min_by_key(|r| r.read_priority)
.map(|r| r.name.clone());
Ok(regs
.iter()
.map(|r| super::types::ArchiveInfo {
name: r.name.clone(),
path: PathBuf::new(),
created_at: chrono::Utc::now(),
session_count: 0,
is_default: primary_name.as_deref() == Some(r.name.as_str()),
})
.collect())
}
/// Get the name of the "default" archive — interpreted in Phase 3 as
/// the enabled, write-active, `Required` backend with the lowest
/// `read_priority`.
pub async fn get_default_archive(&self) -> Result<String> {
let regs = self.registrations.read().await;
regs.iter()
.filter(|r| {
r.enabled && r.write_active && r.failure_mode == FailureMode::Required
})
.min_by_key(|r| r.read_priority)
.map(|r| r.name.clone())
.ok_or_else(|| ArchivistError::PrimaryUnavailable {
name: "<default>".into(),
reason: "no required write-active backend".into(),
})
}
/// **Deprecated in Phase 3.** Archive registry is configured at boot
/// via `dirigent.toml`; runtime mutation is not supported.
pub async fn set_default_archive(&self, _name: String) -> Result<()> {
Err(ArchivistError::DynamicRegistryUnsupported)
}
}
@@ -0,0 +1,281 @@
//! Boot-time construction of the `Archivist` coordinator from a parsed
//! `ArchivesConfig` and a `BackendRegistry` of factories.
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::backend::HealthStatus;
use crate::error::ArchivistBootError;
use crate::registry::{
cache::ReadCache, ArchiveRegistration, ArchivesConfig, BackendRegistry, FailureMode,
WritePolicy,
};
use super::Archivist;
impl Archivist {
/// Construct the coordinator from a parsed `[[archives]]` config block
/// and a registry of backend factories.
///
/// - Validates the config (duplicate-name / no-primary rules).
/// - Instantiates every enabled backend via the factory.
/// - Runs a startup `health_check` per backend.
/// - Sorts registrations by `read_priority` (ties by declaration order).
/// - Writer tasks for `WritePolicy::Queued` backends are wired in Task 17;
/// for now every backend boots with `writer = None`.
pub async fn from_config(
mut config: ArchivesConfig,
registry: &BackendRegistry,
base_dir: Option<&std::path::Path>,
) -> Result<Self, ArchivistBootError> {
config.validate()?;
// Filter-level validation (Phase 4, Task 19).
//
// 1. At least one enabled write-active archive must have an
// unrestricted filter. Otherwise there is no default home for
// a session that does not match any filter, and the primary
// target would silently exclude sessions despite being the
// "write-always" backend.
// 2. No archive may declare a filter whose `include_connectors`
// set is `Some(empty)` — that form rejects every session
// unconditionally and is almost always a config typo.
let mut has_unrestricted_write_active = false;
for entry in &config.entries {
if let Some(inc) = &entry.filter.include_connectors {
if inc.is_empty() {
return Err(ArchivistBootError::FilterRejectsEverything {
archive: entry.name.clone(),
});
}
}
if entry.enabled && entry.write_active && entry.filter.is_unrestricted() {
has_unrestricted_write_active = true;
}
}
if !config.entries.is_empty() && !has_unrestricted_write_active {
return Err(ArchivistBootError::NoUnrestrictedPrimary);
}
// Resolve relative `params.path` values against `base_dir` so that
// archives declared with relative paths land under the data directory
// rather than the binary's CWD.
if let Some(base) = base_dir {
for entry in &mut config.entries {
if let toml::Value::Table(ref mut table) = entry.params {
if let Some(toml::Value::String(ref mut path_str)) = table.get_mut("path") {
let p = std::path::Path::new(path_str.as_str());
if p.is_relative() {
*path_str = base.join(&*path_str).to_string_lossy().into_owned();
}
}
}
}
}
let mut registrations: Vec<Arc<ArchiveRegistration>> = Vec::new();
for entry in config.entries.into_iter() {
let backend = registry
.build(&entry.name, &entry.type_name, entry.params)
.await
.map_err(|e| match e {
crate::registry::BackendBuildError::UnknownType(t) => {
ArchivistBootError::UnknownType {
name: entry.name.clone(),
type_name: t,
}
}
other => ArchivistBootError::BackendBuild {
name: entry.name.clone(),
source: other,
},
})?;
let initial_health = backend.health_check().await;
if entry.failure_mode == FailureMode::Required {
if let HealthStatus::Unavailable { reason } = &initial_health {
return Err(ArchivistBootError::UnavailableRequiredBackend {
name: entry.name.clone(),
reason: reason.clone(),
});
}
}
let runtime_policy: WritePolicy = entry.write_policy.into_runtime();
// Build shared drift state up-front so the writer task (if any)
// and the registration's health-drift helpers mutate the SAME
// `Arc<RwLock<_>>` cells. This keeps Task 22's drift semantics
// coherent across the inline and queued paths.
let health_state: Arc<RwLock<HealthStatus>> =
Arc::new(RwLock::new(initial_health.clone()));
let error_state: Arc<
RwLock<Option<(chrono::DateTime<chrono::Utc>, String)>>,
> = Arc::new(RwLock::new(None));
let failure_counter: Arc<RwLock<u32>> = Arc::new(RwLock::new(0u32));
let writer = match &runtime_policy {
WritePolicy::Inline => None,
WritePolicy::Queued {
batch_window_ms,
capacity,
overflow,
} => Some(crate::registry::writer::spawn_writer(
backend.clone(),
entry.name.clone(),
*capacity,
std::time::Duration::from_millis(*batch_window_ms),
*overflow,
health_state.clone(),
error_state.clone(),
failure_counter.clone(),
)),
};
// Leak `type_name` to satisfy &'static str on the registration; safe at boot,
// and a constant number of entries (O(archives in config)).
let type_name_static: &'static str = Box::leak(entry.type_name.into_boxed_str());
let registration = ArchiveRegistration::new_with_shared_state(
entry.name,
type_name_static,
backend,
entry.write_active,
entry.failure_mode,
entry.read_priority,
entry.enabled,
runtime_policy,
writer,
health_state,
error_state,
failure_counter,
)
.with_filter(entry.filter);
registrations.push(Arc::new(registration));
}
// Sort by `read_priority`. Rust's sort is stable, so ties keep declaration order.
registrations.sort_by_key(|r| r.read_priority);
Ok(Self {
registrations: RwLock::new(registrations),
read_cache: Arc::new(ReadCache::new()),
registry_path: std::path::PathBuf::new(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::registry::{ArchivesConfig, BackendRegistry};
fn parse(toml_src: &str) -> ArchivesConfig {
toml::from_str(toml_src).unwrap()
}
#[tokio::test]
async fn relative_archive_path_resolved_against_base_dir() {
let base = tempfile::tempdir().unwrap();
let cfg = parse(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "my_archive"
"#,
);
let registry = BackendRegistry::with_jsonl();
let archivist = Archivist::from_config(cfg, &registry, Some(base.path()))
.await
.unwrap();
// The archive should have been created under base_dir/my_archive.
// Verify by checking the .contexts directory exists.
let expected = base.path().join("my_archive").join(".contexts");
assert!(
expected.exists(),
"expected {expected:?} to exist after boot with relative path"
);
// Also verify the archivist is functional (has one registration).
let archives = archivist.list_archives().await.unwrap();
assert_eq!(archives.len(), 1);
}
#[tokio::test]
async fn absolute_archive_path_not_affected_by_base_dir() {
let base = tempfile::tempdir().unwrap();
let archive_dir = tempfile::tempdir().unwrap();
let abs_path = archive_dir.path().to_string_lossy().replace('\\', "/");
let cfg = parse(&format!(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "{abs_path}"
"#,
));
let registry = BackendRegistry::with_jsonl();
let archivist = Archivist::from_config(cfg, &registry, Some(base.path()))
.await
.unwrap();
// The archive should be at the absolute path, NOT under base_dir.
let expected = archive_dir.path().join(".contexts");
assert!(
expected.exists(),
"expected {expected:?} to exist (absolute path should be used as-is)"
);
// Verify nothing was created under base_dir with the archive name.
// If base_dir resolution incorrectly touched the absolute path, we'd
// see stray directories under base_dir.
let base_entries: Vec<_> = std::fs::read_dir(base.path())
.unwrap()
.collect();
assert!(
base_entries.is_empty(),
"base_dir should be untouched when archive path is absolute, found: {base_entries:?}"
);
let archives = archivist.list_archives().await.unwrap();
assert_eq!(archives.len(), 1);
}
#[tokio::test]
async fn none_base_dir_preserves_existing_behavior() {
let archive_dir = tempfile::tempdir().unwrap();
let abs_path = archive_dir.path().to_string_lossy().replace('\\', "/");
let cfg = parse(&format!(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "{abs_path}"
"#,
));
let registry = BackendRegistry::with_jsonl();
let archivist = Archivist::from_config(cfg, &registry, None)
.await
.unwrap();
let expected = archive_dir.path().join(".contexts");
assert!(
expected.exists(),
"expected {expected:?} to exist with None base_dir and absolute path"
);
let archives = archivist.list_archives().await.unwrap();
assert_eq!(archives.len(), 1);
}
}
@@ -0,0 +1,285 @@
//! Connector orchestration for `Archivist`.
//!
//! Alias detection and tri-state registration logic live here; persistence is
//! delegated to each backend's `ConnectorRegistryBackend` sub-trait. Ported
//! from `FileBasedArchivist::register_connector` and
//! `MultiArchiveArchivist::resolve_connector_uid`.
use chrono::Utc;
use uuid::Uuid;
use crate::backend::ArchiveCapability;
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{
ConnectorRecord, RegisterConnectorRequest, RegisterConnectorResponse, RegisterStatus,
};
impl Archivist {
/// Register a connector with alias detection.
///
/// Ported from `FileBasedArchivist::register_connector`. Decision order:
///
/// 1. If `custom_uid` collides with an existing connector:
/// - same `client_native_id` → `Aliased` (idempotent re-registration).
/// - different `client_native_id` → `CollisionInconsistent` error.
/// 2. If the `client_native_id` is already registered under a different
/// UID → `Aliased` to that pre-existing UID.
/// 3. If a `fingerprint` matches a pre-existing connector → `Aliased` to
/// that UID. (Identity persistence across connector re-adds.)
/// 4. Otherwise → `Accepted`; a new `ConnectorRecord` is persisted via
/// `ConnectorRegistryBackend::put_connector`.
// TODO(phase3 task 16): register_connector fanout requires replicating the
// ConnectorRecord to secondaries. Since connectors are identity-shaped (UIDs
// must match across backends), the tri-state alias detection must stay
// canonical on the primary, but the accepted record should be mirrored to
// secondaries. Deferred to a follow-up within Phase 3 — the core Task 16
// plan covers append_messages and the session mutators which are the hot
// paths. Current behaviour: single-primary via `resolve_backend`.
pub async fn register_connector(
&self,
req: RegisterConnectorRequest,
archive: Option<String>,
) -> Result<RegisterConnectorResponse> {
let backend = self.resolve_backend(archive).await?;
let registry = backend.as_connector_registry().ok_or_else(|| {
ArchivistError::CapabilityNotSupported {
capability: ArchiveCapability::ConnectorRegistry,
backend: "selected".into(),
}
})?;
// Generate connector UID (use custom_uid or generate new)
let connector_uid = req.custom_uid.unwrap_or_else(Uuid::now_v7);
// Load existing (non-alias) connectors for collision detection.
let existing_connectors = registry.list_connectors().await?;
// 1. Check for UID collision.
if let Some(existing) = existing_connectors
.iter()
.find(|c| c.connector_uid == connector_uid)
{
if existing.client_native_id == req.client_native_id {
// Same UID with same client_native_id -> ALIASED (idempotent).
return Ok(RegisterConnectorResponse {
status: RegisterStatus::Aliased,
connector_uid,
alias_of: Some(connector_uid),
note: Some("Connector already registered with this UID".to_string()),
});
} else {
// Same UID with different client_native_id -> REJECTED.
return Err(ArchivistError::CollisionInconsistent(connector_uid));
}
}
// 2. Check for existing client_native_id (different UID collision).
if let Some(existing) = existing_connectors
.iter()
.find(|c| c.client_native_id == req.client_native_id)
{
return Ok(RegisterConnectorResponse {
status: RegisterStatus::Aliased,
connector_uid: existing.connector_uid,
alias_of: Some(existing.connector_uid),
note: Some("Connector already registered with different UID".to_string()),
});
}
// 3. Check for fingerprint match (identity persistence across re-adds).
//
// Note: the original `FileBasedArchivist` additionally refreshed the
// matched connector's `title`/`metadata` on disk and in cache here.
// That refresh bypassed both the TSV index and any backend abstraction
// (direct `read_json`/`write_json` against `connector.json`). The
// `ConnectorRegistryBackend` trait does not yet expose an
// "update metadata" method, and `put_connector` would append a
// duplicate row to the index rather than mutate in place. The refresh
// was best-effort (`let _ = write_json(...)`) and is not exercised by
// existing tests; deliberately skipped here. Re-introduce via a
// dedicated backend method if a consumer relies on it.
if let Some(ref fp) = req.fingerprint {
if let Some(existing) = existing_connectors
.iter()
.find(|c| c.fingerprint.as_deref() == Some(fp.as_str()))
{
let matched_uid = existing.connector_uid;
return Ok(RegisterConnectorResponse {
status: RegisterStatus::Aliased,
connector_uid: matched_uid,
alias_of: Some(matched_uid),
note: Some(format!("Matched by fingerprint: {}", fp)),
});
}
}
// 4. No collision -> ACCEPTED, create and persist new connector.
let now = Utc::now();
let connector_record = ConnectorRecord {
version: 1,
connector_uid,
r#type: req.r#type,
title: req.title,
client_native_id: req.client_native_id,
alias_of: None,
created_at: now,
metadata: req.metadata,
fingerprint: req.fingerprint,
};
registry.put_connector(connector_record).await?;
Ok(RegisterConnectorResponse {
status: RegisterStatus::Accepted,
connector_uid,
alias_of: None,
note: None,
})
}
/// Resolve a connector UID by scanning every registered backend.
///
/// Ported from `MultiArchiveArchivist::resolve_connector_uid`: each
/// backend is tried in turn; the first backend that recognises the
/// `client_native_id` wins. As a secondary path, if `client_native_id`
/// parses as a UUID, checks whether a backend already has a connector
/// record at that UID. Returns `ConnectorUnknown(Uuid::nil())` if no
/// backend can resolve it.
pub async fn resolve_connector_uid(&self, client_native_id: &str) -> Result<Uuid> {
// Hand-rolled walk rather than `read_walk_collection`: we want
// "try every backend" semantics — a backend that returns `Ok(None)`
// should NOT win the walk. `read_walk_collection` treats any `Ok(_)`
// as a hit, so it would stop at the first backend that answered at
// all. Health drift is still wired through `record_read_*`.
let regs: Vec<_> = self.registrations.read().await.clone();
for reg in regs.iter() {
if !reg.enabled {
continue;
}
let Some(registry) = reg.backend.as_connector_registry() else {
continue;
};
match registry.resolve_connector_uid(client_native_id).await {
Ok(Some(uid)) => {
self.record_read_success(reg).await;
return Ok(uid);
}
Ok(None) => {
self.record_read_success(reg).await;
if let Ok(parsed) = Uuid::parse_str(client_native_id) {
match registry.get_connector(parsed).await {
Ok(Some(_)) => return Ok(parsed),
Ok(None) => {}
Err(_) => {
self.record_read_failure(reg).await;
}
}
}
}
Err(_) => {
self.record_read_failure(reg).await;
}
}
}
Err(ArchivistError::ConnectorUnknown(Uuid::nil()))
}
/// List connectors in the selected archive (non-aliases only).
///
/// When `archive` is `Some`, the explicit override still resolves directly
/// against that named backend (returning `ArchiveNameUnknown` /
/// `CapabilityNotSupported` as appropriate). When `None`, routing walks
/// enabled backends in `read_priority` order and returns the first
/// `ConnectorRegistry`-capable answer.
pub async fn list_connectors(
&self,
archive: Option<String>,
) -> Result<Vec<ConnectorRecord>> {
if let Some(name) = archive {
let reg = self
.find_registration(&name)
.await
.ok_or(ArchivistError::ArchiveNameUnknown(name))?;
let registry = reg.backend.as_connector_registry().ok_or_else(|| {
ArchivistError::CapabilityNotSupported {
capability: ArchiveCapability::ConnectorRegistry,
backend: reg.name.clone(),
}
})?;
return registry.list_connectors().await;
}
Ok(self
.read_walk_collection(
|reg| reg.backend.as_connector_registry().is_some(),
|backend| async move {
let cr = backend
.as_connector_registry()
.expect("predicate ensured");
cr.list_connectors().await
},
)
.await?
.unwrap_or_default())
}
/// Update the stable fingerprint of an existing connector.
///
/// NOTE: read-mutate-write on the backend side; falls through to inline
/// under `WritePolicy::Queued` (no `WriteOp` variant).
pub async fn update_connector_fingerprint(
&self,
connector_uid: Uuid,
fingerprint: String,
archive: Option<String>,
) -> Result<()> {
let primary = self.resolve_primary(archive.clone()).await?;
let regs: Vec<std::sync::Arc<crate::registry::ArchiveRegistration>> =
self.registrations.read().await.clone();
let primary_reg = primary.backend.as_connector_registry().ok_or_else(|| {
ArchivistError::PrimaryUnavailable {
name: primary.name.clone(),
reason: "backend lacks ConnectorRegistry capability".into(),
}
})?;
if let Err(e) = primary_reg
.update_connector_fingerprint(connector_uid, fingerprint.clone())
.await
{
self.record_write_failure(&primary, &format!("{e}")).await;
return Err(e);
}
self.record_write_success(&primary).await;
for reg in regs.iter() {
if reg.name == primary.name {
continue;
}
if !reg.enabled || !reg.write_active {
continue;
}
let Some(sec_reg) = reg.backend.as_connector_registry() else {
tracing::debug!(
backend = reg.name.as_str(),
type_name = reg.type_name,
op = "update_connector_fingerprint",
"capability_skip"
);
continue;
};
if let Err(e) = sec_reg
.update_connector_fingerprint(connector_uid, fingerprint.clone())
.await
{
self.record_write_failure(reg, &format!("{e}")).await;
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
} else {
self.record_write_success(reg).await;
}
}
Ok(())
}
}
@@ -0,0 +1,526 @@
//! Meta events, DAG, and cleanup orchestration for `Archivist`.
//!
//! Ported from `FileBasedArchivist` in `archivist.rs`. Meta events and DAG
//! methods are thin delegates over `as_meta_events()` / `as_dag()`;
//! `get_session_tree` performs a recursive DAG walk; `cleanup_empty_sessions`
//! pages through all sessions and deletes those with zero messages (skipping
//! `SessionKind::AcpConnection` meta sessions, which track events rather than
//! messages).
use uuid::Uuid;
use crate::backend::ArchiveCapability;
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{
DagEdge, MetaEventRecord, SessionKind, SessionListQuery, SessionMetadata, MAX_PAGE_LIMIT,
};
impl Archivist {
// ------------------------------------------------------------------
// Meta events
// ------------------------------------------------------------------
pub async fn append_meta_events(
&self,
scroll_id: Uuid,
events: Vec<MetaEventRecord>,
archive: Option<String>,
) -> Result<()> {
let primary = self.resolve_primary(archive.clone()).await?;
let regs: Vec<std::sync::Arc<crate::registry::ArchiveRegistration>> =
self.registrations.read().await.clone();
// Primary must have MetaEvents capability even in the queued path —
// the writer task dispatches to `as_meta_events()`, so we'd silently
// drop events on an incapable primary. Fail fast here.
let _ = primary.backend.as_meta_events().ok_or_else(|| {
ArchivistError::PrimaryUnavailable {
name: primary.name.clone(),
reason: "backend lacks MetaEvents capability".into(),
}
})?;
match &primary.write_policy {
crate::registry::WritePolicy::Inline => {
let primary_meta = primary
.backend
.as_meta_events()
.expect("capability checked above");
if let Err(e) = primary_meta
.append_meta_events(scroll_id, events.clone())
.await
{
self.record_write_failure(&primary, &format!("{e}")).await;
return Err(e);
}
self.record_write_success(&primary).await;
}
crate::registry::WritePolicy::Queued { .. } => {
let writer = primary
.writer
.as_ref()
.expect("queued policy implies writer handle present");
writer
.enqueue(crate::registry::writer::WriteOp::AppendMetaEvents {
scroll_id,
events: events.clone(),
})
.await?;
}
}
let session_metadata_for_filter = self
.load_metadata_for_filter(scroll_id, &regs, &primary.name)
.await;
for reg in regs.iter() {
if reg.name == primary.name {
continue;
}
if !reg.enabled || !reg.write_active {
continue;
}
if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) {
tracing::debug!(
archive = %reg.name,
scroll_id = %scroll_id,
op = "append_meta_events",
"filter_skip"
);
continue;
}
if reg.backend.as_meta_events().is_none() {
tracing::debug!(
backend = reg.name.as_str(),
type_name = reg.type_name,
op = "append_meta_events",
"capability_skip"
);
continue;
}
match &reg.write_policy {
crate::registry::WritePolicy::Inline => {
let me = reg
.backend
.as_meta_events()
.expect("capability checked above");
if let Err(e) = me.append_meta_events(scroll_id, events.clone()).await {
self.record_write_failure(reg, &format!("{e}")).await;
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
} else {
self.record_write_success(reg).await;
}
}
crate::registry::WritePolicy::Queued { .. } => {
let writer = reg
.writer
.as_ref()
.expect("queued policy implies writer handle present");
if let Err(e) = writer
.enqueue(crate::registry::writer::WriteOp::AppendMetaEvents {
scroll_id,
events: events.clone(),
})
.await
{
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
}
}
}
}
Ok(())
}
pub async fn get_meta_events(
&self,
scroll_id: Uuid,
_archive: Option<String>,
) -> Result<Vec<MetaEventRecord>> {
// `archive` is now ignored for reads; routing picks the highest-priority
// backend that has the session and supports `MetaEvents`.
Ok(self
.read_walk_per_session(
scroll_id,
|reg| reg.backend.as_meta_events().is_some(),
|backend| async move {
let me = backend.as_meta_events().expect("predicate ensured");
me.get_meta_events(scroll_id).await.map(Some)
},
)
.await?
.unwrap_or_default())
}
/// Update the connection status of an ACP meta-session.
///
/// NOTE: read-mutate-write on the backend side (the impl rewrites fields
/// on the stored session); falls through to inline under
/// `WritePolicy::Queued` (no `WriteOp` variant).
pub async fn update_meta_session_status(
&self,
scroll_id: Uuid,
is_connected: bool,
current_session_id: Option<Uuid>,
archive: Option<String>,
) -> Result<()> {
let primary = self.resolve_primary(archive.clone()).await?;
let regs: Vec<std::sync::Arc<crate::registry::ArchiveRegistration>> =
self.registrations.read().await.clone();
let primary_meta = primary.backend.as_meta_events().ok_or_else(|| {
ArchivistError::PrimaryUnavailable {
name: primary.name.clone(),
reason: "backend lacks MetaEvents capability".into(),
}
})?;
if let Err(e) = primary_meta
.update_meta_session_status(scroll_id, is_connected, current_session_id)
.await
{
self.record_write_failure(&primary, &format!("{e}")).await;
return Err(e);
}
self.record_write_success(&primary).await;
for reg in regs.iter() {
if reg.name == primary.name {
continue;
}
if !reg.enabled || !reg.write_active {
continue;
}
let Some(me) = reg.backend.as_meta_events() else {
tracing::debug!(
backend = reg.name.as_str(),
type_name = reg.type_name,
op = "update_meta_session_status",
"capability_skip"
);
continue;
};
if let Err(e) = me
.update_meta_session_status(scroll_id, is_connected, current_session_id)
.await
{
self.record_write_failure(reg, &format!("{e}")).await;
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
} else {
self.record_write_success(reg).await;
}
}
Ok(())
}
pub async fn list_meta_sessions(
&self,
_archive: Option<String>,
) -> Result<Vec<SessionMetadata>> {
// Collection-shape read: first enabled/healthy backend that supports
// `MetaEvents` wins. `archive` override is no longer honoured here —
// routing decides.
Ok(self
.read_walk_collection(
|reg| reg.backend.as_meta_events().is_some(),
|backend| async move {
let me = backend.as_meta_events().expect("predicate ensured");
me.list_meta_sessions().await
},
)
.await?
.unwrap_or_default())
}
pub async fn find_meta_session_by_client(
&self,
client_id: &str,
_archive: Option<String>,
) -> Result<Option<SessionMetadata>> {
// Collection-shape read: first enabled/healthy backend that supports
// `MetaEvents` wins. The inner op returns `Result<Option<_>>`, so the
// walker's outer `Option` flattens to the inner one — "no backend
// answered" and "backend answered None" collapse the same way.
let client_id = client_id.to_string();
let result = self
.read_walk_collection(
|reg| reg.backend.as_meta_events().is_some(),
|backend| {
let client_id = client_id.clone();
async move {
let me = backend.as_meta_events().expect("predicate ensured");
me.find_meta_session_by_client(&client_id).await
}
},
)
.await?;
Ok(result.flatten())
}
// ------------------------------------------------------------------
// DAG
// ------------------------------------------------------------------
pub async fn append_dag_edge(
&self,
edge: DagEdge,
archive: Option<String>,
) -> Result<()> {
let primary = self.resolve_primary(archive.clone()).await?;
let regs: Vec<std::sync::Arc<crate::registry::ArchiveRegistration>> =
self.registrations.read().await.clone();
// Primary must have DAG capability — even in the queued path the
// writer task dispatches to `as_dag()`, so silently accepting a
// non-DAG primary would lose the edge.
let _ = primary.backend.as_dag().ok_or_else(|| {
ArchivistError::PrimaryUnavailable {
name: primary.name.clone(),
reason: "backend lacks Dag capability".into(),
}
})?;
match &primary.write_policy {
crate::registry::WritePolicy::Inline => {
let primary_dag = primary
.backend
.as_dag()
.expect("capability checked above");
if let Err(e) = primary_dag.append_dag_edge(edge.clone()).await {
self.record_write_failure(&primary, &format!("{e}")).await;
return Err(e);
}
self.record_write_success(&primary).await;
}
crate::registry::WritePolicy::Queued { .. } => {
let writer = primary
.writer
.as_ref()
.expect("queued policy implies writer handle present");
writer
.enqueue(crate::registry::writer::WriteOp::AppendDagEdge(edge.clone()))
.await?;
}
}
// DAG edges are indexed under the parent scroll_id, so use that for
// filter evaluation (the session whose DAG is being extended).
let parent_scroll_id = edge.parent;
let session_metadata_for_filter = self
.load_metadata_for_filter(parent_scroll_id, &regs, &primary.name)
.await;
for reg in regs.iter() {
if reg.name == primary.name {
continue;
}
if !reg.enabled || !reg.write_active {
continue;
}
if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) {
tracing::debug!(
archive = %reg.name,
scroll_id = %parent_scroll_id,
op = "append_dag_edge",
"filter_skip"
);
continue;
}
if reg.backend.as_dag().is_none() {
tracing::debug!(
backend = reg.name.as_str(),
type_name = reg.type_name,
op = "append_dag_edge",
"capability_skip"
);
continue;
}
match &reg.write_policy {
crate::registry::WritePolicy::Inline => {
let d = reg.backend.as_dag().expect("capability checked above");
if let Err(e) = d.append_dag_edge(edge.clone()).await {
self.record_write_failure(reg, &format!("{e}")).await;
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
} else {
self.record_write_success(reg).await;
}
}
crate::registry::WritePolicy::Queued { .. } => {
let writer = reg
.writer
.as_ref()
.expect("queued policy implies writer handle present");
if let Err(e) = writer
.enqueue(crate::registry::writer::WriteOp::AppendDagEdge(edge.clone()))
.await
{
if reg.failure_mode == crate::registry::FailureMode::Required {
return Err(e);
}
}
}
}
}
Ok(())
}
pub async fn get_children(
&self,
scroll_id: Uuid,
_archive: Option<String>,
) -> Result<Vec<SessionMetadata>> {
// `archive` is now ignored for reads; routing picks the highest-priority
// backend that has the session and supports `Dag`.
Ok(self
.read_walk_per_session(
scroll_id,
|reg| reg.backend.as_dag().is_some(),
|backend| async move {
let d = backend.as_dag().expect("predicate ensured");
d.get_children(scroll_id).await.map(Some)
},
)
.await?
.unwrap_or_default())
}
/// Recursive DAG walk rooted at `root_scroll_id`.
///
/// Matches the shape of `FileBasedArchivist::get_session_tree`: returns
/// every edge reachable from `root_scroll_id` (children, grandchildren,
/// …). Uses `DagBackend::get_dag_edges` per-parent plus a `seen` set to
/// guard against cycles.
pub async fn get_session_tree(
&self,
root_scroll_id: Uuid,
archive: Option<String>,
) -> Result<Vec<DagEdge>> {
// TODO(phase3): consider multi-backend DAG walk in a future phase —
// current impl uses the default backend only. Consistent BFS across
// a tree requires all `get_dag_edges` calls to target the SAME
// backend as the root, which the walker API does not yet expose.
let backend = self.resolve_backend(archive).await?;
let dag = backend
.as_dag()
.ok_or_else(|| ArchivistError::CapabilityNotSupported {
capability: ArchiveCapability::Dag,
backend: "selected".into(),
})?;
let mut out = Vec::new();
let mut stack = vec![root_scroll_id];
let mut seen = std::collections::HashSet::new();
while let Some(parent) = stack.pop() {
if !seen.insert(parent) {
continue;
}
let edges = dag.get_dag_edges(parent).await?;
for e in &edges {
stack.push(e.child);
}
out.extend(edges);
}
Ok(out)
}
// ------------------------------------------------------------------
// Cleanup
// ------------------------------------------------------------------
/// Delete sessions that have zero messages.
///
/// Ported from `FileBasedArchivist::cleanup_empty_sessions`. Pages through
/// every session (including hidden ones) via `list_sessions_paged`, counts
/// messages per session, and deletes those with zero. Meta sessions
/// (`SessionKind::AcpConnection`) are skipped — they track connection
/// events in `events.jsonl`, not messages, so an empty message log is
/// expected.
///
/// Returns `(deleted, total_scanned)`.
pub async fn cleanup_empty_sessions(
&self,
archive: Option<String>,
) -> Result<(usize, usize)> {
let backend = self.resolve_backend(archive).await?;
let mut total: usize = 0;
let mut deleted: usize = 0;
let mut q = SessionListQuery {
include_hidden: true,
limit: MAX_PAGE_LIMIT,
..SessionListQuery::default()
};
loop {
let page = backend.list_sessions_paged(q.clone()).await?;
for session in page.items.iter() {
total += 1;
// Skip meta sessions - they track events, not messages, so a
// zero message count is expected and not a signal of emptiness.
if session.kind == SessionKind::AcpConnection {
tracing::debug!(
scroll_id = %session.scroll_id,
"Skipping meta session (AcpConnection) during cleanup"
);
continue;
}
let count = match backend.count_messages(session.scroll_id).await {
Ok(c) => c,
Err(e) => {
// Match legacy semantics: if we can't count messages,
// skip this session rather than risk deleting a
// non-empty one.
tracing::warn!(
scroll_id = %session.scroll_id,
error = %e,
"Failed to count messages for session, skipping cleanup"
);
continue;
}
};
if count == 0 {
match backend.delete_session(session.scroll_id).await {
Ok(()) => {
tracing::info!(
scroll_id = %session.scroll_id,
"Deleted empty session during cleanup"
);
deleted += 1;
}
Err(e) => {
tracing::warn!(
scroll_id = %session.scroll_id,
error = %e,
"Failed to delete empty session during cleanup"
);
}
}
}
}
match page.next_cursor {
Some(cursor) => q.cursor = Some(cursor),
None => break,
}
}
tracing::info!(
deleted = deleted,
total = total,
"Completed empty session cleanup"
);
Ok((deleted, total))
}
}
@@ -0,0 +1,231 @@
//! Concrete archivist coordinator.
//!
//! Owns a `Vec<Arc<ArchiveRegistration>>` sorted by `read_priority`, plus a
//! positive `scroll_id → backend` cache. The registry is constructed from
//! `dirigent.toml` at boot (Task 12). `Archivist::new` remains a legacy
//! convenience for the dev-instance migration path; later tasks migrate
//! consumers to `Archivist::from_config`.
mod admin;
mod archives;
mod boot;
mod connectors;
mod meta;
mod routing;
mod sessions;
pub mod types;
pub use types::{ArchiveInfo, ArchiveMetadata};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::backend::ArchiveBackend;
use crate::error::{ArchivistError, Result};
use crate::registry::{
cache::ReadCache, ArchiveRegistration, FailureMode, WritePolicy,
};
pub struct Archivist {
pub(crate) registrations: RwLock<Vec<Arc<ArchiveRegistration>>>,
#[allow(dead_code)] // wired up in later tasks (cache-backed reads)
pub(crate) read_cache: Arc<ReadCache>,
#[allow(dead_code)] // retained for future admin endpoints / diagnostics
pub(crate) registry_path: PathBuf,
}
impl Archivist {
/// Legacy constructor: builds a single JsonlBackend rooted at
/// `registry_path.parent()`. Kept so dev-instance migration still
/// succeeds before Task 28 migrates consumers to `from_config`.
pub async fn new(registry_path: PathBuf) -> Result<Self> {
use crate::backends::JsonlBackend;
let mut registrations: Vec<Arc<ArchiveRegistration>> = Vec::new();
if !registry_path.as_os_str().is_empty() {
let archive_root = registry_path
.parent()
.map(|p| p.to_path_buf())
.unwrap_or_else(|| registry_path.clone());
let backend = Arc::new(JsonlBackend::new(archive_root).await?)
as Arc<dyn ArchiveBackend>;
let initial_health = backend.health_check().await;
registrations.push(Arc::new(ArchiveRegistration::new(
"main".into(),
"jsonl",
backend,
/* write_active */ true,
FailureMode::Required,
/* read_priority */ 0,
/* enabled */ true,
WritePolicy::Inline,
/* writer */ None,
initial_health,
)));
}
Ok(Self {
registrations: RwLock::new(registrations),
read_cache: Arc::new(ReadCache::new()),
registry_path,
})
}
/// Construct a coordinator with a single `JsonlBackend` archive named
/// "main" rooted at `archive_root`.
pub async fn new_with_single_archive(archive_root: PathBuf) -> Result<Self> {
use crate::backends::JsonlBackend;
let backend = Arc::new(JsonlBackend::new(archive_root).await?)
as Arc<dyn ArchiveBackend>;
let initial_health = backend.health_check().await;
let reg = Arc::new(ArchiveRegistration::new(
"main".into(),
"jsonl",
backend,
true,
FailureMode::Required,
0,
true,
WritePolicy::Inline,
None,
initial_health,
));
Ok(Self {
registrations: RwLock::new(vec![reg]),
read_cache: Arc::new(ReadCache::new()),
registry_path: PathBuf::new(),
})
}
/// Construct a coordinator with a pre-built single backend (for tests
/// that need to hold the backend directly alongside the coordinator).
pub async fn from_single_backend(
name: String,
backend: Arc<dyn ArchiveBackend>,
) -> Result<Self> {
let initial_health = backend.health_check().await;
let reg = Arc::new(ArchiveRegistration::new(
name,
"external",
backend,
true,
FailureMode::Required,
0,
true,
WritePolicy::Inline,
None,
initial_health,
));
Ok(Self {
registrations: RwLock::new(vec![reg]),
read_cache: Arc::new(ReadCache::new()),
registry_path: PathBuf::new(),
})
}
/// Resolve a single backend by optional name.
///
/// `None` → lowest-`read_priority` enabled write-active `Required`
/// backend. `Some(name)` → the backend with that name (must exist).
#[allow(dead_code)] // wired up in later tasks
pub(crate) async fn resolve_backend(
&self,
archive: Option<String>,
) -> Result<Arc<dyn ArchiveBackend>> {
let regs = self.registrations.read().await;
if regs.is_empty() {
return Err(ArchivistError::NoArchiveConfigured);
}
let chosen = match archive {
Some(name) => match regs.iter().find(|r| r.name == name) {
Some(r) => r,
None => return Err(ArchivistError::ArchiveNameUnknown(name)),
},
None => regs
.iter()
.filter(|r| {
r.enabled && r.write_active && r.failure_mode == FailureMode::Required
})
.min_by_key(|r| r.read_priority)
.ok_or_else(|| ArchivistError::PrimaryUnavailable {
name: "<default>".into(),
reason: "no required write-active backend".into(),
})?,
};
Ok(chosen.backend.clone())
}
/// Resolve the primary `ArchiveRegistration` for a write.
///
/// `None` → default-write target (lowest `read_priority` among enabled
/// write-active `Required` backends). `Some(name)` → the backend with that
/// name; errors if disabled or not write-active.
#[allow(dead_code)] // wired up in Task 16
pub(crate) async fn resolve_primary(
&self,
archive: Option<String>,
) -> Result<Arc<crate::registry::ArchiveRegistration>> {
let regs = self.registrations.read().await;
if regs.is_empty() {
return Err(ArchivistError::NoArchiveConfigured);
}
let chosen = match archive {
Some(name) => {
let r = regs
.iter()
.find(|r| r.name == name)
.ok_or_else(|| ArchivistError::ArchiveNameUnknown(name.clone()))?;
if !r.enabled {
return Err(ArchivistError::PrimaryUnavailable {
name: r.name.clone(),
reason: "backend is disabled".into(),
});
}
if !r.write_active {
return Err(ArchivistError::PrimaryUnavailable {
name: r.name.clone(),
reason: "backend is not write-active".into(),
});
}
r.clone()
}
None => regs
.iter()
.filter(|r| {
r.enabled
&& r.write_active
&& r.failure_mode == crate::registry::FailureMode::Required
})
.min_by_key(|r| r.read_priority)
.cloned()
.ok_or_else(|| ArchivistError::PrimaryUnavailable {
name: "<default>".into(),
reason: "no required write-active backend".into(),
})?,
};
Ok(chosen)
}
}
#[cfg(any(test, feature = "test-utils"))]
impl Archivist {
/// Test-only: construct directly from pre-built registrations.
pub fn from_registrations(
regs: Vec<std::sync::Arc<crate::registry::ArchiveRegistration>>,
) -> Self {
Self {
registrations: tokio::sync::RwLock::new(regs),
read_cache: std::sync::Arc::new(crate::registry::cache::ReadCache::new()),
registry_path: std::path::PathBuf::new(),
}
}
}
#[cfg(test)]
mod tests;
@@ -0,0 +1,136 @@
//! Read priority walk shared by every per-scroll_id and collection-shape
//! coordinator method.
//!
//! The walk honours per-backend `enabled`, caller-supplied capability
//! predicates, and current health. Per-scroll_id reads populate a positive
//! LRU cache keyed on `scroll_id`, so the second read for the same session
//! can short-circuit the priority walk.
use std::sync::Arc;
use uuid::Uuid;
use crate::backend::ArchiveBackend;
use crate::error::Result;
use crate::registry::ArchiveRegistration;
use super::Archivist;
impl Archivist {
/// Walk enabled + healthy registrations in `read_priority` order.
///
/// `predicate` decides whether a backend can serve the read (typically a
/// capability check). `op` is invoked on the first matching backend:
/// - `Ok(Some(value))` — wins the walk; per-scroll_id cache is updated; returned.
/// - `Ok(None)` — backend doesn't have it; continue.
/// - `Err(_)` — drift the backend's health and continue.
pub(crate) async fn read_walk_per_session<T, F, Fut, P>(
&self,
scroll_id: Uuid,
predicate: P,
op: F,
) -> Result<Option<T>>
where
T: Send,
P: Fn(&ArchiveRegistration) -> bool + Send + Sync,
F: Fn(Arc<dyn ArchiveBackend>) -> Fut + Send + Sync,
Fut: std::future::Future<Output = Result<Option<T>>> + Send,
{
// Cache hit: try the cached backend first.
if let Some(cached_name) = self.read_cache.get(scroll_id).await {
if let Some(reg) = self.find_registration(&cached_name).await {
if predicate(&reg) && reg.enabled && !self.is_unavailable(&reg).await {
match op(reg.backend.clone()).await {
Ok(Some(value)) => return Ok(Some(value)),
Ok(None) => {
// Cached entry no longer holds — invalidate and fall through.
self.read_cache.invalidate(scroll_id).await;
}
Err(_) => {
self.record_read_failure(&reg).await;
self.read_cache.invalidate(scroll_id).await;
}
}
}
}
}
// Priority walk.
let regs: Vec<Arc<ArchiveRegistration>> = self.registrations.read().await.clone();
for reg in regs.iter() {
if !reg.enabled || !predicate(reg) || self.is_unavailable(reg).await {
continue;
}
match op(reg.backend.clone()).await {
Ok(Some(value)) => {
self.record_read_success(reg).await;
self.read_cache.put(scroll_id, reg.name.clone()).await;
return Ok(Some(value));
}
Ok(None) => {
self.record_read_success(reg).await;
continue;
}
Err(_) => {
self.record_read_failure(reg).await;
continue;
}
}
}
Ok(None)
}
/// Collection-shape read variant: returns the first enabled/healthy backend's
/// result, no cache. `op`'s return type is `Result<T>` (no `Option<T>`):
/// an error is treated as "backend couldn't serve this" and drifted; `Ok(T)`
/// is the answer.
pub(crate) async fn read_walk_collection<T, F, Fut, P>(
&self,
predicate: P,
op: F,
) -> Result<Option<T>>
where
T: Send,
P: Fn(&ArchiveRegistration) -> bool + Send + Sync,
F: Fn(Arc<dyn ArchiveBackend>) -> Fut + Send + Sync,
Fut: std::future::Future<Output = Result<T>> + Send,
{
let regs: Vec<Arc<ArchiveRegistration>> = self.registrations.read().await.clone();
for reg in regs.iter() {
if !reg.enabled || !predicate(reg) || self.is_unavailable(reg).await {
continue;
}
match op(reg.backend.clone()).await {
Ok(value) => {
self.record_read_success(reg).await;
return Ok(Some(value));
}
Err(_) => {
self.record_read_failure(reg).await;
continue;
}
}
}
Ok(None)
}
pub(crate) async fn find_registration(
&self,
name: &str,
) -> Option<Arc<ArchiveRegistration>> {
self.registrations
.read()
.await
.iter()
.find(|r| r.name == name)
.cloned()
}
async fn is_unavailable(&self, reg: &ArchiveRegistration) -> bool {
matches!(
*reg.last_health.read().await,
crate::backend::HealthStatus::Unavailable { .. }
)
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,195 @@
//! Coordinator orchestration unit tests using `MockBackend`.
//!
//! These tests exercise alias detection, move/copy semantics, DAG walks,
//! and cleanup policies without any disk I/O.
#![cfg(test)]
use std::sync::Arc;
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::backend::mock::MockBackend;
use crate::backend::ArchiveBackend;
use crate::coordinator::Archivist;
use crate::registry::{
cache::ReadCache, ArchiveRegistration, FailureMode, WritePolicy,
};
use crate::types::{
DagEdge, MessageRecord, RegisterConnectorRequest, RegisterStatus, SessionCompleteness,
SessionKind, SessionMetadata,
};
/// Construct a blank `SessionMetadata` with the given `scroll_id` and
/// `connector_uid`. Sensible defaults for every other field.
fn blank_session(scroll_id: Uuid, connector_uid: Uuid) -> SessionMetadata {
let now = chrono::Utc::now();
SessionMetadata {
version: 1,
scroll_id,
created_at: now,
updated_at: now,
title: None,
connector_uid,
native_session_id: None,
agent_id: None,
parent_scroll_id: None,
continuation: None,
tags: Vec::new(),
metadata: serde_json::Value::Null,
no_update: false,
kind: SessionKind::Chat,
acp_client_id: None,
is_connected: None,
current_session_id: None,
models: None,
modes: None,
config_options: None,
completeness: SessionCompleteness::Complete,
matrix_room_id: None,
matrix_sharing_active: false,
matrix_shared_at: None,
is_subagent: false,
subagent_type: None,
spawning_tool_use_id: None,
}
}
/// Construct a blank `MessageRecord` scoped to the given session with a
/// freshly generated `message_id` and current timestamp.
fn blank_message(session: Uuid) -> MessageRecord {
MessageRecord {
version: 1,
message_id: Uuid::now_v7(),
session,
parent_id: None,
ts: chrono::Utc::now(),
role: "user".into(),
author: None,
content_md: String::new(),
content_parts: None,
attachments: Vec::new(),
metadata: serde_json::Value::Null,
}
}
async fn make_coordinator_with_single_mock() -> Archivist {
let backend: Arc<dyn ArchiveBackend> = Arc::new(MockBackend::new());
let initial_health = backend.health_check().await;
let reg = Arc::new(ArchiveRegistration::new(
"main".into(),
"mock",
backend,
/* write_active */ true,
FailureMode::Required,
/* read_priority */ 0,
/* enabled */ true,
WritePolicy::Inline,
/* writer */ None,
initial_health,
));
Archivist {
registrations: RwLock::new(vec![reg]),
read_cache: Arc::new(ReadCache::new()),
registry_path: std::path::PathBuf::from("/mock/.archives.json"),
}
}
#[tokio::test]
async fn register_connector_assigns_uid_and_returns_accepted() {
let coord = make_coordinator_with_single_mock().await;
let req = RegisterConnectorRequest {
r#type: "OpenCode".into(),
title: "test".into(),
client_native_id: "opencode@localhost".into(),
custom_uid: None,
metadata: serde_json::Value::Null,
fingerprint: None,
};
let resp = coord.register_connector(req, None).await.expect("register");
assert!(matches!(resp.status, RegisterStatus::Accepted));
assert_ne!(resp.connector_uid, Uuid::nil());
}
#[tokio::test]
async fn register_connector_aliases_on_duplicate_native_id() {
let coord = make_coordinator_with_single_mock().await;
let mk_req = || RegisterConnectorRequest {
r#type: "OpenCode".into(),
title: "test".into(),
client_native_id: "opencode@localhost".into(),
custom_uid: None,
metadata: serde_json::Value::Null,
fingerprint: None,
};
let first = coord.register_connector(mk_req(), None).await.unwrap();
let second = coord.register_connector(mk_req(), None).await.unwrap();
assert_eq!(second.connector_uid, first.connector_uid);
assert!(matches!(second.status, RegisterStatus::Aliased));
}
#[tokio::test]
async fn get_session_tree_walks_full_dag() {
let coord = make_coordinator_with_single_mock().await;
let connector_uid = Uuid::now_v7();
let root = Uuid::now_v7();
let child_a = Uuid::now_v7();
let child_b = Uuid::now_v7();
let grand = Uuid::now_v7();
let backend = coord.registrations.read().await[0].backend.clone();
for id in [root, child_a, child_b, grand] {
backend
.put_session(blank_session(id, connector_uid))
.await
.unwrap();
}
for (p, c) in [(root, child_a), (root, child_b), (child_a, grand)] {
coord
.append_dag_edge(
DagEdge {
parent: p,
child: c,
agent_id: String::new(),
subagent_type: None,
tool_use_id: None,
ts: Some(chrono::Utc::now()),
},
None,
)
.await
.unwrap();
}
let edges = coord.get_session_tree(root, None).await.unwrap();
assert_eq!(edges.len(), 3, "expected 3 edges, got {}", edges.len());
}
#[tokio::test]
async fn cleanup_empty_sessions_deletes_only_message_less_sessions() {
let coord = make_coordinator_with_single_mock().await;
let connector_uid = Uuid::now_v7();
let empty = Uuid::now_v7();
let populated = Uuid::now_v7();
let backend = coord.registrations.read().await[0].backend.clone();
for scroll_id in [empty, populated] {
backend
.put_session(blank_session(scroll_id, connector_uid))
.await
.unwrap();
}
backend
.append_messages(populated, vec![blank_message(populated)])
.await
.unwrap();
let (deleted, total) = coord.cleanup_empty_sessions(None).await.unwrap();
assert_eq!(deleted, 1);
assert_eq!(total, 2);
assert!(backend.get_session(empty).await.unwrap().is_none());
assert!(backend.get_session(populated).await.unwrap().is_some());
}
@@ -0,0 +1,60 @@
//! Shared data types used by the archivist coordinator.
//!
//! `ArchiveMetadata` is persisted per-archive in the registry file and
//! tracks creation time, path, and the set of connectors registered in
//! the archive. `ArchiveInfo` is the display-friendly projection returned
//! from listing APIs; it extends the metadata with computed fields like
//! session count and default-archive status.
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
/// Metadata about a single archive.
///
/// This structure contains all the information needed to track and display
/// an archive without loading its full backend instance.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchiveMetadata {
/// Unique name for this archive (e.g., "personal", "work", "experiments")
pub name: String,
/// Filesystem path to the archive root directory
pub path: PathBuf,
/// When this archive was first registered with the coordinator
pub created_at: DateTime<Utc>,
/// List of connector UIDs registered in this archive
///
/// This is updated as connectors are registered/unregistered and provides
/// a quick way to see which connectors belong to which archive.
pub connector_uids: Vec<Uuid>,
}
/// Display-friendly information about an archive.
///
/// This struct is returned by listing operations and includes computed
/// fields like session count and default status.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchiveInfo {
/// Unique name for this archive
pub name: String,
/// Filesystem path to the archive root directory
pub path: PathBuf,
/// When this archive was first registered
pub created_at: DateTime<Utc>,
/// Total number of sessions across all connectors in this archive
///
/// This is computed by counting sessions across all connectors and
/// may be expensive for large archives.
pub session_count: usize,
/// Whether this is the current default archive
pub is_default: bool,
}
+314
View File
@@ -0,0 +1,314 @@
//! Error types for the Archivist.
//!
//! This module defines all error types that can occur during archival operations,
//! including I/O errors, JSON errors, and domain-specific errors for connectors
//! and sessions.
use std::path::PathBuf;
use thiserror::Error;
use uuid::Uuid;
/// Result type alias for Archivist operations
pub type Result<T> = std::result::Result<T, ArchivistError>;
/// Errors that can occur during archival operations
#[derive(Debug, Error)]
pub enum ArchivistError {
/// Connector with the given UID was not found
#[error("Connector not found: {0}")]
ConnectorUnknown(Uuid),
/// Session with the given scroll ID was not found
#[error("Session not found: {0}")]
SessionUnknown(Uuid),
/// UUID collision detected with inconsistent data
///
/// This occurs when a custom UUID is provided that matches an existing
/// entity but with different attributes (e.g., different connector type).
#[error("UUID collision: {0}")]
CollisionInconsistent(Uuid),
/// Invalid request (e.g., missing required fields, invalid format)
#[error("Invalid request: {0}")]
InvalidRequest(String),
/// I/O error during file operations
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
/// JSON serialization/deserialization error
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
// Multi-archive errors
/// Invalid archive name (empty or contains invalid characters)
#[error("Invalid archive name: {0}")]
InvalidArchiveName(String),
/// Archive already exists with the given name
#[error("Archive already exists: {0}")]
ArchiveAlreadyExists(String),
/// Archive not found with the given name
#[error("Archive not found: {0}")]
ArchiveNotFound(String),
/// Archive path conflict (path is already used by another archive)
#[error("Archive path conflict: {0}")]
ArchivePathConflict(PathBuf),
/// Cannot remove default archive without force flag
#[error("Cannot remove default archive without force flag")]
CannotRemoveDefaultArchive,
/// Archive is not empty (has sessions)
#[error("Archive '{name}' is not empty ({session_count} sessions)")]
ArchiveNotEmpty {
name: String,
session_count: usize,
},
/// No archives configured
#[error("No archives configured")]
NoArchivesConfigured,
/// Failed to load registry file
#[error("Failed to load registry: {0}")]
RegistryLoadError(String),
/// Failed to parse registry JSON
#[error("Failed to parse registry: {0}")]
RegistryParseError(String),
/// Failed to serialize registry to JSON
#[error("Failed to serialize registry: {0}")]
RegistrySerializeError(String),
/// Failed to write registry file
#[error("Failed to write registry: {0}")]
RegistryWriteError(String),
/// Backend is unavailable (e.g., disk full, connection lost, degraded state)
#[error("Backend {name} is unavailable")]
BackendUnavailable { name: String },
/// Backend does not support the requested capability
#[error("Backend {backend} does not support capability {capability:?}")]
CapabilityNotSupported {
capability: crate::backend::ArchiveCapability,
backend: String,
},
/// Health check for a backend failed
#[error("Health check for backend {name} failed: {reason}")]
BackendHealthCheckFailed { name: String, reason: String },
/// Primary write backend is unavailable or misconfigured.
#[error("primary write backend `{name}` is unavailable: {reason}")]
PrimaryUnavailable { name: String, reason: String },
/// Session exists on a read-only (not write_active) backend; deletion impossible.
#[error("session {scroll_id} exists in read-only backend `{backend}`; cannot delete")]
DeleteOnReadOnlyBackend { backend: String, scroll_id: uuid::Uuid },
/// Move succeeded at the destination but source-side delete failed.
#[error("partial move: copy to `{copied_to}` succeeded but source-side delete failed: {delete_error}")]
PartialMove {
copied_to: String,
delete_error: Box<ArchivistError>,
},
/// Queued-write backend's queue is full.
#[error("write queue full for backend `{backend}` (op `{op}`)")]
WriteQueueFull {
backend: String,
op: &'static str,
},
/// The coordinator has no archive configured (ephemeral mode).
#[error("no archive is configured (ephemeral mode)")]
NoArchiveConfigured,
/// A requested archive name does not exist in the registry.
#[error("archive name `{0}` is unknown")]
ArchiveNameUnknown(String),
/// Runtime mutation of the archive registry is not supported in Phase 3.
#[error("dynamic registry mutation is not supported (Phase 3 is startup-only)")]
DynamicRegistryUnsupported,
/// Catch-all for injected failures / legacy call sites. Prefer a typed variant when possible.
#[error("{0}")]
Other(String),
}
/// Errors raised exclusively at boot, by `Archivist::from_config`.
#[derive(Debug, thiserror::Error)]
pub enum ArchivistBootError {
#[error("duplicate archive name `{0}` in config")]
DuplicateName(String),
#[error("archive `{name}` declares unknown type `{type_name}`")]
UnknownType { name: String, type_name: String },
#[error("no `required` write-active backend configured (need at least one primary)")]
NoPrimary,
#[error("backend `{name}` failed to build: {source}")]
BackendBuild {
name: String,
#[source]
source: crate::registry::BackendBuildError,
},
#[error("required backend `{name}` is unavailable at boot: {reason}")]
UnavailableRequiredBackend { name: String, reason: String },
#[error("no unrestricted write-active archive — at least one enabled, write_active backend must have an empty filter")]
NoUnrestrictedPrimary,
#[error("filter for archive `{archive}` rejects all sessions (include_connectors is empty)")]
FilterRejectsEverything { archive: String },
#[error("config validation failed: {0}")]
Validation(#[from] crate::registry::ConfigValidationError),
}
#[cfg(test)]
mod tests {
use super::*;
use std::io;
#[test]
fn test_error_display() {
let uuid = Uuid::now_v7();
// Test ConnectorUnknown
let err = ArchivistError::ConnectorUnknown(uuid);
assert_eq!(err.to_string(), format!("Connector not found: {}", uuid));
// Test SessionUnknown
let err = ArchivistError::SessionUnknown(uuid);
assert_eq!(err.to_string(), format!("Session not found: {}", uuid));
// Test CollisionInconsistent
let err = ArchivistError::CollisionInconsistent(uuid);
assert_eq!(err.to_string(), format!("UUID collision: {}", uuid));
// Test InvalidRequest
let err = ArchivistError::InvalidRequest("missing field".to_string());
assert_eq!(err.to_string(), "Invalid request: missing field");
}
#[test]
fn test_io_error_conversion() {
// Create an I/O error
let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found");
// Convert to ArchivistError using From trait
let archivist_err: ArchivistError = io_err.into();
// Verify it's the right variant
match archivist_err {
ArchivistError::Io(e) => {
assert_eq!(e.kind(), io::ErrorKind::NotFound);
assert_eq!(e.to_string(), "file not found");
}
_ => panic!("Expected Io variant"),
}
}
#[test]
fn test_json_error_conversion() {
// Create a JSON error by trying to parse invalid JSON
let json_err = serde_json::from_str::<serde_json::Value>("invalid json").unwrap_err();
// Convert to ArchivistError using From trait
let archivist_err: ArchivistError = json_err.into();
// Verify it's the right variant
match archivist_err {
ArchivistError::Json(_) => {
// Success - it's a JSON error
}
_ => panic!("Expected Json variant"),
}
}
#[test]
fn test_result_type_with_question_mark() {
// Test that Result<T> works with the ? operator
fn test_function() -> Result<String> {
// This should compile and work with ?
let _data: serde_json::Value = serde_json::from_str(r#"{"key": "value"}"#)?;
Ok("success".to_string())
}
let result = test_function();
assert!(result.is_ok());
assert_eq!(result.unwrap(), "success");
}
#[test]
fn test_error_chain() {
// Test that errors can be chained properly
fn inner_function() -> std::io::Result<String> {
Err(std::io::Error::new(
std::io::ErrorKind::NotFound,
"inner error",
))
}
fn outer_function() -> Result<String> {
// The ? operator should automatically convert io::Error to ArchivistError
let _result = inner_function()?;
Ok("success".to_string())
}
let result = outer_function();
assert!(result.is_err());
match result {
Err(ArchivistError::Io(e)) => {
assert_eq!(e.kind(), std::io::ErrorKind::NotFound);
}
_ => panic!("Expected Io error"),
}
}
#[test]
fn test_error_debug() {
let uuid = Uuid::now_v7();
let err = ArchivistError::ConnectorUnknown(uuid);
// Verify Debug implementation works
let debug_str = format!("{:?}", err);
assert!(debug_str.contains("ConnectorUnknown"));
assert!(debug_str.contains(&uuid.to_string()));
}
#[test]
fn test_all_error_variants() {
let uuid = Uuid::now_v7();
// Test all variants can be created
let errors = vec![
ArchivistError::ConnectorUnknown(uuid),
ArchivistError::SessionUnknown(uuid),
ArchivistError::CollisionInconsistent(uuid),
ArchivistError::InvalidRequest("test".to_string()),
ArchivistError::Io(io::Error::new(io::ErrorKind::Other, "test")),
ArchivistError::Json(serde_json::from_str::<serde_json::Value>("bad").unwrap_err()),
];
// Verify each error has a non-empty display string
for err in errors {
let display = err.to_string();
assert!(!display.is_empty(), "Error display should not be empty");
let debug = format!("{:?}", err);
assert!(!debug.is_empty(), "Error debug should not be empty");
}
}
}
File diff suppressed because it is too large Load Diff
+933
View File
@@ -0,0 +1,933 @@
//! Generic import infrastructure for bringing external sessions into the archive.
//!
//! This module provides the shared types and orchestration logic that all importers
//! (Claude, ChatGPT, etc.) reuse. Each importer implements discovery and message
//! conversion, then delegates to [`import_sessions`] for the actual import.
pub mod progress;
pub mod registry;
pub mod sources;
pub mod trait_def;
/// Backwards-compatible re-export — external callers (e.g. `api`) import
/// `dirigent_archivist::import::claude::{discover_claude_import,
/// import_claude_sessions}`. Keep the path stable until those callsites
/// migrate to the `Importer` trait.
pub use sources::claude;
#[cfg(feature = "importer-claude")]
pub use sources::claude::ClaudeImporter;
pub use progress::{ImportProgressEvent, ImportProgressSink, SessionOutcome, StatsDelta};
pub use registry::ImporterRegistry;
pub use trait_def::{ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget, Importer, ImporterInfo};
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{
MessageRecord, RegisterConnectorRequest, RegisterSessionRequest, RegisterStatus,
SessionCompleteness,
};
/// Statistics collected during an import operation.
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
pub struct ImportStats {
/// Number of sessions found by the importer's discovery phase.
pub sessions_discovered: usize,
/// Number of sessions successfully imported as new.
pub sessions_imported: usize,
/// Number of sessions skipped (already present with same or more messages).
pub sessions_skipped: usize,
/// Number of sessions that were updated with new messages.
pub sessions_updated: usize,
/// Total number of message records written to the archive.
pub messages_written: usize,
/// Number of messages that were already present (from existing sessions).
pub messages_already_present: usize,
/// Number of sessions skipped because the fingerprint matched (no source changes).
#[serde(default)]
pub sessions_fingerprint_skipped: usize,
/// Errors encountered during import (non-fatal; import continues).
pub errors: Vec<String>,
}
impl ImportStats {
/// Total sessions processed (imported + skipped + updated + errored).
pub fn total_sessions_processed(&self) -> usize {
self.sessions_imported + self.sessions_skipped + self.sessions_updated + self.errors.len()
}
/// Whether any errors were encountered during import.
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
}
/// Intermediate representation for a session discovered by any importer.
///
/// This is source-agnostic: each importer converts its native session format
/// into `DiscoveredSession` before handing it to [`import_sessions`].
#[derive(Debug, Clone)]
pub struct DiscoveredSession {
/// The session ID from the original source (e.g., Claude's JSONL filename).
pub native_session_id: String,
/// Human-readable session title, if available.
pub title: Option<String>,
/// When the session was created in the source system.
pub created_at: Option<DateTime<Utc>>,
/// When the session was last updated in the source system.
pub updated_at: Option<DateTime<Utc>>,
/// Number of messages in the source session (used for skip/update decisions).
pub message_count: usize,
/// Arbitrary source-specific metadata preserved for provenance.
pub metadata: serde_json::Value,
/// Project path associated with the session, if known.
pub project_path: Option<String>,
/// Size of the source file in bytes, if available. Used for fingerprint-based
/// change detection to skip unchanged sessions on re-import.
pub file_size: Option<u64>,
}
/// Snapshot of source-side signals captured after a successful import.
///
/// Stored in the session's `metadata` JSON under the `"_import_snapshot"` key.
/// On re-import, comparing the current `DiscoveredSession` against the stored
/// snapshot lets us skip expensive full-parse when nothing has changed (O(1) gate).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ImportSnapshot {
/// Number of messages in the source at the time of import.
pub source_message_count: usize,
/// Source-side `updated_at` timestamp at the time of import.
pub source_updated_at: Option<DateTime<Utc>>,
/// Source file size in bytes at the time of import.
pub source_file_size: Option<u64>,
/// When this snapshot was recorded.
pub imported_at: DateTime<Utc>,
}
/// Key used to store [`ImportSnapshot`] in session metadata JSON.
const IMPORT_SNAPSHOT_KEY: &str = "_import_snapshot";
impl ImportSnapshot {
/// Check whether the source signals in `discovered` match this snapshot.
///
/// Returns `true` if all present signals match, meaning the session has not
/// changed since this snapshot was taken and a full re-parse can be skipped.
pub fn matches(&self, discovered: &DiscoveredSession) -> bool {
if self.source_message_count != discovered.message_count {
return false;
}
if self.source_updated_at != discovered.updated_at {
return false;
}
// file_size: only compare when both sides have a value.
if let (Some(snap_size), Some(disc_size)) = (self.source_file_size, discovered.file_size) {
if snap_size != disc_size {
return false;
}
}
true
}
/// Build a snapshot from a discovered session (captures current source signals).
pub fn from_discovered(discovered: &DiscoveredSession) -> Self {
Self {
source_message_count: discovered.message_count,
source_updated_at: discovered.updated_at,
source_file_size: discovered.file_size,
imported_at: Utc::now(),
}
}
/// Try to deserialize a snapshot from a session's metadata JSON.
pub fn from_metadata(metadata: &serde_json::Value) -> Option<Self> {
metadata
.get(IMPORT_SNAPSHOT_KEY)
.and_then(|v| serde_json::from_value(v.clone()).ok())
}
/// Serialize this snapshot into the session's metadata JSON under the
/// `_import_snapshot` key.
pub fn write_to_metadata(&self, metadata: &mut serde_json::Value) {
if let Some(obj) = metadata.as_object_mut() {
if let Ok(val) = serde_json::to_value(self) {
obj.insert(IMPORT_SNAPSHOT_KEY.to_string(), val);
}
} else {
tracing::warn!("cannot write import snapshot: metadata is not a JSON object");
}
}
}
/// Summary returned by the discovery phase before actual import begins.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportDiscovery {
/// Human-readable name of the import source (e.g., "Claude Code").
pub source_name: String,
/// Filesystem path or URI that was scanned.
pub source_path: String,
/// Projects discovered, grouped by name.
pub projects: Vec<ImportProject>,
/// Total number of sessions found across all projects.
pub total_sessions: usize,
/// Estimated total messages across all discovered sessions.
pub total_estimated_messages: usize,
}
/// A project grouping within an import discovery result.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportProject {
/// Project name (typically derived from the directory path).
pub name: String,
/// Number of sessions belonging to this project.
pub session_count: usize,
}
/// Resolves the `updated_at` timestamp for an imported session.
///
/// Prefers the source-provided timestamp from `discovered.updated_at`; falls
/// back to `Utc::now()` only when the source does not supply one.
fn resolve_updated_at(discovered: &DiscoveredSession) -> DateTime<Utc> {
discovered.updated_at.unwrap_or_else(chrono::Utc::now)
}
/// Generic async orchestrator that imports discovered sessions into the archive.
///
/// This function handles the full import lifecycle:
/// 1. Registers the connector (idempotent via fingerprint).
/// 2. For each discovered session, checks whether it already exists in the archive.
/// 3. New sessions are registered and their messages are converted and appended.
/// 4. Existing sessions with fewer archived messages are logged and skipped (v1).
/// 5. Existing sessions with the same or more archived messages are skipped.
///
/// The `convert_messages` closure receives a `native_session_id` and returns
/// `MessageRecord`s with `Uuid::nil()` in the `session` field. This function
/// patches each record's `session` to the real `scroll_id` before appending.
///
/// # Arguments
///
/// * `archivist` - The archivist to import into.
/// * `connector_req` - Registration request for the import connector.
/// * `sessions` - Sessions discovered by the importer.
/// * `convert_messages` - Closure that converts a native session into `MessageRecord`s.
/// * `archive` - Optional archive name (`None` for default archive).
/// * `progress` - Sink for per-session progress events (use
/// [`ImportProgressSink::noop`] when progress reporting is not needed).
pub async fn import_sessions<F>(
archivist: &Archivist,
connector_req: RegisterConnectorRequest,
sessions: Vec<DiscoveredSession>,
convert_messages: F,
archive: Option<String>,
progress: &ImportProgressSink,
force_deep_scan: bool,
project_map: &HashMap<String, String>,
) -> Result<ImportStats>
where
F: Fn(&str) -> Result<Vec<MessageRecord>> + Send + Sync,
{
let mut stats = ImportStats::default();
stats.sessions_discovered = sessions.len();
// Step 1: Register the connector (idempotent).
let connector_resp = archivist
.register_connector(connector_req, archive.clone())
.await?;
let connector_uid = connector_resp.connector_uid;
tracing::info!(
connector_uid = %connector_uid,
status = ?connector_resp.status,
"Import connector registered"
);
// Step 2: Process each discovered session.
let total = sessions.len();
for (index, session) in sessions.iter().enumerate() {
let native_id = &session.native_session_id;
progress
.send(ImportProgressEvent::SessionStarted {
native_id: native_id.clone(),
index,
total,
})
.await;
// Per-session outcome + stats delta. Updated as we go; on the early
// `continue` paths we emit Failed/Skipped before moving on.
let mut messages_written_delta: u64 = 0;
let mut messages_already_present_delta: u64 = 0;
let mut session_changed = false;
// Helper: emit SessionFinished and fall out of the iteration.
macro_rules! emit_finished {
($outcome:expr) => {{
progress
.send(ImportProgressEvent::SessionFinished {
native_id: native_id.clone(),
outcome: $outcome,
stats_delta: StatsDelta {
messages_written: messages_written_delta,
messages_already_present: messages_already_present_delta,
},
})
.await;
}};
}
// --- Step 1: Resolve or create scroll_id BEFORE convert_messages ---
let (scroll_id, session_is_new) = match archivist
.resolve_session(connector_uid, native_id, archive.clone())
.await
{
Ok(id) => (id, false),
Err(ArchivistError::SessionUnknown(_)) => {
// Inject project_id from project_map if the session has a
// project_path that maps to a known project.
let mut metadata = session.metadata.clone();
if let Some(project_path) = session.project_path.as_deref() {
if let Some(pid) = project_map.get(project_path) {
if let Some(obj) = metadata.as_object_mut() {
obj.insert(
"project_id".to_string(),
serde_json::Value::String(pid.clone()),
);
}
}
}
let register_req = RegisterSessionRequest {
connector_uid,
native_session_id: native_id.clone(),
title: session.title.clone(),
custom_scroll_id: None,
metadata,
completeness: SessionCompleteness::Complete,
parent_scroll_id: None,
is_subagent: false,
continuation: None,
agent_id: None,
subagent_type: None,
spawning_tool_use_id: None,
};
match archivist
.register_session(register_req, archive.clone())
.await
{
Ok(resp) => match resp.status {
RegisterStatus::Accepted => (resp.scroll_id, true),
RegisterStatus::Aliased => {
stats.sessions_skipped += 1;
emit_finished!(SessionOutcome::Skipped);
continue;
}
RegisterStatus::Rejected => {
stats.errors.push(format!(
"Session registration rejected for {native_id}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
},
Err(e) => {
stats.errors.push(format!(
"Failed to register session {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
}
}
Err(e) => {
stats.errors.push(format!(
"Failed to resolve session {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
};
// --- Step 2: Hoist metadata read for existing sessions ---
// Load metadata once; reused for fingerprint check AND title/model diff.
let existing_meta = if !session_is_new {
match archivist
.get_session_metadata(scroll_id, archive.clone())
.await
{
Ok(m) => Some(m),
Err(e) => {
stats.errors.push(format!(
"Failed to read session metadata for {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
}
} else {
None
};
// --- Step 2b: Retroactive project_id linking for existing sessions ---
// Sessions imported before project detection (or before the project was
// created) have project_path but no project_id. Patch it now if the
// project_map has a match — this runs even for fingerprint-skipped
// sessions so re-import can link them without any source-side changes.
if !session_is_new {
if let Some(ref meta) = existing_meta {
let has_project_path = meta
.metadata
.get("project_path")
.and_then(|v| v.as_str())
.is_some();
let has_project_id = meta
.metadata
.get("project_id")
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.is_some();
if has_project_path && !has_project_id {
let stored_path = meta
.metadata
.get("project_path")
.and_then(|v| v.as_str())
.unwrap();
if let Some(pid) = project_map.get(stored_path) {
if let Ok(primary) =
archivist.resolve_primary(archive.clone()).await
{
let mut patched = meta.clone();
if let Some(obj) = patched.metadata.as_object_mut() {
obj.insert(
"project_id".to_string(),
serde_json::Value::String(pid.clone()),
);
}
patched.updated_at = resolve_updated_at(session);
match primary.backend.put_session(patched).await {
Ok(_) => {
tracing::info!(
scroll_id = %scroll_id,
project_id = %pid,
"Retroactively linked session to project"
);
session_changed = true;
}
Err(e) => {
tracing::warn!(
scroll_id = %scroll_id,
error = %e,
"Failed to retroactively link session to project"
);
}
}
}
}
}
}
}
// --- Step 3: Fingerprint gate — skip unchanged sessions ---
if !session_is_new && !force_deep_scan {
if let Some(ref meta) = existing_meta {
if let Some(snapshot) = ImportSnapshot::from_metadata(&meta.metadata) {
if snapshot.matches(session) {
stats.sessions_fingerprint_skipped += 1;
if session_changed {
tracing::debug!(
native_id = %native_id,
"Fingerprint match — skipping message scan (metadata was updated)"
);
stats.sessions_updated += 1;
emit_finished!(SessionOutcome::Updated);
} else {
tracing::debug!(
native_id = %native_id,
"Fingerprint match — skipping unchanged session"
);
stats.sessions_skipped += 1;
emit_finished!(SessionOutcome::Skipped);
}
continue;
}
}
}
}
// --- Step 4: Convert messages (EXPENSIVE — after fingerprint gate) ---
let source_records = match convert_messages(native_id) {
Ok(r) => r,
Err(e) => {
stats.errors.push(format!(
"Failed to convert messages for session {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
};
// Build existing_ids set — empty for brand-new sessions.
let existing_ids: std::collections::HashSet<Uuid> = if session_is_new {
std::collections::HashSet::new()
} else {
match archivist.get_messages(scroll_id, archive.clone()).await {
Ok(msgs) => msgs.into_iter().map(|m| m.message_id).collect(),
Err(e) => {
stats.errors.push(format!(
"Failed to read existing messages for session {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
}
};
// Patch placeholder session field and partition.
let mut new_messages: Vec<MessageRecord> = Vec::new();
let mut already_present_count: usize = 0;
for mut record in source_records {
if record.session == Uuid::nil() {
record.session = scroll_id;
}
if existing_ids.contains(&record.message_id) {
already_present_count += 1;
} else {
new_messages.push(record);
}
}
let new_count = new_messages.len();
if new_count > 0 {
if let Err(e) = archivist
.append_messages(scroll_id, new_messages, archive.clone())
.await
{
stats.errors.push(format!(
"Failed to append messages for session {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
stats.messages_written += new_count;
messages_written_delta = new_count as u64;
session_changed = true;
}
stats.messages_already_present += already_present_count;
messages_already_present_delta = already_present_count as u64;
// --- Step 5: Metadata diff (reuse hoisted metadata) ---
if !session_is_new {
// SAFETY: existing_meta is Some when !session_is_new (guarded above).
let current_meta = existing_meta.unwrap();
let new_title = session.title.as_ref();
let title_differs = new_title.is_some() && new_title != current_meta.title.as_ref();
let new_model = session
.metadata
.get("model")
.and_then(|v| v.as_str())
.map(String::from);
let current_model = current_meta
.metadata
.get("model")
.and_then(|v| v.as_str())
.map(String::from);
let model_differs = new_model.is_some() && new_model != current_model;
if title_differs || model_differs {
if let Err(e) = archivist
.update_session_metadata(
scroll_id,
if title_differs { new_title.cloned() } else { None },
if model_differs { new_model } else { None },
archive.clone(),
)
.await
{
stats.errors.push(format!(
"Failed to update session metadata for {native_id}: {e}"
));
emit_finished!(SessionOutcome::Failed);
continue;
}
session_changed = true;
}
let new_project_path = session
.metadata
.get("project_path")
.and_then(|v| v.as_str())
.map(String::from);
let current_project_path = current_meta
.metadata
.get("project_path")
.and_then(|v| v.as_str())
.map(String::from);
let project_path_differs =
new_project_path.is_some() && new_project_path != current_project_path;
if project_path_differs {
// project_path lives in the free-form metadata JSON.
// Re-read to pick up any title/model changes applied above.
let mut patched_meta = archivist
.get_session_metadata(scroll_id, archive.clone())
.await
.unwrap_or(current_meta);
if let Some(obj) = patched_meta.metadata.as_object_mut() {
let path_val = new_project_path.clone().unwrap_or_default();
obj.insert(
"project_path".to_string(),
serde_json::Value::String(path_val.clone()),
);
if let Some(pid) = project_map.get(&path_val) {
obj.insert(
"project_id".to_string(),
serde_json::Value::String(pid.clone()),
);
}
}
patched_meta.updated_at = resolve_updated_at(session);
if let Ok(primary) = archivist.resolve_primary(archive.clone()).await {
if let Err(e) = primary.backend.put_session(patched_meta).await {
tracing::warn!(
scroll_id = %scroll_id,
error = %e,
"Failed to update project_path in session metadata"
);
}
}
session_changed = true;
}
}
// --- Step 6: Write import snapshot after successful import/update ---
{
let snapshot = ImportSnapshot::from_discovered(session);
// Re-read metadata to get the latest state (may have been updated above).
let write_result = async {
let mut meta = archivist
.get_session_metadata(scroll_id, archive.clone())
.await?;
snapshot.write_to_metadata(&mut meta.metadata);
meta.updated_at = resolve_updated_at(session);
let primary = archivist.resolve_primary(archive.clone()).await?;
primary.backend.put_session(meta).await.map_err(|e| {
ArchivistError::InvalidRequest(format!(
"Failed to write import snapshot: {e}"
))
})
}
.await;
if let Err(e) = write_result {
tracing::warn!(
scroll_id = %scroll_id,
error = %e,
"Failed to write import snapshot (session still imported)"
);
}
}
// Accounting: exactly one of {imported, updated, skipped} per session.
let outcome = if session_is_new {
stats.sessions_imported += 1;
SessionOutcome::Imported
} else if session_changed {
stats.sessions_updated += 1;
SessionOutcome::Updated
} else {
stats.sessions_skipped += 1;
SessionOutcome::Skipped
};
emit_finished!(outcome);
}
Ok(stats)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_import_stats_default() {
let stats = ImportStats::default();
assert_eq!(stats.sessions_discovered, 0);
assert_eq!(stats.sessions_imported, 0);
assert_eq!(stats.sessions_skipped, 0);
assert_eq!(stats.sessions_updated, 0);
assert_eq!(stats.messages_written, 0);
assert_eq!(stats.messages_already_present, 0);
assert!(stats.errors.is_empty());
}
#[test]
fn test_import_stats_total_sessions_processed() {
let mut stats = ImportStats::default();
stats.sessions_imported = 3;
stats.sessions_skipped = 2;
stats.sessions_updated = 1;
stats.errors.push("oops".to_string());
assert_eq!(stats.total_sessions_processed(), 7);
}
#[test]
fn test_import_stats_has_errors() {
let mut stats = ImportStats::default();
assert!(!stats.has_errors());
stats.errors.push("something went wrong".to_string());
assert!(stats.has_errors());
}
}
#[cfg(test)]
mod idempotency_tests {
use super::*;
use crate::Archivist;
use chrono::Utc;
use uuid::Uuid;
async fn mk() -> (Archivist, std::path::PathBuf) {
let tmp = std::env::temp_dir().join(format!("import_idem_{}", Uuid::now_v7()));
// Use `from_single_backend` rather than `new_with_single_archive` so
// each test's archive is fully self-contained (no shared `.archives.json`
// in the parent tempdir racing against sibling tests).
let backend = std::sync::Arc::new(
crate::backends::JsonlBackend::new(tmp.clone()).await.unwrap(),
);
let a = Archivist::from_single_backend("main".into(), backend)
.await
.unwrap();
(a, tmp)
}
fn connector() -> RegisterConnectorRequest {
// Stable client_native_id so that re-registering within the same test
// (which uses an isolated temp dir per test) aliases onto the same
// connector_uid — otherwise each call would produce a fresh connector
// and defeat idempotency.
RegisterConnectorRequest {
r#type: "Fake".into(),
title: "fake".into(),
client_native_id: "fake@local:stable".into(),
custom_uid: None,
metadata: serde_json::json!({}),
fingerprint: None,
}
}
fn record(session: Uuid, id: Uuid, role: &str, content: &str) -> MessageRecord {
MessageRecord {
version: 1,
message_id: id,
session,
parent_id: None,
ts: Utc::now(),
role: role.to_string(),
author: None,
content_md: content.to_string(),
content_parts: None,
attachments: Vec::new(),
metadata: serde_json::json!({}),
}
}
#[tokio::test]
async fn import_skips_already_present_messages() {
let (archivist, tmp) = mk().await;
let a = Uuid::now_v7();
let b = Uuid::now_v7();
let c = Uuid::now_v7();
let discovered = vec![DiscoveredSession {
native_session_id: "s1".into(),
title: Some("t".into()),
created_at: None,
updated_at: None,
message_count: 3,
metadata: serde_json::json!({}),
project_path: None,
file_size: None,
}];
let convert = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![
record(Uuid::nil(), a, "user", "hi-a"),
record(Uuid::nil(), b, "user", "hi-b"),
record(Uuid::nil(), c, "user", "hi-c"),
])
};
let stats = import_sessions(&archivist, connector(), discovered.clone(), convert, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
assert_eq!(stats.sessions_imported, 1);
assert_eq!(stats.messages_written, 3);
// Re-import with IDENTICAL records — nothing should be written.
let convert2 = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![
record(Uuid::nil(), a, "user", "hi-a"),
record(Uuid::nil(), b, "user", "hi-b"),
record(Uuid::nil(), c, "user", "hi-c"),
])
};
let stats2 = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
assert_eq!(stats2.messages_written, 0);
assert_eq!(stats2.messages_already_present, 3);
assert_eq!(stats2.sessions_skipped, 1);
assert_eq!(stats2.sessions_imported, 0);
assert_eq!(stats2.sessions_updated, 0);
let _ = tokio::fs::remove_dir_all(tmp).await;
}
#[tokio::test]
async fn import_appends_new_messages_only() {
let (archivist, tmp) = mk().await;
let a = Uuid::now_v7();
let b = Uuid::now_v7();
let c = Uuid::now_v7();
let d = Uuid::now_v7();
let discovered = vec![DiscoveredSession {
native_session_id: "s1".into(),
title: Some("t".into()),
created_at: None,
updated_at: None,
message_count: 2,
metadata: serde_json::json!({}),
project_path: None,
file_size: None,
}];
let convert1 = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![
record(Uuid::nil(), a, "user", "hi-a"),
record(Uuid::nil(), b, "user", "hi-b"),
])
};
let _ = import_sessions(&archivist, connector(), discovered.clone(), convert1, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
// Second run: source has grown to 4 messages.
let convert2 = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![
record(Uuid::nil(), a, "user", "hi-a"),
record(Uuid::nil(), b, "user", "hi-b"),
record(Uuid::nil(), c, "user", "hi-c"),
record(Uuid::nil(), d, "user", "hi-d"),
])
};
let stats = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
assert_eq!(stats.messages_written, 2);
assert_eq!(stats.messages_already_present, 2);
assert_eq!(stats.sessions_updated, 1);
assert_eq!(stats.sessions_skipped, 0);
assert_eq!(stats.sessions_imported, 0);
let _ = tokio::fs::remove_dir_all(tmp).await;
}
#[tokio::test]
async fn import_updates_metadata_only() {
let (archivist, tmp) = mk().await;
let a = Uuid::now_v7();
let convert = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![record(Uuid::nil(), a, "user", "hi")])
};
let first = vec![DiscoveredSession {
native_session_id: "s1".into(),
title: Some("old title".into()),
created_at: None,
updated_at: None,
message_count: 1,
metadata: serde_json::json!({}),
project_path: None,
file_size: None,
}];
let _ = import_sessions(&archivist, connector(), first, convert, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
// Re-import with same messages but new title.
let second = vec![DiscoveredSession {
native_session_id: "s1".into(),
title: Some("new title".into()),
created_at: None,
updated_at: None,
message_count: 1,
metadata: serde_json::json!({}),
project_path: None,
file_size: None,
}];
let convert2 = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![record(Uuid::nil(), a, "user", "hi")])
};
let stats = import_sessions(&archivist, connector(), second, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
assert_eq!(stats.messages_written, 0);
assert_eq!(stats.sessions_updated, 1);
assert_eq!(stats.sessions_skipped, 0);
// Verify title landed on disk.
let meta_list = archivist
.list_sessions_paged(
crate::types::SessionListQuery::default().with_limit(50),
)
.await
.unwrap();
assert!(meta_list.items.iter().any(|m| m.title.as_deref() == Some("new title")));
let _ = tokio::fs::remove_dir_all(tmp).await;
}
#[tokio::test]
async fn import_handles_metadata_unchanged() {
let (archivist, tmp) = mk().await;
let a = Uuid::now_v7();
let discovered = vec![DiscoveredSession {
native_session_id: "s1".into(),
title: Some("t".into()),
created_at: None,
updated_at: None,
message_count: 1,
metadata: serde_json::json!({"model": "claude"}),
project_path: None,
file_size: None,
}];
let convert = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![record(Uuid::nil(), a, "user", "hi")])
};
let _ = import_sessions(&archivist, connector(), discovered.clone(), convert, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
let convert2 = |_: &str| -> Result<Vec<MessageRecord>> {
Ok(vec![record(Uuid::nil(), a, "user", "hi")])
};
let stats = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new())
.await
.unwrap();
assert_eq!(stats.sessions_skipped, 1);
assert_eq!(stats.sessions_updated, 0);
assert_eq!(stats.messages_written, 0);
let _ = tokio::fs::remove_dir_all(tmp).await;
}
}
@@ -0,0 +1,117 @@
//! ImportProgressSink: bounded mpsc with drop-oldest-non-terminal overflow.
//! Terminal events (ImportDone / ImportFailed) are never dropped — on full
//! channel they evict oldest non-terminal events until they fit. The import
//! thread never backpressures on a slow consumer.
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use super::ImportDiscovery;
use super::ImportStats;
const DEFAULT_CAPACITY: usize = 64;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "kind")]
pub enum ImportProgressEvent {
DiscoveryStarted { source: String },
DiscoveryProgress { scanned: usize, estimated_total: Option<usize> },
DiscoveryDone { discovered: ImportDiscovery },
SessionStarted { native_id: String, index: usize, total: usize },
SessionFinished { native_id: String, outcome: SessionOutcome, stats_delta: StatsDelta },
ImportDone { stats: ImportStats },
ImportFailed { error: String },
}
impl ImportProgressEvent {
pub fn is_terminal(&self) -> bool {
matches!(self, ImportProgressEvent::ImportDone { .. } | ImportProgressEvent::ImportFailed { .. })
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SessionOutcome { Imported, Skipped, Updated, Failed }
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct StatsDelta {
pub messages_written: u64,
pub messages_already_present: u64,
}
pub struct ImportProgressSink {
inner: SinkInner,
}
enum SinkInner {
Live { tx: mpsc::Sender<ImportProgressEvent> },
Noop,
}
impl ImportProgressSink {
pub fn channel() -> (Self, mpsc::Receiver<ImportProgressEvent>) {
let (tx, rx) = mpsc::channel(DEFAULT_CAPACITY);
(Self { inner: SinkInner::Live { tx } }, rx)
}
pub fn noop() -> Self { Self { inner: SinkInner::Noop } }
pub async fn send(&self, evt: ImportProgressEvent) {
match &self.inner {
SinkInner::Noop => {}
SinkInner::Live { tx } => {
if evt.is_terminal() {
// Force-send: guaranteed delivery of terminal events.
let _ = tx.send(evt).await;
} else {
// Best-effort: drop non-terminal events when the channel is full.
match tx.try_send(evt) {
Ok(()) => {}
Err(mpsc::error::TrySendError::Full(_)) => {
tracing::debug!("import progress: dropped non-terminal event (queue full)");
}
Err(mpsc::error::TrySendError::Closed(_)) => {
tracing::warn!("import progress: consumer gone");
}
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn terminal_events_always_delivered() {
let (sink, mut rx) = ImportProgressSink::channel();
// Fill the channel with non-terminal events (mostly drop).
for i in 0..1000 {
sink.send(ImportProgressEvent::SessionStarted {
native_id: format!("s{i}"), index: i, total: 1000,
}).await;
}
// Consumer drains in background.
let handle = tokio::spawn(async move {
let mut saw_done = false;
while let Some(e) = rx.recv().await {
if matches!(e, ImportProgressEvent::ImportDone { .. }) {
saw_done = true;
break;
}
}
saw_done
});
sink.send(ImportProgressEvent::ImportDone { stats: ImportStats::default() }).await;
let saw_done = tokio::time::timeout(std::time::Duration::from_secs(2), handle).await.unwrap().unwrap();
assert!(saw_done);
}
#[tokio::test]
async fn noop_sink_never_fails() {
let sink = ImportProgressSink::noop();
sink.send(ImportProgressEvent::ImportDone { stats: ImportStats::default() }).await;
}
}
@@ -0,0 +1,93 @@
//! Dynamic registry of Importer implementations. Populated at boot.
use std::collections::HashMap;
use std::sync::Arc;
use super::trait_def::{Importer, ImporterInfo};
pub struct ImporterRegistry {
importers: HashMap<&'static str, Arc<dyn Importer>>,
}
impl ImporterRegistry {
pub fn new() -> Self {
Self {
importers: HashMap::new(),
}
}
/// Populate with all built-in importers. Feature flags select which ship.
pub fn with_defaults() -> Self {
let mut r = Self::new();
#[cfg(feature = "importer-claude")]
r.register(Arc::new(super::sources::claude::ClaudeImporter));
#[cfg(feature = "importer-chatgpt")]
r.register(Arc::new(super::sources::chatgpt::ChatGptImporter));
#[cfg(feature = "importer-codex")]
r.register(Arc::new(super::sources::codex::CodexImporter));
r
}
pub fn register(&mut self, importer: Arc<dyn Importer>) {
self.importers.insert(importer.source_name(), importer);
}
pub fn get(&self, name: &str) -> Option<Arc<dyn Importer>> {
self.importers.get(name).cloned()
}
pub fn list(&self) -> Vec<ImporterInfo> {
self.importers
.values()
.map(|i| ImporterInfo {
source_name: i.source_name().to_string(),
display_name: pretty_name(i.source_name()),
config_shape: i.config_shape(),
})
.collect()
}
}
fn pretty_name(source: &str) -> String {
match source {
"claude" => "Claude Code".into(),
"chatgpt" => "ChatGPT (OpenAI)".into(),
"codex" => "OpenAI Codex".into(),
other => other.to_string(),
}
}
impl Default for ImporterRegistry {
fn default() -> Self {
Self::with_defaults()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn defaults_include_claude_when_feature_enabled() {
let reg = ImporterRegistry::with_defaults();
let list = reg.list();
#[cfg(feature = "importer-claude")]
{
assert!(list.iter().any(|i| i.source_name == "claude"));
assert!(reg.get("claude").is_some());
}
#[cfg(not(feature = "importer-claude"))]
{
let _ = list;
assert!(reg.get("claude").is_none());
}
}
#[test]
fn pretty_name_known_sources() {
assert_eq!(pretty_name("claude"), "Claude Code");
assert_eq!(pretty_name("chatgpt"), "ChatGPT (OpenAI)");
assert_eq!(pretty_name("codex"), "OpenAI Codex");
assert_eq!(pretty_name("custom"), "custom");
}
}
@@ -0,0 +1,361 @@
//! ChatGPT importer: takes a path to a conversations.json file.
use std::path::PathBuf;
use async_trait::async_trait;
use chrono::Utc;
use uuid::Uuid;
use dirigent_chatgpt::{ContentPart, ParsedConversation, ParsedMessage};
use super::super::progress::ImportProgressSink;
use super::super::trait_def::{
ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget,
Importer,
};
use super::super::{
import_sessions, DiscoveredSession, ImportDiscovery, ImportProject, ImportStats,
};
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{MessageRecord, RegisterConnectorRequest};
/// Connector type string used for imported ChatGPT sessions.
pub const CHATGPT_CONNECTOR_TYPE: &str = "ChatGPT";
/// Fingerprint prefix for locally-imported ChatGPT exports.
pub const CHATGPT_FINGERPRINT_PREFIX: &str = "import/local:chatgpt";
/// Namespace UUID for deterministic UUIDv5 derivations on ChatGPT message ids
/// that are not already valid UUIDs.
const CHATGPT_MESSAGE_NS: Uuid = Uuid::from_u128(0x4e58_a7cb_bf1c_4de2_b7c9_8c31_11b3_1112);
pub struct ChatGptImporter;
#[async_trait]
impl Importer for ChatGptImporter {
fn source_name(&self) -> &'static str {
"chatgpt"
}
fn config_shape(&self) -> ImportConfigShape {
ImportConfigShape {
fields: vec![ConfigField {
key: "path".into(),
label: "conversations.json path".into(),
kind: ConfigFieldKind::File {
extension: Some("json".into()),
},
required: true,
help: Some(
"Unzipped OpenAI data export \u{2192} conversations.json".into(),
),
}],
example: ImportConfig {
source: "chatgpt".into(),
params: {
let mut m = std::collections::BTreeMap::new();
m.insert(
"path".into(),
serde_json::json!("~/Downloads/chatgpt-export/conversations.json"),
);
m
},
},
}
}
async fn discover(
&self,
cfg: &ImportConfig,
) -> std::result::Result<ImportDiscovery, ImportError> {
let path = require_path(cfg)?;
let convs = dirigent_chatgpt::parse_export(&path)
.map_err(|e| ImportError::Discovery(e.to_string()))?;
let total_sessions = convs.len();
let total_estimated_messages: usize = convs.iter().map(|c| c.messages.len()).sum();
// ChatGPT exports don't carry per-project information, so we bucket
// everything into a single synthetic project named after the file.
let project_name = path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("ChatGPT export")
.to_string();
Ok(ImportDiscovery {
source_name: "ChatGPT".to_string(),
source_path: path.display().to_string(),
projects: vec![ImportProject {
name: project_name,
session_count: total_sessions,
}],
total_sessions,
total_estimated_messages,
})
}
async fn import(
&self,
cfg: &ImportConfig,
archivist: &Archivist,
target: ImportTarget,
progress: ImportProgressSink,
) -> std::result::Result<ImportStats, ImportError> {
let path = require_path(cfg)?;
let convs = dirigent_chatgpt::parse_export(&path)
.map_err(|e| ImportError::Parser(e.to_string()))?;
// Build discovered-session list + keep the parsed convs handy for
// message conversion inside the closure.
let mut discovered: Vec<DiscoveredSession> = Vec::with_capacity(convs.len());
for c in &convs {
let metadata = serde_json::json!({
"source": "chatgpt",
"conversation_id": c.id,
"parser_metadata": c.metadata.clone(),
});
discovered.push(DiscoveredSession {
native_session_id: c.id.clone(),
title: c.title.clone(),
created_at: c.created_at,
updated_at: c.updated_at,
message_count: c.messages.len(),
metadata,
project_path: None,
file_size: None,
});
}
// Map native_id -> parsed conversation for O(1) lookup in `convert`.
let conv_lookup: std::collections::HashMap<String, ParsedConversation> = convs
.into_iter()
.map(|c| (c.id.clone(), c))
.collect();
// Fingerprint the import by the canonical path. Re-running against the
// same file aliases onto the same connector.
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone());
let fingerprint = format!("{}:{}", CHATGPT_FINGERPRINT_PREFIX, canonical_path.display());
let connector_req = RegisterConnectorRequest {
r#type: CHATGPT_CONNECTOR_TYPE.to_string(),
title: format!("ChatGPT ({})", canonical_path.display()),
client_native_id: fingerprint.clone(),
custom_uid: None,
metadata: serde_json::json!({}),
fingerprint: Some(fingerprint),
};
let convert = |native_id: &str| -> Result<Vec<MessageRecord>> {
let conv = conv_lookup.get(native_id).ok_or_else(|| {
ArchivistError::InvalidRequest(format!(
"Parsed conversation not found for native_id: {}",
native_id
))
})?;
Ok(convert_conversation_to_records(conv))
};
import_sessions(
archivist,
connector_req,
discovered,
convert,
target.archive,
&progress,
false,
&target.project_map,
)
.await
.map_err(|e| ImportError::Archivist(e.to_string()))
}
}
// ---------------------------------------------------------------------------
// Conversion helpers
// ---------------------------------------------------------------------------
fn require_path(cfg: &ImportConfig) -> std::result::Result<PathBuf, ImportError> {
cfg.params
.get("path")
.and_then(|v| v.as_str())
.map(PathBuf::from)
.ok_or_else(|| ImportError::Config("missing `path`".into()))
}
/// Prefer to parse the native id as a UUID if possible; otherwise derive a
/// stable UUIDv5 under [`CHATGPT_MESSAGE_NS`].
fn parse_or_derive_uuid(native_id: &str) -> Uuid {
Uuid::parse_str(native_id)
.unwrap_or_else(|_| Uuid::new_v5(&CHATGPT_MESSAGE_NS, native_id.as_bytes()))
}
/// Convert parsed `ContentPart`s into `dirigent_protocol::MessagePart`s.
fn parts_to_message_parts(parts: &[ContentPart]) -> Vec<dirigent_protocol::MessagePart> {
parts
.iter()
.map(|p| match p {
ContentPart::Text { text } => dirigent_protocol::MessagePart::Text {
text: text.clone(),
},
ContentPart::Code { language, text } => dirigent_protocol::MessagePart::Code {
language: language.clone().unwrap_or_default(),
code: text.clone(),
},
ContentPart::Tool { name, input, output } => dirigent_protocol::MessagePart::Tool {
tool: name.clone(),
tool_call_id: None,
input: input.clone(),
output: output.clone(),
},
})
.collect()
}
/// Flatten a list of parsed content parts into a markdown-y string for the
/// `content_md` fallback surface.
fn parts_to_markdown(parts: &[ContentPart]) -> String {
parts
.iter()
.map(|p| match p {
ContentPart::Text { text } => text.clone(),
ContentPart::Code { language, text } => {
let lang = language.clone().unwrap_or_default();
format!("```{}\n{}\n```", lang, text)
}
ContentPart::Tool { name, .. } => format!("[Tool: {}]", name),
})
.collect::<Vec<_>>()
.join("\n\n")
}
/// Convert a parsed ChatGPT conversation into a vector of `MessageRecord`s.
///
/// Each message's `session` field is left as `Uuid::nil()`; the generic
/// `import_sessions` orchestrator patches it to the real scroll id.
fn convert_conversation_to_records(conv: &ParsedConversation) -> Vec<MessageRecord> {
conv.messages
.iter()
.filter_map(convert_parsed_message)
.collect()
}
fn convert_parsed_message(msg: &ParsedMessage) -> Option<MessageRecord> {
// Skip messages with entirely empty text payloads (nothing to archive).
let content_md = parts_to_markdown(&msg.content);
if content_md.trim().is_empty() && msg.content.iter().all(is_part_empty) {
return None;
}
let parts = parts_to_message_parts(&msg.content);
let content_parts = serde_json::to_value(&parts).ok();
let ts = msg.ts.unwrap_or_else(Utc::now);
let message_id = if msg.id.is_empty() {
// Fallback: derive from role + timestamp + a hash of content.
let key = format!("{}:{}:{}", msg.role, ts.to_rfc3339(), content_md);
Uuid::new_v5(&CHATGPT_MESSAGE_NS, key.as_bytes())
} else {
parse_or_derive_uuid(&msg.id)
};
Some(MessageRecord {
version: 1,
message_id,
session: Uuid::nil(),
parent_id: None,
ts,
role: msg.role.clone(),
author: None,
content_md,
content_parts,
attachments: Vec::new(),
metadata: msg.metadata.clone(),
})
}
fn is_part_empty(p: &ContentPart) -> bool {
match p {
ContentPart::Text { text } => text.trim().is_empty(),
ContentPart::Code { text, .. } => text.trim().is_empty(),
ContentPart::Tool { .. } => false,
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_or_derive_uuid_parses_real_uuid() {
let real = "12345678-1234-5678-1234-567812345678";
let u = parse_or_derive_uuid(real);
assert_eq!(u.to_string(), real);
}
#[test]
fn parse_or_derive_uuid_falls_back_to_v5() {
let a = parse_or_derive_uuid("not-a-uuid");
let b = parse_or_derive_uuid("not-a-uuid");
assert_eq!(a, b, "deterministic UUIDv5 derivation");
let c = parse_or_derive_uuid("different");
assert_ne!(a, c);
}
#[test]
fn parts_to_message_parts_covers_all_variants() {
let parts = vec![
ContentPart::Text { text: "hi".into() },
ContentPart::Code {
language: Some("rust".into()),
text: "fn main() {}".into(),
},
ContentPart::Tool {
name: "browser".into(),
input: serde_json::json!({"url": "https://example.com"}),
output: Some(serde_json::json!({"status": 200})),
},
];
let mp = parts_to_message_parts(&parts);
assert_eq!(mp.len(), 3);
assert!(matches!(&mp[0], dirigent_protocol::MessagePart::Text { .. }));
assert!(matches!(&mp[1], dirigent_protocol::MessagePart::Code { .. }));
assert!(matches!(&mp[2], dirigent_protocol::MessagePart::Tool { .. }));
}
#[test]
fn empty_parsed_message_is_skipped() {
let msg = ParsedMessage {
id: "m1".into(),
role: "system".into(),
ts: None,
content: vec![ContentPart::Text { text: " ".into() }],
metadata: serde_json::Value::Null,
};
assert!(convert_parsed_message(&msg).is_none());
}
#[test]
fn non_empty_parsed_message_round_trips() {
let msg = ParsedMessage {
id: "m1".into(),
role: "user".into(),
ts: None,
content: vec![ContentPart::Text {
text: "hello".into(),
}],
metadata: serde_json::Value::Null,
};
let record = convert_parsed_message(&msg).expect("should convert");
assert_eq!(record.role, "user");
assert_eq!(record.content_md, "hello");
assert_eq!(record.session, Uuid::nil());
assert!(record.content_parts.is_some());
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,331 @@
//! OpenAI Codex CLI importer: takes a path to a directory of JSONL session files.
use std::path::PathBuf;
use async_trait::async_trait;
use chrono::Utc;
use uuid::Uuid;
use dirigent_codex::{ParsedMessage, ParsedSession};
use super::super::progress::ImportProgressSink;
use super::super::trait_def::{
ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget,
Importer,
};
use super::super::{
import_sessions, DiscoveredSession, ImportDiscovery, ImportProject, ImportStats,
};
use crate::coordinator::Archivist;
use crate::error::{ArchivistError, Result};
use crate::types::{MessageRecord, RegisterConnectorRequest};
/// Connector type string used for imported Codex sessions.
pub const CODEX_CONNECTOR_TYPE: &str = "Codex";
/// Fingerprint prefix for locally-imported Codex sessions.
pub const CODEX_FINGERPRINT_PREFIX: &str = "import/local:codex";
/// Namespace UUID for deterministic UUIDv5 derivations of message ids that
/// Codex does not expose natively.
const CODEX_MESSAGE_NS: Uuid = Uuid::from_u128(0x9e28_b7d4_af9c_4fe2_a8d1_8c41_21b3_2222);
pub struct CodexImporter;
#[async_trait]
impl Importer for CodexImporter {
fn source_name(&self) -> &'static str {
"codex"
}
fn config_shape(&self) -> ImportConfigShape {
ImportConfigShape {
fields: vec![ConfigField {
key: "path".into(),
label: "Codex sessions directory".into(),
kind: ConfigFieldKind::Path { directory: true },
required: true,
help: Some("Usually ~/.codex/sessions".into()),
}],
example: ImportConfig {
source: "codex".into(),
params: {
let mut m = std::collections::BTreeMap::new();
m.insert("path".into(), serde_json::json!("~/.codex/sessions"));
m
},
},
}
}
async fn discover(
&self,
cfg: &ImportConfig,
) -> std::result::Result<ImportDiscovery, ImportError> {
let path = require_path(cfg)?;
let files = dirigent_codex::discover_sessions(&path)
.map_err(|e| ImportError::Discovery(e.to_string()))?;
// Parse each file to count messages. This is a best-effort estimate —
// malformed lines are skipped by the parser, so counts reflect what
// the importer would actually write.
let mut total_estimated_messages: usize = 0;
for file in &files {
if let Ok(session) = dirigent_codex::parse_file(file) {
total_estimated_messages += session.messages.len();
}
}
let total_sessions = files.len();
// Codex sessions live flat in one directory; bucket them into a
// single synthetic project named after the directory.
let project_name = path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("Codex sessions")
.to_string();
Ok(ImportDiscovery {
source_name: "Codex".to_string(),
source_path: path.display().to_string(),
projects: vec![ImportProject {
name: project_name,
session_count: total_sessions,
}],
total_sessions,
total_estimated_messages,
})
}
async fn import(
&self,
cfg: &ImportConfig,
archivist: &Archivist,
target: ImportTarget,
progress: ImportProgressSink,
) -> std::result::Result<ImportStats, ImportError> {
let path = require_path(cfg)?;
let files = dirigent_codex::discover_sessions(&path)
.map_err(|e| ImportError::Discovery(e.to_string()))?;
// Parse every session file up front so that `convert_messages`
// (called by `import_sessions`) can do O(1) lookups.
let mut parsed: Vec<ParsedSession> = Vec::with_capacity(files.len());
for file in &files {
match dirigent_codex::parse_file(file) {
Ok(session) => parsed.push(session),
Err(e) => {
tracing::warn!(
path = %file.display(),
error = %e,
"Skipping unreadable Codex session file"
);
}
}
}
let mut discovered: Vec<DiscoveredSession> = Vec::with_capacity(parsed.len());
for s in &parsed {
let metadata = serde_json::json!({
"source": "codex",
"source_path": s.source_path.display().to_string(),
"native_id": s.native_id,
});
let file_size = std::fs::metadata(&s.source_path).ok().map(|m| m.len());
discovered.push(DiscoveredSession {
native_session_id: s.native_id.clone(),
title: None,
created_at: s.created_at,
updated_at: s.updated_at,
message_count: s.messages.len(),
metadata,
project_path: None,
file_size,
});
}
// Map native_id -> parsed session for O(1) lookup in `convert`.
let session_lookup: std::collections::HashMap<String, ParsedSession> = parsed
.into_iter()
.map(|s| (s.native_id.clone(), s))
.collect();
// Fingerprint the import by the canonical directory path. Re-running
// against the same directory aliases onto the same connector.
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone());
let fingerprint = format!("{}:{}", CODEX_FINGERPRINT_PREFIX, canonical_path.display());
let connector_req = RegisterConnectorRequest {
r#type: CODEX_CONNECTOR_TYPE.to_string(),
title: format!("Codex ({})", canonical_path.display()),
client_native_id: fingerprint.clone(),
custom_uid: None,
metadata: serde_json::json!({}),
fingerprint: Some(fingerprint),
};
let convert = |native_id: &str| -> Result<Vec<MessageRecord>> {
let session = session_lookup.get(native_id).ok_or_else(|| {
ArchivistError::InvalidRequest(format!(
"Parsed session not found for native_id: {}",
native_id
))
})?;
Ok(convert_session_to_records(session))
};
import_sessions(
archivist,
connector_req,
discovered,
convert,
target.archive,
&progress,
false,
&target.project_map,
)
.await
.map_err(|e| ImportError::Archivist(e.to_string()))
}
}
// ---------------------------------------------------------------------------
// Conversion helpers
// ---------------------------------------------------------------------------
fn require_path(cfg: &ImportConfig) -> std::result::Result<PathBuf, ImportError> {
cfg.params
.get("path")
.and_then(|v| v.as_str())
.map(PathBuf::from)
.ok_or_else(|| ImportError::Config("missing `path`".into()))
}
/// Convert every [`ParsedMessage`] in a session into a [`MessageRecord`],
/// leaving `session = Uuid::nil()` for the generic orchestrator to patch.
fn convert_session_to_records(session: &ParsedSession) -> Vec<MessageRecord> {
session
.messages
.iter()
.enumerate()
.filter_map(|(idx, m)| convert_parsed_message(&session.native_id, idx, m))
.collect()
}
fn convert_parsed_message(
native_session_id: &str,
index: usize,
msg: &ParsedMessage,
) -> Option<MessageRecord> {
// Skip purely empty messages — nothing to archive.
if msg.content.trim().is_empty() {
return None;
}
let ts = msg.ts.unwrap_or_else(Utc::now);
// Codex events don't carry per-message UUIDs, so always derive a stable
// UUIDv5 from (native_session, index, role, ts). Index disambiguates
// otherwise-identical back-to-back messages.
let key = format!(
"{}:{}:{}:{}",
native_session_id,
index,
msg.role,
ts.to_rfc3339(),
);
let message_id = Uuid::new_v5(&CODEX_MESSAGE_NS, key.as_bytes());
let parts = vec![dirigent_protocol::MessagePart::Text {
text: msg.content.clone(),
}];
let content_parts = serde_json::to_value(&parts).ok();
Some(MessageRecord {
version: 1,
message_id,
session: Uuid::nil(),
parent_id: None,
ts,
role: msg.role.clone(),
author: None,
content_md: msg.content.clone(),
content_parts,
attachments: Vec::new(),
metadata: msg.metadata.clone(),
})
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
fn sample_message(role: &str, content: &str) -> ParsedMessage {
ParsedMessage {
ts: None,
role: role.into(),
content: content.into(),
metadata: serde_json::Value::Null,
}
}
fn sample_message_at(role: &str, content: &str, ts: chrono::DateTime<Utc>) -> ParsedMessage {
ParsedMessage {
ts: Some(ts),
role: role.into(),
content: content.into(),
metadata: serde_json::Value::Null,
}
}
#[test]
fn empty_content_is_skipped() {
let m = sample_message("user", " ");
assert!(convert_parsed_message("s", 0, &m).is_none());
}
#[test]
fn non_empty_message_converts() {
let m = sample_message("user", "hello");
let r = convert_parsed_message("s", 0, &m).expect("converts");
assert_eq!(r.role, "user");
assert_eq!(r.content_md, "hello");
assert_eq!(r.session, Uuid::nil());
assert!(r.content_parts.is_some());
}
#[test]
fn message_id_is_deterministic_per_session_index() {
// Fix ts so we don't accidentally hash Utc::now() into the id key.
let ts = chrono::TimeZone::timestamp_opt(&Utc, 1_735_732_800, 0)
.single()
.unwrap();
let m = sample_message_at("user", "hello", ts);
let a = convert_parsed_message("session-a", 0, &m).unwrap();
let b = convert_parsed_message("session-a", 0, &m).unwrap();
assert_eq!(a.message_id, b.message_id);
// Different index → different id.
let c = convert_parsed_message("session-a", 1, &m).unwrap();
assert_ne!(a.message_id, c.message_id);
// Different session → different id.
let d = convert_parsed_message("session-b", 0, &m).unwrap();
assert_ne!(a.message_id, d.message_id);
}
#[test]
fn require_path_reports_missing_config() {
let cfg = ImportConfig {
source: "codex".into(),
params: Default::default(),
};
let err = require_path(&cfg).expect_err("should fail");
assert!(matches!(err, ImportError::Config(_)));
}
}
@@ -0,0 +1,7 @@
//! Per-source importer implementations.
pub mod claude;
#[cfg(feature = "importer-chatgpt")]
pub mod chatgpt;
#[cfg(feature = "importer-codex")]
pub mod codex;
@@ -0,0 +1,113 @@
//! Importer trait and config-shape types consumed by the UI (dynamic form
//! rendering) and the CLI (future). Scripts can serialise ImportConfig as JSON.
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap};
use thiserror::Error;
use uuid::Uuid;
use crate::coordinator::Archivist;
use super::progress::ImportProgressSink;
#[async_trait]
pub trait Importer: Send + Sync {
fn source_name(&self) -> &'static str;
fn config_shape(&self) -> ImportConfigShape;
async fn discover(
&self,
cfg: &ImportConfig,
) -> Result<super::ImportDiscovery, ImportError>;
async fn import(
&self,
cfg: &ImportConfig,
archivist: &Archivist,
target: ImportTarget,
progress: ImportProgressSink,
) -> Result<super::ImportStats, ImportError>;
/// Attempt to auto-detect default configuration values.
///
/// Importers that can discover their source location automatically
/// (e.g., Claude Code's `~/.claude` directory) should override this.
/// Returns `None` when auto-detection is not supported or fails.
fn detect_defaults(&self) -> Option<ImportConfig> {
None
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImporterInfo {
pub source_name: String,
pub display_name: String,
pub config_shape: ImportConfigShape,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportConfigShape {
pub fields: Vec<ConfigField>,
pub example: ImportConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConfigField {
pub key: String,
pub label: String,
pub kind: ConfigFieldKind,
pub required: bool,
pub help: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ConfigFieldKind {
Path { directory: bool },
File { extension: Option<String> },
String,
Bool,
Enum { variants: Vec<String> },
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ImportConfig {
pub source: String,
#[serde(default)]
pub params: BTreeMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ImportTarget {
pub archive: Option<String>,
pub connector_alias: Option<String>,
pub project_id: Option<Uuid>,
/// Maps normalized project_path -> project_id (as string UUID).
/// When a session's project_path is found in this map, the corresponding
/// project_id is injected into the session metadata during import.
#[serde(default)]
pub project_map: HashMap<String, String>,
}
#[derive(Debug, Error)]
pub enum ImportError {
#[error("source not found: {0}")] SourceNotFound(String),
#[error("config: {0}")] Config(String),
#[error("discovery: {0}")] Discovery(String),
#[error("I/O: {0}")] Io(#[from] std::io::Error),
#[error("archivist: {0}")] Archivist(String),
#[error("parser: {0}")] Parser(String),
#[error("cancelled")] Cancelled,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn config_round_trips() {
let cfg = ImportConfig { source: "claude".into(), params: BTreeMap::new() };
let json = serde_json::to_string(&cfg).unwrap();
let back: ImportConfig = serde_json::from_str(&json).unwrap();
assert_eq!(back.source, "claude");
}
}
+45
View File
@@ -0,0 +1,45 @@
//! Dirigent Archivist
//!
//! Persistent storage for all agentic interactions in Dirigent.
//!
//! The Archivist provides file-based archival storage using NDJSON, JSON, and TSV
//! formats for durability and human-readability. It implements an archive-first
//! architecture with connector API fallback for session data.
//!
//! # Key Features
//!
//! - File-based storage for easy curation and grep-ability
//! - Content-addressable file storage for attachments
//! - Session lineage tracking (splits, continuations, mutations)
//! - Connector registry with UID coordination
//! - Real-time event streaming for archive updates
//!
//! # Architecture
//!
//! See `docs/building/05_archivist/vision.md` for detailed design.
pub mod accumulator;
pub mod backend;
pub mod backends;
pub mod backfill;
pub mod coordinator;
pub mod error;
pub mod events;
pub mod import;
pub mod registry;
pub mod session;
pub mod storage;
pub mod types;
// Re-export commonly used types
pub use accumulator::{MessageAccumulator, ToolCallData};
pub use backend::{
ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend,
DagBackend, HealthStatus, MetaEventsBackend, SearchBackend, SessionMappingBackend,
};
pub use backends::JsonlBackend;
pub use backfill::{backfill_from_sessions, convert_message_to_record, BackfillStats};
pub use coordinator::{ArchiveInfo, ArchiveMetadata, Archivist};
pub use error::{ArchivistError, Result};
pub use events::EventHandler;
pub use types::*;
@@ -0,0 +1,116 @@
//! Positive LRU cache mapping `scroll_id` to the backend that holds the
//! authoritative session metadata, populated on the first successful read.
use std::num::NonZeroUsize;
use lru::LruCache;
use tokio::sync::Mutex;
use uuid::Uuid;
const DEFAULT_CAPACITY: usize = 10_000;
pub struct ReadCache {
inner: Mutex<LruCache<Uuid, String>>,
}
impl ReadCache {
pub fn new() -> Self {
Self::with_capacity(DEFAULT_CAPACITY)
}
pub fn with_capacity(capacity: usize) -> Self {
let cap = NonZeroUsize::new(capacity.max(1)).unwrap();
Self {
inner: Mutex::new(LruCache::new(cap)),
}
}
pub async fn get(&self, scroll_id: Uuid) -> Option<String> {
let mut guard = self.inner.lock().await;
guard.get(&scroll_id).cloned()
}
pub async fn put(&self, scroll_id: Uuid, backend_name: String) {
let mut guard = self.inner.lock().await;
guard.put(scroll_id, backend_name);
}
pub async fn invalidate(&self, scroll_id: Uuid) {
let mut guard = self.inner.lock().await;
guard.pop(&scroll_id);
}
pub async fn rewrite(&self, scroll_id: Uuid, new_backend: String) {
let mut guard = self.inner.lock().await;
guard.put(scroll_id, new_backend);
}
pub async fn clear(&self) {
let mut guard = self.inner.lock().await;
guard.clear();
}
pub async fn len(&self) -> usize {
let guard = self.inner.lock().await;
guard.len()
}
}
impl Default for ReadCache {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn id(b: u8) -> Uuid {
Uuid::from_bytes([b; 16])
}
#[tokio::test]
async fn put_then_get() {
let c = ReadCache::new();
c.put(id(1), "main".into()).await;
assert_eq!(c.get(id(1)).await.as_deref(), Some("main"));
assert!(c.get(id(2)).await.is_none());
}
#[tokio::test]
async fn invalidate_removes_entry() {
let c = ReadCache::new();
c.put(id(1), "main".into()).await;
c.invalidate(id(1)).await;
assert!(c.get(id(1)).await.is_none());
}
#[tokio::test]
async fn rewrite_changes_backend() {
let c = ReadCache::new();
c.put(id(1), "a".into()).await;
c.rewrite(id(1), "b".into()).await;
assert_eq!(c.get(id(1)).await.as_deref(), Some("b"));
}
#[tokio::test]
async fn lru_evicts_oldest() {
let c = ReadCache::with_capacity(2);
c.put(id(1), "a".into()).await;
c.put(id(2), "b".into()).await;
c.put(id(3), "c".into()).await; // evicts id(1)
assert!(c.get(id(1)).await.is_none());
assert_eq!(c.get(id(2)).await.as_deref(), Some("b"));
assert_eq!(c.get(id(3)).await.as_deref(), Some("c"));
}
#[tokio::test]
async fn clear_empties() {
let c = ReadCache::new();
c.put(id(1), "a".into()).await;
c.put(id(2), "b".into()).await;
c.clear().await;
assert_eq!(c.len().await, 0);
}
}
@@ -0,0 +1,253 @@
//! Declarative `[[archives]]` config block parsed from `dirigent.toml`.
//!
//! The TOML schema is documented in `docs/plans/2026-04-19-archivist-phase3-design.md`.
use serde::{Deserialize, Serialize};
use super::filter::ArchiveFilter;
use super::registration::{FailureMode, OverflowPolicy};
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ArchivesConfig {
#[serde(default, rename = "archives")]
pub entries: Vec<ArchiveConfig>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ArchiveConfig {
pub name: String,
#[serde(rename = "type")]
pub type_name: String,
#[serde(default = "default_write_active")]
pub write_active: bool,
#[serde(default)]
pub failure_mode: FailureMode,
#[serde(default)]
pub read_priority: u32,
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default)]
pub write_policy: WritePolicyConfig,
/// Per-archive include/exclude filter applied during non-primary write
/// fanout. Absent or `{}` means unrestricted.
#[serde(default)]
pub filter: ArchiveFilter,
#[serde(default = "default_params")]
pub params: toml::Value,
}
fn default_params() -> toml::Value {
toml::Value::Table(toml::value::Table::new())
}
fn default_write_active() -> bool {
true
}
fn default_enabled() -> bool {
true
}
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(untagged)]
pub enum WritePolicyConfig {
Tag(WritePolicyTag),
Detailed(WritePolicyDetailed),
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum WritePolicyTag {
Inline,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum WritePolicyDetailed {
Inline,
Queued {
#[serde(default = "default_batch_window_ms")]
batch_window_ms: u64,
#[serde(default = "default_capacity")]
capacity: usize,
#[serde(default)]
overflow: OverflowPolicy,
},
}
fn default_batch_window_ms() -> u64 {
50
}
fn default_capacity() -> usize {
1024
}
impl Default for WritePolicyConfig {
fn default() -> Self {
WritePolicyConfig::Tag(WritePolicyTag::Inline)
}
}
impl WritePolicyConfig {
pub fn into_runtime(self) -> super::registration::WritePolicy {
use super::registration::WritePolicy;
match self {
WritePolicyConfig::Tag(WritePolicyTag::Inline) => WritePolicy::Inline,
WritePolicyConfig::Detailed(WritePolicyDetailed::Inline) => WritePolicy::Inline,
WritePolicyConfig::Detailed(WritePolicyDetailed::Queued {
batch_window_ms,
capacity,
overflow,
}) => WritePolicy::Queued {
batch_window_ms,
capacity,
overflow,
},
}
}
}
use std::collections::BTreeSet;
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum ConfigValidationError {
#[error("duplicate archive name `{0}`")]
DuplicateName(String),
#[error("no `required` write-active backend configured (need at least one)")]
NoPrimary,
}
impl ArchivesConfig {
pub fn validate(&self) -> Result<(), ConfigValidationError> {
let mut seen: BTreeSet<&str> = BTreeSet::new();
for entry in &self.entries {
if !seen.insert(entry.name.as_str()) {
return Err(ConfigValidationError::DuplicateName(entry.name.clone()));
}
}
// Empty config is allowed (ephemeral mode).
if self.entries.is_empty() {
return Ok(());
}
let has_primary = self
.entries
.iter()
.any(|e| e.enabled && e.write_active && e.failure_mode == FailureMode::Required);
if !has_primary {
return Err(ConfigValidationError::NoPrimary);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(toml_src: &str) -> ArchivesConfig {
toml::from_str(toml_src).expect("parse")
}
#[test]
fn empty_config_is_ephemeral() {
let cfg: ArchivesConfig = toml::from_str("").unwrap();
assert!(cfg.entries.is_empty());
assert!(cfg.validate().is_ok());
}
#[test]
fn minimal_single_archive() {
let cfg = parse(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "dirigent_archive"
"#,
);
assert_eq!(cfg.entries.len(), 1);
let e = &cfg.entries[0];
assert_eq!(e.name, "main");
assert_eq!(e.type_name, "jsonl");
assert!(e.write_active);
assert_eq!(e.failure_mode, FailureMode::Required);
assert_eq!(e.read_priority, 0);
assert!(e.enabled);
assert!(matches!(e.write_policy, WritePolicyConfig::Tag(WritePolicyTag::Inline)));
cfg.validate().unwrap();
}
#[test]
fn duplicate_name_rejected() {
let cfg = parse(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "a"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "b"
"#,
);
assert_eq!(
cfg.validate(),
Err(ConfigValidationError::DuplicateName("main".into()))
);
}
#[test]
fn no_primary_rejected() {
let cfg = parse(
r#"
[[archives]]
name = "mirror"
type = "jsonl"
failure_mode = "best_effort"
[archives.params]
path = "a"
"#,
);
assert_eq!(cfg.validate(), Err(ConfigValidationError::NoPrimary));
}
#[test]
fn queued_write_policy_parses() {
let cfg = parse(
r#"
[[archives]]
name = "main"
type = "jsonl"
[archives.params]
path = "a"
[archives.write_policy]
type = "queued"
batch_window_ms = 100
capacity = 4096
overflow = "drop_oldest"
"#,
);
let entry = &cfg.entries[0];
match &entry.write_policy {
WritePolicyConfig::Detailed(WritePolicyDetailed::Queued {
batch_window_ms,
capacity,
overflow,
}) => {
assert_eq!(*batch_window_ms, 100);
assert_eq!(*capacity, 4096);
assert_eq!(*overflow, OverflowPolicy::DropOldest);
}
other => panic!("unexpected write_policy: {:?}", other),
}
}
}

Some files were not shown because too many files have changed in this diff Show More