sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
+72
View File
@@ -0,0 +1,72 @@
# Package: dirigent_langfuse
Phase 4 stream backend that mirrors BusEvents to a Langfuse ingestion
endpoint.
## Scope
- `LangfuseFactory` registered as `kind = "langfuse"` in the
`StreamFactoryRegistry`.
- `LangfuseStream` implements `SessionStream`:
- Maps each `BusEvent` via `mapping::bus_event_to_items`.
- Buffers up to 32 items per flush; flushes eagerly when full and on
shutdown.
- POSTs `{host}/api/public/ingestion` with basic-auth
`(public_key, secret_key)`.
## File map
- `src/lib.rs` — public API: `LangfuseStream`, `LangfuseConfig`,
`LangfuseFactory`.
- `src/client.rs``LangfuseClient` (reqwest wrapper with retry) and
the `LangfuseStream` implementation.
- `src/mapping.rs``bus_event_to_items` mapping.
- `src/factory.rs``StreamFactory` impl.
## Event → ingestion mapping
| BusEvent variant | Langfuse item |
|------------------|---------------|
| `SessionCreated` | `trace-create` (id = `scroll_id`) |
| `MessageStarted` | `generation-create` |
| `MessageCompleted` | `generation-update` with output |
| `SessionUpdate` (non-tool) | skipped |
| All others | skipped |
Events without a bound `scroll_id` (no late-bind hit) are dropped — the
implementation does NOT buffer pending events keyed by connector_id /
native_session_id in Phase 4. If buffering is needed later, extend
`LangfuseStream::on_event`.
## Failure modes
- Transport error → `StreamOutcome::Failed(StreamError::Transport)`.
Health drift applies; the stream goes Degraded after one failure and
Unavailable after five consecutive failures.
- 5xx response → retried up to 3 times with exponential backoff
(100ms → 200 → 400 → 800, capped at 1s).
- 4xx response → returned as `LangfuseError::Status(code)`; no retry.
- Empty scroll_id → `StreamOutcome::Skipped` (not a failure).
## Configuration
```toml
[[streams]]
name = "langfuse-prod"
type = "langfuse"
enabled = true
[streams.scope]
kind = "connector"
connector_uid = "01985d00-..."
[streams.params]
host = "https://langfuse.example.com"
public_key = "pk-lf-..."
secret_key = "sk-lf-..."
```
## Deferred
- Tool-call → span mapping (`SpanCreate`/`SpanUpdate`): scaffolded but
not yet populated.
- Buffering pending events keyed by `(connector_id, native_session_id)`
for late-bind scenarios.
+23
View File
@@ -0,0 +1,23 @@
[package]
name = "dirigent_langfuse"
version = "0.1.0"
edition = "2021"
[features]
default = []
server = ["dep:reqwest", "dep:tokio", "dep:dirigent_core", "dirigent_core/server"]
[dependencies]
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
dirigent_core = { path = "../dirigent_core", optional = true }
dirigent_protocol = { path = "../dirigent_protocol" }
reqwest = { version = "0.12", optional = true, features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
tokio = { version = "1", optional = true, features = ["rt", "sync", "macros"] }
toml = "0.8"
tracing = "0.1"
url = "2"
uuid = { version = "1", features = ["v4", "v7"] }
+204
View File
@@ -0,0 +1,204 @@
//! Langfuse ingestion client. Phase 4 feature-gated on `server`.
use std::sync::Arc;
#[cfg(feature = "server")]
use std::time::Duration;
use async_trait::async_trait;
use chrono::Utc;
use thiserror::Error;
#[cfg(feature = "server")]
use tokio::sync::Mutex;
#[cfg(feature = "server")]
use tracing::warn;
use dirigent_protocol::streaming::{
BusEvent, SessionStream, StreamKind, StreamOutcome, StreamScope, StreamSummary,
};
#[cfg(feature = "server")]
use dirigent_protocol::streaming::StreamError;
#[cfg(feature = "server")]
use crate::mapping::{bus_event_to_items, IngestItem};
/// Langfuse stream configuration (credentials + host).
#[derive(Debug, Clone)]
pub struct LangfuseConfig {
pub host: String,
pub public_key: String,
pub secret_key: String,
}
#[derive(Debug, Error)]
#[cfg_attr(not(feature = "server"), allow(dead_code))]
pub enum LangfuseError {
#[error("transport: {0}")]
Transport(String),
#[error("unexpected status: {0}")]
Status(u16),
#[error("serialisation: {0}")]
Serialisation(String),
}
/// Thin wrapper around `reqwest::Client` that POSTs batches to
/// `{host}/api/public/ingestion` with HTTP basic auth.
#[cfg(feature = "server")]
pub(crate) struct LangfuseClient {
http: reqwest::Client,
host: String,
auth: (String, String),
}
#[cfg(feature = "server")]
impl LangfuseClient {
pub fn new(config: LangfuseConfig) -> Result<Self, LangfuseError> {
let http = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.build()
.map_err(|e| LangfuseError::Transport(e.to_string()))?;
Ok(Self {
http,
host: config.host,
auth: (config.public_key, config.secret_key),
})
}
pub async fn ingest_batch(&self, batch: Vec<IngestItem>) -> Result<(), LangfuseError> {
if batch.is_empty() {
return Ok(());
}
let url = format!("{}/api/public/ingestion", self.host.trim_end_matches('/'));
let payload = serde_json::json!({ "batch": batch });
let mut attempt = 0u32;
let mut delay_ms = 100u64;
loop {
let resp = self
.http
.post(&url)
.basic_auth(&self.auth.0, Some(&self.auth.1))
.json(&payload)
.send()
.await;
match resp {
Ok(r) if r.status().is_success() => return Ok(()),
Ok(r) if r.status().is_server_error() && attempt < 3 => {
warn!(status = %r.status(), attempt, "langfuse ingestion 5xx; retrying");
}
Ok(r) => return Err(LangfuseError::Status(r.status().as_u16())),
Err(e) if attempt < 3 => {
warn!(error = %e, attempt, "langfuse transport error; retrying");
}
Err(e) => return Err(LangfuseError::Transport(e.to_string())),
}
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
attempt += 1;
delay_ms = (delay_ms * 2).min(1000);
}
}
}
/// A live Langfuse stream. Buffers items in-memory and flushes every N ms
/// or M items, whichever is first.
pub struct LangfuseStream {
pub config: LangfuseConfig,
pub scope: StreamScope,
pub name: String,
pub active_since: chrono::DateTime<chrono::Utc>,
#[cfg(feature = "server")]
client: Arc<LangfuseClient>,
#[cfg(feature = "server")]
buffer: Arc<Mutex<Vec<IngestItem>>>,
}
#[cfg(feature = "server")]
const FLUSH_ITEMS: usize = 32;
impl LangfuseStream {
#[cfg(feature = "server")]
pub fn new(
name: String,
config: LangfuseConfig,
scope: StreamScope,
) -> Result<Arc<Self>, LangfuseError> {
let client = Arc::new(LangfuseClient::new(config.clone())?);
Ok(Arc::new(Self {
config,
scope,
name,
active_since: Utc::now(),
client,
buffer: Arc::new(Mutex::new(Vec::new())),
}))
}
#[cfg(not(feature = "server"))]
pub fn new(name: String, config: LangfuseConfig, scope: StreamScope) -> Arc<Self> {
Arc::new(Self {
config,
scope,
name,
active_since: Utc::now(),
})
}
#[cfg(feature = "server")]
async fn flush(&self) -> Result<(), LangfuseError> {
let mut buf = self.buffer.lock().await;
if buf.is_empty() {
return Ok(());
}
let batch: Vec<_> = buf.drain(..).collect();
drop(buf);
self.client.ingest_batch(batch).await
}
}
#[async_trait]
impl SessionStream for LangfuseStream {
fn summary(&self) -> StreamSummary {
StreamSummary {
name: self.name.clone(),
kind: StreamKind::Langfuse,
target: format!("langfuse: {}", self.config.host),
active_since: self.active_since,
}
}
fn scope(&self) -> StreamScope {
self.scope.clone()
}
#[cfg(feature = "server")]
async fn on_event(&self, event: &BusEvent) -> StreamOutcome {
let items = bus_event_to_items(event);
if items.is_empty() {
return StreamOutcome::Skipped;
}
let mut buf = self.buffer.lock().await;
buf.extend(items);
if buf.len() >= FLUSH_ITEMS {
let batch: Vec<_> = buf.drain(..).collect();
drop(buf);
match self.client.ingest_batch(batch).await {
Ok(()) => StreamOutcome::Ok,
Err(e) => StreamOutcome::Failed(StreamError::Transport(e.to_string())),
}
} else {
StreamOutcome::Ok
}
}
#[cfg(not(feature = "server"))]
async fn on_event(&self, _event: &BusEvent) -> StreamOutcome {
StreamOutcome::Ok
}
async fn shutdown(&self) {
#[cfg(feature = "server")]
{
let _ = self.flush().await;
}
}
}
+48
View File
@@ -0,0 +1,48 @@
//! Phase 4: factory that builds a stub `LangfuseStream`. Task 22 upgrades
//! it to read credentials from params and construct a real client.
use std::sync::Arc;
use async_trait::async_trait;
use dirigent_core::sharing::{StreamBuildError, StreamConfig, StreamFactory};
use dirigent_protocol::streaming::SessionStream;
use crate::client::{LangfuseConfig, LangfuseStream};
pub struct LangfuseFactory;
#[async_trait]
impl StreamFactory for LangfuseFactory {
fn kind(&self) -> &'static str { "langfuse" }
async fn build(&self, cfg: &StreamConfig) -> Result<Arc<dyn SessionStream>, StreamBuildError> {
// Parse params. Required fields:
// host: String (URL)
// public_key: String
// secret_key: String
//
// Phase 4 stub: parse-or-fail, then construct LangfuseStream with
// the parsed config. Task 22 uses the host to build a reqwest client.
let host = cfg.params
.get("host").and_then(|v| v.as_str())
.ok_or_else(|| StreamBuildError::Config("missing `host` (url string)".into()))?;
let public_key = cfg.params
.get("public_key").and_then(|v| v.as_str())
.ok_or_else(|| StreamBuildError::Config("missing `public_key`".into()))?;
let secret_key = cfg.params
.get("secret_key").and_then(|v| v.as_str())
.ok_or_else(|| StreamBuildError::Config("missing `secret_key`".into()))?;
let lf_cfg = LangfuseConfig {
host: host.to_string(),
public_key: public_key.to_string(),
secret_key: secret_key.to_string(),
};
let stream = LangfuseStream::new(cfg.name.clone(), lf_cfg, cfg.scope.clone())
.map_err(|e| StreamBuildError::Transport(e.to_string()))?;
Ok(stream as Arc<dyn SessionStream>)
}
}
+13
View File
@@ -0,0 +1,13 @@
//! Langfuse SessionStream implementation.
//!
//! Phase 4 scope: stub implementation. Task 22 adds the real HTTP
//! client + event-to-ingestion mapping.
mod client;
#[cfg(feature = "server")]
mod factory;
mod mapping;
pub use client::{LangfuseConfig, LangfuseStream};
#[cfg(feature = "server")]
pub use factory::LangfuseFactory;
+173
View File
@@ -0,0 +1,173 @@
//! BusEvent → Langfuse ingestion mapping.
//!
//! Maps the common BusEvent kinds to Langfuse ingestion items (traces,
//! generations, spans). Events without a `scroll_id` are dropped —
//! Langfuse requires a trace id up-front.
// The items below are only wired into the stream when the `server`
// feature is on; the default-feature build keeps them for symmetry but
// does not reference them, so allow dead-code warnings there.
#![cfg_attr(not(feature = "server"), allow(dead_code))]
use chrono::{DateTime, Utc};
use serde::Serialize;
use uuid::Uuid;
use dirigent_protocol::{streaming::BusEvent, Event};
/// A single Langfuse ingestion item.
///
/// Batched into `{ "batch": [...] }` in `LangfuseClient::ingest_batch`.
#[derive(Debug, Clone, Serialize)]
pub struct IngestItem {
pub id: String, // UUIDv7
pub timestamp: DateTime<Utc>,
#[serde(rename = "type")]
pub kind: IngestKind,
pub body: serde_json::Value,
}
#[derive(Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "kebab-case")]
#[allow(dead_code)] // SpanCreate/SpanUpdate reserved for future tool-call mapping
pub enum IngestKind {
TraceCreate,
GenerationCreate,
GenerationUpdate,
SpanCreate,
SpanUpdate,
}
pub fn bus_event_to_items(bus_event: &BusEvent) -> Vec<IngestItem> {
let Some(scroll_id) = bus_event.routing.scroll_id else {
// No scroll_id binding yet — drop. Upstream callers may choose to
// buffer pending events keyed by (connector_id, native_id) until
// SessionRegistered arrives; Phase 4 scope: drop and log.
return Vec::new();
};
let trace_id = scroll_id.to_string();
let now = Utc::now();
match &*bus_event.event {
Event::SessionCreated { session, .. } => {
// `session.title` is a `String`; fall back to the id if empty.
let name = if session.title.is_empty() {
session.id.clone()
} else {
session.title.clone()
};
vec![IngestItem {
id: Uuid::now_v7().to_string(),
timestamp: now,
kind: IngestKind::TraceCreate,
body: serde_json::json!({
"id": trace_id,
"name": name,
}),
}]
}
Event::MessageStarted { message, .. } => {
vec![IngestItem {
id: Uuid::now_v7().to_string(),
timestamp: now,
kind: IngestKind::GenerationCreate,
body: serde_json::json!({
"id": message.id,
"traceId": trace_id,
"name": format!("{:?}", message.role),
"startTime": message.created_at,
}),
}]
}
Event::MessageCompleted { message, .. } => {
vec![IngestItem {
id: Uuid::now_v7().to_string(),
timestamp: now,
kind: IngestKind::GenerationUpdate,
body: serde_json::json!({
"id": message.id,
"traceId": trace_id,
"endTime": now,
"output": serialize_content(&message.content),
}),
}]
}
Event::TurnComplete { .. } => Vec::new(), // captured by MessageCompleted
// SessionUpdate::ToolCall* — would need a case-by-case mapping; out of
// Phase 4 scope. Return empty for now.
_ => Vec::new(),
}
}
fn serialize_content(parts: &[dirigent_protocol::MessagePart]) -> serde_json::Value {
serde_json::to_value(parts).unwrap_or(serde_json::Value::Null)
}
#[cfg(test)]
mod tests {
use super::*;
use dirigent_protocol::streaming::{BusEvent, EventKind, EventOrigin, EventRouting};
use dirigent_protocol::{Event, Message, MessageRole, MessageStatus};
use std::sync::Arc;
fn make_bus_event_with_scroll(event: Event, scroll_id: Uuid) -> BusEvent {
BusEvent {
routing: EventRouting {
scroll_id: Some(scroll_id),
connector_uid: Some(Uuid::new_v4()),
connector_id: Some("c".into()),
native_session_id: Some("s".into()),
kind: EventKind::Message,
},
origin: EventOrigin::Runtime,
event: Arc::new(event),
}
}
#[test]
fn message_started_produces_generation_create() {
let scroll_id = Uuid::new_v4();
let msg = Message {
id: "m1".into(),
session_id: "s".into(),
role: MessageRole::Assistant,
created_at: chrono::Utc::now(),
content: vec![],
status: MessageStatus::Streaming,
metadata: None,
};
let bus_event = make_bus_event_with_scroll(
Event::MessageStarted {
connector_id: "c".into(),
message: msg,
},
scroll_id,
);
let items = bus_event_to_items(&bus_event);
assert_eq!(items.len(), 1);
assert!(matches!(items[0].kind, IngestKind::GenerationCreate));
}
#[test]
fn no_scroll_id_drops_event() {
let event = Event::Connected;
let bus_event = BusEvent {
routing: EventRouting::default(),
origin: EventOrigin::Runtime,
event: Arc::new(event),
};
let items = bus_event_to_items(&bus_event);
assert_eq!(items.len(), 0);
}
#[test]
fn unmapped_event_returns_empty() {
// `Connected` is not one of our mapped variants even when a scroll_id
// is bound → expect 0 items.
let scroll_id = Uuid::new_v4();
let bus_event = make_bus_event_with_scroll(Event::Connected, scroll_id);
let items = bus_event_to_items(&bus_event);
assert_eq!(items.len(), 0);
}
}