sync from monorepo @ 2452e92e
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
# Package: dirigent_langfuse
|
||||
|
||||
Phase 4 stream backend that mirrors BusEvents to a Langfuse ingestion
|
||||
endpoint.
|
||||
|
||||
## Scope
|
||||
|
||||
- `LangfuseFactory` registered as `kind = "langfuse"` in the
|
||||
`StreamFactoryRegistry`.
|
||||
- `LangfuseStream` implements `SessionStream`:
|
||||
- Maps each `BusEvent` via `mapping::bus_event_to_items`.
|
||||
- Buffers up to 32 items per flush; flushes eagerly when full and on
|
||||
shutdown.
|
||||
- POSTs `{host}/api/public/ingestion` with basic-auth
|
||||
`(public_key, secret_key)`.
|
||||
|
||||
## File map
|
||||
|
||||
- `src/lib.rs` — public API: `LangfuseStream`, `LangfuseConfig`,
|
||||
`LangfuseFactory`.
|
||||
- `src/client.rs` — `LangfuseClient` (reqwest wrapper with retry) and
|
||||
the `LangfuseStream` implementation.
|
||||
- `src/mapping.rs` — `bus_event_to_items` mapping.
|
||||
- `src/factory.rs` — `StreamFactory` impl.
|
||||
|
||||
## Event → ingestion mapping
|
||||
|
||||
| BusEvent variant | Langfuse item |
|
||||
|------------------|---------------|
|
||||
| `SessionCreated` | `trace-create` (id = `scroll_id`) |
|
||||
| `MessageStarted` | `generation-create` |
|
||||
| `MessageCompleted` | `generation-update` with output |
|
||||
| `SessionUpdate` (non-tool) | skipped |
|
||||
| All others | skipped |
|
||||
|
||||
Events without a bound `scroll_id` (no late-bind hit) are dropped — the
|
||||
implementation does NOT buffer pending events keyed by connector_id /
|
||||
native_session_id in Phase 4. If buffering is needed later, extend
|
||||
`LangfuseStream::on_event`.
|
||||
|
||||
## Failure modes
|
||||
|
||||
- Transport error → `StreamOutcome::Failed(StreamError::Transport)`.
|
||||
Health drift applies; the stream goes Degraded after one failure and
|
||||
Unavailable after five consecutive failures.
|
||||
- 5xx response → retried up to 3 times with exponential backoff
|
||||
(100ms → 200 → 400 → 800, capped at 1s).
|
||||
- 4xx response → returned as `LangfuseError::Status(code)`; no retry.
|
||||
- Empty scroll_id → `StreamOutcome::Skipped` (not a failure).
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml
|
||||
[[streams]]
|
||||
name = "langfuse-prod"
|
||||
type = "langfuse"
|
||||
enabled = true
|
||||
[streams.scope]
|
||||
kind = "connector"
|
||||
connector_uid = "01985d00-..."
|
||||
[streams.params]
|
||||
host = "https://langfuse.example.com"
|
||||
public_key = "pk-lf-..."
|
||||
secret_key = "sk-lf-..."
|
||||
```
|
||||
|
||||
## Deferred
|
||||
|
||||
- Tool-call → span mapping (`SpanCreate`/`SpanUpdate`): scaffolded but
|
||||
not yet populated.
|
||||
- Buffering pending events keyed by `(connector_id, native_session_id)`
|
||||
for late-bind scenarios.
|
||||
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "dirigent_langfuse"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
server = ["dep:reqwest", "dep:tokio", "dep:dirigent_core", "dirigent_core/server"]
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
dirigent_core = { path = "../dirigent_core", optional = true }
|
||||
dirigent_protocol = { path = "../dirigent_protocol" }
|
||||
reqwest = { version = "0.12", optional = true, features = ["json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "2.0"
|
||||
tokio = { version = "1", optional = true, features = ["rt", "sync", "macros"] }
|
||||
toml = "0.8"
|
||||
tracing = "0.1"
|
||||
url = "2"
|
||||
uuid = { version = "1", features = ["v4", "v7"] }
|
||||
@@ -0,0 +1,204 @@
|
||||
//! Langfuse ingestion client. Phase 4 feature-gated on `server`.
|
||||
|
||||
use std::sync::Arc;
|
||||
#[cfg(feature = "server")]
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::Utc;
|
||||
use thiserror::Error;
|
||||
#[cfg(feature = "server")]
|
||||
use tokio::sync::Mutex;
|
||||
#[cfg(feature = "server")]
|
||||
use tracing::warn;
|
||||
|
||||
use dirigent_protocol::streaming::{
|
||||
BusEvent, SessionStream, StreamKind, StreamOutcome, StreamScope, StreamSummary,
|
||||
};
|
||||
#[cfg(feature = "server")]
|
||||
use dirigent_protocol::streaming::StreamError;
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
use crate::mapping::{bus_event_to_items, IngestItem};
|
||||
|
||||
/// Langfuse stream configuration (credentials + host).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LangfuseConfig {
|
||||
pub host: String,
|
||||
pub public_key: String,
|
||||
pub secret_key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[cfg_attr(not(feature = "server"), allow(dead_code))]
|
||||
pub enum LangfuseError {
|
||||
#[error("transport: {0}")]
|
||||
Transport(String),
|
||||
#[error("unexpected status: {0}")]
|
||||
Status(u16),
|
||||
#[error("serialisation: {0}")]
|
||||
Serialisation(String),
|
||||
}
|
||||
|
||||
/// Thin wrapper around `reqwest::Client` that POSTs batches to
|
||||
/// `{host}/api/public/ingestion` with HTTP basic auth.
|
||||
#[cfg(feature = "server")]
|
||||
pub(crate) struct LangfuseClient {
|
||||
http: reqwest::Client,
|
||||
host: String,
|
||||
auth: (String, String),
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
impl LangfuseClient {
|
||||
pub fn new(config: LangfuseConfig) -> Result<Self, LangfuseError> {
|
||||
let http = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.map_err(|e| LangfuseError::Transport(e.to_string()))?;
|
||||
Ok(Self {
|
||||
http,
|
||||
host: config.host,
|
||||
auth: (config.public_key, config.secret_key),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn ingest_batch(&self, batch: Vec<IngestItem>) -> Result<(), LangfuseError> {
|
||||
if batch.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let url = format!("{}/api/public/ingestion", self.host.trim_end_matches('/'));
|
||||
let payload = serde_json::json!({ "batch": batch });
|
||||
|
||||
let mut attempt = 0u32;
|
||||
let mut delay_ms = 100u64;
|
||||
loop {
|
||||
let resp = self
|
||||
.http
|
||||
.post(&url)
|
||||
.basic_auth(&self.auth.0, Some(&self.auth.1))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match resp {
|
||||
Ok(r) if r.status().is_success() => return Ok(()),
|
||||
Ok(r) if r.status().is_server_error() && attempt < 3 => {
|
||||
warn!(status = %r.status(), attempt, "langfuse ingestion 5xx; retrying");
|
||||
}
|
||||
Ok(r) => return Err(LangfuseError::Status(r.status().as_u16())),
|
||||
Err(e) if attempt < 3 => {
|
||||
warn!(error = %e, attempt, "langfuse transport error; retrying");
|
||||
}
|
||||
Err(e) => return Err(LangfuseError::Transport(e.to_string())),
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
|
||||
attempt += 1;
|
||||
delay_ms = (delay_ms * 2).min(1000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A live Langfuse stream. Buffers items in-memory and flushes every N ms
|
||||
/// or M items, whichever is first.
|
||||
pub struct LangfuseStream {
|
||||
pub config: LangfuseConfig,
|
||||
pub scope: StreamScope,
|
||||
pub name: String,
|
||||
pub active_since: chrono::DateTime<chrono::Utc>,
|
||||
#[cfg(feature = "server")]
|
||||
client: Arc<LangfuseClient>,
|
||||
#[cfg(feature = "server")]
|
||||
buffer: Arc<Mutex<Vec<IngestItem>>>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
const FLUSH_ITEMS: usize = 32;
|
||||
|
||||
impl LangfuseStream {
|
||||
#[cfg(feature = "server")]
|
||||
pub fn new(
|
||||
name: String,
|
||||
config: LangfuseConfig,
|
||||
scope: StreamScope,
|
||||
) -> Result<Arc<Self>, LangfuseError> {
|
||||
let client = Arc::new(LangfuseClient::new(config.clone())?);
|
||||
Ok(Arc::new(Self {
|
||||
config,
|
||||
scope,
|
||||
name,
|
||||
active_since: Utc::now(),
|
||||
client,
|
||||
buffer: Arc::new(Mutex::new(Vec::new())),
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "server"))]
|
||||
pub fn new(name: String, config: LangfuseConfig, scope: StreamScope) -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
config,
|
||||
scope,
|
||||
name,
|
||||
active_since: Utc::now(),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
async fn flush(&self) -> Result<(), LangfuseError> {
|
||||
let mut buf = self.buffer.lock().await;
|
||||
if buf.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let batch: Vec<_> = buf.drain(..).collect();
|
||||
drop(buf);
|
||||
self.client.ingest_batch(batch).await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SessionStream for LangfuseStream {
|
||||
fn summary(&self) -> StreamSummary {
|
||||
StreamSummary {
|
||||
name: self.name.clone(),
|
||||
kind: StreamKind::Langfuse,
|
||||
target: format!("langfuse: {}", self.config.host),
|
||||
active_since: self.active_since,
|
||||
}
|
||||
}
|
||||
fn scope(&self) -> StreamScope {
|
||||
self.scope.clone()
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
async fn on_event(&self, event: &BusEvent) -> StreamOutcome {
|
||||
let items = bus_event_to_items(event);
|
||||
if items.is_empty() {
|
||||
return StreamOutcome::Skipped;
|
||||
}
|
||||
|
||||
let mut buf = self.buffer.lock().await;
|
||||
buf.extend(items);
|
||||
if buf.len() >= FLUSH_ITEMS {
|
||||
let batch: Vec<_> = buf.drain(..).collect();
|
||||
drop(buf);
|
||||
match self.client.ingest_batch(batch).await {
|
||||
Ok(()) => StreamOutcome::Ok,
|
||||
Err(e) => StreamOutcome::Failed(StreamError::Transport(e.to_string())),
|
||||
}
|
||||
} else {
|
||||
StreamOutcome::Ok
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "server"))]
|
||||
async fn on_event(&self, _event: &BusEvent) -> StreamOutcome {
|
||||
StreamOutcome::Ok
|
||||
}
|
||||
|
||||
async fn shutdown(&self) {
|
||||
#[cfg(feature = "server")]
|
||||
{
|
||||
let _ = self.flush().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
//! Phase 4: factory that builds a stub `LangfuseStream`. Task 22 upgrades
|
||||
//! it to read credentials from params and construct a real client.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use dirigent_core::sharing::{StreamBuildError, StreamConfig, StreamFactory};
|
||||
use dirigent_protocol::streaming::SessionStream;
|
||||
|
||||
use crate::client::{LangfuseConfig, LangfuseStream};
|
||||
|
||||
pub struct LangfuseFactory;
|
||||
|
||||
#[async_trait]
|
||||
impl StreamFactory for LangfuseFactory {
|
||||
fn kind(&self) -> &'static str { "langfuse" }
|
||||
|
||||
async fn build(&self, cfg: &StreamConfig) -> Result<Arc<dyn SessionStream>, StreamBuildError> {
|
||||
// Parse params. Required fields:
|
||||
// host: String (URL)
|
||||
// public_key: String
|
||||
// secret_key: String
|
||||
//
|
||||
// Phase 4 stub: parse-or-fail, then construct LangfuseStream with
|
||||
// the parsed config. Task 22 uses the host to build a reqwest client.
|
||||
|
||||
let host = cfg.params
|
||||
.get("host").and_then(|v| v.as_str())
|
||||
.ok_or_else(|| StreamBuildError::Config("missing `host` (url string)".into()))?;
|
||||
let public_key = cfg.params
|
||||
.get("public_key").and_then(|v| v.as_str())
|
||||
.ok_or_else(|| StreamBuildError::Config("missing `public_key`".into()))?;
|
||||
let secret_key = cfg.params
|
||||
.get("secret_key").and_then(|v| v.as_str())
|
||||
.ok_or_else(|| StreamBuildError::Config("missing `secret_key`".into()))?;
|
||||
|
||||
let lf_cfg = LangfuseConfig {
|
||||
host: host.to_string(),
|
||||
public_key: public_key.to_string(),
|
||||
secret_key: secret_key.to_string(),
|
||||
};
|
||||
|
||||
let stream = LangfuseStream::new(cfg.name.clone(), lf_cfg, cfg.scope.clone())
|
||||
.map_err(|e| StreamBuildError::Transport(e.to_string()))?;
|
||||
Ok(stream as Arc<dyn SessionStream>)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
//! Langfuse SessionStream implementation.
|
||||
//!
|
||||
//! Phase 4 scope: stub implementation. Task 22 adds the real HTTP
|
||||
//! client + event-to-ingestion mapping.
|
||||
|
||||
mod client;
|
||||
#[cfg(feature = "server")]
|
||||
mod factory;
|
||||
mod mapping;
|
||||
|
||||
pub use client::{LangfuseConfig, LangfuseStream};
|
||||
#[cfg(feature = "server")]
|
||||
pub use factory::LangfuseFactory;
|
||||
@@ -0,0 +1,173 @@
|
||||
//! BusEvent → Langfuse ingestion mapping.
|
||||
//!
|
||||
//! Maps the common BusEvent kinds to Langfuse ingestion items (traces,
|
||||
//! generations, spans). Events without a `scroll_id` are dropped —
|
||||
//! Langfuse requires a trace id up-front.
|
||||
|
||||
// The items below are only wired into the stream when the `server`
|
||||
// feature is on; the default-feature build keeps them for symmetry but
|
||||
// does not reference them, so allow dead-code warnings there.
|
||||
#![cfg_attr(not(feature = "server"), allow(dead_code))]
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::Serialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use dirigent_protocol::{streaming::BusEvent, Event};
|
||||
|
||||
/// A single Langfuse ingestion item.
|
||||
///
|
||||
/// Batched into `{ "batch": [...] }` in `LangfuseClient::ingest_batch`.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IngestItem {
|
||||
pub id: String, // UUIDv7
|
||||
pub timestamp: DateTime<Utc>,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: IngestKind,
|
||||
pub body: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
#[allow(dead_code)] // SpanCreate/SpanUpdate reserved for future tool-call mapping
|
||||
pub enum IngestKind {
|
||||
TraceCreate,
|
||||
GenerationCreate,
|
||||
GenerationUpdate,
|
||||
SpanCreate,
|
||||
SpanUpdate,
|
||||
}
|
||||
|
||||
pub fn bus_event_to_items(bus_event: &BusEvent) -> Vec<IngestItem> {
|
||||
let Some(scroll_id) = bus_event.routing.scroll_id else {
|
||||
// No scroll_id binding yet — drop. Upstream callers may choose to
|
||||
// buffer pending events keyed by (connector_id, native_id) until
|
||||
// SessionRegistered arrives; Phase 4 scope: drop and log.
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let trace_id = scroll_id.to_string();
|
||||
let now = Utc::now();
|
||||
|
||||
match &*bus_event.event {
|
||||
Event::SessionCreated { session, .. } => {
|
||||
// `session.title` is a `String`; fall back to the id if empty.
|
||||
let name = if session.title.is_empty() {
|
||||
session.id.clone()
|
||||
} else {
|
||||
session.title.clone()
|
||||
};
|
||||
vec![IngestItem {
|
||||
id: Uuid::now_v7().to_string(),
|
||||
timestamp: now,
|
||||
kind: IngestKind::TraceCreate,
|
||||
body: serde_json::json!({
|
||||
"id": trace_id,
|
||||
"name": name,
|
||||
}),
|
||||
}]
|
||||
}
|
||||
Event::MessageStarted { message, .. } => {
|
||||
vec![IngestItem {
|
||||
id: Uuid::now_v7().to_string(),
|
||||
timestamp: now,
|
||||
kind: IngestKind::GenerationCreate,
|
||||
body: serde_json::json!({
|
||||
"id": message.id,
|
||||
"traceId": trace_id,
|
||||
"name": format!("{:?}", message.role),
|
||||
"startTime": message.created_at,
|
||||
}),
|
||||
}]
|
||||
}
|
||||
Event::MessageCompleted { message, .. } => {
|
||||
vec![IngestItem {
|
||||
id: Uuid::now_v7().to_string(),
|
||||
timestamp: now,
|
||||
kind: IngestKind::GenerationUpdate,
|
||||
body: serde_json::json!({
|
||||
"id": message.id,
|
||||
"traceId": trace_id,
|
||||
"endTime": now,
|
||||
"output": serialize_content(&message.content),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
Event::TurnComplete { .. } => Vec::new(), // captured by MessageCompleted
|
||||
// SessionUpdate::ToolCall* — would need a case-by-case mapping; out of
|
||||
// Phase 4 scope. Return empty for now.
|
||||
_ => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_content(parts: &[dirigent_protocol::MessagePart]) -> serde_json::Value {
|
||||
serde_json::to_value(parts).unwrap_or(serde_json::Value::Null)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use dirigent_protocol::streaming::{BusEvent, EventKind, EventOrigin, EventRouting};
|
||||
use dirigent_protocol::{Event, Message, MessageRole, MessageStatus};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn make_bus_event_with_scroll(event: Event, scroll_id: Uuid) -> BusEvent {
|
||||
BusEvent {
|
||||
routing: EventRouting {
|
||||
scroll_id: Some(scroll_id),
|
||||
connector_uid: Some(Uuid::new_v4()),
|
||||
connector_id: Some("c".into()),
|
||||
native_session_id: Some("s".into()),
|
||||
kind: EventKind::Message,
|
||||
},
|
||||
origin: EventOrigin::Runtime,
|
||||
event: Arc::new(event),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn message_started_produces_generation_create() {
|
||||
let scroll_id = Uuid::new_v4();
|
||||
let msg = Message {
|
||||
id: "m1".into(),
|
||||
session_id: "s".into(),
|
||||
role: MessageRole::Assistant,
|
||||
created_at: chrono::Utc::now(),
|
||||
content: vec![],
|
||||
status: MessageStatus::Streaming,
|
||||
metadata: None,
|
||||
};
|
||||
let bus_event = make_bus_event_with_scroll(
|
||||
Event::MessageStarted {
|
||||
connector_id: "c".into(),
|
||||
message: msg,
|
||||
},
|
||||
scroll_id,
|
||||
);
|
||||
let items = bus_event_to_items(&bus_event);
|
||||
assert_eq!(items.len(), 1);
|
||||
assert!(matches!(items[0].kind, IngestKind::GenerationCreate));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_scroll_id_drops_event() {
|
||||
let event = Event::Connected;
|
||||
let bus_event = BusEvent {
|
||||
routing: EventRouting::default(),
|
||||
origin: EventOrigin::Runtime,
|
||||
event: Arc::new(event),
|
||||
};
|
||||
let items = bus_event_to_items(&bus_event);
|
||||
assert_eq!(items.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unmapped_event_returns_empty() {
|
||||
// `Connected` is not one of our mapped variants even when a scroll_id
|
||||
// is bound → expect 0 items.
|
||||
let scroll_id = Uuid::new_v4();
|
||||
let bus_event = make_bus_event_with_scroll(Event::Connected, scroll_id);
|
||||
let items = bus_event_to_items(&bus_event);
|
||||
assert_eq!(items.len(), 0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user