From ed8bc3e5fda7553e501fdde70a3826a773a8033e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabor=20K=C3=B6rber?= Date: Thu, 7 May 2026 21:59:24 +0200 Subject: [PATCH] rename dirigent_ant to dirigent_anth, binaries to anth_bear/anth_usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the Claude Code session parser crate from dirigent_ant to dirigent_anth. Binary targets renamed: ant → anth_bear, ant_usage → anth_usage. Module claude_usage renamed to anth_usage throughout. Also normalizes CRLF → LF line endings across touched files. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 148 +++ Cargo.toml | 40 + src/anth_usage.rs | 331 +++++++ src/bin/anth.rs | 252 ++++++ src/bin/anth_usage.rs | 192 ++++ src/claude_grab.rs | 157 ++++ src/correlation.rs | 107 +++ src/dedup.rs | 116 +++ src/discovery.rs | 342 +++++++ src/error.rs | 19 + src/lib.rs | 52 ++ src/noise.rs | 72 ++ src/parser.rs | 50 ++ src/subagent.rs | 215 +++++ src/tree.rs | 171 ++++ src/types.rs | 847 ++++++++++++++++++ src/util.rs | 70 ++ tests/fixtures/branching_tree.jsonl | 6 + tests/fixtures/minimal_session.jsonl | 6 + tests/fixtures/noise_patterns.jsonl | 9 + tests/fixtures/streaming_dedup.jsonl | 6 + tests/fixtures/subagent/parent.jsonl | 4 + .../parent/subagents/agent-abc123.jsonl | 2 + .../parent/subagents/agent-abc123.meta.json | 1 + tests/fixtures/tool_correlation.jsonl | 6 + tests/integration_tests.rs | 294 ++++++ tests/usage_parse.rs | 101 +++ 27 files changed, 3616 insertions(+) create mode 100644 CLAUDE.md create mode 100644 Cargo.toml create mode 100644 src/anth_usage.rs create mode 100644 src/bin/anth.rs create mode 100644 src/bin/anth_usage.rs create mode 100644 src/claude_grab.rs create mode 100644 src/correlation.rs create mode 100644 src/dedup.rs create mode 100644 src/discovery.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/noise.rs create mode 100644 src/parser.rs create mode 100644 src/subagent.rs create mode 100644 src/tree.rs create mode 100644 src/types.rs create mode 100644 src/util.rs create mode 100644 tests/fixtures/branching_tree.jsonl create mode 100644 tests/fixtures/minimal_session.jsonl create mode 100644 tests/fixtures/noise_patterns.jsonl create mode 100644 tests/fixtures/streaming_dedup.jsonl create mode 100644 tests/fixtures/subagent/parent.jsonl create mode 100644 tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl create mode 100644 tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json create mode 100644 tests/fixtures/tool_correlation.jsonl create mode 100644 tests/integration_tests.rs create mode 100644 tests/usage_parse.rs diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..051c429 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,148 @@ +# Package: dirigent_anth + +Claude Code JSONL session parser and toolkit. + +## Quick Facts +- **Type**: Library +- **Main Entry**: src/lib.rs +- **Dependencies**: serde, serde_json, chrono, uuid, camino, thiserror, tracing, dirs +- **Status**: Core parsing complete — ready for downstream consumers + +## Purpose + +Reads Claude Code's local JSONL session storage (`~/.claude/projects/`) and produces typed, deduplicated, correlated Rust data structures. The types are the product — downstream consumers (archivist import, shell usage analyzers, session browsers) depend on these structs. + +## Key Features + +- **Session Discovery**: Scan `~/.claude/projects/` for all Claude Code projects and sessions +- **JSONL Parsing**: Lenient line-by-line parser that handles unknown fields and message types +- **Streaming Dedup**: Collapse streamed assistant messages to their final version +- **Tool Correlation**: ID-based pairing of tool_use → tool_result across parallel calls +- **Conversation Tree**: Reconstruct uuid/parentUuid threading with branch detection +- **Noise Classification**: Identify meta messages, warmup, interruptions, API errors +- **Sub-Agent Loading**: Recursive parsing of sub-agent JSONL with metadata +- **Timestamp Parsing**: Handle ISO 8601, Unix seconds, and Unix milliseconds + +## Architecture + +### Design Principles + +1. **Types are the product** — Well-typed Rust structs that downstream consumers import +2. **Lenient parsing** — Unknown fields ignored, unknown message types logged and skipped +3. **Stream-oriented** — Line-by-line BufReader parsing, never loads entire files +4. **Sync-first** — File parsing is CPU-bound; no async overhead +5. **Cross-platform** — camino::Utf8PathBuf throughout for Windows/Unix compatibility + +### Module Organization + +- **`types.rs`** — All public data types (Content, ContentBlock, RawMessage variants, ToolCall, etc.) +- **`error.rs`** — AntError enum with I/O, JSON parse, home-not-found, invalid-path variants +- **`parser.rs`** — JSONL line parser and file parser with lenient error handling +- **`dedup.rs`** — Streaming deduplication of assistant messages by uuid +- **`correlation.rs`** — Tool call ↔ result pairing by tool_use_id +- **`tree.rs`** — Conversation tree from uuid/parentUuid relationships +- **`noise.rs`** — Noise pattern classification (meta, warmup, interruptions, etc.) +- **`discovery.rs`** — Filesystem scanning for Claude projects and sessions +- **`subagent.rs`** — Sub-agent JSONL and metadata loading +- **`util.rs`** — Timestamp parsing utilities + +## Public API + +### Quick Start + +```rust +use dirigent_anth::{discover_claude_home, discover_projects, load_session}; + +// Discover all projects +let home = discover_claude_home()?; +let projects = discover_projects(&home)?; + +// Load a session with full parsing +for project in &projects { + for session_ref in &project.sessions { + let session = load_session(session_ref)?; + println!("Messages: {}, Tools: {}, Subagents: {}", + session.messages.len(), + session.tool_exchanges.len(), + session.subagents.len()); + } +} +``` + +### Key Functions + +| Function | Purpose | +|----------|---------| +| `discover_claude_home()` | Find `~/.claude/` directory | +| `discover_projects(home)` | Scan for all project directories | +| `parse_session(path)` | Parse a JSONL file into messages | +| `parse_session_deduped(path)` | Parse with streaming dedup applied | +| `dedup_messages(msgs)` | Deduplicate streamed assistant messages | +| `correlate_tools(msgs)` | Pair tool calls with results by ID | +| `ConversationTree::build(msgs)` | Build conversation tree | +| `classify_noise(msg)` | Classify a message as noise | +| `load_subagents(dir)` | Load sub-agent sessions from artifacts | +| `load_session(ref)` | Full parse: dedup + correlate + tree + subagents | +| `parse_timestamp(value)` | Parse ISO/Unix timestamps | + +## Data Model + +### Claude Code JSONL Format + +Each line in `~/.claude/projects//.jsonl` is a JSON object with a `type` field discriminator. Five types: `user`, `assistant`, `progress`, `system`, `queue-operation`. + +- **Outer wrapper**: camelCase fields (sessionId, parentUuid, isSidechain, gitBranch) +- **Inner message body**: snake_case fields (stop_reason, tool_use_id, is_error) +- **Content**: Either a plain string or array of typed content blocks + +### Content Blocks + +| Type | Fields | +|------|--------| +| text | `text` | +| tool_use | `id`, `name`, `input` | +| tool_result | `tool_use_id`, `content`, `is_error` | +| thinking | `thinking` | +| image | `source` | + +Unknown content block types are silently dropped (lenient deserialization). + +## Testing + +```bash +cargo test --package dirigent_anth +``` + +Tests use synthetic JSONL fixtures in `tests/fixtures/`: +- `minimal_session.jsonl` — Basic session with all message types +- `streaming_dedup.jsonl` — Streaming dedup scenario +- `tool_correlation.jsonl` — Parallel and sequential tool calls +- `branching_tree.jsonl` — Conversation with branches +- `noise_patterns.jsonl` — All noise pattern types +- `subagent/` — Sub-agent session with parent and metadata + +## Error Handling + +- Individual unparseable JSONL lines are logged and skipped (lenient) +- I/O errors and missing directories are propagated as AntError +- Unknown message types are skipped via serde +- Unknown content blocks are silently filtered + +## Related Packages + +- **dirigent_archivist** — Future consumer for session import +- No current dependencies on other dirigent packages (standalone) + +## Future Enhancements + +- Bash command analysis module (shell usage analytics) +- Archivist event transform/import +- CLI tool with scan/analyze/import subcommands +- SQLite caching layer +- Watch mode for new session monitoring + +## Documentation + +- **Package README**: `./README.md` - User-facing overview +- **API Docs**: Run `cargo doc --package dirigent_anth --open` +- **Design Plan**: `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md` diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3065cd2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "dirigent_anth" +version = "0.1.0" +edition = "2021" + +[lib] +path = "src/lib.rs" + +[[bin]] +name = "anth_bear" +path = "src/bin/anth.rs" + +[[bin]] +name = "anth_usage" +path = "src/bin/anth_usage.rs" + +[features] +default = [] +dirigent-paths = ["dep:dirigent_config"] + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +chrono = { version = "0.4", features = ["serde"] } +chrono-tz = "0.10" +uuid = { version = "1.11", features = ["serde"] } +camino = { version = "1.1", features = ["serde1"] } +dirs = "6.0" +thiserror = "2.0" +tracing = "0.1" +regex = "1" +portable-pty = "0.8" +vt100 = "0.15" +dirigent_config = { path = "../dirigent_config", optional = true } + +[dev-dependencies] +tempfile = "3.0" + +[lints] +workspace = true diff --git a/src/anth_usage.rs b/src/anth_usage.rs new file mode 100644 index 0000000..b9213e3 --- /dev/null +++ b/src/anth_usage.rs @@ -0,0 +1,331 @@ +use chrono::{Datelike, NaiveDate, NaiveTime, Utc}; +use chrono_tz::Tz; +use serde::Serialize; + +#[derive(Debug, Serialize, Default)] +pub struct UsageData { + pub gauges: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub contributions: Option, +} + +#[derive(Debug, Serialize)] +pub struct UsageGauge { + pub name: String, + pub percent_used: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub resets: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub resets_iso: Option, +} + +#[derive(Debug, Serialize, Default)] +pub struct ContributionInfo { + #[serde(skip_serializing_if = "Vec::is_empty")] + pub factors: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub subagents: Vec, +} + +#[derive(Debug, Serialize)] +pub struct ContributionFactor { + pub description: String, + pub percent: u32, +} + +#[derive(Debug, Serialize)] +pub struct SubagentUsage { + pub name: String, + pub percent: u32, +} + +pub struct ProcessedOutput { + pub raw_screen: String, + pub data: UsageData, +} + +pub fn process_usage_screen(raw: &str) -> ProcessedOutput { + let lines: Vec<&str> = raw.lines().collect(); + + let start = lines + .iter() + .position(|l| { + let t = l.trim(); + t.starts_with('─') && t.chars().filter(|&c| c == '─').count() >= 6 + }) + .unwrap_or(0); + + let end = lines + .iter() + .rposition(|l| !l.trim().is_empty()) + .map(|i| i + 1) + .unwrap_or(lines.len()); + + let clean_lines = &lines[start..end]; + let raw_screen = clean_lines.join("\n"); + + let data = extract_usage_data(clean_lines); + + ProcessedOutput { raw_screen, data } +} + +fn extract_usage_data(lines: &[&str]) -> UsageData { + let mut data = UsageData::default(); + let mut i = 0; + + while i < lines.len() { + let trimmed = lines[i].trim(); + + if (trimmed.starts_with("Current session") || trimmed.starts_with("Current week")) + && !trimmed.contains('%') + { + let name = trimmed.to_string(); + if let Some(gauge) = find_gauge(&lines[i..], &name) { + data.gauges.push(gauge); + } + } + + if let Some(factor) = parse_contribution_factor(trimmed) { + data.contributions + .get_or_insert_with(ContributionInfo::default) + .factors + .push(factor); + } + + if trimmed.starts_with("Subagents") { + let subs = parse_subagent_table(&lines[i + 1..]); + if !subs.is_empty() { + data.contributions + .get_or_insert_with(ContributionInfo::default) + .subagents = subs; + } + } + + i += 1; + } + + data +} + +fn find_gauge(lines: &[&str], name: &str) -> Option { + let mut percent = None; + let mut resets_raw = None; + + for line in lines.iter().skip(1).take(4) { + let t = line.trim(); + if let Some(pct) = extract_percent_used(t) { + percent = Some(pct); + } + if t.starts_with("Resets ") { + resets_raw = Some(t.trim_start_matches("Resets ").to_string()); + } + } + + percent.map(|p| { + let resets_iso = resets_raw.as_deref().and_then(parse_reset_to_iso); + UsageGauge { + name: name.to_string(), + percent_used: p, + resets: resets_raw, + resets_iso, + } + }) +} + +/// Parse reset strings like: +/// "12:30pm (Europe/Vienna)" → today at 12:30 in that tz +/// "May 12, 9am (Europe/Vienna)" → May 12 at 09:00 +/// "May 12, 9:30am (Europe/Vienna)" → May 12 at 09:30 +/// "Jun 1, 12pm (America/New_York)" → Jun 1 at 12:00 +/// +/// Claude Code uses JS `Intl.DateTimeFormat` style output. +fn parse_reset_to_iso(s: &str) -> Option { + // Split off the timezone from parentheses + let (datetime_part, tz_str) = { + let open = s.rfind('(')?; + let close = s.rfind(')')?; + let tz = s[open + 1..close].trim(); + let dt = s[..open].trim(); + (dt, tz) + }; + + let tz: Tz = tz_str.parse().ok()?; + let now = Utc::now().with_timezone(&tz); + + let (date, time_str) = if datetime_part.contains(',') { + // "May 12, 9am" or "May 12, 9:30am" + let comma_pos = datetime_part.find(',')?; + let date_part = datetime_part[..comma_pos].trim(); + let time_part = datetime_part[comma_pos + 1..].trim(); + + let date = parse_month_day(date_part, now.year())?; + (date, time_part) + } else { + // "12:30pm" — today in the given timezone + (now.date_naive(), datetime_part) + }; + + let time = parse_12h_time(time_str)?; + let naive = date.and_time(time); + let local = naive.and_local_timezone(tz).earliest()?; + let utc = local.with_timezone(&Utc); + + Some(utc.to_rfc3339()) +} + +/// Parse "May 12", "Jun 1", "December 25", etc. +fn parse_month_day(s: &str, year: i32) -> Option { + let parts: Vec<&str> = s.split_whitespace().collect(); + if parts.len() != 2 { + return None; + } + let month = match parts[0].to_lowercase().as_str() { + "jan" | "january" => 1, + "feb" | "february" => 2, + "mar" | "march" => 3, + "apr" | "april" => 4, + "may" => 5, + "jun" | "june" => 6, + "jul" | "july" => 7, + "aug" | "august" => 8, + "sep" | "september" => 9, + "oct" | "october" => 10, + "nov" | "november" => 11, + "dec" | "december" => 12, + _ => return None, + }; + let day: u32 = parts[1].parse().ok()?; + NaiveDate::from_ymd_opt(year, month, day) +} + +/// Parse "9am", "12pm", "9:30am", "12:30pm" +fn parse_12h_time(s: &str) -> Option { + let s = s.trim().to_lowercase(); + let is_pm = s.ends_with("pm"); + let is_am = s.ends_with("am"); + if !is_pm && !is_am { + return None; + } + + let num_part = &s[..s.len() - 2]; + + let (hour, minute) = if let Some((h, m)) = num_part.split_once(':') { + (h.parse::().ok()?, m.parse::().ok()?) + } else { + (num_part.parse::().ok()?, 0) + }; + + let hour_24 = match (hour, is_pm) { + (12, true) => 12, + (12, false) => 0, + (h, true) => h + 12, + (h, false) => h, + }; + + NaiveTime::from_hms_opt(hour_24, minute, 0) +} + +fn extract_percent_used(line: &str) -> Option { + let line = line.trim(); + if !line.ends_with("% used") { + return None; + } + let before_pct = line.trim_end_matches("% used").trim(); + before_pct + .rsplit_once(char::is_whitespace) + .map(|(_, n)| n) + .unwrap_or(before_pct) + .parse() + .ok() +} + +fn parse_contribution_factor(line: &str) -> Option { + if !line.contains("% of your usage") { + return None; + } + let pct_str = line.split('%').next()?; + let percent: u32 = pct_str.trim().parse().ok()?; + let description = line.to_string(); + Some(ContributionFactor { + description, + percent, + }) +} + +fn parse_subagent_table(lines: &[&str]) -> Vec { + let mut subs = Vec::new(); + for line in lines { + let t = line.trim(); + if t.is_empty() || t.starts_with('─') || t.contains("to day") || t.contains("to cancel") { + break; + } + if let Some(pos) = t.rfind('%') { + let num_start = t[..pos] + .rfind(char::is_whitespace) + .map(|i| i + 1) + .unwrap_or(0); + if let Ok(pct) = t[num_start..pos].parse::() { + let name = t[..num_start].trim().to_string(); + if !name.is_empty() && !name.contains("% of") { + subs.push(SubagentUsage { + name, + percent: pct, + }); + } + } + } + } + subs +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_time_only() { + let t = parse_12h_time("12:30pm").unwrap(); + assert_eq!(t, NaiveTime::from_hms_opt(12, 30, 0).unwrap()); + } + + #[test] + fn parse_time_am() { + let t = parse_12h_time("9am").unwrap(); + assert_eq!(t, NaiveTime::from_hms_opt(9, 0, 0).unwrap()); + } + + #[test] + fn parse_time_12am() { + let t = parse_12h_time("12am").unwrap(); + assert_eq!(t, NaiveTime::from_hms_opt(0, 0, 0).unwrap()); + } + + #[test] + fn parse_time_with_minutes() { + let t = parse_12h_time("9:30am").unwrap(); + assert_eq!(t, NaiveTime::from_hms_opt(9, 30, 0).unwrap()); + } + + #[test] + fn parse_reset_time_only() { + let iso = parse_reset_to_iso("12:30pm (Europe/Vienna)"); + assert!(iso.is_some()); + let iso = iso.unwrap(); + assert!(iso.contains("T")); + // Should end in +00:00 (UTC via rfc3339) + assert!(iso.ends_with("+00:00")); + } + + #[test] + fn parse_reset_date_and_time() { + let iso = parse_reset_to_iso("May 12, 9am (Europe/Vienna)").unwrap(); + assert!(iso.contains("T07:00:00") || iso.contains("T08:00:00")); + // CEST is UTC+2, CET is UTC+1 — depends on whether May 12 is summer time + } + + #[test] + fn parse_month_day_basic() { + let d = parse_month_day("May 12", 2026).unwrap(); + assert_eq!(d, NaiveDate::from_ymd_opt(2026, 5, 12).unwrap()); + } +} diff --git a/src/bin/anth.rs b/src/bin/anth.rs new file mode 100644 index 0000000..5ecdefa --- /dev/null +++ b/src/bin/anth.rs @@ -0,0 +1,252 @@ +//! Minimal CLI for dirigent_anth — validate parsing and search sessions. +//! +//! Usage: +//! cargo run --package dirigent_anth --bin ant # validate all sessions +//! cargo run --package dirigent_anth --bin ant -- search "query" # search user messages +//! cargo run --package dirigent_anth --bin ant -- stats # show statistics + +use dirigent_anth::*; +use std::io::BufRead; + +fn main() { + let args: Vec = std::env::args().skip(1).collect(); + + let home = match discover_claude_home() { + Ok(h) => h, + Err(e) => { + eprintln!("Could not find Claude home: {e}"); + std::process::exit(1); + } + }; + + let projects = match discover_projects(&home) { + Ok(p) => p, + Err(e) => { + eprintln!("Could not discover projects: {e}"); + std::process::exit(1); + } + }; + + match args.first().map(|s| s.as_str()) { + Some("search") => { + let query = args.get(1).map(|s| s.as_str()).unwrap_or(""); + if query.is_empty() { + eprintln!("Usage: ant search "); + std::process::exit(1); + } + cmd_search(&projects, query); + } + Some("stats") => cmd_stats(&projects), + Some("validate") | None => cmd_validate(&projects), + Some(other) => { + eprintln!("Unknown command: {other}"); + eprintln!("Commands: validate (default), search , stats"); + std::process::exit(1); + } + } +} + +/// Validate that the parser can handle all sessions without errors. +fn cmd_validate(projects: &[ClaudeProject]) { + let mut total_sessions = 0; + let mut total_ok = 0; + let mut total_messages = 0; + let mut total_skipped_lines = 0; + let mut errors: Vec<(String, String)> = Vec::new(); + + for project in projects { + println!( + "Project: {} ({} sessions)", + project.original_path, + project.sessions.len() + ); + + for session in &project.sessions { + total_sessions += 1; + + // Raw line-level validation: count how many lines parse vs skip + let (_raw_ok, raw_skip) = validate_lines(&session.jsonl_path); + total_skipped_lines += raw_skip; + + // Full pipeline validation + match load_session(session) { + Ok(parsed) => { + total_ok += 1; + total_messages += parsed.messages.len(); + let tools = parsed.tool_exchanges.len(); + let subs = parsed.subagents.len(); + let branches = if parsed.tree.is_linear() { + "linear" + } else { + "branched" + }; + + if raw_skip > 0 { + println!( + " {} — {} msgs, {} tools, {} subagents, {} | {raw_skip} lines skipped", + &session.id[..8.min(session.id.len())], + parsed.messages.len(), + tools, + subs, + branches, + ); + } + } + Err(e) => { + errors.push((session.id.clone(), e.to_string())); + eprintln!(" {} — ERROR: {e}", &session.id[..8.min(session.id.len())]); + } + } + } + } + + println!("\n--- Validation Summary ---"); + println!("Projects: {}", projects.len()); + println!("Sessions: {total_sessions} ({total_ok} ok, {} errors)", errors.len()); + println!("Messages: {total_messages}"); + if total_skipped_lines > 0 { + println!("Skipped: {total_skipped_lines} unparseable lines"); + } + + if !errors.is_empty() { + println!("\nErrors:"); + for (id, err) in &errors { + println!(" {id}: {err}"); + } + std::process::exit(1); + } +} + +/// Count parseable vs skipped lines in a JSONL file. +fn validate_lines(path: &camino::Utf8Path) -> (usize, usize) { + let file = match std::fs::File::open(path.as_std_path()) { + Ok(f) => f, + Err(_) => return (0, 0), + }; + let reader = std::io::BufReader::new(file); + let mut ok = 0; + let mut skip = 0; + + for (i, line) in reader.lines().enumerate() { + let line = match line { + Ok(l) => l, + Err(_) => { + skip += 1; + continue; + } + }; + if line.trim().is_empty() { + continue; + } + if parse_line(&line, i + 1).is_some() { + ok += 1; + } else { + skip += 1; + } + } + + (ok, skip) +} + +/// Search user messages for a query string (case-insensitive). +fn cmd_search(projects: &[ClaudeProject], query: &str) { + let query_lower = query.to_lowercase(); + let mut hits = 0; + + for project in projects { + for session in &project.sessions { + let messages = match parse_session_deduped(&session.jsonl_path) { + Ok(m) => m, + Err(_) => continue, + }; + + for msg in &messages { + let text = match msg { + types::RawMessage::User(u) => match &u.message.content { + types::Content::Text(s) => s.clone(), + types::Content::Blocks(_) => continue, + }, + types::RawMessage::Assistant(a) => { + let mut parts = Vec::new(); + for block in &a.message.content { + if let types::ContentBlock::Text { text } = block { + parts.push(text.as_str()); + } + } + parts.join(" ") + } + _ => continue, + }; + + if text.to_lowercase().contains(&query_lower) { + let role = match msg { + types::RawMessage::User(_) => "user", + types::RawMessage::Assistant(_) => "assistant", + _ => "other", + }; + let preview = truncate(&text, 120); + println!( + "[{}] {} {} | {}", + &project.original_path, + &session.id[..8.min(session.id.len())], + role, + preview + ); + hits += 1; + } + } + } + } + + println!("\n{hits} matches for \"{query}\""); +} + +/// Show aggregate statistics across all sessions. +fn cmd_stats(projects: &[ClaudeProject]) { + let mut total_sessions = 0; + let mut total_messages = 0; + let mut total_tools = 0; + let mut total_subagents = 0; + let mut tool_counts: std::collections::HashMap = std::collections::HashMap::new(); + + for project in projects { + for session in &project.sessions { + total_sessions += 1; + if let Ok(parsed) = load_session(session) { + total_messages += parsed.messages.len(); + total_tools += parsed.tool_exchanges.len(); + total_subagents += parsed.subagents.len(); + + for ex in &parsed.tool_exchanges { + let name = format!("{:?}", ex.call.name); + *tool_counts.entry(name).or_default() += 1; + } + } + } + } + + println!("--- Statistics ---"); + println!("Projects: {}", projects.len()); + println!("Sessions: {total_sessions}"); + println!("Messages: {total_messages}"); + println!("Tool calls: {total_tools}"); + println!("Sub-agents: {total_subagents}"); + + if !tool_counts.is_empty() { + println!("\nTool usage:"); + let mut sorted: Vec<_> = tool_counts.into_iter().collect(); + sorted.sort_by(|a, b| b.1.cmp(&a.1)); + for (name, count) in sorted.iter().take(15) { + println!(" {name:20} {count}"); + } + } +} + +fn truncate(s: &str, max: usize) -> String { + let s = s.replace('\n', " ").replace('\r', ""); + if s.len() <= max { + s + } else { + format!("{}...", &s[..max]) + } +} diff --git a/src/bin/anth_usage.rs b/src/bin/anth_usage.rs new file mode 100644 index 0000000..58fd3e0 --- /dev/null +++ b/src/bin/anth_usage.rs @@ -0,0 +1,192 @@ +use portable_pty::{CommandBuilder, NativePtySystem, PtySize, PtySystem}; +use std::io::{Read, Write}; +use std::path::PathBuf; +use std::time::Duration; + +const ROWS: u16 = 80; +const COLS: u16 = 120; + +struct Args { + debug: bool, + raw: bool, + no_trust: bool, + workdir: Option, + use_cwd: bool, +} + +fn parse_args() -> Args { + let mut args = Args { + debug: false, + raw: false, + no_trust: false, + workdir: None, + use_cwd: false, + }; + let mut iter = std::env::args().skip(1); + while let Some(arg) = iter.next() { + match arg.as_str() { + "--debug" => args.debug = true, + "--raw" => args.raw = true, + "--no-trust" => args.no_trust = true, + "--cwd" => args.use_cwd = true, + "--workdir" => { + args.workdir = Some(PathBuf::from( + iter.next().expect("--workdir requires a path argument"), + )); + } + other => { + eprintln!("Unknown argument: {other}"); + eprintln!( + "Usage: anth_usage [--debug] [--raw] [--no-trust] [--workdir ] [--cwd]" + ); + std::process::exit(2); + } + } + } + args +} + +fn resolve_workdir(args: &Args) -> PathBuf { + if let Some(ref dir) = args.workdir { + return dir.clone(); + } + if args.use_cwd { + return std::env::current_dir().expect("failed to get current directory"); + } + + #[cfg(feature = "dirigent-paths")] + { + if let Ok(paths) = dirigent_config::DirigentPaths::resolve() { + let noproject = paths.noproject_home_dir(); + if noproject.exists() { + return noproject; + } + } + } + + dirs::home_dir().expect("failed to resolve home directory") +} + +fn grab_screen(parser: &vt100::Parser) -> String { + let screen = parser.screen(); + let mut output = String::new(); + for line in screen.rows(0, COLS) { + output.push_str(&line); + output.push('\n'); + } + output +} + +macro_rules! debug { + ($args:expr, $($tt:tt)*) => { + if $args.debug { + eprintln!($($tt)*); + } + }; +} + +fn main() { + let args = parse_args(); + let workdir = resolve_workdir(&args); + + debug!(args, "Working directory: {}", workdir.display()); + + let pty_system = NativePtySystem::default(); + let pair = pty_system + .openpty(PtySize { + rows: ROWS, + cols: COLS, + pixel_width: 0, + pixel_height: 0, + }) + .expect("failed to open pty"); + + let mut cmd = CommandBuilder::new("claude"); + cmd.cwd(&workdir); + let mut child = pair.slave.spawn_command(cmd).expect("failed to spawn claude"); + drop(pair.slave); + + let mut writer = pair.master.take_writer().expect("failed to get writer"); + let reader = pair.master.try_clone_reader().expect("failed to get reader"); + + let (tx, rx) = std::sync::mpsc::channel(); + std::thread::spawn(move || { + let mut reader = reader; + let mut buf = [0u8; 4096]; + loop { + match reader.read(&mut buf) { + Ok(0) => break, + Ok(n) => { + let _ = tx.send(buf[..n].to_vec()); + } + Err(_) => break, + } + } + }); + + // Wait for claude to render + std::thread::sleep(Duration::from_secs(5)); + + debug!( + args, + "Child alive: {}", + matches!(child.try_wait(), Ok(None)) + ); + + // Grab screen + let mut parser = vt100::Parser::new(ROWS, COLS, 0); + while let Ok(data) = rx.try_recv() { + parser.process(&data); + } + let output = grab_screen(&parser); + debug!(args, "=== SCREEN ===\n{output}=== END ==="); + + // Handle trust prompt + if output.contains("Yes, I trust this folder") { + if args.no_trust { + eprintln!("Folder is not trusted: {}", workdir.display()); + eprintln!("Run claude in this folder manually to trust it, or omit --no-trust."); + let _ = child.kill(); + std::process::exit(1); + } + debug!(args, "Sending enter for trust..."); + writer.write_all(b"\r").expect("failed to confirm trust"); + + std::thread::sleep(Duration::from_secs(3)); + + while let Ok(data) = rx.try_recv() { + parser.process(&data); + } + debug!( + args, + "=== AFTER TRUST ===\n{}=== END ===", + grab_screen(&parser) + ); + } + + // Send /usage + debug!(args, "Sending /usage..."); + writer + .write_all(b"/usage\r") + .expect("failed to send /usage"); + + std::thread::sleep(Duration::from_secs(3)); + + while let Ok(data) = rx.try_recv() { + parser.process(&data); + } + let raw_output = grab_screen(&parser); + + let processed = dirigent_anth::anth_usage::process_usage_screen(&raw_output); + + if args.raw { + println!("{}", processed.raw_screen); + } else { + println!( + "{}", + serde_json::to_string_pretty(&processed.data).expect("failed to serialize usage data") + ); + } + + let _ = child.kill(); +} diff --git a/src/claude_grab.rs b/src/claude_grab.rs new file mode 100644 index 0000000..1d47902 --- /dev/null +++ b/src/claude_grab.rs @@ -0,0 +1,157 @@ +use portable_pty::{Child, CommandBuilder, NativePtySystem, PtySize, PtySystem}; +use std::io::{Read, Write}; +use std::sync::mpsc::{self, Receiver}; +use std::time::Duration; +use vt100::Parser; + +const DEFAULT_ROWS: u16 = 80; +const DEFAULT_COLS: u16 = 120; + +pub struct PtySession { + parser: Parser, + writer: Option>, + rx: Receiver>, + cols: u16, + #[allow(dead_code)] + child: Box, +} + +impl PtySession { + pub fn spawn_claude(args: &[&str]) -> Self { + Self::spawn_claude_with_size(args, DEFAULT_ROWS, DEFAULT_COLS) + } + + pub fn spawn_claude_with_size(args: &[&str], rows: u16, cols: u16) -> Self { + let pty_system = NativePtySystem::default(); + + let pair = pty_system + .openpty(PtySize { + rows, + cols, + pixel_width: 0, + pixel_height: 0, + }) + .expect("failed to open pty"); + + let mut cmd = CommandBuilder::new("claude"); + for arg in args { + cmd.arg(*arg); + } + if let Some(home) = dirs::home_dir() { + cmd.cwd(home); + } + let child = pair + .slave + .spawn_command(cmd) + .expect("failed to spawn claude"); + + drop(pair.slave); + + let writer = pair.master.take_writer().expect("failed to get writer"); + let reader = pair + .master + .try_clone_reader() + .expect("failed to get reader"); + + let (tx, rx) = mpsc::channel::>(); + std::thread::spawn(move || { + let mut reader = reader; + let mut chunk = [0u8; 4096]; + loop { + match reader.read(&mut chunk) { + Ok(0) => break, + Ok(n) => { + if tx.send(chunk[..n].to_vec()).is_err() { + break; + } + } + Err(_) => break, + } + } + }); + + Self { + parser: Parser::new(rows, cols, 0), + writer: Some(writer), + rx, + cols, + child, + } + } + + pub fn grab_screen(&mut self) -> String { + while let Ok(data) = self.rx.try_recv() { + self.parser.process(&data); + } + let deadline = std::time::Instant::now() + Duration::from_millis(200); + while std::time::Instant::now() < deadline { + match self.rx.recv_timeout(Duration::from_millis(50)) { + Ok(data) => self.parser.process(&data), + Err(_) => {} + } + } + + let screen = self.parser.screen(); + let mut output = String::new(); + for line in screen.rows(0, self.cols) { + output.push_str(&line); + output.push('\n'); + } + output + } + + pub fn wait_for(&mut self, needle: &str, timeout: Duration) -> bool { + self.wait_for_any(&[needle], timeout) + } + + pub fn wait_for_any(&mut self, needles: &[&str], timeout: Duration) -> bool { + let deadline = std::time::Instant::now() + timeout; + while std::time::Instant::now() < deadline { + match self.rx.recv_timeout(Duration::from_millis(100)) { + Ok(data) => self.parser.process(&data), + Err(_) => {} + } + let screen = self.parser.screen(); + let mut content = String::new(); + for line in screen.rows(0, self.cols) { + content.push_str(&line); + content.push('\n'); + } + for needle in needles { + if content.contains(needle) { + return true; + } + } + } + false + } + + pub fn is_alive(&mut self) -> bool { + matches!(self.child.try_wait(), Ok(None)) + } + + pub fn send(&mut self, input: &[u8]) { + self.writer.as_mut().expect("writer gone").write_all(input).expect("failed to write to pty"); + } + + pub fn try_send(&mut self, input: &[u8]) -> std::io::Result<()> { + match self.writer.as_mut() { + Some(w) => w.write_all(input), + None => Err(std::io::Error::new(std::io::ErrorKind::BrokenPipe, "writer gone")), + } + } + + pub fn try_send_line(&mut self, text: &str) -> std::io::Result<()> { + self.try_send(text.as_bytes())?; + self.try_send(b"\r") + } + + pub fn send_enter(&mut self) { + self.send(b"\r"); + } + + pub fn send_line(&mut self, text: &str) { + self.send(text.as_bytes()); + self.send_enter(); + } +} diff --git a/src/correlation.rs b/src/correlation.rs new file mode 100644 index 0000000..5e96bef --- /dev/null +++ b/src/correlation.rs @@ -0,0 +1,107 @@ +//! Tool call correlation — matches assistant ToolUse blocks with their +//! corresponding user ToolResult blocks by ID across a message sequence. + +use std::collections::HashMap; + +use crate::types::{ + Content, ContentBlock, RawAssistantMessage, RawMessage, RawUserMessage, ToolCall, + ToolExchange, ToolName, ToolResultData, +}; + +/// Extract tool calls from an assistant message's content blocks. +fn extract_tool_calls(msg: &RawAssistantMessage) -> Vec { + let source_uuid = msg.uuid.clone().unwrap_or_default(); + msg.message + .content + .iter() + .filter_map(|block| { + if let ContentBlock::ToolUse { id, name, input, .. } = block { + Some(ToolCall { + id: id.clone(), + name: ToolName::from(name.clone()), + input: input.clone(), + source_message_uuid: source_uuid.clone(), + }) + } else { + None + } + }) + .collect() +} + +/// Extract tool results from a user message's content blocks. +fn extract_tool_results(msg: &RawUserMessage) -> Vec { + let source_uuid = msg.uuid.clone().unwrap_or_default(); + match &msg.message.content { + Content::Blocks(blocks) => blocks + .iter() + .filter_map(|block| { + if let ContentBlock::ToolResult { tool_use_id, content, is_error } = block { + // Extract text content from the tool result + let text_content = content.as_ref().and_then(|c| match c { + Content::Text(s) => Some(s.clone()), + Content::Blocks(bs) => { + // Concatenate text blocks + let texts: Vec<&str> = bs + .iter() + .filter_map(|b| { + if let ContentBlock::Text { text } = b { + Some(text.as_str()) + } else { + None + } + }) + .collect(); + if texts.is_empty() { None } else { Some(texts.join("\n")) } + } + }); + Some(ToolResultData { + tool_use_id: tool_use_id.clone(), + content: text_content, + is_error: *is_error, + source_message_uuid: source_uuid.clone(), + }) + } else { + None + } + }) + .collect(), + Content::Text(_) => Vec::new(), + } +} + +/// Correlate tool calls with their results across a message sequence. +/// +/// Iterates messages in order, collecting ToolUse blocks from assistant +/// messages and matching them by ID to ToolResult blocks in subsequent user +/// messages. Any tool calls that never received a result are emitted with +/// `result: None`. +pub fn correlate_tools(messages: &[RawMessage]) -> Vec { + let mut pending: HashMap = HashMap::new(); + let mut exchanges: Vec = Vec::new(); + + for msg in messages { + match msg { + RawMessage::Assistant(asst) => { + for call in extract_tool_calls(asst) { + pending.insert(call.id.clone(), call); + } + } + RawMessage::User(user) => { + for result in extract_tool_results(user) { + if let Some(call) = pending.remove(&result.tool_use_id) { + exchanges.push(ToolExchange { call, result: Some(result) }); + } + } + } + _ => {} + } + } + + // Emit unmatched calls (no result found) + for (_id, call) in pending { + exchanges.push(ToolExchange { call, result: None }); + } + + exchanges +} diff --git a/src/dedup.rs b/src/dedup.rs new file mode 100644 index 0000000..27c2bd7 --- /dev/null +++ b/src/dedup.rs @@ -0,0 +1,116 @@ +//! Streaming deduplication for assistant messages. + +use crate::types::{RawAssistantMessage, RawMessage}; + +/// Deduplicate streamed assistant messages. +/// +/// Claude Code writes multiple JSONL lines for the same assistant message +/// as it streams. Each shares the same `uuid` with progressively more +/// content blocks. We keep only the last entry per uuid. +/// +/// Non-assistant messages pass through unchanged. +pub fn dedup_messages(messages: Vec) -> Vec { + let mut result: Vec = Vec::new(); + let mut buffered_assistant: Option = None; + + for msg in messages { + match msg { + RawMessage::Assistant(ref asst) => { + let current_uuid = asst.uuid.as_deref(); + + if let Some(ref buffered) = buffered_assistant { + let buffered_uuid = buffered.uuid.as_deref(); + if current_uuid == buffered_uuid { + // Same uuid — replace buffer with newer (more complete) version + buffered_assistant = Some(asst.clone()); + } else { + // Different uuid — flush old buffer, start new + result.push(RawMessage::Assistant(buffered.clone())); + buffered_assistant = Some(asst.clone()); + } + } else { + // No buffer yet — start buffering + buffered_assistant = Some(asst.clone()); + } + } + _ => { + // Non-assistant: flush any buffered assistant first, then push this + if let Some(buffered) = buffered_assistant.take() { + result.push(RawMessage::Assistant(buffered)); + } + result.push(msg); + } + } + } + + // Flush remaining buffer + if let Some(buffered) = buffered_assistant { + result.push(RawMessage::Assistant(buffered)); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{AssistantInner, ContentBlock}; + + fn make_assistant(uuid: &str, stop_reason: Option<&str>, text: &str) -> RawMessage { + RawMessage::Assistant(RawAssistantMessage { + uuid: Some(uuid.to_string()), + parent_uuid: None, + timestamp: None, + session_id: None, + cwd: None, + version: None, + git_branch: None, + is_sidechain: false, + request_id: None, + message: AssistantInner { + model: None, + id: None, + message_type: None, + role: None, + content: vec![ContentBlock::Text { text: text.to_string() }], + stop_reason: stop_reason.map(str::to_string), + stop_sequence: None, + usage: None, + }, + }) + } + + #[test] + fn dedup_single_streamed_message() { + let msgs = vec![ + make_assistant("a-1", None, "Part 1"), + make_assistant("a-1", None, "Part 1 more"), + make_assistant("a-1", Some("end_turn"), "Part 1 final"), + ]; + let deduped = dedup_messages(msgs); + assert_eq!(deduped.len(), 1); + if let RawMessage::Assistant(a) = &deduped[0] { + assert_eq!(a.message.stop_reason.as_deref(), Some("end_turn")); + match &a.message.content[0] { + ContentBlock::Text { text } => assert_eq!(text, "Part 1 final"), + _ => panic!("Expected text block"), + } + } + } + + #[test] + fn dedup_two_distinct_assistants() { + let msgs = vec![ + make_assistant("a-1", Some("end_turn"), "First"), + make_assistant("a-2", Some("end_turn"), "Second"), + ]; + let deduped = dedup_messages(msgs); + assert_eq!(deduped.len(), 2); + } + + #[test] + fn dedup_empty_input() { + let deduped = dedup_messages(vec![]); + assert!(deduped.is_empty()); + } +} diff --git a/src/discovery.rs b/src/discovery.rs new file mode 100644 index 0000000..5e830f2 --- /dev/null +++ b/src/discovery.rs @@ -0,0 +1,342 @@ +use std::collections::HashMap; +use camino::{Utf8Path, Utf8PathBuf}; +use crate::types::*; +use crate::error::{AntError, Result}; + +/// Discover the Claude Code home directory (~/.claude/). +pub fn discover_claude_home() -> Result { + let home = dirs::home_dir().ok_or(AntError::HomeNotFound)?; + let claude_dir = home.join(".claude"); + if !claude_dir.exists() { + return Err(AntError::HomeNotFound); + } + Utf8PathBuf::try_from(claude_dir.to_path_buf()) + .map_err(|e| AntError::InvalidPath(e.to_string())) +} + +/// Normalise a native path to forward slashes for consistent storage. +fn normalize_to_forward_slashes(path: &str) -> String { + path.replace('\\', "/") +} + +/// Resolve the original filesystem path for a Claude project directory. +/// +/// Priority: +/// 1. `projectPath` from `sessions-index.json` (authoritative, cheap) +/// 2. `cwd` from the first user message in any session JSONL (authoritative, costs one file parse) +/// 3. `decode_project_path` (lossy fallback for empty project directories) +pub fn resolve_original_path(dir_name: &str, sessions: &[SessionRef]) -> String { + // 1. Try sessions-index.json projectPath + for session in sessions { + if let Some(ref idx) = session.index_entry { + if let Some(ref path) = idx.project_path { + if !path.is_empty() { + return normalize_to_forward_slashes(path); + } + } + } + } + + // 2. Try cwd from first user message in any session + for session in sessions { + if let Ok(msgs) = crate::parser::parse_session(&session.jsonl_path) { + for msg in &msgs { + if let crate::types::RawMessage::User(user) = msg { + if let Some(ref cwd) = user.cwd { + if !cwd.is_empty() { + return normalize_to_forward_slashes(cwd); + } + } + } + } + } + } + + // 3. Lossy fallback + decode_project_path(dir_name) +} + +/// Discover all Claude Code project directories under the given home. +pub fn discover_projects(home: &Utf8Path) -> Result> { + let projects_dir = home.join("projects"); + if !projects_dir.as_std_path().exists() { + return Ok(Vec::new()); + } + + let mut projects = Vec::new(); + for entry in std::fs::read_dir(projects_dir.as_std_path())? { + let entry = entry?; + let path = entry.path(); + if !path.is_dir() { + continue; + } + let dir_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name.to_string(), + None => continue, + }; + + let utf8_path = match Utf8PathBuf::try_from(path.clone()) { + Ok(p) => p, + Err(_) => continue, + }; + + let sessions = discover_sessions(&utf8_path)?; + let original_path = resolve_original_path(&dir_name, &sessions); + + projects.push(ClaudeProject { + path: utf8_path, + original_path, + sessions, + }); + } + + Ok(projects) +} + +/// Decode an encoded project folder name back to the original path (lossy). +/// +/// **Warning**: Claude Code's encoding replaces `\`, `/`, AND `_` all with +/// `-`, making this decoding ambiguous. For example, `G--dev-projects-adk-rust` +/// could be `G:/dev/projects/adk-rust` or `G:/dev/projects/adk/rust`. Prefer +/// [`resolve_original_path`] which reads ground truth from `sessions-index.json` +/// or session JSONL files. This function is a last-resort fallback for empty +/// project directories with no sessions or index. +pub fn decode_project_path(encoded: &str) -> String { + // Split on "--" to recover path segments separated by the original separators. + let parts: Vec<&str> = encoded.split("--").collect(); + + if parts.is_empty() { + return encoded.to_string(); + } + + let mut result = String::new(); + + let first = parts[0]; + + if first.len() == 1 && first.chars().next().map_or(false, |c| c.is_ascii_uppercase()) { + // Windows drive letter: "G" → "G:" + result.push_str(first); + result.push(':'); + } else if first.starts_with('-') || first.is_empty() { + // Unix-style absolute path: the original path started with "/". + // The first segment has a leading "-" that encoded the root separator. + // Strip that leading "-" to recover the first directory component. + let component = first.trim_start_matches('-'); + result.push('/'); + if !component.is_empty() { + // Single dashes within the component are path separators. + result.push_str(&component.replace('-', "/")); + } + } else { + result.push_str(first); + } + + // Remaining "--"-separated parts are additional path components. + // Within each part, single "-" represent path separators. + for part in &parts[1..] { + result.push('/'); + result.push_str(&part.replace('-', "/")); + } + + result +} + +/// Discover all session JSONL files in a project directory. +pub fn discover_sessions(project_dir: &Utf8Path) -> Result> { + let index = load_session_index(project_dir); + let mut sessions = Vec::new(); + + for entry in std::fs::read_dir(project_dir.as_std_path())? { + let entry = entry?; + let path = entry.path(); + + // Only .jsonl files + let extension = path.extension().and_then(|e| e.to_str()); + if extension != Some("jsonl") { + continue; + } + + let stem = match path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s.to_string(), + None => continue, + }; + + let utf8_path = match Utf8PathBuf::try_from(path) { + Ok(p) => p, + Err(_) => continue, + }; + + // Check for artifacts directory (same name as the session stem). + let artifacts_dir = { + let dir = project_dir.join(&stem); + if dir.as_std_path().is_dir() { + Some(dir) + } else { + None + } + }; + + let index_entry = index.as_ref().and_then(|idx| idx.get(&stem).cloned()); + + sessions.push(SessionRef { + id: stem, + jsonl_path: utf8_path, + artifacts_dir, + index_entry, + }); + } + + Ok(sessions) +} + +/// Load `sessions-index.json` if it exists in the given project directory. +fn load_session_index(project_dir: &Utf8Path) -> Option> { + let index_path = project_dir.join("sessions-index.json"); + if !index_path.as_std_path().exists() { + return None; + } + + let content = std::fs::read_to_string(index_path.as_std_path()).ok()?; + serde_json::from_str::>(&content).ok() +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn decode_project_path_windows() { + assert_eq!( + decode_project_path("G--dev-projects-dirigent"), + "G:/dev/projects/dirigent" + ); + } + + #[test] + fn decode_project_path_windows_users() { + assert_eq!( + decode_project_path("C--Users-g4b-tmp"), + "C:/Users/g4b/tmp" + ); + } + + #[test] + fn decode_project_path_unix() { + assert_eq!( + decode_project_path("-home-user-projects-foo"), + "/home/user/projects/foo" + ); + } + + #[test] + fn discover_sessions_in_temp_dir() { + let tmp = TempDir::new().unwrap(); + let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); + + // Create fake session files. + std::fs::write(project_dir.join("abc-def-123.jsonl").as_std_path(), "{}\n").unwrap(); + std::fs::write(project_dir.join("xyz-456-789.jsonl").as_std_path(), "{}\n").unwrap(); + // Create an artifacts directory for one session. + std::fs::create_dir(project_dir.join("abc-def-123").as_std_path()).unwrap(); + + let sessions = discover_sessions(project_dir).unwrap(); + assert_eq!(sessions.len(), 2); + + let with_artifacts = sessions.iter().find(|s| s.id == "abc-def-123").unwrap(); + assert!(with_artifacts.artifacts_dir.is_some()); + + let without_artifacts = sessions.iter().find(|s| s.id == "xyz-456-789").unwrap(); + assert!(without_artifacts.artifacts_dir.is_none()); + } + + #[test] + fn discover_sessions_ignores_non_jsonl() { + let tmp = TempDir::new().unwrap(); + let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); + + std::fs::write(project_dir.join("session.jsonl").as_std_path(), "{}\n").unwrap(); + std::fs::write( + project_dir.join("sessions-index.json").as_std_path(), + "{}", + ) + .unwrap(); + std::fs::create_dir(project_dir.join("some-dir").as_std_path()).unwrap(); + + let sessions = discover_sessions(project_dir).unwrap(); + assert_eq!(sessions.len(), 1); + assert_eq!(sessions[0].id, "session"); + } + + #[test] + fn discover_sessions_loads_index_entry() { + let tmp = TempDir::new().unwrap(); + let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); + + std::fs::write(project_dir.join("abc-123.jsonl").as_std_path(), "{}\n").unwrap(); + + let index_json = r#"{ + "abc-123": { + "sessionId": "abc-123", + "firstPrompt": "Hello", + "summary": "A test session", + "messageCount": 5 + } + }"#; + std::fs::write( + project_dir.join("sessions-index.json").as_std_path(), + index_json, + ) + .unwrap(); + + let sessions = discover_sessions(project_dir).unwrap(); + assert_eq!(sessions.len(), 1); + + let entry = sessions[0].index_entry.as_ref().unwrap(); + assert_eq!(entry.session_id.as_deref(), Some("abc-123")); + assert_eq!(entry.first_prompt.as_deref(), Some("Hello")); + assert_eq!(entry.message_count, Some(5)); + } + + #[test] + fn resolve_original_path_prefers_index_project_path() { + let sessions = vec![SessionRef { + id: "test-session".to_string(), + jsonl_path: Utf8PathBuf::from("/tmp/fake.jsonl"), + artifacts_dir: None, + index_entry: Some(SessionIndexEntry { + session_id: Some("test-session".to_string()), + first_prompt: None, + summary: None, + message_count: None, + created: None, + modified: None, + git_branch: None, + project_path: Some(r"G:\dev\projects\bevy_sprite3d".to_string()), + }), + }]; + let result = resolve_original_path("G--dev-projects-bevy-sprite3d", &sessions); + assert_eq!(result, "G:/dev/projects/bevy_sprite3d"); + } + + #[test] + fn resolve_original_path_falls_back_to_decode() { + let sessions: Vec = vec![]; + let result = resolve_original_path("G--dev-projects-dirigent", &sessions); + assert_eq!(result, "G:/dev/projects/dirigent"); + } + + #[test] + fn discover_projects_empty_when_no_projects_dir() { + let tmp = TempDir::new().unwrap(); + let home_dir = Utf8Path::from_path(tmp.path()).unwrap(); + + // No "projects" subdirectory — should return empty vec, not an error. + let projects = discover_projects(home_dir).unwrap(); + assert!(projects.is_empty()); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..ef33e4e --- /dev/null +++ b/src/error.rs @@ -0,0 +1,19 @@ +#[derive(Debug, thiserror::Error)] +pub enum AntError { + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("JSON parse error at line {line}: {source}")] + JsonParse { + line: usize, + source: serde_json::Error, + }, + + #[error("Claude home directory not found")] + HomeNotFound, + + #[error("Invalid path: {0}")] + InvalidPath(String), +} + +pub type Result = std::result::Result; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e0b6621 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,52 @@ +//! dirigent_anth — Claude Code Session Parser & Toolkit +//! +//! Reads Claude Code's local JSONL session storage and produces typed, +//! deduplicated, correlated Rust data structures. +//! +//! # Design +//! +//! See `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md` + +pub mod claude_grab; +pub mod anth_usage; +pub mod correlation; +pub mod dedup; +pub mod discovery; +pub mod error; +pub mod noise; +pub mod parser; +pub mod subagent; +pub mod tree; +pub mod types; +pub mod util; + +/// Load and fully parse a session: dedup, correlate, tree, subagents. +pub fn load_session(session_ref: &types::SessionRef) -> error::Result { + let messages = parser::parse_session_deduped(&session_ref.jsonl_path)?; + let tree = tree::ConversationTree::build(&messages); + let tool_exchanges = correlation::correlate_tools(&messages); + let mut subagents = if let Some(ref dir) = session_ref.artifacts_dir { + subagent::load_subagents(dir)? + } else { + Vec::new() + }; + subagent::link_subagents_to_calls(&mut subagents, &tool_exchanges); + + Ok(types::ParsedSession { + messages, + tree, + tool_exchanges, + subagents, + }) +} + +pub use correlation::correlate_tools; +pub use dedup::dedup_messages; +pub use discovery::{decode_project_path, discover_claude_home, discover_projects, discover_sessions, resolve_original_path}; +pub use error::{AntError, Result}; +pub use noise::{classify_noise, NoiseKind}; +pub use parser::{parse_line, parse_session, parse_session_deduped}; +pub use subagent::{link_subagents_to_calls, load_subagents}; +pub use tree::{message_parent_uuid, message_uuid, ConversationNode, ConversationTree}; +pub use types::*; +pub use util::parse_timestamp; diff --git a/src/noise.rs b/src/noise.rs new file mode 100644 index 0000000..6e986aa --- /dev/null +++ b/src/noise.rs @@ -0,0 +1,72 @@ +use crate::types::*; + +/// Classification of noise patterns in Claude Code JSONL. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NoiseKind { + Meta, + Warmup, + Interrupted, + Continuation, + ApiError, + SystemCaveat, + QueueOp, +} + +/// Classify a message as noise, if applicable. +/// Returns None for normal messages. +pub fn classify_noise(message: &RawMessage) -> Option { + match message { + RawMessage::QueueOperation(_) => Some(NoiseKind::QueueOp), + RawMessage::User(user) => { + if user.is_meta.unwrap_or(false) { + return Some(NoiseKind::Meta); + } + if let Some(text) = extract_user_text(user) { + if text == "Warmup" { + return Some(NoiseKind::Warmup); + } + if text.starts_with("[Request interrupted") { + return Some(NoiseKind::Interrupted); + } + if text.starts_with("This session is being continued") { + return Some(NoiseKind::Continuation); + } + if text.starts_with("API Error") { + return Some(NoiseKind::ApiError); + } + if text.starts_with("Caveat: The messages below") { + return Some(NoiseKind::SystemCaveat); + } + } + None + } + _ => None, + } +} + +/// Extract plain text from a user message's content. +fn extract_user_text(user: &RawUserMessage) -> Option<&str> { + match &user.message.content { + Content::Text(s) => Some(s.as_str()), + Content::Blocks(_) => None, // tool_result blocks, not plain text + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normal_assistant_is_not_noise() { + let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"id":"m","role":"assistant","content":[{"type":"text","text":"Hello"}],"stop_reason":"end_turn"}}"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + assert_eq!(classify_noise(&msg), None); + } + + #[test] + fn queue_op_is_noise() { + let json = r#"{"type":"queue-operation","operation":"enqueue","timestamp":"2026-01-01T00:00:00Z","sessionId":"s"}"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + assert_eq!(classify_noise(&msg), Some(NoiseKind::QueueOp)); + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..e7db24d --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,50 @@ +//! JSONL line parser for Claude Code session files. + +use std::io::BufRead; + +use camino::Utf8Path; + +use crate::error::Result; +use crate::types::RawMessage; + +/// Parse a single JSONL line into a RawMessage. +/// Returns None for lines that cannot be parsed (logged via tracing). +pub fn parse_line(line: &str, line_number: usize) -> Option { + match serde_json::from_str::(line) { + Ok(msg) => Some(msg), + Err(e) => { + tracing::warn!(line = line_number, error = %e, "Skipping unparseable JSONL line"); + None + } + } +} + +/// Parse all messages from a JSONL file. +/// Skips unparseable lines (lenient). Returns I/O errors. +pub fn parse_session(path: &Utf8Path) -> Result> { + let file = std::fs::File::open(path.as_std_path())?; + let reader = std::io::BufReader::new(file); + let mut messages = Vec::new(); + + for (i, line) in reader.lines().enumerate() { + let line = line?; + if line.trim().is_empty() { + continue; + } + if let Some(msg) = parse_line(&line, i + 1) { + messages.push(msg); + } + } + + Ok(messages) +} + +/// Parse a session JSONL file with streaming deduplication applied. +/// +/// Claude Code writes multiple JSONL lines for the same assistant message as +/// it streams. This function collapses those into a single final version per +/// uuid. See [`crate::dedup::dedup_messages`] for details. +pub fn parse_session_deduped(path: &Utf8Path) -> Result> { + let messages = parse_session(path)?; + Ok(crate::dedup::dedup_messages(messages)) +} diff --git a/src/subagent.rs b/src/subagent.rs new file mode 100644 index 0000000..8fff571 --- /dev/null +++ b/src/subagent.rs @@ -0,0 +1,215 @@ +//! Sub-agent session loading. +//! +//! Claude Code spawns sub-agents for Agent tool calls and stores their +//! conversations under `/subagents/`. Each sub-agent +//! has a JSONL file and an optional `.meta.json` with metadata such as the +//! agent type. + +use camino::Utf8Path; + +use crate::error::Result; +use crate::parser::parse_session; +use crate::types::{SubAgentMeta, SubAgentSession, ToolExchange}; + +/// Load all sub-agent sessions from a session's artifacts directory. +/// +/// Expects files at: `/subagents/agent-.jsonl` +/// with optional companion: `/subagents/agent-.meta.json` +/// +/// Returns an empty `Vec` if the `subagents/` subdirectory does not exist. +pub fn load_subagents(session_artifacts_dir: &Utf8Path) -> Result> { + let subagents_dir = session_artifacts_dir.join("subagents"); + if !subagents_dir.as_std_path().exists() { + return Ok(Vec::new()); + } + + let mut subagents = Vec::new(); + + for entry in std::fs::read_dir(subagents_dir.as_std_path())? { + let entry = entry?; + let path = entry.path(); + + // Only process agent-*.jsonl files + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name.to_string(), + None => continue, + }; + + if !file_name.starts_with("agent-") || !file_name.ends_with(".jsonl") { + continue; + } + + // Extract agent ID: "agent-abc123.jsonl" → "abc123" + let agent_id = file_name + .strip_prefix("agent-") + .and_then(|s| s.strip_suffix(".jsonl")) + .unwrap_or(&file_name) + .to_string(); + + let jsonl_path = match camino::Utf8PathBuf::try_from(path.clone()) { + Ok(p) => p, + Err(_) => continue, + }; + + // Parse the JSONL session + let messages = parse_session(&jsonl_path)?; + + // Try to load companion metadata file + let meta_path = path.with_file_name(format!("agent-{}.meta.json", agent_id)); + let meta = if meta_path.exists() { + let content = std::fs::read_to_string(&meta_path)?; + serde_json::from_str::(&content) + .unwrap_or(SubAgentMeta { agent_type: None }) + } else { + SubAgentMeta { agent_type: None } + }; + + subagents.push(SubAgentSession { + agent_id, + meta, + messages, + parent_tool_call_id: None, + }); + } + + Ok(subagents) +} + +/// Try to link sub-agent sessions to their parent Agent tool calls. +/// +/// For each Agent tool call in `tool_exchanges`, parses the tool result text +/// for `agentId: ` and matches it against sub-agent sessions. On match, +/// sets `SubAgentSession.parent_tool_call_id` to the tool call's ID. +/// +/// This is best-effort: if the agentId text format changes or a result is +/// missing, the sub-agent is still usable but without tool_use linkage. +pub fn link_subagents_to_calls( + subagents: &mut [SubAgentSession], + tool_exchanges: &[ToolExchange], +) { + use regex::Regex; + + if subagents.is_empty() || tool_exchanges.is_empty() { + return; + } + + // Compile once, match many + let re = Regex::new(r"agentId:\s*(\S+)").expect("valid regex"); + + for exchange in tool_exchanges { + // Only look at Agent tool calls + if exchange.call.name != crate::types::ToolName::Agent { + continue; + } + + // Extract agentId from the tool result text + let agent_id = exchange + .result + .as_ref() + .and_then(|r| r.content.as_deref()) + .and_then(|text| re.captures(text)) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str()); + + let agent_id = match agent_id { + Some(id) => id, + None => continue, + }; + + // Find matching sub-agent and set the linkage + if let Some(subagent) = subagents.iter_mut().find(|s| s.agent_id == agent_id) { + subagent.parent_tool_call_id = Some(exchange.call.id.clone()); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{ToolCall, ToolName, ToolResultData}; + + #[test] + fn test_link_subagents_to_calls_matches_agent_id() { + let mut subagents = vec![ + SubAgentSession { + agent_id: "abc123def".to_string(), + meta: SubAgentMeta { agent_type: Some("Explore".to_string()) }, + messages: vec![], + parent_tool_call_id: None, + }, + SubAgentSession { + agent_id: "xyz789".to_string(), + meta: SubAgentMeta { agent_type: None }, + messages: vec![], + parent_tool_call_id: None, + }, + ]; + + let exchanges = vec![ + ToolExchange { + call: ToolCall { + id: "toolu_01ABC".to_string(), + name: ToolName::Agent, + input: serde_json::json!({"description": "test"}), + source_message_uuid: "msg-1".to_string(), + }, + result: Some(ToolResultData { + tool_use_id: "toolu_01ABC".to_string(), + content: Some("agentId: abc123def (use SendMessage with to: 'abc123def' to continue)\ntotal_tokens: 1000".to_string()), + is_error: false, + source_message_uuid: "msg-2".to_string(), + }), + }, + ToolExchange { + call: ToolCall { + id: "toolu_02DEF".to_string(), + name: ToolName::Read, + input: serde_json::json!({}), + source_message_uuid: "msg-3".to_string(), + }, + result: None, + }, + ]; + + link_subagents_to_calls(&mut subagents, &exchanges); + + assert_eq!(subagents[0].parent_tool_call_id, Some("toolu_01ABC".to_string())); + assert_eq!(subagents[1].parent_tool_call_id, None); + } + + #[test] + fn test_link_subagents_empty_inputs() { + let mut empty_subagents: Vec = vec![]; + let empty_exchanges: Vec = vec![]; + link_subagents_to_calls(&mut empty_subagents, &empty_exchanges); + // No panic + } + + #[test] + fn test_link_subagents_no_match() { + let mut subagents = vec![SubAgentSession { + agent_id: "no_match".to_string(), + meta: SubAgentMeta { agent_type: None }, + messages: vec![], + parent_tool_call_id: None, + }]; + + let exchanges = vec![ToolExchange { + call: ToolCall { + id: "toolu_99".to_string(), + name: ToolName::Agent, + input: serde_json::json!({}), + source_message_uuid: "msg-1".to_string(), + }, + result: Some(ToolResultData { + tool_use_id: "toolu_99".to_string(), + content: Some("agentId: different_id\ntokens: 500".to_string()), + is_error: false, + source_message_uuid: "msg-2".to_string(), + }), + }]; + + link_subagents_to_calls(&mut subagents, &exchanges); + assert_eq!(subagents[0].parent_tool_call_id, None); + } +} diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 0000000..ca8f37d --- /dev/null +++ b/src/tree.rs @@ -0,0 +1,171 @@ +//! Conversation tree module — builds a parent/child tree from `RawMessage`s. +//! +//! Claude Code sessions are not purely linear: the user can edit earlier +//! messages, producing branches. Each message carries a `uuid` and a +//! `parentUuid` that describe the relationship. This module reconstructs +//! the tree so callers can walk threads, detect branches, and select the +//! main thread. + +use std::collections::HashMap; + +use crate::types::RawMessage; + +// --------------------------------------------------------------------------- +// Node & tree types +// --------------------------------------------------------------------------- + +/// A single node in the conversation tree. +#[derive(Debug)] +pub struct ConversationNode { + /// The UUID of this message. + pub uuid: String, + /// The raw message stored at this node. + pub message: RawMessage, + /// UUIDs of direct children, in insertion order. + pub children: Vec, +} + +/// The full conversation tree for a session. +/// +/// A session may have multiple roots when the first message has no +/// `parentUuid`, or when a message refers to a parent that is not present +/// in the slice provided to [`ConversationTree::build`]. +#[derive(Debug)] +pub struct ConversationTree { + /// Root node UUIDs (messages with no parent or with an unknown parent). + pub roots: Vec, + /// All nodes indexed by UUID. + pub nodes: HashMap, +} + +// --------------------------------------------------------------------------- +// UUID / parent-UUID helpers +// --------------------------------------------------------------------------- + +/// Extract the `uuid` from any `RawMessage` variant. +/// +/// Returns `None` for variants that carry no UUID (e.g. `QueueOperation`). +pub fn message_uuid(msg: &RawMessage) -> Option<&str> { + match msg { + RawMessage::User(m) => m.uuid.as_deref(), + RawMessage::Assistant(m) => m.uuid.as_deref(), + RawMessage::Progress(m) => m.uuid.as_deref(), + RawMessage::System(m) => m.uuid.as_deref(), + RawMessage::QueueOperation(_) + | RawMessage::FileHistorySnapshot(_) + | RawMessage::LastPrompt(_) => None, + } +} + +/// Extract the `parent_uuid` from any `RawMessage` variant. +/// +/// Returns `None` for variants that carry no parent UUID. +pub fn message_parent_uuid(msg: &RawMessage) -> Option<&str> { + match msg { + RawMessage::User(m) => m.parent_uuid.as_deref(), + RawMessage::Assistant(m) => m.parent_uuid.as_deref(), + RawMessage::Progress(m) => m.parent_uuid.as_deref(), + RawMessage::System(m) => m.parent_uuid.as_deref(), + RawMessage::QueueOperation(_) + | RawMessage::FileHistorySnapshot(_) + | RawMessage::LastPrompt(_) => None, + } +} + +// --------------------------------------------------------------------------- +// ConversationTree impl +// --------------------------------------------------------------------------- + +impl ConversationTree { + /// Build a conversation tree from a sequence of messages. + /// + /// Messages without a UUID (e.g. `QueueOperation`) are silently skipped. + /// If a message's `parentUuid` is present but not found in the set, + /// that message is treated as a root. + pub fn build(messages: &[RawMessage]) -> Self { + let mut nodes: HashMap = HashMap::new(); + let mut roots: Vec = Vec::new(); + + // First pass: insert every addressable message as a node. + for msg in messages { + if let Some(uuid) = message_uuid(msg) { + nodes.insert( + uuid.to_string(), + ConversationNode { + uuid: uuid.to_string(), + message: msg.clone(), + children: Vec::new(), + }, + ); + } + } + + // Second pass: collect (uuid, parent_uuid) pairs so we can wire up + // parent→child edges without a simultaneous mutable borrow. + let parent_links: Vec<(String, Option)> = messages + .iter() + .filter_map(|msg| { + let uuid = message_uuid(msg)?.to_string(); + let parent = message_parent_uuid(msg).map(|s| s.to_string()); + Some((uuid, parent)) + }) + .collect(); + + for (uuid, parent_uuid) in parent_links { + match parent_uuid { + Some(parent_id) if nodes.contains_key(&parent_id) => { + // Safe: parent_id != uuid (a message cannot be its own parent). + nodes + .get_mut(&parent_id) + .expect("parent key confirmed above") + .children + .push(uuid); + } + _ => { + // No parent, or parent not in the provided slice — treat as root. + roots.push(uuid); + } + } + } + + ConversationTree { roots, nodes } + } + + /// Walk the *main thread*: start from the first root and always follow + /// the first child at each step. + /// + /// In a linear session this is the complete conversation. In a branching + /// session this is the path taken before any edits. + pub fn main_thread(&self) -> Vec<&ConversationNode> { + let mut result = Vec::new(); + if let Some(root_id) = self.roots.first() { + let mut current = root_id.as_str(); + loop { + match self.nodes.get(current) { + Some(node) => { + result.push(node); + match node.children.first() { + Some(first_child) => current = first_child.as_str(), + None => break, + } + } + None => break, + } + } + } + result + } + + /// Returns `true` when every node has at most one child (no branches). + pub fn is_linear(&self) -> bool { + self.nodes.values().all(|n| n.children.len() <= 1) + } + + /// Returns all nodes that have more than one child (branch points). + pub fn branch_points(&self) -> Vec<&ConversationNode> { + self.nodes + .values() + .filter(|n| n.children.len() > 1) + .collect() + } +} diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..e4c92ac --- /dev/null +++ b/src/types.rs @@ -0,0 +1,847 @@ +//! Core types for parsing Claude Code JSONL session data. + +use camino::Utf8PathBuf; +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Content types +// --------------------------------------------------------------------------- + +/// Content is either a plain string or an array of content blocks. +/// +/// Uses a custom deserializer so that `Blocks` variant applies lenient +/// deserialization — unknown content block types (e.g. `tool_reference`) +/// are silently skipped instead of failing the entire message. +#[derive(Debug, Clone, Serialize)] +#[serde(untagged)] +pub enum Content { + Text(String), + Blocks(Vec), +} + +impl<'de> serde::Deserialize<'de> for Content { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + let value = serde_json::Value::deserialize(deserializer)?; + match value { + serde_json::Value::String(s) => Ok(Content::Text(s)), + serde_json::Value::Array(arr) => { + let blocks = arr + .into_iter() + .filter_map(|v| { + serde_json::from_value::(v.clone()) + .ok() + .or_else(|| { + tracing::debug!( + "Skipping unknown content block: {:?}", + v.get("type") + ); + None + }) + }) + .collect(); + Ok(Content::Blocks(blocks)) + } + other => Err(serde::de::Error::custom(format!( + "expected string or array for Content, got {}", + match &other { + serde_json::Value::Null => "null", + serde_json::Value::Bool(_) => "bool", + serde_json::Value::Number(_) => "number", + serde_json::Value::Object(_) => "object", + _ => "unknown", + } + ))), + } + } +} + +/// Typed content block inside messages. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentBlock { + Text { + text: String, + }, + ToolUse { + id: String, + name: String, + input: serde_json::Value, + #[serde(default)] + caller: Option, + }, + ToolResult { + tool_use_id: String, + #[serde(default)] + content: Option, + #[serde(default)] + is_error: bool, + }, + Thinking { + thinking: String, + }, + Image { + source: serde_json::Value, + }, +} + +// --------------------------------------------------------------------------- +// Lenient content block deserialization +// --------------------------------------------------------------------------- + +/// Deserializes a `Vec` leniently — unknown block types are +/// silently skipped instead of failing the entire message. +fn deserialize_content_blocks<'de, D>( + deserializer: D, +) -> std::result::Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + use serde::Deserialize as _; + let raw: Vec = Vec::deserialize(deserializer)?; + Ok(raw + .into_iter() + .filter_map(|v| { + serde_json::from_value::(v.clone()).ok().or_else(|| { + tracing::debug!("Skipping unknown content block: {:?}", v.get("type")); + None + }) + }) + .collect()) +} + +// --------------------------------------------------------------------------- +// Top-level JSONL line discriminator +// --------------------------------------------------------------------------- + +/// Top-level JSONL line discriminator. +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "kebab-case")] +pub enum RawMessage { + User(RawUserMessage), + Assistant(RawAssistantMessage), + Progress(RawProgressMessage), + System(RawSystemMessage), + QueueOperation(RawQueueOperation), + FileHistorySnapshot(RawFileHistorySnapshot), + LastPrompt(RawLastPrompt), +} + +// --------------------------------------------------------------------------- +// User message +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawUserMessage { + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub parent_uuid: Option, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub session_id: Option, + #[serde(default)] + pub cwd: Option, + #[serde(default)] + pub version: Option, + #[serde(default)] + pub git_branch: Option, + #[serde(default)] + pub is_sidechain: bool, + #[serde(default)] + pub is_meta: Option, + #[serde(default)] + pub user_type: Option, + pub message: UserMessageInner, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct UserMessageInner { + pub role: String, + pub content: Content, +} + +// --------------------------------------------------------------------------- +// Assistant message +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawAssistantMessage { + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub parent_uuid: Option, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub session_id: Option, + #[serde(default)] + pub cwd: Option, + #[serde(default)] + pub version: Option, + #[serde(default)] + pub git_branch: Option, + #[serde(default)] + pub is_sidechain: bool, + #[serde(default)] + pub request_id: Option, + pub message: AssistantInner, +} + +// NOTE: AssistantInner is the Anthropic API response object nested inside +// the Claude Code JSONL wrapper. The API uses snake_case (stop_reason, etc.) +// unlike the outer JSONL wrapper which uses camelCase. +#[derive(Debug, Clone, Deserialize)] +pub struct AssistantInner { + #[serde(default)] + pub model: Option, + #[serde(default)] + pub id: Option, + #[serde(default, rename = "type")] + pub message_type: Option, + #[serde(default)] + pub role: Option, + #[serde(default, deserialize_with = "deserialize_content_blocks")] + pub content: Vec, + #[serde(default)] + pub stop_reason: Option, + #[serde(default)] + pub stop_sequence: Option, + #[serde(default)] + pub usage: Option, +} + +// --------------------------------------------------------------------------- +// Progress message +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawProgressMessage { + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub parent_uuid: Option, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub session_id: Option, + #[serde(default)] + pub cwd: Option, + #[serde(default)] + pub version: Option, + #[serde(default)] + pub git_branch: Option, + #[serde(default)] + pub is_sidechain: bool, + #[serde(default)] + pub data: Option, +} + +// --------------------------------------------------------------------------- +// System message +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawSystemMessage { + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub parent_uuid: Option, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub session_id: Option, + #[serde(default)] + pub cwd: Option, + #[serde(default)] + pub version: Option, + #[serde(default)] + pub git_branch: Option, + #[serde(default)] + pub is_sidechain: bool, + #[serde(default)] + pub data: Option, +} + +// --------------------------------------------------------------------------- +// Queue operation +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawQueueOperation { + pub operation: String, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub session_id: Option, +} + +// --------------------------------------------------------------------------- +// File history snapshot +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawFileHistorySnapshot { + #[serde(default)] + pub message_id: Option, + #[serde(default)] + pub is_snapshot_update: bool, + #[serde(default)] + pub snapshot: Option, +} + +// --------------------------------------------------------------------------- +// Last prompt +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawLastPrompt { + #[serde(default)] + pub last_prompt: Option, + #[serde(default)] + pub session_id: Option, +} + +// --------------------------------------------------------------------------- +// Tool types (for correlation module later) +// --------------------------------------------------------------------------- + +/// Known tool names used by Claude Code. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ToolName { + Bash, + Read, + Write, + Edit, + Grep, + Glob, + Agent, + Skill, + WebSearch, + WebFetch, + TodoWrite, + NotebookEdit, + Other(String), +} + +impl From for ToolName { + fn from(s: String) -> Self { + match s.as_str() { + "Bash" => ToolName::Bash, + "Read" => ToolName::Read, + "Write" => ToolName::Write, + "Edit" => ToolName::Edit, + "Grep" => ToolName::Grep, + "Glob" => ToolName::Glob, + "Agent" => ToolName::Agent, + "Skill" => ToolName::Skill, + "WebSearch" => ToolName::WebSearch, + "WebFetch" => ToolName::WebFetch, + "TodoWrite" => ToolName::TodoWrite, + "NotebookEdit" => ToolName::NotebookEdit, + other => ToolName::Other(other.to_string()), + } + } +} + +/// A tool call extracted from an assistant message. +#[derive(Debug, Clone)] +pub struct ToolCall { + pub id: String, + pub name: ToolName, + pub input: serde_json::Value, + pub source_message_uuid: String, +} + +/// A tool result extracted from a user message. +#[derive(Debug, Clone)] +pub struct ToolResultData { + pub tool_use_id: String, + pub content: Option, + pub is_error: bool, + pub source_message_uuid: String, +} + +/// A correlated tool call + result pair. +#[derive(Debug, Clone)] +pub struct ToolExchange { + pub call: ToolCall, + pub result: Option, +} + +// --------------------------------------------------------------------------- +// Discovery types (for discovery module later) +// --------------------------------------------------------------------------- + +/// A discovered Claude Code project directory. +#[derive(Debug, Clone)] +pub struct ClaudeProject { + pub path: Utf8PathBuf, + pub original_path: String, + pub sessions: Vec, +} + +/// Reference to a session (not yet parsed). +#[derive(Debug, Clone)] +pub struct SessionRef { + pub id: String, + pub jsonl_path: Utf8PathBuf, + pub artifacts_dir: Option, + pub index_entry: Option, +} + +/// From sessions-index.json (when available). +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SessionIndexEntry { + #[serde(default)] + pub session_id: Option, + #[serde(default)] + pub first_prompt: Option, + #[serde(default)] + pub summary: Option, + #[serde(default)] + pub message_count: Option, + #[serde(default)] + pub created: Option, + #[serde(default)] + pub modified: Option, + #[serde(default)] + pub git_branch: Option, + #[serde(default)] + pub project_path: Option, +} + +// --------------------------------------------------------------------------- +// Sub-agent types +// --------------------------------------------------------------------------- + +/// Sub-agent metadata from .meta.json. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SubAgentMeta { + #[serde(default)] + pub agent_type: Option, +} + +/// A parsed sub-agent session. +#[derive(Debug, Clone)] +pub struct SubAgentSession { + pub agent_id: String, + pub meta: SubAgentMeta, + pub messages: Vec, + pub parent_tool_call_id: Option, +} + +// --------------------------------------------------------------------------- +// MessageMeta (convenience, future use) +// --------------------------------------------------------------------------- + +/// Common metadata extracted from any message. Defined for future consumers. +#[derive(Debug, Clone)] +pub struct MessageMeta { + pub uuid: String, + pub parent_uuid: Option, + pub timestamp: Option, + pub session_id: String, + pub cwd: Option, + pub version: Option, + pub git_branch: Option, + pub is_sidechain: bool, +} + +// --------------------------------------------------------------------------- +// ParsedSession +// --------------------------------------------------------------------------- + +/// A fully parsed session with all correlations built. +#[derive(Debug)] +pub struct ParsedSession { + pub messages: Vec, + pub tree: crate::tree::ConversationTree, + pub tool_exchanges: Vec, + pub subagents: Vec, +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_content_text_string() { + let json = r#""Hello world""#; + let content: Content = serde_json::from_str(json).unwrap(); + match content { + Content::Text(s) => assert_eq!(s, "Hello world"), + _ => panic!("Expected Content::Text"), + } + } + + #[test] + fn parse_content_blocks() { + let json = r#"[{"type": "text", "text": "Hello"}]"#; + let content: Content = serde_json::from_str(json).unwrap(); + match content { + Content::Blocks(blocks) => { + assert_eq!(blocks.len(), 1); + match &blocks[0] { + ContentBlock::Text { text } => assert_eq!(text, "Hello"), + _ => panic!("Expected ContentBlock::Text"), + } + } + _ => panic!("Expected Content::Blocks"), + } + } + + #[test] + fn parse_tool_use_block() { + let json = r#"{"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}"#; + let block: ContentBlock = serde_json::from_str(json).unwrap(); + match block { + ContentBlock::ToolUse { id, name, .. } => { + assert_eq!(id, "toolu_123"); + assert_eq!(name, "Bash"); + } + _ => panic!("Expected ContentBlock::ToolUse"), + } + } + + #[test] + fn parse_tool_result_block() { + let json = r#"{"type": "tool_result", "tool_use_id": "toolu_123", "content": "output text", "is_error": false}"#; + let block: ContentBlock = serde_json::from_str(json).unwrap(); + match block { + ContentBlock::ToolResult { + tool_use_id, + is_error, + .. + } => { + assert_eq!(tool_use_id, "toolu_123"); + assert!(!is_error); + } + _ => panic!("Expected ContentBlock::ToolResult"), + } + } + + #[test] + fn parse_thinking_block() { + let json = r#"{"type": "thinking", "thinking": "Let me consider..."}"#; + let block: ContentBlock = serde_json::from_str(json).unwrap(); + match block { + ContentBlock::Thinking { thinking } => { + assert_eq!(thinking, "Let me consider..."); + } + _ => panic!("Expected ContentBlock::Thinking"), + } + } + + #[test] + fn parse_queue_operation() { + let json = r#"{"type": "queue-operation", "operation": "enqueue", "timestamp": "2026-03-14T21:15:17.531Z", "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73"}"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::QueueOperation(op) => { + assert_eq!(op.operation, "enqueue"); + assert_eq!( + op.session_id.as_deref(), + Some("00f72d8d-fc54-485c-a082-310ffcabdb73") + ); + } + _ => panic!("Expected RawMessage::QueueOperation"), + } + } + + #[test] + fn parse_user_message_with_string_content() { + let json = r#"{ + "parentUuid": "b1ab1ac7-fdb6-4e25-bc17-4c060b470b4a", + "isSidechain": false, + "userType": "external", + "cwd": "G:\\dev\\projects\\dirigent", + "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73", + "version": "2.1.71", + "gitBranch": "main", + "type": "user", + "message": { + "role": "user", + "content": "Hello world" + }, + "isMeta": false, + "uuid": "1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0", + "timestamp": "2026-03-14T21:08:58.586Z" + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::User(u) => { + assert_eq!(u.uuid.as_deref(), Some("1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0")); + assert_eq!(u.session_id.as_deref(), Some("00f72d8d-fc54-485c-a082-310ffcabdb73")); + assert_eq!(u.is_meta, Some(false)); + match &u.message.content { + Content::Text(s) => assert_eq!(s, "Hello world"), + _ => panic!("Expected Content::Text"), + } + } + _ => panic!("Expected RawMessage::User"), + } + } + + #[test] + fn parse_assistant_message_with_tool_use() { + let json = r#"{ + "parentUuid": "77793647-f957-4aec-8b04-a9c07e01e37b", + "isSidechain": false, + "userType": "external", + "cwd": "G:\\dev\\projects\\dirigent", + "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73", + "version": "2.1.71", + "gitBranch": "main", + "message": { + "model": "claude-opus-4-6", + "id": "msg_01NcwYjEydGEyZCSCgwmcnYd", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_01DP5mkAQnAi2o54idq24cPn", + "name": "Agent", + "input": { + "description": "Investigate config sources of truth", + "subagent_type": "Explore", + "prompt": "test prompt" + }, + "caller": { "type": "direct" } + } + ], + "stop_reason": null, + "stop_sequence": null, + "usage": { + "input_tokens": 3, + "cache_creation_input_tokens": 20147, + "cache_read_input_tokens": 0, + "output_tokens": 9, + "service_tier": "standard" + } + }, + "requestId": "req_011CZ3fYWGjcQCgh5d58d2k8", + "type": "assistant", + "uuid": "6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b", + "timestamp": "2026-03-14T21:15:27.916Z" + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::Assistant(a) => { + assert_eq!(a.uuid.as_deref(), Some("6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b")); + assert_eq!(a.message.model.as_deref(), Some("claude-opus-4-6")); + assert_eq!(a.message.content.len(), 1); + match &a.message.content[0] { + ContentBlock::ToolUse { name, id, .. } => { + assert_eq!(name, "Agent"); + assert_eq!(id, "toolu_01DP5mkAQnAi2o54idq24cPn"); + } + _ => panic!("Expected ContentBlock::ToolUse"), + } + assert!(a.message.stop_reason.is_none()); + assert!(a.message.usage.is_some()); + } + _ => panic!("Expected RawMessage::Assistant"), + } + } + + #[test] + fn unknown_content_block_type_skipped_in_assistant() { + let json = r#"{ + "parentUuid": null, + "isSidechain": false, + "sessionId": "test", + "message": { + "role": "assistant", + "content": [ + {"type": "text", "text": "known"}, + {"type": "future_type", "data": "something"} + ] + }, + "type": "assistant", + "uuid": "test-uuid", + "timestamp": "2026-01-01T00:00:00Z" + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::Assistant(a) => { + assert_eq!(a.message.content.len(), 1); + match &a.message.content[0] { + ContentBlock::Text { text } => assert_eq!(text, "known"), + _ => panic!("Expected ContentBlock::Text"), + } + } + _ => panic!("Expected RawMessage::Assistant"), + } + } + + // ----------------------------------------------------------------------- + // Regression tests for parse failure audit (2026-04-04) + // ----------------------------------------------------------------------- + + #[test] + fn tool_reference_in_tool_result_content_does_not_fail() { + // Suggestion 1 & 3: tool_reference blocks inside tool_result.content + // should be silently skipped, not fail the entire message. + let json = r#"{ + "type": "user", + "uuid": "test-uuid", + "parentUuid": null, + "isSidechain": false, + "sessionId": "test-session", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_abc123", + "content": [ + {"type": "text", "text": "File contents here"}, + {"type": "tool_reference", "tool_name": "TodoWrite"} + ], + "is_error": false + } + ] + } + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::User(u) => { + match &u.message.content { + Content::Blocks(blocks) => { + assert_eq!(blocks.len(), 1); + match &blocks[0] { + ContentBlock::ToolResult { tool_use_id, content, .. } => { + assert_eq!(tool_use_id, "toolu_abc123"); + // The inner content should have 1 block (text), tool_reference skipped + match content.as_ref().unwrap() { + Content::Blocks(inner) => { + assert_eq!(inner.len(), 1); + match &inner[0] { + ContentBlock::Text { text } => { + assert_eq!(text, "File contents here"); + } + _ => panic!("Expected inner ContentBlock::Text"), + } + } + _ => panic!("Expected inner Content::Blocks"), + } + } + _ => panic!("Expected ContentBlock::ToolResult"), + } + } + _ => panic!("Expected Content::Blocks"), + } + } + _ => panic!("Expected RawMessage::User"), + } + } + + #[test] + fn file_history_snapshot_parses() { + // Suggestion 2: file-history-snapshot lines should parse, not fail. + let json = r#"{ + "type": "file-history-snapshot", + "messageId": "abc-123", + "isSnapshotUpdate": false, + "snapshot": { + "messageId": "abc-123", + "trackedFileBackups": { + "src/main.rs": {"backupFileName": "main.rs.bak", "backupTime": "2026-01-01T00:00:00Z", "version": "1"} + }, + "timestamp": "2026-01-01T00:00:00Z" + } + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::FileHistorySnapshot(s) => { + assert_eq!(s.message_id.as_deref(), Some("abc-123")); + assert!(!s.is_snapshot_update); + assert!(s.snapshot.is_some()); + } + _ => panic!("Expected RawMessage::FileHistorySnapshot"), + } + } + + #[test] + fn last_prompt_parses() { + // Suggestion 2: last-prompt lines should parse, not fail. + let json = r#"{ + "type": "last-prompt", + "lastPrompt": "Fix the bug in auth middleware", + "sessionId": "session-uuid-123" + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::LastPrompt(lp) => { + assert_eq!(lp.last_prompt.as_deref(), Some("Fix the bug in auth middleware")); + assert_eq!(lp.session_id.as_deref(), Some("session-uuid-123")); + } + _ => panic!("Expected RawMessage::LastPrompt"), + } + } + + #[test] + fn unknown_content_block_in_user_message_skipped() { + // Suggestion 3: Unknown block types in user message content + // should be silently skipped (lenient everywhere). + let json = r#"{ + "type": "user", + "uuid": "test-uuid", + "isSidechain": false, + "sessionId": "test", + "message": { + "role": "user", + "content": [ + {"type": "text", "text": "known"}, + {"type": "future_unknown_type", "data": "something"} + ] + } + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + match msg { + RawMessage::User(u) => { + match &u.message.content { + Content::Blocks(blocks) => { + assert_eq!(blocks.len(), 1); + match &blocks[0] { + ContentBlock::Text { text } => assert_eq!(text, "known"), + _ => panic!("Expected ContentBlock::Text"), + } + } + _ => panic!("Expected Content::Blocks"), + } + } + _ => panic!("Expected RawMessage::User"), + } + } + + #[test] + fn tool_name_from_string() { + assert_eq!(ToolName::from("Bash".to_string()), ToolName::Bash); + assert_eq!(ToolName::from("Read".to_string()), ToolName::Read); + assert_eq!(ToolName::from("Agent".to_string()), ToolName::Agent); + assert_eq!(ToolName::from("WebSearch".to_string()), ToolName::WebSearch); + assert_eq!( + ToolName::from("CustomTool".to_string()), + ToolName::Other("CustomTool".to_string()) + ); + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..2c010f4 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,70 @@ +use chrono::{DateTime, Utc}; + +/// Parse a timestamp from various formats found in Claude Code data. +/// +/// Supports: +/// - ISO 8601 string: "2026-03-22T17:00:13.192Z" +/// - Unix milliseconds (number > 1e12): 1769461914249 +/// - Unix seconds (number <= 1e12): 1769461914 +pub fn parse_timestamp(value: &serde_json::Value) -> Option> { + match value { + serde_json::Value::String(s) => { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + } + serde_json::Value::Number(n) => { + if let Some(ms) = n.as_i64() { + if ms > 1_000_000_000_000 { + DateTime::from_timestamp_millis(ms) + } else { + DateTime::from_timestamp(ms, 0) + } + } else { + None + } + } + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Datelike; + + #[test] + fn parse_timestamp_iso8601() { + let v = serde_json::json!("2026-03-22T17:00:13.192Z"); + let dt = parse_timestamp(&v).unwrap(); + assert_eq!(dt.year(), 2026); + assert_eq!(dt.month(), 3); + assert_eq!(dt.day(), 22); + } + + #[test] + fn parse_timestamp_unix_millis() { + let v = serde_json::json!(1769461914249_i64); + let dt = parse_timestamp(&v).unwrap(); + assert!(dt.year() >= 2025); + } + + #[test] + fn parse_timestamp_unix_seconds() { + let v = serde_json::json!(1769461914_i64); + let dt = parse_timestamp(&v).unwrap(); + assert!(dt.year() >= 2025); + } + + #[test] + fn parse_timestamp_null_returns_none() { + let v = serde_json::json!(null); + assert!(parse_timestamp(&v).is_none()); + } + + #[test] + fn parse_timestamp_invalid_string_returns_none() { + let v = serde_json::json!("not a date"); + assert!(parse_timestamp(&v).is_none()); + } +} diff --git a/tests/fixtures/branching_tree.jsonl b/tests/fixtures/branching_tree.jsonl new file mode 100644 index 0000000..93f440c --- /dev/null +++ b/tests/fixtures/branching_tree.jsonl @@ -0,0 +1,6 @@ +{"type":"user","uuid":"r-001","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Help me"}} +{"type":"assistant","uuid":"a-001","parentUuid":"r-001","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"Sure"}],"stop_reason":"end_turn","usage":{"input_tokens":10,"output_tokens":5}}} +{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Do option A"}} +{"type":"assistant","uuid":"a-003","parentUuid":"u-002","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-003","type":"message","role":"assistant","content":[{"type":"text","text":"Doing A"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}} +{"type":"user","uuid":"u-002b","parentUuid":"a-001","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Actually, do option B"}} +{"type":"assistant","uuid":"a-003b","parentUuid":"u-002b","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-003","message":{"model":"claude-opus-4-6","id":"msg-003b","type":"message","role":"assistant","content":[{"type":"text","text":"Doing B"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}} diff --git a/tests/fixtures/minimal_session.jsonl b/tests/fixtures/minimal_session.jsonl new file mode 100644 index 0000000..6476f6f --- /dev/null +++ b/tests/fixtures/minimal_session.jsonl @@ -0,0 +1,6 @@ +{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-001"} +{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-14T21:00:00.001Z","sessionId":"test-session-001"} +{"type":"user","uuid":"u-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Hello, help me with this project"}} +{"type":"assistant","uuid":"a-001","parentUuid":"u-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"I'll help you."},{"type":"tool_use","id":"toolu_01","name":"Bash","input":{"command":"ls","description":"List files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}} +{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01","content":"file1.rs\nfile2.rs","is_error":false}]}} +{"type":"assistant","uuid":"a-002","parentUuid":"u-002","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-002","type":"message","role":"assistant","content":[{"type":"text","text":"I can see two Rust files in the directory."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":30}}} diff --git a/tests/fixtures/noise_patterns.jsonl b/tests/fixtures/noise_patterns.jsonl new file mode 100644 index 0000000..1369fe2 --- /dev/null +++ b/tests/fixtures/noise_patterns.jsonl @@ -0,0 +1,9 @@ +{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-noise"} +{"type":"user","uuid":"u-n-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":"system injected stuff"}} +{"type":"user","uuid":"u-n-002","parentUuid":"u-n-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Warmup"}} +{"type":"user","uuid":"u-n-003","parentUuid":"u-n-002","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"[Request interrupted by user"}} +{"type":"user","uuid":"u-n-004","parentUuid":"u-n-003","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"This session is being continued from a previous conversation"}} +{"type":"user","uuid":"u-n-005","parentUuid":"u-n-004","timestamp":"2026-03-14T21:00:05.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"API Error: rate limit exceeded"}} +{"type":"user","uuid":"u-n-006","parentUuid":"u-n-005","timestamp":"2026-03-14T21:00:06.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Caveat: The messages below were generated by the user"}} +{"type":"user","uuid":"u-n-007","parentUuid":"u-n-006","timestamp":"2026-03-14T21:00:07.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Please help me fix this bug"}} +{"type":"assistant","uuid":"a-n-001","parentUuid":"u-n-007","timestamp":"2026-03-14T21:00:08.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"message":{"id":"msg-n-001","role":"assistant","content":[{"type":"text","text":"Sure, let me help."}],"stop_reason":"end_turn"}} diff --git a/tests/fixtures/streaming_dedup.jsonl b/tests/fixtures/streaming_dedup.jsonl new file mode 100644 index 0000000..e787149 --- /dev/null +++ b/tests/fixtures/streaming_dedup.jsonl @@ -0,0 +1,6 @@ +{"type":"user","uuid":"u-100","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"What files are here?"}} +{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me"}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":3}}} +{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.100Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look"},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":""}}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":12}}} +{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.200Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look at this."},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":"ls"}}],"stop_reason":"tool_use","usage":{"input_tokens":50,"output_tokens":20}}} +{"type":"user","uuid":"u-101","parentUuid":"a-100","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_100","content":"main.rs\nlib.rs","is_error":false}]}} +{"type":"assistant","uuid":"a-101","parentUuid":"u-101","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-101","message":{"model":"claude-opus-4-6","id":"msg-101","type":"message","role":"assistant","content":[{"type":"text","text":"Done."}],"stop_reason":"end_turn","usage":{"input_tokens":100,"output_tokens":5}}} diff --git a/tests/fixtures/subagent/parent.jsonl b/tests/fixtures/subagent/parent.jsonl new file mode 100644 index 0000000..c391246 --- /dev/null +++ b/tests/fixtures/subagent/parent.jsonl @@ -0,0 +1,4 @@ +{"type":"user","uuid":"u-300","parentUuid":null,"timestamp":"2026-03-23T12:00:00.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Search the codebase"}} +{"type":"assistant","uuid":"a-300","parentUuid":"u-300","timestamp":"2026-03-23T12:00:01.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-300","message":{"model":"claude-opus-4-6","id":"msg-300","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_300","name":"Agent","input":{"description":"Search codebase","subagent_type":"Explore","prompt":"Find all config files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":20}}} +{"type":"user","uuid":"u-301","parentUuid":"a-300","timestamp":"2026-03-23T12:00:30.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_300","content":"Found 3 config files","is_error":false}]}} +{"type":"assistant","uuid":"a-301","parentUuid":"u-301","timestamp":"2026-03-23T12:00:31.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-301","message":{"model":"claude-opus-4-6","id":"msg-301","type":"message","role":"assistant","content":[{"type":"text","text":"I found the config files."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":10}}} diff --git a/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl b/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl new file mode 100644 index 0000000..7ed323e --- /dev/null +++ b/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl @@ -0,0 +1,2 @@ +{"type":"user","uuid":"sa-u1","parentUuid":null,"timestamp":"2026-03-23T12:00:02.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"isMeta":false,"message":{"role":"user","content":"Find all config files"}} +{"type":"assistant","uuid":"sa-a1","parentUuid":"sa-u1","timestamp":"2026-03-23T12:00:03.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"requestId":"req-sa1","message":{"model":"claude-opus-4-6","id":"msg-sa1","type":"message","role":"assistant","content":[{"type":"text","text":"Found config.toml, settings.json, .env"}],"stop_reason":"end_turn","usage":{"input_tokens":50,"output_tokens":15}}} diff --git a/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json b/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json new file mode 100644 index 0000000..9fdf2d6 --- /dev/null +++ b/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json @@ -0,0 +1 @@ +{"agentType": "Explore"} diff --git a/tests/fixtures/tool_correlation.jsonl b/tests/fixtures/tool_correlation.jsonl new file mode 100644 index 0000000..0f4801d --- /dev/null +++ b/tests/fixtures/tool_correlation.jsonl @@ -0,0 +1,6 @@ +{"type":"user","uuid":"u-200","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Fix the bug"}} +{"type":"assistant","uuid":"a-200","parentUuid":"u-200","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-200","message":{"model":"claude-opus-4-6","id":"msg-200","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_200","name":"Bash","input":{"command":"cargo test"}},{"type":"tool_use","id":"toolu_201","name":"Read","input":{"file_path":"src/main.rs"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}} +{"type":"user","uuid":"u-201","parentUuid":"a-200","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_200","content":"test result output","is_error":false},{"type":"tool_result","tool_use_id":"toolu_201","content":"fn main() {}","is_error":false}]}} +{"type":"assistant","uuid":"a-201","parentUuid":"u-201","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-201","message":{"model":"claude-opus-4-6","id":"msg-201","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_202","name":"Write","input":{"file_path":"src/fix.rs","content":"fixed"}}],"stop_reason":"tool_use","usage":{"input_tokens":150,"output_tokens":30}}} +{"type":"user","uuid":"u-202","parentUuid":"a-201","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_202","content":"File written successfully","is_error":false}]}} +{"type":"assistant","uuid":"a-202","parentUuid":"u-202","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-202","message":{"model":"claude-opus-4-6","id":"msg-202","type":"message","role":"assistant","content":[{"type":"text","text":"Bug is fixed."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":20}}} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs new file mode 100644 index 0000000..cfd0134 --- /dev/null +++ b/tests/integration_tests.rs @@ -0,0 +1,294 @@ +use camino::{Utf8Path, Utf8PathBuf}; +use chrono::Datelike; +use dirigent_anth::{ + correlation::correlate_tools, + dedup::dedup_messages, + noise::{classify_noise, NoiseKind}, + parse_session, + tree::ConversationTree, + types::{ContentBlock, RawMessage}, + util::parse_timestamp, +}; + +#[test] +fn parse_minimal_session() { + let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); + let messages = parse_session(path).unwrap(); + + assert_eq!(messages.len(), 6, "Expected 6 messages, got {}", messages.len()); + + let type_names: Vec<&str> = messages + .iter() + .map(|m| match m { + RawMessage::User(_) => "user", + RawMessage::Assistant(_) => "assistant", + RawMessage::Progress(_) => "progress", + RawMessage::System(_) => "system", + RawMessage::QueueOperation(_) => "queue-operation", + RawMessage::FileHistorySnapshot(_) => "file-history-snapshot", + RawMessage::LastPrompt(_) => "last-prompt", + }) + .collect(); + + assert_eq!( + type_names.iter().filter(|&&t| t == "queue-operation").count(), + 2 + ); + assert_eq!(type_names.iter().filter(|&&t| t == "user").count(), 2); + assert_eq!( + type_names.iter().filter(|&&t| t == "assistant").count(), + 2 + ); +} + +#[test] +fn parse_line_returns_none_for_invalid_json() { + assert!(dirigent_anth::parse_line("not valid json", 1).is_none()); + assert!(dirigent_anth::parse_line("{}", 1).is_none()); +} + +#[test] +fn dedup_streaming_session() { + let path = Utf8Path::new("tests/fixtures/streaming_dedup.jsonl"); + let messages = parse_session(path).unwrap(); + + // Raw should have 6 lines (including 3 versions of same assistant message) + assert_eq!(messages.len(), 6, "Raw messages: expected 6, got {}", messages.len()); + + let deduped = dedup_messages(messages); + + // After dedup: U1, A1(final), U2, A2 = 4 + assert_eq!(deduped.len(), 4, "Deduped messages: expected 4, got {}", deduped.len()); + + // The kept assistant message must be the final version + let first_assistant = deduped.iter().find(|m| matches!(m, RawMessage::Assistant(_))).unwrap(); + if let RawMessage::Assistant(a) = first_assistant { + assert!(a.message.stop_reason.is_some(), "Deduped assistant should have stop_reason set"); + assert_eq!(a.message.stop_reason.as_deref(), Some("tool_use")); + assert_eq!(a.message.content.len(), 2, "Final version should have 2 content blocks"); + } else { + unreachable!(); + } +} + +#[test] +fn dedup_preserves_non_streamed_messages() { + let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); + let messages = parse_session(path).unwrap(); + let count_before = messages.len(); + let deduped = dedup_messages(messages); + // No streaming in minimal_session, so count should be same + assert_eq!(deduped.len(), count_before); +} + +#[test] +fn correlate_parallel_tools() { + let path = Utf8Path::new("tests/fixtures/tool_correlation.jsonl"); + let messages = dirigent_anth::parse_session_deduped(path).unwrap(); + let exchanges = correlate_tools(&messages); + + // 3 tool calls: 2 parallel (Bash+Read) + 1 sequential (Write) + assert_eq!(exchanges.len(), 3); + + // All should have results + assert!(exchanges.iter().all(|e| e.result.is_some())); + + // Verify correct pairing by ID + for ex in &exchanges { + assert_eq!(ex.call.id, ex.result.as_ref().unwrap().tool_use_id); + } +} + +#[test] +fn correlate_no_tools_returns_empty() { + // Test with just a plain user message — no tool calls or results + let messages = vec![ + serde_json::from_str::( + r#"{"type":"user","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"role":"user","content":"hello"}}"#, + ) + .unwrap(), + ]; + let exchanges = correlate_tools(&messages); + assert!(exchanges.is_empty()); +} + +#[test] +fn build_branching_tree() { + let path = Utf8Path::new("tests/fixtures/branching_tree.jsonl"); + let messages = dirigent_anth::parse_session(path).unwrap(); + let tree = ConversationTree::build(&messages); + + assert_eq!(tree.roots.len(), 1); + assert!(!tree.is_linear()); + assert_eq!(tree.branch_points().len(), 1); // A1 has 2 children + + let main = tree.main_thread(); + assert_eq!(main.len(), 4); // R → A1 → U2 → A3 (first branch) +} + +#[test] +fn linear_conversation_is_linear() { + let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); + let messages = dirigent_anth::parse_session(path).unwrap(); + let tree = ConversationTree::build(&messages); + assert!(tree.is_linear()); +} + +#[test] +fn classify_noise_from_fixture() { + let path = Utf8Path::new("tests/fixtures/noise_patterns.jsonl"); + let messages = dirigent_anth::parse_session(path).unwrap(); + + assert_eq!(messages.len(), 9, "Expected 9 messages in noise fixture"); + + let classifications: Vec> = messages.iter() + .map(classify_noise) + .collect(); + + assert_eq!(classifications[0], Some(NoiseKind::QueueOp)); + assert_eq!(classifications[1], Some(NoiseKind::Meta)); + assert_eq!(classifications[2], Some(NoiseKind::Warmup)); + assert_eq!(classifications[3], Some(NoiseKind::Interrupted)); + assert_eq!(classifications[4], Some(NoiseKind::Continuation)); + assert_eq!(classifications[5], Some(NoiseKind::ApiError)); + assert_eq!(classifications[6], Some(NoiseKind::SystemCaveat)); + assert_eq!(classifications[7], None); // normal user + assert_eq!(classifications[8], None); // normal assistant +} + +#[test] +fn load_subagent_from_fixture() { + let artifacts_dir = Utf8Path::new("tests/fixtures/subagent/parent"); + let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap(); + + assert_eq!(subagents.len(), 1); + assert_eq!(subagents[0].agent_id, "abc123"); + assert_eq!(subagents[0].meta.agent_type.as_deref(), Some("Explore")); + assert_eq!(subagents[0].messages.len(), 2); +} + +#[test] +fn load_subagents_empty_dir() { + // Non-existent artifacts dir should return empty vec + let artifacts_dir = Utf8Path::new("tests/fixtures/nonexistent"); + let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap(); + assert!(subagents.is_empty()); +} + +#[test] +fn load_full_session_with_subagents() { + use dirigent_anth::types::SessionRef; + + let session_ref = SessionRef { + id: "parent".to_string(), + jsonl_path: Utf8PathBuf::from("tests/fixtures/subagent/parent.jsonl"), + artifacts_dir: Some(Utf8PathBuf::from("tests/fixtures/subagent/parent")), + index_entry: None, + }; + + let session = dirigent_anth::load_session(&session_ref).unwrap(); + assert!(!session.messages.is_empty()); + assert!(!session.subagents.is_empty()); + assert!(!session.tree.roots.is_empty()); + assert!(!session.tool_exchanges.is_empty()); +} + +#[test] +fn load_session_without_artifacts() { + use dirigent_anth::types::SessionRef; + + let session_ref = SessionRef { + id: "minimal".to_string(), + jsonl_path: Utf8PathBuf::from("tests/fixtures/minimal_session.jsonl"), + artifacts_dir: None, + index_entry: None, + }; + + let session = dirigent_anth::load_session(&session_ref).unwrap(); + assert_eq!(session.messages.len(), 6); // 2 queue-ops + 2 users + 2 assistants + assert!(session.subagents.is_empty()); + assert!(session.tree.is_linear()); +} + +#[test] +fn content_as_string_or_blocks() { + // String content + let s: dirigent_anth::types::Content = serde_json::from_str(r#""hello""#).unwrap(); + assert!(matches!(s, dirigent_anth::types::Content::Text(_))); + + // Block content + let b: dirigent_anth::types::Content = + serde_json::from_str(r#"[{"type":"text","text":"hi"}]"#).unwrap(); + assert!(matches!(b, dirigent_anth::types::Content::Blocks(_))); + + // Empty blocks + let empty: dirigent_anth::types::Content = serde_json::from_str(r#"[]"#).unwrap(); + assert!(matches!(empty, dirigent_anth::types::Content::Blocks(ref v) if v.is_empty())); +} + +#[test] +fn missing_optional_fields_dont_crash() { + // Minimal assistant message with many fields missing + let json = r#"{ + "type": "assistant", + "message": { + "content": [{"type": "text", "text": "hi"}] + } + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + assert!(matches!(msg, RawMessage::Assistant(_))); +} + +#[test] +fn tool_result_content_string_and_blocks() { + // tool_result with string content + let json = r#"{"type":"tool_result","tool_use_id":"t1","content":"output text","is_error":false}"#; + let block: ContentBlock = serde_json::from_str(json).unwrap(); + if let ContentBlock::ToolResult { content, is_error, .. } = block { + assert!(!is_error); + assert!(content.is_some()); + } else { + panic!("Expected ToolResult"); + } + + // tool_result with no content + let json2 = r#"{"type":"tool_result","tool_use_id":"t2"}"#; + let block2: ContentBlock = serde_json::from_str(json2).unwrap(); + if let ContentBlock::ToolResult { content, is_error, .. } = block2 { + assert!(!is_error); + assert!(content.is_none()); + } else { + panic!("Expected ToolResult"); + } +} + +#[test] +fn extra_unknown_fields_are_ignored() { + // Messages with extra fields not in our structs should parse fine + let json = r#"{ + "type": "user", + "uuid": "x", + "timestamp": "2026-01-01T00:00:00Z", + "sessionId": "s", + "unknownField": "should be ignored", + "anotherExtra": 42, + "message": {"role": "user", "content": "hello"} + }"#; + let msg: RawMessage = serde_json::from_str(json).unwrap(); + assert!(matches!(msg, RawMessage::User(_))); +} + +#[test] +fn timestamp_parsing_all_formats() { + // ISO 8601 + let iso = parse_timestamp(&serde_json::json!("2026-03-22T17:00:13.192Z")).unwrap(); + assert_eq!(iso.year(), 2026); + + // Unix millis + let ms = parse_timestamp(&serde_json::json!(1769461914249_i64)).unwrap(); + assert!(ms.year() >= 2025); + + // Unix seconds + let secs = parse_timestamp(&serde_json::json!(1769461914_i64)).unwrap(); + assert!(secs.year() >= 2025); +} diff --git a/tests/usage_parse.rs b/tests/usage_parse.rs new file mode 100644 index 0000000..fca1916 --- /dev/null +++ b/tests/usage_parse.rs @@ -0,0 +1,101 @@ +use dirigent_anth::anth_usage::process_usage_screen; + +const SAMPLE: &str = r#" +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + Status Config Usage Stats + + Session + Total cost: $0.0000 + Total duration (API): 0s + Total duration (wall): 4s + Total code changes: 0 lines added, 0 lines removed + Usage: 0 input, 0 output, 0 cache read, 0 cache write + + Current session + ███████ 14% used + Resets 12:30pm (Europe/Vienna) + + Current week (all models) + ██████ 12% used + Resets May 12, 9am (Europe/Vienna) + + Current week (Sonnet only) + 0% used + Resets May 12, 9am (Europe/Vienna) + + What's contributing to your limits usage? + Approximate, based on local sessions on this machine — does not include other devices or claude.ai + + Last 24h · these are independent characteristics of your usage, not a breakdown + + 97% of your usage came from subagent-heavy sessions + Each subagent runs its own requests. Be deliberate about spawning them — and + consider configuring a cheaper model for simpler subagents. + + 16% of your usage was at >150k context + Longer sessions are more expensive even when cached. /compact mid-task, /clear + when switches to new tasks. + + Subagents % of usage + Explore 3% + claude-code-guide 2% + + d to day · w to week + + Esc to cancel +"#; + +#[test] +fn parses_gauges() { + let result = process_usage_screen(SAMPLE); + assert_eq!(result.data.gauges.len(), 3); + + assert_eq!(result.data.gauges[0].name, "Current session"); + assert_eq!(result.data.gauges[0].percent_used, 14); + assert_eq!( + result.data.gauges[0].resets.as_deref(), + Some("12:30pm (Europe/Vienna)") + ); + + assert_eq!(result.data.gauges[1].name, "Current week (all models)"); + assert_eq!(result.data.gauges[1].percent_used, 12); + assert_eq!( + result.data.gauges[1].resets.as_deref(), + Some("May 12, 9am (Europe/Vienna)") + ); + + assert_eq!(result.data.gauges[2].name, "Current week (Sonnet only)"); + assert_eq!(result.data.gauges[2].percent_used, 0); + + // resets_iso should be present for all gauges with reset info + assert!(result.data.gauges[0].resets_iso.is_some()); + assert!(result.data.gauges[1].resets_iso.is_some()); + assert!(result.data.gauges[2].resets_iso.is_some()); + + // Week resets should contain the right date + let week_iso = result.data.gauges[1].resets_iso.as_ref().unwrap(); + assert!(week_iso.starts_with("2026-05-12") || week_iso.contains("05-12")); +} + +#[test] +fn parses_contributions() { + let result = process_usage_screen(SAMPLE); + let contrib = result.data.contributions.as_ref().unwrap(); + + assert_eq!(contrib.factors.len(), 2); + assert_eq!(contrib.factors[0].percent, 97); + assert!(contrib.factors[0].description.contains("subagent-heavy")); + assert_eq!(contrib.factors[1].percent, 16); + + assert_eq!(contrib.subagents.len(), 2); + assert_eq!(contrib.subagents[0].name, "Explore"); + assert_eq!(contrib.subagents[0].percent, 3); + assert_eq!(contrib.subagents[1].name, "claude-code-guide"); + assert_eq!(contrib.subagents[1].percent, 2); +} + +#[test] +fn raw_screen_starts_with_rule() { + let result = process_usage_screen(SAMPLE); + assert!(result.raw_screen.starts_with('─')); +}