From bf5a79d931d27b99f39d593d801ab1c24571774e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabor=20K=C3=B6rber?= Date: Sat, 9 May 2026 19:52:44 +0200 Subject: [PATCH] sync from monorepo @ ffee08f2 --- Cargo.toml | 20 +- README.md | 27 +- architecture.svg | 140 +- crates/dirigent_anth/CLAUDE.md | 148 - crates/dirigent_anth/Cargo.toml | 37 - crates/dirigent_anth/src/anth_usage.rs | 331 --- crates/dirigent_anth/src/bin/anth.rs | 252 -- crates/dirigent_anth/src/bin/anth_usage.rs | 192 -- crates/dirigent_anth/src/claude_grab.rs | 157 -- crates/dirigent_anth/src/correlation.rs | 107 - crates/dirigent_anth/src/dedup.rs | 116 - crates/dirigent_anth/src/discovery.rs | 342 --- crates/dirigent_anth/src/error.rs | 19 - crates/dirigent_anth/src/lib.rs | 52 - crates/dirigent_anth/src/noise.rs | 72 - crates/dirigent_anth/src/parser.rs | 50 - crates/dirigent_anth/src/subagent.rs | 215 -- crates/dirigent_anth/src/tree.rs | 171 -- crates/dirigent_anth/src/types.rs | 847 ------ crates/dirigent_anth/src/util.rs | 70 - .../tests/fixtures/branching_tree.jsonl | 6 - .../tests/fixtures/minimal_session.jsonl | 6 - .../tests/fixtures/noise_patterns.jsonl | 9 - .../tests/fixtures/streaming_dedup.jsonl | 6 - .../tests/fixtures/subagent/parent.jsonl | 4 - .../parent/subagents/agent-abc123.jsonl | 2 - .../parent/subagents/agent-abc123.meta.json | 1 - .../tests/fixtures/tool_correlation.jsonl | 6 - .../dirigent_anth/tests/integration_tests.rs | 294 -- crates/dirigent_anth/tests/usage_parse.rs | 101 - crates/dirigent_archivist/CLAUDE.md | 761 ------ crates/dirigent_archivist/Cargo.toml | 69 - crates/dirigent_archivist/README.md | 338 --- .../examples/basic_usage.rs | 198 -- .../dirigent_archivist/examples/demo_types.rs | 156 -- .../examples/event_handling.rs | 277 -- .../examples/file_storage.rs | 214 -- .../examples/multi_backend.rs | 199 -- crates/dirigent_archivist/src/accumulator.rs | 923 ------- .../src/backend/capability.rs | 18 - .../src/backend/contract.rs | 108 - .../dirigent_archivist/src/backend/health.rs | 10 - crates/dirigent_archivist/src/backend/mock.rs | 574 ---- crates/dirigent_archivist/src/backend/mod.rs | 20 - .../dirigent_archivist/src/backend/traits.rs | 167 -- .../src/backends/jsonl/backend.rs | 624 ----- .../src/backends/jsonl/connectors.rs | 161 -- .../src/backends/jsonl/dag.rs | 69 - .../src/backends/jsonl/mapping.rs | 179 -- .../src/backends/jsonl/meta.rs | 200 -- .../src/backends/jsonl/mod.rs | 12 - crates/dirigent_archivist/src/backends/mod.rs | 5 - crates/dirigent_archivist/src/backfill.rs | 558 ---- .../src/coordinator/admin.rs | 70 - .../src/coordinator/archives.rs | 77 - .../src/coordinator/boot.rs | 281 -- .../src/coordinator/connectors.rs | 285 -- .../src/coordinator/meta.rs | 526 ---- .../dirigent_archivist/src/coordinator/mod.rs | 231 -- .../src/coordinator/routing.rs | 136 - .../src/coordinator/sessions.rs | 1470 ---------- .../src/coordinator/tests.rs | 195 -- .../src/coordinator/types.rs | 60 - crates/dirigent_archivist/src/error.rs | 314 --- crates/dirigent_archivist/src/events.rs | 2162 --------------- crates/dirigent_archivist/src/import/mod.rs | 933 ------- .../dirigent_archivist/src/import/progress.rs | 117 - .../dirigent_archivist/src/import/registry.rs | 93 - .../src/import/sources/chatgpt.rs | 361 --- .../src/import/sources/claude.rs | 1356 --------- .../src/import/sources/codex.rs | 331 --- .../src/import/sources/mod.rs | 7 - .../src/import/trait_def.rs | 113 - crates/dirigent_archivist/src/lib.rs | 45 - .../dirigent_archivist/src/registry/cache.rs | 116 - .../dirigent_archivist/src/registry/config.rs | 253 -- .../src/registry/factory.rs | 192 -- .../dirigent_archivist/src/registry/filter.rs | 187 -- .../dirigent_archivist/src/registry/health.rs | 72 - crates/dirigent_archivist/src/registry/mod.rs | 22 - .../src/registry/registration.rs | 181 -- .../dirigent_archivist/src/registry/writer.rs | 256 -- crates/dirigent_archivist/src/session.rs | 24 - .../dirigent_archivist/src/storage/files.rs | 465 ---- crates/dirigent_archivist/src/storage/json.rs | 342 --- crates/dirigent_archivist/src/storage/mod.rs | 118 - .../dirigent_archivist/src/storage/ndjson.rs | 361 --- .../dirigent_archivist/src/storage/paths.rs | 436 --- crates/dirigent_archivist/src/storage/tsv.rs | 552 ---- crates/dirigent_archivist/src/types.rs | 1298 --------- .../tests/archive_filter_test.rs | 334 --- ...abc12345-1234-1234-1234-abcdef123456.jsonl | 2 - .../tests/import_claude_idempotency_test.rs | 153 -- .../tests/import_progress_test.rs | 89 - .../tests/integration_tests.rs | 2414 ----------------- .../tests/list_sessions_paged_test.rs | 364 --- .../tests/multi_backend_boot_test.rs | 130 - .../tests/multi_backend_capability_test.rs | 76 - .../tests/multi_backend_cross_test.rs | 121 - .../tests/multi_backend_fanout_test.rs | 124 - .../tests/multi_backend_health_test.rs | 129 - .../tests/multi_backend_routing_test.rs | 102 - .../tests/multi_backend_writer_test.rs | 252 -- .../tests/pagination_test.rs | 142 - crates/dirigent_chatgpt/CLAUDE.md | 32 - crates/dirigent_chatgpt/Cargo.toml | 13 - crates/dirigent_chatgpt/src/lib.rs | 7 - crates/dirigent_chatgpt/src/parser.rs | 349 --- crates/dirigent_chatgpt/src/types.rs | 31 - .../tests/fixtures/minimal.json | 31 - crates/dirigent_codex/CLAUDE.md | 30 - crates/dirigent_codex/Cargo.toml | 14 - crates/dirigent_codex/src/lib.rs | 14 - crates/dirigent_codex/src/parser.rs | 274 -- crates/dirigent_codex/src/types.rs | 32 - crates/dirigent_core/Cargo.toml | 19 +- crates/dirigent_core/src/config.rs | 27 +- crates/dirigent_core/src/hooks.rs | 56 + crates/dirigent_core/src/lib.rs | 8 +- crates/dirigent_core/src/runtime/mod.rs | 985 +------ .../src/runtime/zed_detection.rs | 589 ---- crates/dirigent_core/src/sharing/health.rs | 15 +- crates/dirigent_core/src/sharing/matrix.rs | 217 -- crates/dirigent_core/src/sharing/mod.rs | 10 +- crates/dirigent_core/src/sharing/replay.rs | 226 -- .../tests/matrix_migration_test.rs | 207 -- crates/dirigent_core/tests/replay_test.rs | 176 -- crates/dirigent_fermata/CLAUDE.md | 34 - crates/dirigent_fermata/Cargo.toml | 40 - crates/dirigent_fermata/LICENSE-APACHE | 201 -- crates/dirigent_fermata/LICENSE-MIT | 21 - crates/dirigent_fermata/README.md | 214 -- crates/dirigent_fermata/src/bin/fermata.rs | 205 -- crates/dirigent_fermata/src/core/botignore.rs | 91 - crates/dirigent_fermata/src/core/decision.rs | 30 - crates/dirigent_fermata/src/core/extract.rs | 50 - crates/dirigent_fermata/src/core/mod.rs | 14 - crates/dirigent_fermata/src/core/op.rs | 9 - crates/dirigent_fermata/src/core/policy.rs | 164 -- crates/dirigent_fermata/src/core/project.rs | 33 - .../dirigent_fermata/src/core/toml_config.rs | 47 - crates/dirigent_fermata/src/harness/claude.rs | 76 - crates/dirigent_fermata/src/harness/mod.rs | 67 - crates/dirigent_fermata/src/lib.rs | 7 - .../tests/cargo_publish_metadata.rs | 47 - crates/dirigent_fermata/tests/cli_check.rs | 52 - .../dirigent_fermata/tests/cli_hook_claude.rs | 69 - .../dirigent_fermata/tests/core_botignore.rs | 135 - crates/dirigent_fermata/tests/core_extract.rs | 39 - .../tests/core_op_decision.rs | 42 - .../tests/core_policy_command.rs | 52 - .../tests/core_policy_path.rs | 64 - crates/dirigent_fermata/tests/core_project.rs | 120 - .../tests/core_toml_config.rs | 47 - crates/dirigent_fermata/tests/fixtures_a4.rs | 112 - .../dirigent_fermata/tests/harness_claude.rs | 86 - crates/dirigent_matrix/CLAUDE.md | 96 - crates/dirigent_matrix/Cargo.toml | 47 - crates/dirigent_matrix/src/config.rs | 73 - crates/dirigent_matrix/src/error.rs | 39 - crates/dirigent_matrix/src/lib.rs | 17 - crates/dirigent_matrix/src/room.rs | 81 - crates/dirigent_matrix/src/service.rs | 436 --- crates/dirigent_matrix/src/share.rs | 723 ----- crates/dirigent_taskrunner/CLAUDE.md | 73 - crates/dirigent_taskrunner/Cargo.toml | 17 - crates/dirigent_taskrunner/src/lib.rs | 7 - crates/dirigent_taskrunner/src/output.rs | 84 - crates/dirigent_taskrunner/src/runner.rs | 467 ---- crates/dirigent_taskrunner/src/types.rs | 71 - crates/dirigent_zed/CLAUDE.md | 64 - crates/dirigent_zed/Cargo.toml | 16 - crates/dirigent_zed/src/agents.rs | 1145 -------- crates/dirigent_zed/src/detection.rs | 158 -- crates/dirigent_zed/src/lib.rs | 31 - crates/dirigent_zed/src/paths.rs | 147 - crates/dirigent_zed/src/registry.rs | 546 ---- 177 files changed, 242 insertions(+), 37736 deletions(-) delete mode 100644 crates/dirigent_anth/CLAUDE.md delete mode 100644 crates/dirigent_anth/Cargo.toml delete mode 100644 crates/dirigent_anth/src/anth_usage.rs delete mode 100644 crates/dirigent_anth/src/bin/anth.rs delete mode 100644 crates/dirigent_anth/src/bin/anth_usage.rs delete mode 100644 crates/dirigent_anth/src/claude_grab.rs delete mode 100644 crates/dirigent_anth/src/correlation.rs delete mode 100644 crates/dirigent_anth/src/dedup.rs delete mode 100644 crates/dirigent_anth/src/discovery.rs delete mode 100644 crates/dirigent_anth/src/error.rs delete mode 100644 crates/dirigent_anth/src/lib.rs delete mode 100644 crates/dirigent_anth/src/noise.rs delete mode 100644 crates/dirigent_anth/src/parser.rs delete mode 100644 crates/dirigent_anth/src/subagent.rs delete mode 100644 crates/dirigent_anth/src/tree.rs delete mode 100644 crates/dirigent_anth/src/types.rs delete mode 100644 crates/dirigent_anth/src/util.rs delete mode 100644 crates/dirigent_anth/tests/fixtures/branching_tree.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/minimal_session.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/noise_patterns.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/streaming_dedup.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/subagent/parent.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl delete mode 100644 crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json delete mode 100644 crates/dirigent_anth/tests/fixtures/tool_correlation.jsonl delete mode 100644 crates/dirigent_anth/tests/integration_tests.rs delete mode 100644 crates/dirigent_anth/tests/usage_parse.rs delete mode 100644 crates/dirigent_archivist/CLAUDE.md delete mode 100644 crates/dirigent_archivist/Cargo.toml delete mode 100644 crates/dirigent_archivist/README.md delete mode 100644 crates/dirigent_archivist/examples/basic_usage.rs delete mode 100644 crates/dirigent_archivist/examples/demo_types.rs delete mode 100644 crates/dirigent_archivist/examples/event_handling.rs delete mode 100644 crates/dirigent_archivist/examples/file_storage.rs delete mode 100644 crates/dirigent_archivist/examples/multi_backend.rs delete mode 100644 crates/dirigent_archivist/src/accumulator.rs delete mode 100644 crates/dirigent_archivist/src/backend/capability.rs delete mode 100644 crates/dirigent_archivist/src/backend/contract.rs delete mode 100644 crates/dirigent_archivist/src/backend/health.rs delete mode 100644 crates/dirigent_archivist/src/backend/mock.rs delete mode 100644 crates/dirigent_archivist/src/backend/mod.rs delete mode 100644 crates/dirigent_archivist/src/backend/traits.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/backend.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/connectors.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/dag.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/mapping.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/meta.rs delete mode 100644 crates/dirigent_archivist/src/backends/jsonl/mod.rs delete mode 100644 crates/dirigent_archivist/src/backends/mod.rs delete mode 100644 crates/dirigent_archivist/src/backfill.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/admin.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/archives.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/boot.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/connectors.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/meta.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/mod.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/routing.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/sessions.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/tests.rs delete mode 100644 crates/dirigent_archivist/src/coordinator/types.rs delete mode 100644 crates/dirigent_archivist/src/error.rs delete mode 100644 crates/dirigent_archivist/src/events.rs delete mode 100644 crates/dirigent_archivist/src/import/mod.rs delete mode 100644 crates/dirigent_archivist/src/import/progress.rs delete mode 100644 crates/dirigent_archivist/src/import/registry.rs delete mode 100644 crates/dirigent_archivist/src/import/sources/chatgpt.rs delete mode 100644 crates/dirigent_archivist/src/import/sources/claude.rs delete mode 100644 crates/dirigent_archivist/src/import/sources/codex.rs delete mode 100644 crates/dirigent_archivist/src/import/sources/mod.rs delete mode 100644 crates/dirigent_archivist/src/import/trait_def.rs delete mode 100644 crates/dirigent_archivist/src/lib.rs delete mode 100644 crates/dirigent_archivist/src/registry/cache.rs delete mode 100644 crates/dirigent_archivist/src/registry/config.rs delete mode 100644 crates/dirigent_archivist/src/registry/factory.rs delete mode 100644 crates/dirigent_archivist/src/registry/filter.rs delete mode 100644 crates/dirigent_archivist/src/registry/health.rs delete mode 100644 crates/dirigent_archivist/src/registry/mod.rs delete mode 100644 crates/dirigent_archivist/src/registry/registration.rs delete mode 100644 crates/dirigent_archivist/src/registry/writer.rs delete mode 100644 crates/dirigent_archivist/src/session.rs delete mode 100644 crates/dirigent_archivist/src/storage/files.rs delete mode 100644 crates/dirigent_archivist/src/storage/json.rs delete mode 100644 crates/dirigent_archivist/src/storage/mod.rs delete mode 100644 crates/dirigent_archivist/src/storage/ndjson.rs delete mode 100644 crates/dirigent_archivist/src/storage/paths.rs delete mode 100644 crates/dirigent_archivist/src/storage/tsv.rs delete mode 100644 crates/dirigent_archivist/src/types.rs delete mode 100644 crates/dirigent_archivist/tests/archive_filter_test.rs delete mode 100644 crates/dirigent_archivist/tests/fixtures/claude_minimal/projects/-home-user-myproj/abc12345-1234-1234-1234-abcdef123456.jsonl delete mode 100644 crates/dirigent_archivist/tests/import_claude_idempotency_test.rs delete mode 100644 crates/dirigent_archivist/tests/import_progress_test.rs delete mode 100644 crates/dirigent_archivist/tests/integration_tests.rs delete mode 100644 crates/dirigent_archivist/tests/list_sessions_paged_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_boot_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_capability_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_cross_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_fanout_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_health_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_routing_test.rs delete mode 100644 crates/dirigent_archivist/tests/multi_backend_writer_test.rs delete mode 100644 crates/dirigent_archivist/tests/pagination_test.rs delete mode 100644 crates/dirigent_chatgpt/CLAUDE.md delete mode 100644 crates/dirigent_chatgpt/Cargo.toml delete mode 100644 crates/dirigent_chatgpt/src/lib.rs delete mode 100644 crates/dirigent_chatgpt/src/parser.rs delete mode 100644 crates/dirigent_chatgpt/src/types.rs delete mode 100644 crates/dirigent_chatgpt/tests/fixtures/minimal.json delete mode 100644 crates/dirigent_codex/CLAUDE.md delete mode 100644 crates/dirigent_codex/Cargo.toml delete mode 100644 crates/dirigent_codex/src/lib.rs delete mode 100644 crates/dirigent_codex/src/parser.rs delete mode 100644 crates/dirigent_codex/src/types.rs create mode 100644 crates/dirigent_core/src/hooks.rs delete mode 100644 crates/dirigent_core/src/runtime/zed_detection.rs delete mode 100644 crates/dirigent_core/src/sharing/matrix.rs delete mode 100644 crates/dirigent_core/src/sharing/replay.rs delete mode 100644 crates/dirigent_core/tests/matrix_migration_test.rs delete mode 100644 crates/dirigent_core/tests/replay_test.rs delete mode 100644 crates/dirigent_fermata/CLAUDE.md delete mode 100644 crates/dirigent_fermata/Cargo.toml delete mode 100644 crates/dirigent_fermata/LICENSE-APACHE delete mode 100644 crates/dirigent_fermata/LICENSE-MIT delete mode 100644 crates/dirigent_fermata/README.md delete mode 100644 crates/dirigent_fermata/src/bin/fermata.rs delete mode 100644 crates/dirigent_fermata/src/core/botignore.rs delete mode 100644 crates/dirigent_fermata/src/core/decision.rs delete mode 100644 crates/dirigent_fermata/src/core/extract.rs delete mode 100644 crates/dirigent_fermata/src/core/mod.rs delete mode 100644 crates/dirigent_fermata/src/core/op.rs delete mode 100644 crates/dirigent_fermata/src/core/policy.rs delete mode 100644 crates/dirigent_fermata/src/core/project.rs delete mode 100644 crates/dirigent_fermata/src/core/toml_config.rs delete mode 100644 crates/dirigent_fermata/src/harness/claude.rs delete mode 100644 crates/dirigent_fermata/src/harness/mod.rs delete mode 100644 crates/dirigent_fermata/src/lib.rs delete mode 100644 crates/dirigent_fermata/tests/cargo_publish_metadata.rs delete mode 100644 crates/dirigent_fermata/tests/cli_check.rs delete mode 100644 crates/dirigent_fermata/tests/cli_hook_claude.rs delete mode 100644 crates/dirigent_fermata/tests/core_botignore.rs delete mode 100644 crates/dirigent_fermata/tests/core_extract.rs delete mode 100644 crates/dirigent_fermata/tests/core_op_decision.rs delete mode 100644 crates/dirigent_fermata/tests/core_policy_command.rs delete mode 100644 crates/dirigent_fermata/tests/core_policy_path.rs delete mode 100644 crates/dirigent_fermata/tests/core_project.rs delete mode 100644 crates/dirigent_fermata/tests/core_toml_config.rs delete mode 100644 crates/dirigent_fermata/tests/fixtures_a4.rs delete mode 100644 crates/dirigent_fermata/tests/harness_claude.rs delete mode 100644 crates/dirigent_matrix/CLAUDE.md delete mode 100644 crates/dirigent_matrix/Cargo.toml delete mode 100644 crates/dirigent_matrix/src/config.rs delete mode 100644 crates/dirigent_matrix/src/error.rs delete mode 100644 crates/dirigent_matrix/src/lib.rs delete mode 100644 crates/dirigent_matrix/src/room.rs delete mode 100644 crates/dirigent_matrix/src/service.rs delete mode 100644 crates/dirigent_matrix/src/share.rs delete mode 100644 crates/dirigent_taskrunner/CLAUDE.md delete mode 100644 crates/dirigent_taskrunner/Cargo.toml delete mode 100644 crates/dirigent_taskrunner/src/lib.rs delete mode 100644 crates/dirigent_taskrunner/src/output.rs delete mode 100644 crates/dirigent_taskrunner/src/runner.rs delete mode 100644 crates/dirigent_taskrunner/src/types.rs delete mode 100644 crates/dirigent_zed/CLAUDE.md delete mode 100644 crates/dirigent_zed/Cargo.toml delete mode 100644 crates/dirigent_zed/src/agents.rs delete mode 100644 crates/dirigent_zed/src/detection.rs delete mode 100644 crates/dirigent_zed/src/lib.rs delete mode 100644 crates/dirigent_zed/src/paths.rs delete mode 100644 crates/dirigent_zed/src/registry.rs diff --git a/Cargo.toml b/Cargo.toml index d38fbe3..2e48c65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,19 +4,11 @@ members = [ "crates/dirigent_protocol", "crates/dirigent_core", "crates/dirigent_tools", - "crates/dirigent_fermata", "crates/dirigent_auth", "crates/dirigent_config", "crates/dirigent_acp_api", - "crates/dirigent_archivist", - "crates/dirigent_process", - "crates/dirigent_taskrunner", - "crates/dirigent_anth", "crates/dirigent_inspector", - "crates/dirigent_matrix", - "crates/dirigent_zed", - "crates/dirigent_chatgpt", - "crates/dirigent_codex", + "crates/dirigent_process", "crates/opencode_client", ] @@ -31,17 +23,9 @@ unused_assignments = "allow" dirigent_protocol = { path = "crates/dirigent_protocol" } dirigent_core = { path = "crates/dirigent_core" } dirigent_tools = { path = "crates/dirigent_tools" } -dirigent_fermata = { path = "crates/dirigent_fermata" } dirigent_auth = { path = "crates/dirigent_auth" } dirigent_config = { path = "crates/dirigent_config" } dirigent_acp_api = { path = "crates/dirigent_acp_api" } -dirigent_archivist = { path = "crates/dirigent_archivist" } -dirigent_process = { path = "crates/dirigent_process" } -dirigent_taskrunner = { path = "crates/dirigent_taskrunner" } -dirigent_anth = { path = "crates/dirigent_anth" } dirigent_inspector = { path = "crates/dirigent_inspector" } -dirigent_matrix = { path = "crates/dirigent_matrix", default-features = true } -dirigent_zed = { path = "crates/dirigent_zed" } -dirigent_chatgpt = { path = "crates/dirigent_chatgpt" } -dirigent_codex = { path = "crates/dirigent_codex" } +dirigent_process = { path = "crates/dirigent_process" } opencode_client = { path = "crates/opencode_client" } diff --git a/README.md b/README.md index b4eb225..2c97445 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,10 @@ These tools are developed in this monorepo but distributed as independent reposi

**Layers top-to-bottom:** -- **Standalone Tools** — installable from their own repositories; depend on foundation crates -- **Orchestration** — multi-connector runtime, ACP server, task management, archival -- **Foundation** — protocol types, tool sandbox, configuration, auth -- **Integrations** — Matrix, Zed, and other external system connectors -- **Parsers** — readers for third-party session formats (OpenCode, ChatGPT, Codex) +- **Consumers** *(shadow)* — server assembly, web app, integrations — not in this repo +- **Standalone Tools** — installable from their own repositories; depend on these crates +- **Orchestration** — connector runtime, ACP server, introspection +- **Foundation** — protocol types, tool sandbox, configuration, auth, process management --- @@ -41,22 +40,14 @@ These tools are developed in this monorepo but distributed as independent reposi | Crate | Maturity | Description | |-------|----------|-------------| -| `dirigent_protocol` | beta | ACP protocol types — messages, events, and RPC definitions | | `dirigent_core` | beta | Multi-connector orchestration runtime | -| `dirigent_tools` | concept | Tool sandbox and execution abstractions | -| `dirigent_fermata` | production | Policy gate for AI coding agents (`.botignore` / `botignore.toml`) | -| `dirigent_auth` | concept | User authorization model | -| `dirigent_config` | beta | Configuration management | +| `dirigent_protocol` | beta | ACP protocol types — messages, events, and RPC definitions | | `dirigent_acp_api` | beta | ACP server for incoming agent connections | -| `dirigent_archivist` | production | Event-driven session archival | +| `dirigent_inspector` | concept | Runtime introspection tree | +| `dirigent_config` | beta | Configuration management | +| `dirigent_auth` | concept | User authorization model | | `dirigent_process` | beta | Child process management | -| `dirigent_taskrunner` | beta | Background task runner | -| `dirigent_anth` | production | Claude Code JSONL session parser | -| `dirigent_inspector` | concept | Session inspection tools | -| `dirigent_matrix` | concept | Matrix integration for session sharing | -| `dirigent_zed` | concept | Zed editor integration | -| `dirigent_chatgpt` | beta | ChatGPT `conversations.json` parser | -| `dirigent_codex` | beta | OpenAI Codex session parser | +| `dirigent_tools` | concept | Tool sandbox and execution abstractions | | `opencode_client` | beta | OpenCode.ai HTTP client | --- diff --git a/architecture.svg b/architecture.svg index 55bbb32..8c48646 100644 --- a/architecture.svg +++ b/architecture.svg @@ -1,86 +1,82 @@ - + - + Dirigent package architecture - - - STANDALONE TOOLS - - fermata - - dirigate - - anth - ← own repos, installable + + + CONSUMERS (not in this repo) + + server assembly + + web application + + API layer + + integrations + + archival + + parsers + importers + + + + STANDALONE TOOLS + + fermata + + dirigate + + anth + ← own repos, installable - - ORCHESTRATION - - dirigent_core - multi-connector runtime - - dirigent_acp_api - ACP server - - dirigent_taskrunner - background tasks - - dirigent_archivist - session archival + + ORCHESTRATION + + dirigent_core + connector runtime + + dirigent_acp_api + ACP server + + dirigent_inspector + introspection tree - - FOUNDATION - - dirigent_protocol - ACP types + messages - - dirigent_tools - tool sandbox - - dirigent_config - configuration - - dirigent_auth - authorization - - - - INTEGRATIONS - - matrix - session sharing - - langfuse - observability - - zed - editor - - - - PARSERS (third-party format readers) - - opencode_client - - dirigent_chatgpt - - dirigent_codex - - dirigent_inspector + + FOUNDATION + + protocol + ACP types + + tools + sandbox + + config + paths + toml + + auth + accounts + + process + lifecycle + + opencode + HTTP client - - - - - - - + + + + + + + + Shadow boxes = downstream consumers not included in this repository + 9 crates — minimal set for dirigate and standalone tool dependencies diff --git a/crates/dirigent_anth/CLAUDE.md b/crates/dirigent_anth/CLAUDE.md deleted file mode 100644 index 051c429..0000000 --- a/crates/dirigent_anth/CLAUDE.md +++ /dev/null @@ -1,148 +0,0 @@ -# Package: dirigent_anth - -Claude Code JSONL session parser and toolkit. - -## Quick Facts -- **Type**: Library -- **Main Entry**: src/lib.rs -- **Dependencies**: serde, serde_json, chrono, uuid, camino, thiserror, tracing, dirs -- **Status**: Core parsing complete — ready for downstream consumers - -## Purpose - -Reads Claude Code's local JSONL session storage (`~/.claude/projects/`) and produces typed, deduplicated, correlated Rust data structures. The types are the product — downstream consumers (archivist import, shell usage analyzers, session browsers) depend on these structs. - -## Key Features - -- **Session Discovery**: Scan `~/.claude/projects/` for all Claude Code projects and sessions -- **JSONL Parsing**: Lenient line-by-line parser that handles unknown fields and message types -- **Streaming Dedup**: Collapse streamed assistant messages to their final version -- **Tool Correlation**: ID-based pairing of tool_use → tool_result across parallel calls -- **Conversation Tree**: Reconstruct uuid/parentUuid threading with branch detection -- **Noise Classification**: Identify meta messages, warmup, interruptions, API errors -- **Sub-Agent Loading**: Recursive parsing of sub-agent JSONL with metadata -- **Timestamp Parsing**: Handle ISO 8601, Unix seconds, and Unix milliseconds - -## Architecture - -### Design Principles - -1. **Types are the product** — Well-typed Rust structs that downstream consumers import -2. **Lenient parsing** — Unknown fields ignored, unknown message types logged and skipped -3. **Stream-oriented** — Line-by-line BufReader parsing, never loads entire files -4. **Sync-first** — File parsing is CPU-bound; no async overhead -5. **Cross-platform** — camino::Utf8PathBuf throughout for Windows/Unix compatibility - -### Module Organization - -- **`types.rs`** — All public data types (Content, ContentBlock, RawMessage variants, ToolCall, etc.) -- **`error.rs`** — AntError enum with I/O, JSON parse, home-not-found, invalid-path variants -- **`parser.rs`** — JSONL line parser and file parser with lenient error handling -- **`dedup.rs`** — Streaming deduplication of assistant messages by uuid -- **`correlation.rs`** — Tool call ↔ result pairing by tool_use_id -- **`tree.rs`** — Conversation tree from uuid/parentUuid relationships -- **`noise.rs`** — Noise pattern classification (meta, warmup, interruptions, etc.) -- **`discovery.rs`** — Filesystem scanning for Claude projects and sessions -- **`subagent.rs`** — Sub-agent JSONL and metadata loading -- **`util.rs`** — Timestamp parsing utilities - -## Public API - -### Quick Start - -```rust -use dirigent_anth::{discover_claude_home, discover_projects, load_session}; - -// Discover all projects -let home = discover_claude_home()?; -let projects = discover_projects(&home)?; - -// Load a session with full parsing -for project in &projects { - for session_ref in &project.sessions { - let session = load_session(session_ref)?; - println!("Messages: {}, Tools: {}, Subagents: {}", - session.messages.len(), - session.tool_exchanges.len(), - session.subagents.len()); - } -} -``` - -### Key Functions - -| Function | Purpose | -|----------|---------| -| `discover_claude_home()` | Find `~/.claude/` directory | -| `discover_projects(home)` | Scan for all project directories | -| `parse_session(path)` | Parse a JSONL file into messages | -| `parse_session_deduped(path)` | Parse with streaming dedup applied | -| `dedup_messages(msgs)` | Deduplicate streamed assistant messages | -| `correlate_tools(msgs)` | Pair tool calls with results by ID | -| `ConversationTree::build(msgs)` | Build conversation tree | -| `classify_noise(msg)` | Classify a message as noise | -| `load_subagents(dir)` | Load sub-agent sessions from artifacts | -| `load_session(ref)` | Full parse: dedup + correlate + tree + subagents | -| `parse_timestamp(value)` | Parse ISO/Unix timestamps | - -## Data Model - -### Claude Code JSONL Format - -Each line in `~/.claude/projects//.jsonl` is a JSON object with a `type` field discriminator. Five types: `user`, `assistant`, `progress`, `system`, `queue-operation`. - -- **Outer wrapper**: camelCase fields (sessionId, parentUuid, isSidechain, gitBranch) -- **Inner message body**: snake_case fields (stop_reason, tool_use_id, is_error) -- **Content**: Either a plain string or array of typed content blocks - -### Content Blocks - -| Type | Fields | -|------|--------| -| text | `text` | -| tool_use | `id`, `name`, `input` | -| tool_result | `tool_use_id`, `content`, `is_error` | -| thinking | `thinking` | -| image | `source` | - -Unknown content block types are silently dropped (lenient deserialization). - -## Testing - -```bash -cargo test --package dirigent_anth -``` - -Tests use synthetic JSONL fixtures in `tests/fixtures/`: -- `minimal_session.jsonl` — Basic session with all message types -- `streaming_dedup.jsonl` — Streaming dedup scenario -- `tool_correlation.jsonl` — Parallel and sequential tool calls -- `branching_tree.jsonl` — Conversation with branches -- `noise_patterns.jsonl` — All noise pattern types -- `subagent/` — Sub-agent session with parent and metadata - -## Error Handling - -- Individual unparseable JSONL lines are logged and skipped (lenient) -- I/O errors and missing directories are propagated as AntError -- Unknown message types are skipped via serde -- Unknown content blocks are silently filtered - -## Related Packages - -- **dirigent_archivist** — Future consumer for session import -- No current dependencies on other dirigent packages (standalone) - -## Future Enhancements - -- Bash command analysis module (shell usage analytics) -- Archivist event transform/import -- CLI tool with scan/analyze/import subcommands -- SQLite caching layer -- Watch mode for new session monitoring - -## Documentation - -- **Package README**: `./README.md` - User-facing overview -- **API Docs**: Run `cargo doc --package dirigent_anth --open` -- **Design Plan**: `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md` diff --git a/crates/dirigent_anth/Cargo.toml b/crates/dirigent_anth/Cargo.toml deleted file mode 100644 index 082e4f0..0000000 --- a/crates/dirigent_anth/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -[package] -name = "dirigent_anth" -version = "0.1.0" -edition = "2021" - -[lib] -path = "src/lib.rs" - -[[bin]] -name = "anth_bear" -path = "src/bin/anth.rs" - -[[bin]] -name = "anth_usage" -path = "src/bin/anth_usage.rs" - -[features] -default = [] -dirigent-paths = ["dep:dirigent_config"] - -[dependencies] -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -chrono = { version = "0.4", features = ["serde"] } -chrono-tz = "0.10" -uuid = { version = "1.11", features = ["serde"] } -camino = { version = "1.1", features = ["serde1"] } -dirs = "6.0" -thiserror = "2.0" -tracing = "0.1" -regex = "1" -portable-pty = "0.8" -vt100 = "0.15" -dirigent_config = { path = "../dirigent_config", optional = true } - -[dev-dependencies] -tempfile = "3.0" diff --git a/crates/dirigent_anth/src/anth_usage.rs b/crates/dirigent_anth/src/anth_usage.rs deleted file mode 100644 index b9213e3..0000000 --- a/crates/dirigent_anth/src/anth_usage.rs +++ /dev/null @@ -1,331 +0,0 @@ -use chrono::{Datelike, NaiveDate, NaiveTime, Utc}; -use chrono_tz::Tz; -use serde::Serialize; - -#[derive(Debug, Serialize, Default)] -pub struct UsageData { - pub gauges: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub contributions: Option, -} - -#[derive(Debug, Serialize)] -pub struct UsageGauge { - pub name: String, - pub percent_used: u32, - #[serde(skip_serializing_if = "Option::is_none")] - pub resets: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub resets_iso: Option, -} - -#[derive(Debug, Serialize, Default)] -pub struct ContributionInfo { - #[serde(skip_serializing_if = "Vec::is_empty")] - pub factors: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub subagents: Vec, -} - -#[derive(Debug, Serialize)] -pub struct ContributionFactor { - pub description: String, - pub percent: u32, -} - -#[derive(Debug, Serialize)] -pub struct SubagentUsage { - pub name: String, - pub percent: u32, -} - -pub struct ProcessedOutput { - pub raw_screen: String, - pub data: UsageData, -} - -pub fn process_usage_screen(raw: &str) -> ProcessedOutput { - let lines: Vec<&str> = raw.lines().collect(); - - let start = lines - .iter() - .position(|l| { - let t = l.trim(); - t.starts_with('─') && t.chars().filter(|&c| c == '─').count() >= 6 - }) - .unwrap_or(0); - - let end = lines - .iter() - .rposition(|l| !l.trim().is_empty()) - .map(|i| i + 1) - .unwrap_or(lines.len()); - - let clean_lines = &lines[start..end]; - let raw_screen = clean_lines.join("\n"); - - let data = extract_usage_data(clean_lines); - - ProcessedOutput { raw_screen, data } -} - -fn extract_usage_data(lines: &[&str]) -> UsageData { - let mut data = UsageData::default(); - let mut i = 0; - - while i < lines.len() { - let trimmed = lines[i].trim(); - - if (trimmed.starts_with("Current session") || trimmed.starts_with("Current week")) - && !trimmed.contains('%') - { - let name = trimmed.to_string(); - if let Some(gauge) = find_gauge(&lines[i..], &name) { - data.gauges.push(gauge); - } - } - - if let Some(factor) = parse_contribution_factor(trimmed) { - data.contributions - .get_or_insert_with(ContributionInfo::default) - .factors - .push(factor); - } - - if trimmed.starts_with("Subagents") { - let subs = parse_subagent_table(&lines[i + 1..]); - if !subs.is_empty() { - data.contributions - .get_or_insert_with(ContributionInfo::default) - .subagents = subs; - } - } - - i += 1; - } - - data -} - -fn find_gauge(lines: &[&str], name: &str) -> Option { - let mut percent = None; - let mut resets_raw = None; - - for line in lines.iter().skip(1).take(4) { - let t = line.trim(); - if let Some(pct) = extract_percent_used(t) { - percent = Some(pct); - } - if t.starts_with("Resets ") { - resets_raw = Some(t.trim_start_matches("Resets ").to_string()); - } - } - - percent.map(|p| { - let resets_iso = resets_raw.as_deref().and_then(parse_reset_to_iso); - UsageGauge { - name: name.to_string(), - percent_used: p, - resets: resets_raw, - resets_iso, - } - }) -} - -/// Parse reset strings like: -/// "12:30pm (Europe/Vienna)" → today at 12:30 in that tz -/// "May 12, 9am (Europe/Vienna)" → May 12 at 09:00 -/// "May 12, 9:30am (Europe/Vienna)" → May 12 at 09:30 -/// "Jun 1, 12pm (America/New_York)" → Jun 1 at 12:00 -/// -/// Claude Code uses JS `Intl.DateTimeFormat` style output. -fn parse_reset_to_iso(s: &str) -> Option { - // Split off the timezone from parentheses - let (datetime_part, tz_str) = { - let open = s.rfind('(')?; - let close = s.rfind(')')?; - let tz = s[open + 1..close].trim(); - let dt = s[..open].trim(); - (dt, tz) - }; - - let tz: Tz = tz_str.parse().ok()?; - let now = Utc::now().with_timezone(&tz); - - let (date, time_str) = if datetime_part.contains(',') { - // "May 12, 9am" or "May 12, 9:30am" - let comma_pos = datetime_part.find(',')?; - let date_part = datetime_part[..comma_pos].trim(); - let time_part = datetime_part[comma_pos + 1..].trim(); - - let date = parse_month_day(date_part, now.year())?; - (date, time_part) - } else { - // "12:30pm" — today in the given timezone - (now.date_naive(), datetime_part) - }; - - let time = parse_12h_time(time_str)?; - let naive = date.and_time(time); - let local = naive.and_local_timezone(tz).earliest()?; - let utc = local.with_timezone(&Utc); - - Some(utc.to_rfc3339()) -} - -/// Parse "May 12", "Jun 1", "December 25", etc. -fn parse_month_day(s: &str, year: i32) -> Option { - let parts: Vec<&str> = s.split_whitespace().collect(); - if parts.len() != 2 { - return None; - } - let month = match parts[0].to_lowercase().as_str() { - "jan" | "january" => 1, - "feb" | "february" => 2, - "mar" | "march" => 3, - "apr" | "april" => 4, - "may" => 5, - "jun" | "june" => 6, - "jul" | "july" => 7, - "aug" | "august" => 8, - "sep" | "september" => 9, - "oct" | "october" => 10, - "nov" | "november" => 11, - "dec" | "december" => 12, - _ => return None, - }; - let day: u32 = parts[1].parse().ok()?; - NaiveDate::from_ymd_opt(year, month, day) -} - -/// Parse "9am", "12pm", "9:30am", "12:30pm" -fn parse_12h_time(s: &str) -> Option { - let s = s.trim().to_lowercase(); - let is_pm = s.ends_with("pm"); - let is_am = s.ends_with("am"); - if !is_pm && !is_am { - return None; - } - - let num_part = &s[..s.len() - 2]; - - let (hour, minute) = if let Some((h, m)) = num_part.split_once(':') { - (h.parse::().ok()?, m.parse::().ok()?) - } else { - (num_part.parse::().ok()?, 0) - }; - - let hour_24 = match (hour, is_pm) { - (12, true) => 12, - (12, false) => 0, - (h, true) => h + 12, - (h, false) => h, - }; - - NaiveTime::from_hms_opt(hour_24, minute, 0) -} - -fn extract_percent_used(line: &str) -> Option { - let line = line.trim(); - if !line.ends_with("% used") { - return None; - } - let before_pct = line.trim_end_matches("% used").trim(); - before_pct - .rsplit_once(char::is_whitespace) - .map(|(_, n)| n) - .unwrap_or(before_pct) - .parse() - .ok() -} - -fn parse_contribution_factor(line: &str) -> Option { - if !line.contains("% of your usage") { - return None; - } - let pct_str = line.split('%').next()?; - let percent: u32 = pct_str.trim().parse().ok()?; - let description = line.to_string(); - Some(ContributionFactor { - description, - percent, - }) -} - -fn parse_subagent_table(lines: &[&str]) -> Vec { - let mut subs = Vec::new(); - for line in lines { - let t = line.trim(); - if t.is_empty() || t.starts_with('─') || t.contains("to day") || t.contains("to cancel") { - break; - } - if let Some(pos) = t.rfind('%') { - let num_start = t[..pos] - .rfind(char::is_whitespace) - .map(|i| i + 1) - .unwrap_or(0); - if let Ok(pct) = t[num_start..pos].parse::() { - let name = t[..num_start].trim().to_string(); - if !name.is_empty() && !name.contains("% of") { - subs.push(SubagentUsage { - name, - percent: pct, - }); - } - } - } - } - subs -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_time_only() { - let t = parse_12h_time("12:30pm").unwrap(); - assert_eq!(t, NaiveTime::from_hms_opt(12, 30, 0).unwrap()); - } - - #[test] - fn parse_time_am() { - let t = parse_12h_time("9am").unwrap(); - assert_eq!(t, NaiveTime::from_hms_opt(9, 0, 0).unwrap()); - } - - #[test] - fn parse_time_12am() { - let t = parse_12h_time("12am").unwrap(); - assert_eq!(t, NaiveTime::from_hms_opt(0, 0, 0).unwrap()); - } - - #[test] - fn parse_time_with_minutes() { - let t = parse_12h_time("9:30am").unwrap(); - assert_eq!(t, NaiveTime::from_hms_opt(9, 30, 0).unwrap()); - } - - #[test] - fn parse_reset_time_only() { - let iso = parse_reset_to_iso("12:30pm (Europe/Vienna)"); - assert!(iso.is_some()); - let iso = iso.unwrap(); - assert!(iso.contains("T")); - // Should end in +00:00 (UTC via rfc3339) - assert!(iso.ends_with("+00:00")); - } - - #[test] - fn parse_reset_date_and_time() { - let iso = parse_reset_to_iso("May 12, 9am (Europe/Vienna)").unwrap(); - assert!(iso.contains("T07:00:00") || iso.contains("T08:00:00")); - // CEST is UTC+2, CET is UTC+1 — depends on whether May 12 is summer time - } - - #[test] - fn parse_month_day_basic() { - let d = parse_month_day("May 12", 2026).unwrap(); - assert_eq!(d, NaiveDate::from_ymd_opt(2026, 5, 12).unwrap()); - } -} diff --git a/crates/dirigent_anth/src/bin/anth.rs b/crates/dirigent_anth/src/bin/anth.rs deleted file mode 100644 index 5ecdefa..0000000 --- a/crates/dirigent_anth/src/bin/anth.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! Minimal CLI for dirigent_anth — validate parsing and search sessions. -//! -//! Usage: -//! cargo run --package dirigent_anth --bin ant # validate all sessions -//! cargo run --package dirigent_anth --bin ant -- search "query" # search user messages -//! cargo run --package dirigent_anth --bin ant -- stats # show statistics - -use dirigent_anth::*; -use std::io::BufRead; - -fn main() { - let args: Vec = std::env::args().skip(1).collect(); - - let home = match discover_claude_home() { - Ok(h) => h, - Err(e) => { - eprintln!("Could not find Claude home: {e}"); - std::process::exit(1); - } - }; - - let projects = match discover_projects(&home) { - Ok(p) => p, - Err(e) => { - eprintln!("Could not discover projects: {e}"); - std::process::exit(1); - } - }; - - match args.first().map(|s| s.as_str()) { - Some("search") => { - let query = args.get(1).map(|s| s.as_str()).unwrap_or(""); - if query.is_empty() { - eprintln!("Usage: ant search "); - std::process::exit(1); - } - cmd_search(&projects, query); - } - Some("stats") => cmd_stats(&projects), - Some("validate") | None => cmd_validate(&projects), - Some(other) => { - eprintln!("Unknown command: {other}"); - eprintln!("Commands: validate (default), search , stats"); - std::process::exit(1); - } - } -} - -/// Validate that the parser can handle all sessions without errors. -fn cmd_validate(projects: &[ClaudeProject]) { - let mut total_sessions = 0; - let mut total_ok = 0; - let mut total_messages = 0; - let mut total_skipped_lines = 0; - let mut errors: Vec<(String, String)> = Vec::new(); - - for project in projects { - println!( - "Project: {} ({} sessions)", - project.original_path, - project.sessions.len() - ); - - for session in &project.sessions { - total_sessions += 1; - - // Raw line-level validation: count how many lines parse vs skip - let (_raw_ok, raw_skip) = validate_lines(&session.jsonl_path); - total_skipped_lines += raw_skip; - - // Full pipeline validation - match load_session(session) { - Ok(parsed) => { - total_ok += 1; - total_messages += parsed.messages.len(); - let tools = parsed.tool_exchanges.len(); - let subs = parsed.subagents.len(); - let branches = if parsed.tree.is_linear() { - "linear" - } else { - "branched" - }; - - if raw_skip > 0 { - println!( - " {} — {} msgs, {} tools, {} subagents, {} | {raw_skip} lines skipped", - &session.id[..8.min(session.id.len())], - parsed.messages.len(), - tools, - subs, - branches, - ); - } - } - Err(e) => { - errors.push((session.id.clone(), e.to_string())); - eprintln!(" {} — ERROR: {e}", &session.id[..8.min(session.id.len())]); - } - } - } - } - - println!("\n--- Validation Summary ---"); - println!("Projects: {}", projects.len()); - println!("Sessions: {total_sessions} ({total_ok} ok, {} errors)", errors.len()); - println!("Messages: {total_messages}"); - if total_skipped_lines > 0 { - println!("Skipped: {total_skipped_lines} unparseable lines"); - } - - if !errors.is_empty() { - println!("\nErrors:"); - for (id, err) in &errors { - println!(" {id}: {err}"); - } - std::process::exit(1); - } -} - -/// Count parseable vs skipped lines in a JSONL file. -fn validate_lines(path: &camino::Utf8Path) -> (usize, usize) { - let file = match std::fs::File::open(path.as_std_path()) { - Ok(f) => f, - Err(_) => return (0, 0), - }; - let reader = std::io::BufReader::new(file); - let mut ok = 0; - let mut skip = 0; - - for (i, line) in reader.lines().enumerate() { - let line = match line { - Ok(l) => l, - Err(_) => { - skip += 1; - continue; - } - }; - if line.trim().is_empty() { - continue; - } - if parse_line(&line, i + 1).is_some() { - ok += 1; - } else { - skip += 1; - } - } - - (ok, skip) -} - -/// Search user messages for a query string (case-insensitive). -fn cmd_search(projects: &[ClaudeProject], query: &str) { - let query_lower = query.to_lowercase(); - let mut hits = 0; - - for project in projects { - for session in &project.sessions { - let messages = match parse_session_deduped(&session.jsonl_path) { - Ok(m) => m, - Err(_) => continue, - }; - - for msg in &messages { - let text = match msg { - types::RawMessage::User(u) => match &u.message.content { - types::Content::Text(s) => s.clone(), - types::Content::Blocks(_) => continue, - }, - types::RawMessage::Assistant(a) => { - let mut parts = Vec::new(); - for block in &a.message.content { - if let types::ContentBlock::Text { text } = block { - parts.push(text.as_str()); - } - } - parts.join(" ") - } - _ => continue, - }; - - if text.to_lowercase().contains(&query_lower) { - let role = match msg { - types::RawMessage::User(_) => "user", - types::RawMessage::Assistant(_) => "assistant", - _ => "other", - }; - let preview = truncate(&text, 120); - println!( - "[{}] {} {} | {}", - &project.original_path, - &session.id[..8.min(session.id.len())], - role, - preview - ); - hits += 1; - } - } - } - } - - println!("\n{hits} matches for \"{query}\""); -} - -/// Show aggregate statistics across all sessions. -fn cmd_stats(projects: &[ClaudeProject]) { - let mut total_sessions = 0; - let mut total_messages = 0; - let mut total_tools = 0; - let mut total_subagents = 0; - let mut tool_counts: std::collections::HashMap = std::collections::HashMap::new(); - - for project in projects { - for session in &project.sessions { - total_sessions += 1; - if let Ok(parsed) = load_session(session) { - total_messages += parsed.messages.len(); - total_tools += parsed.tool_exchanges.len(); - total_subagents += parsed.subagents.len(); - - for ex in &parsed.tool_exchanges { - let name = format!("{:?}", ex.call.name); - *tool_counts.entry(name).or_default() += 1; - } - } - } - } - - println!("--- Statistics ---"); - println!("Projects: {}", projects.len()); - println!("Sessions: {total_sessions}"); - println!("Messages: {total_messages}"); - println!("Tool calls: {total_tools}"); - println!("Sub-agents: {total_subagents}"); - - if !tool_counts.is_empty() { - println!("\nTool usage:"); - let mut sorted: Vec<_> = tool_counts.into_iter().collect(); - sorted.sort_by(|a, b| b.1.cmp(&a.1)); - for (name, count) in sorted.iter().take(15) { - println!(" {name:20} {count}"); - } - } -} - -fn truncate(s: &str, max: usize) -> String { - let s = s.replace('\n', " ").replace('\r', ""); - if s.len() <= max { - s - } else { - format!("{}...", &s[..max]) - } -} diff --git a/crates/dirigent_anth/src/bin/anth_usage.rs b/crates/dirigent_anth/src/bin/anth_usage.rs deleted file mode 100644 index 58fd3e0..0000000 --- a/crates/dirigent_anth/src/bin/anth_usage.rs +++ /dev/null @@ -1,192 +0,0 @@ -use portable_pty::{CommandBuilder, NativePtySystem, PtySize, PtySystem}; -use std::io::{Read, Write}; -use std::path::PathBuf; -use std::time::Duration; - -const ROWS: u16 = 80; -const COLS: u16 = 120; - -struct Args { - debug: bool, - raw: bool, - no_trust: bool, - workdir: Option, - use_cwd: bool, -} - -fn parse_args() -> Args { - let mut args = Args { - debug: false, - raw: false, - no_trust: false, - workdir: None, - use_cwd: false, - }; - let mut iter = std::env::args().skip(1); - while let Some(arg) = iter.next() { - match arg.as_str() { - "--debug" => args.debug = true, - "--raw" => args.raw = true, - "--no-trust" => args.no_trust = true, - "--cwd" => args.use_cwd = true, - "--workdir" => { - args.workdir = Some(PathBuf::from( - iter.next().expect("--workdir requires a path argument"), - )); - } - other => { - eprintln!("Unknown argument: {other}"); - eprintln!( - "Usage: anth_usage [--debug] [--raw] [--no-trust] [--workdir ] [--cwd]" - ); - std::process::exit(2); - } - } - } - args -} - -fn resolve_workdir(args: &Args) -> PathBuf { - if let Some(ref dir) = args.workdir { - return dir.clone(); - } - if args.use_cwd { - return std::env::current_dir().expect("failed to get current directory"); - } - - #[cfg(feature = "dirigent-paths")] - { - if let Ok(paths) = dirigent_config::DirigentPaths::resolve() { - let noproject = paths.noproject_home_dir(); - if noproject.exists() { - return noproject; - } - } - } - - dirs::home_dir().expect("failed to resolve home directory") -} - -fn grab_screen(parser: &vt100::Parser) -> String { - let screen = parser.screen(); - let mut output = String::new(); - for line in screen.rows(0, COLS) { - output.push_str(&line); - output.push('\n'); - } - output -} - -macro_rules! debug { - ($args:expr, $($tt:tt)*) => { - if $args.debug { - eprintln!($($tt)*); - } - }; -} - -fn main() { - let args = parse_args(); - let workdir = resolve_workdir(&args); - - debug!(args, "Working directory: {}", workdir.display()); - - let pty_system = NativePtySystem::default(); - let pair = pty_system - .openpty(PtySize { - rows: ROWS, - cols: COLS, - pixel_width: 0, - pixel_height: 0, - }) - .expect("failed to open pty"); - - let mut cmd = CommandBuilder::new("claude"); - cmd.cwd(&workdir); - let mut child = pair.slave.spawn_command(cmd).expect("failed to spawn claude"); - drop(pair.slave); - - let mut writer = pair.master.take_writer().expect("failed to get writer"); - let reader = pair.master.try_clone_reader().expect("failed to get reader"); - - let (tx, rx) = std::sync::mpsc::channel(); - std::thread::spawn(move || { - let mut reader = reader; - let mut buf = [0u8; 4096]; - loop { - match reader.read(&mut buf) { - Ok(0) => break, - Ok(n) => { - let _ = tx.send(buf[..n].to_vec()); - } - Err(_) => break, - } - } - }); - - // Wait for claude to render - std::thread::sleep(Duration::from_secs(5)); - - debug!( - args, - "Child alive: {}", - matches!(child.try_wait(), Ok(None)) - ); - - // Grab screen - let mut parser = vt100::Parser::new(ROWS, COLS, 0); - while let Ok(data) = rx.try_recv() { - parser.process(&data); - } - let output = grab_screen(&parser); - debug!(args, "=== SCREEN ===\n{output}=== END ==="); - - // Handle trust prompt - if output.contains("Yes, I trust this folder") { - if args.no_trust { - eprintln!("Folder is not trusted: {}", workdir.display()); - eprintln!("Run claude in this folder manually to trust it, or omit --no-trust."); - let _ = child.kill(); - std::process::exit(1); - } - debug!(args, "Sending enter for trust..."); - writer.write_all(b"\r").expect("failed to confirm trust"); - - std::thread::sleep(Duration::from_secs(3)); - - while let Ok(data) = rx.try_recv() { - parser.process(&data); - } - debug!( - args, - "=== AFTER TRUST ===\n{}=== END ===", - grab_screen(&parser) - ); - } - - // Send /usage - debug!(args, "Sending /usage..."); - writer - .write_all(b"/usage\r") - .expect("failed to send /usage"); - - std::thread::sleep(Duration::from_secs(3)); - - while let Ok(data) = rx.try_recv() { - parser.process(&data); - } - let raw_output = grab_screen(&parser); - - let processed = dirigent_anth::anth_usage::process_usage_screen(&raw_output); - - if args.raw { - println!("{}", processed.raw_screen); - } else { - println!( - "{}", - serde_json::to_string_pretty(&processed.data).expect("failed to serialize usage data") - ); - } - - let _ = child.kill(); -} diff --git a/crates/dirigent_anth/src/claude_grab.rs b/crates/dirigent_anth/src/claude_grab.rs deleted file mode 100644 index 1d47902..0000000 --- a/crates/dirigent_anth/src/claude_grab.rs +++ /dev/null @@ -1,157 +0,0 @@ -use portable_pty::{Child, CommandBuilder, NativePtySystem, PtySize, PtySystem}; -use std::io::{Read, Write}; -use std::sync::mpsc::{self, Receiver}; -use std::time::Duration; -use vt100::Parser; - -const DEFAULT_ROWS: u16 = 80; -const DEFAULT_COLS: u16 = 120; - -pub struct PtySession { - parser: Parser, - writer: Option>, - rx: Receiver>, - cols: u16, - #[allow(dead_code)] - child: Box, -} - -impl PtySession { - pub fn spawn_claude(args: &[&str]) -> Self { - Self::spawn_claude_with_size(args, DEFAULT_ROWS, DEFAULT_COLS) - } - - pub fn spawn_claude_with_size(args: &[&str], rows: u16, cols: u16) -> Self { - let pty_system = NativePtySystem::default(); - - let pair = pty_system - .openpty(PtySize { - rows, - cols, - pixel_width: 0, - pixel_height: 0, - }) - .expect("failed to open pty"); - - let mut cmd = CommandBuilder::new("claude"); - for arg in args { - cmd.arg(*arg); - } - if let Some(home) = dirs::home_dir() { - cmd.cwd(home); - } - let child = pair - .slave - .spawn_command(cmd) - .expect("failed to spawn claude"); - - drop(pair.slave); - - let writer = pair.master.take_writer().expect("failed to get writer"); - let reader = pair - .master - .try_clone_reader() - .expect("failed to get reader"); - - let (tx, rx) = mpsc::channel::>(); - std::thread::spawn(move || { - let mut reader = reader; - let mut chunk = [0u8; 4096]; - loop { - match reader.read(&mut chunk) { - Ok(0) => break, - Ok(n) => { - if tx.send(chunk[..n].to_vec()).is_err() { - break; - } - } - Err(_) => break, - } - } - }); - - Self { - parser: Parser::new(rows, cols, 0), - writer: Some(writer), - rx, - cols, - child, - } - } - - pub fn grab_screen(&mut self) -> String { - while let Ok(data) = self.rx.try_recv() { - self.parser.process(&data); - } - let deadline = std::time::Instant::now() + Duration::from_millis(200); - while std::time::Instant::now() < deadline { - match self.rx.recv_timeout(Duration::from_millis(50)) { - Ok(data) => self.parser.process(&data), - Err(_) => {} - } - } - - let screen = self.parser.screen(); - let mut output = String::new(); - for line in screen.rows(0, self.cols) { - output.push_str(&line); - output.push('\n'); - } - output - } - - pub fn wait_for(&mut self, needle: &str, timeout: Duration) -> bool { - self.wait_for_any(&[needle], timeout) - } - - pub fn wait_for_any(&mut self, needles: &[&str], timeout: Duration) -> bool { - let deadline = std::time::Instant::now() + timeout; - while std::time::Instant::now() < deadline { - match self.rx.recv_timeout(Duration::from_millis(100)) { - Ok(data) => self.parser.process(&data), - Err(_) => {} - } - let screen = self.parser.screen(); - let mut content = String::new(); - for line in screen.rows(0, self.cols) { - content.push_str(&line); - content.push('\n'); - } - for needle in needles { - if content.contains(needle) { - return true; - } - } - } - false - } - - pub fn is_alive(&mut self) -> bool { - matches!(self.child.try_wait(), Ok(None)) - } - - pub fn send(&mut self, input: &[u8]) { - self.writer.as_mut().expect("writer gone").write_all(input).expect("failed to write to pty"); - } - - pub fn try_send(&mut self, input: &[u8]) -> std::io::Result<()> { - match self.writer.as_mut() { - Some(w) => w.write_all(input), - None => Err(std::io::Error::new(std::io::ErrorKind::BrokenPipe, "writer gone")), - } - } - - pub fn try_send_line(&mut self, text: &str) -> std::io::Result<()> { - self.try_send(text.as_bytes())?; - self.try_send(b"\r") - } - - pub fn send_enter(&mut self) { - self.send(b"\r"); - } - - pub fn send_line(&mut self, text: &str) { - self.send(text.as_bytes()); - self.send_enter(); - } -} diff --git a/crates/dirigent_anth/src/correlation.rs b/crates/dirigent_anth/src/correlation.rs deleted file mode 100644 index 5e96bef..0000000 --- a/crates/dirigent_anth/src/correlation.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Tool call correlation — matches assistant ToolUse blocks with their -//! corresponding user ToolResult blocks by ID across a message sequence. - -use std::collections::HashMap; - -use crate::types::{ - Content, ContentBlock, RawAssistantMessage, RawMessage, RawUserMessage, ToolCall, - ToolExchange, ToolName, ToolResultData, -}; - -/// Extract tool calls from an assistant message's content blocks. -fn extract_tool_calls(msg: &RawAssistantMessage) -> Vec { - let source_uuid = msg.uuid.clone().unwrap_or_default(); - msg.message - .content - .iter() - .filter_map(|block| { - if let ContentBlock::ToolUse { id, name, input, .. } = block { - Some(ToolCall { - id: id.clone(), - name: ToolName::from(name.clone()), - input: input.clone(), - source_message_uuid: source_uuid.clone(), - }) - } else { - None - } - }) - .collect() -} - -/// Extract tool results from a user message's content blocks. -fn extract_tool_results(msg: &RawUserMessage) -> Vec { - let source_uuid = msg.uuid.clone().unwrap_or_default(); - match &msg.message.content { - Content::Blocks(blocks) => blocks - .iter() - .filter_map(|block| { - if let ContentBlock::ToolResult { tool_use_id, content, is_error } = block { - // Extract text content from the tool result - let text_content = content.as_ref().and_then(|c| match c { - Content::Text(s) => Some(s.clone()), - Content::Blocks(bs) => { - // Concatenate text blocks - let texts: Vec<&str> = bs - .iter() - .filter_map(|b| { - if let ContentBlock::Text { text } = b { - Some(text.as_str()) - } else { - None - } - }) - .collect(); - if texts.is_empty() { None } else { Some(texts.join("\n")) } - } - }); - Some(ToolResultData { - tool_use_id: tool_use_id.clone(), - content: text_content, - is_error: *is_error, - source_message_uuid: source_uuid.clone(), - }) - } else { - None - } - }) - .collect(), - Content::Text(_) => Vec::new(), - } -} - -/// Correlate tool calls with their results across a message sequence. -/// -/// Iterates messages in order, collecting ToolUse blocks from assistant -/// messages and matching them by ID to ToolResult blocks in subsequent user -/// messages. Any tool calls that never received a result are emitted with -/// `result: None`. -pub fn correlate_tools(messages: &[RawMessage]) -> Vec { - let mut pending: HashMap = HashMap::new(); - let mut exchanges: Vec = Vec::new(); - - for msg in messages { - match msg { - RawMessage::Assistant(asst) => { - for call in extract_tool_calls(asst) { - pending.insert(call.id.clone(), call); - } - } - RawMessage::User(user) => { - for result in extract_tool_results(user) { - if let Some(call) = pending.remove(&result.tool_use_id) { - exchanges.push(ToolExchange { call, result: Some(result) }); - } - } - } - _ => {} - } - } - - // Emit unmatched calls (no result found) - for (_id, call) in pending { - exchanges.push(ToolExchange { call, result: None }); - } - - exchanges -} diff --git a/crates/dirigent_anth/src/dedup.rs b/crates/dirigent_anth/src/dedup.rs deleted file mode 100644 index 27c2bd7..0000000 --- a/crates/dirigent_anth/src/dedup.rs +++ /dev/null @@ -1,116 +0,0 @@ -//! Streaming deduplication for assistant messages. - -use crate::types::{RawAssistantMessage, RawMessage}; - -/// Deduplicate streamed assistant messages. -/// -/// Claude Code writes multiple JSONL lines for the same assistant message -/// as it streams. Each shares the same `uuid` with progressively more -/// content blocks. We keep only the last entry per uuid. -/// -/// Non-assistant messages pass through unchanged. -pub fn dedup_messages(messages: Vec) -> Vec { - let mut result: Vec = Vec::new(); - let mut buffered_assistant: Option = None; - - for msg in messages { - match msg { - RawMessage::Assistant(ref asst) => { - let current_uuid = asst.uuid.as_deref(); - - if let Some(ref buffered) = buffered_assistant { - let buffered_uuid = buffered.uuid.as_deref(); - if current_uuid == buffered_uuid { - // Same uuid — replace buffer with newer (more complete) version - buffered_assistant = Some(asst.clone()); - } else { - // Different uuid — flush old buffer, start new - result.push(RawMessage::Assistant(buffered.clone())); - buffered_assistant = Some(asst.clone()); - } - } else { - // No buffer yet — start buffering - buffered_assistant = Some(asst.clone()); - } - } - _ => { - // Non-assistant: flush any buffered assistant first, then push this - if let Some(buffered) = buffered_assistant.take() { - result.push(RawMessage::Assistant(buffered)); - } - result.push(msg); - } - } - } - - // Flush remaining buffer - if let Some(buffered) = buffered_assistant { - result.push(RawMessage::Assistant(buffered)); - } - - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::{AssistantInner, ContentBlock}; - - fn make_assistant(uuid: &str, stop_reason: Option<&str>, text: &str) -> RawMessage { - RawMessage::Assistant(RawAssistantMessage { - uuid: Some(uuid.to_string()), - parent_uuid: None, - timestamp: None, - session_id: None, - cwd: None, - version: None, - git_branch: None, - is_sidechain: false, - request_id: None, - message: AssistantInner { - model: None, - id: None, - message_type: None, - role: None, - content: vec![ContentBlock::Text { text: text.to_string() }], - stop_reason: stop_reason.map(str::to_string), - stop_sequence: None, - usage: None, - }, - }) - } - - #[test] - fn dedup_single_streamed_message() { - let msgs = vec![ - make_assistant("a-1", None, "Part 1"), - make_assistant("a-1", None, "Part 1 more"), - make_assistant("a-1", Some("end_turn"), "Part 1 final"), - ]; - let deduped = dedup_messages(msgs); - assert_eq!(deduped.len(), 1); - if let RawMessage::Assistant(a) = &deduped[0] { - assert_eq!(a.message.stop_reason.as_deref(), Some("end_turn")); - match &a.message.content[0] { - ContentBlock::Text { text } => assert_eq!(text, "Part 1 final"), - _ => panic!("Expected text block"), - } - } - } - - #[test] - fn dedup_two_distinct_assistants() { - let msgs = vec![ - make_assistant("a-1", Some("end_turn"), "First"), - make_assistant("a-2", Some("end_turn"), "Second"), - ]; - let deduped = dedup_messages(msgs); - assert_eq!(deduped.len(), 2); - } - - #[test] - fn dedup_empty_input() { - let deduped = dedup_messages(vec![]); - assert!(deduped.is_empty()); - } -} diff --git a/crates/dirigent_anth/src/discovery.rs b/crates/dirigent_anth/src/discovery.rs deleted file mode 100644 index 5e830f2..0000000 --- a/crates/dirigent_anth/src/discovery.rs +++ /dev/null @@ -1,342 +0,0 @@ -use std::collections::HashMap; -use camino::{Utf8Path, Utf8PathBuf}; -use crate::types::*; -use crate::error::{AntError, Result}; - -/// Discover the Claude Code home directory (~/.claude/). -pub fn discover_claude_home() -> Result { - let home = dirs::home_dir().ok_or(AntError::HomeNotFound)?; - let claude_dir = home.join(".claude"); - if !claude_dir.exists() { - return Err(AntError::HomeNotFound); - } - Utf8PathBuf::try_from(claude_dir.to_path_buf()) - .map_err(|e| AntError::InvalidPath(e.to_string())) -} - -/// Normalise a native path to forward slashes for consistent storage. -fn normalize_to_forward_slashes(path: &str) -> String { - path.replace('\\', "/") -} - -/// Resolve the original filesystem path for a Claude project directory. -/// -/// Priority: -/// 1. `projectPath` from `sessions-index.json` (authoritative, cheap) -/// 2. `cwd` from the first user message in any session JSONL (authoritative, costs one file parse) -/// 3. `decode_project_path` (lossy fallback for empty project directories) -pub fn resolve_original_path(dir_name: &str, sessions: &[SessionRef]) -> String { - // 1. Try sessions-index.json projectPath - for session in sessions { - if let Some(ref idx) = session.index_entry { - if let Some(ref path) = idx.project_path { - if !path.is_empty() { - return normalize_to_forward_slashes(path); - } - } - } - } - - // 2. Try cwd from first user message in any session - for session in sessions { - if let Ok(msgs) = crate::parser::parse_session(&session.jsonl_path) { - for msg in &msgs { - if let crate::types::RawMessage::User(user) = msg { - if let Some(ref cwd) = user.cwd { - if !cwd.is_empty() { - return normalize_to_forward_slashes(cwd); - } - } - } - } - } - } - - // 3. Lossy fallback - decode_project_path(dir_name) -} - -/// Discover all Claude Code project directories under the given home. -pub fn discover_projects(home: &Utf8Path) -> Result> { - let projects_dir = home.join("projects"); - if !projects_dir.as_std_path().exists() { - return Ok(Vec::new()); - } - - let mut projects = Vec::new(); - for entry in std::fs::read_dir(projects_dir.as_std_path())? { - let entry = entry?; - let path = entry.path(); - if !path.is_dir() { - continue; - } - let dir_name = match path.file_name().and_then(|n| n.to_str()) { - Some(name) => name.to_string(), - None => continue, - }; - - let utf8_path = match Utf8PathBuf::try_from(path.clone()) { - Ok(p) => p, - Err(_) => continue, - }; - - let sessions = discover_sessions(&utf8_path)?; - let original_path = resolve_original_path(&dir_name, &sessions); - - projects.push(ClaudeProject { - path: utf8_path, - original_path, - sessions, - }); - } - - Ok(projects) -} - -/// Decode an encoded project folder name back to the original path (lossy). -/// -/// **Warning**: Claude Code's encoding replaces `\`, `/`, AND `_` all with -/// `-`, making this decoding ambiguous. For example, `G--dev-projects-adk-rust` -/// could be `G:/dev/projects/adk-rust` or `G:/dev/projects/adk/rust`. Prefer -/// [`resolve_original_path`] which reads ground truth from `sessions-index.json` -/// or session JSONL files. This function is a last-resort fallback for empty -/// project directories with no sessions or index. -pub fn decode_project_path(encoded: &str) -> String { - // Split on "--" to recover path segments separated by the original separators. - let parts: Vec<&str> = encoded.split("--").collect(); - - if parts.is_empty() { - return encoded.to_string(); - } - - let mut result = String::new(); - - let first = parts[0]; - - if first.len() == 1 && first.chars().next().map_or(false, |c| c.is_ascii_uppercase()) { - // Windows drive letter: "G" → "G:" - result.push_str(first); - result.push(':'); - } else if first.starts_with('-') || first.is_empty() { - // Unix-style absolute path: the original path started with "/". - // The first segment has a leading "-" that encoded the root separator. - // Strip that leading "-" to recover the first directory component. - let component = first.trim_start_matches('-'); - result.push('/'); - if !component.is_empty() { - // Single dashes within the component are path separators. - result.push_str(&component.replace('-', "/")); - } - } else { - result.push_str(first); - } - - // Remaining "--"-separated parts are additional path components. - // Within each part, single "-" represent path separators. - for part in &parts[1..] { - result.push('/'); - result.push_str(&part.replace('-', "/")); - } - - result -} - -/// Discover all session JSONL files in a project directory. -pub fn discover_sessions(project_dir: &Utf8Path) -> Result> { - let index = load_session_index(project_dir); - let mut sessions = Vec::new(); - - for entry in std::fs::read_dir(project_dir.as_std_path())? { - let entry = entry?; - let path = entry.path(); - - // Only .jsonl files - let extension = path.extension().and_then(|e| e.to_str()); - if extension != Some("jsonl") { - continue; - } - - let stem = match path.file_stem().and_then(|s| s.to_str()) { - Some(s) => s.to_string(), - None => continue, - }; - - let utf8_path = match Utf8PathBuf::try_from(path) { - Ok(p) => p, - Err(_) => continue, - }; - - // Check for artifacts directory (same name as the session stem). - let artifacts_dir = { - let dir = project_dir.join(&stem); - if dir.as_std_path().is_dir() { - Some(dir) - } else { - None - } - }; - - let index_entry = index.as_ref().and_then(|idx| idx.get(&stem).cloned()); - - sessions.push(SessionRef { - id: stem, - jsonl_path: utf8_path, - artifacts_dir, - index_entry, - }); - } - - Ok(sessions) -} - -/// Load `sessions-index.json` if it exists in the given project directory. -fn load_session_index(project_dir: &Utf8Path) -> Option> { - let index_path = project_dir.join("sessions-index.json"); - if !index_path.as_std_path().exists() { - return None; - } - - let content = std::fs::read_to_string(index_path.as_std_path()).ok()?; - serde_json::from_str::>(&content).ok() -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::TempDir; - - #[test] - fn decode_project_path_windows() { - assert_eq!( - decode_project_path("G--dev-projects-dirigent"), - "G:/dev/projects/dirigent" - ); - } - - #[test] - fn decode_project_path_windows_users() { - assert_eq!( - decode_project_path("C--Users-g4b-tmp"), - "C:/Users/g4b/tmp" - ); - } - - #[test] - fn decode_project_path_unix() { - assert_eq!( - decode_project_path("-home-user-projects-foo"), - "/home/user/projects/foo" - ); - } - - #[test] - fn discover_sessions_in_temp_dir() { - let tmp = TempDir::new().unwrap(); - let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); - - // Create fake session files. - std::fs::write(project_dir.join("abc-def-123.jsonl").as_std_path(), "{}\n").unwrap(); - std::fs::write(project_dir.join("xyz-456-789.jsonl").as_std_path(), "{}\n").unwrap(); - // Create an artifacts directory for one session. - std::fs::create_dir(project_dir.join("abc-def-123").as_std_path()).unwrap(); - - let sessions = discover_sessions(project_dir).unwrap(); - assert_eq!(sessions.len(), 2); - - let with_artifacts = sessions.iter().find(|s| s.id == "abc-def-123").unwrap(); - assert!(with_artifacts.artifacts_dir.is_some()); - - let without_artifacts = sessions.iter().find(|s| s.id == "xyz-456-789").unwrap(); - assert!(without_artifacts.artifacts_dir.is_none()); - } - - #[test] - fn discover_sessions_ignores_non_jsonl() { - let tmp = TempDir::new().unwrap(); - let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); - - std::fs::write(project_dir.join("session.jsonl").as_std_path(), "{}\n").unwrap(); - std::fs::write( - project_dir.join("sessions-index.json").as_std_path(), - "{}", - ) - .unwrap(); - std::fs::create_dir(project_dir.join("some-dir").as_std_path()).unwrap(); - - let sessions = discover_sessions(project_dir).unwrap(); - assert_eq!(sessions.len(), 1); - assert_eq!(sessions[0].id, "session"); - } - - #[test] - fn discover_sessions_loads_index_entry() { - let tmp = TempDir::new().unwrap(); - let project_dir = Utf8Path::from_path(tmp.path()).unwrap(); - - std::fs::write(project_dir.join("abc-123.jsonl").as_std_path(), "{}\n").unwrap(); - - let index_json = r#"{ - "abc-123": { - "sessionId": "abc-123", - "firstPrompt": "Hello", - "summary": "A test session", - "messageCount": 5 - } - }"#; - std::fs::write( - project_dir.join("sessions-index.json").as_std_path(), - index_json, - ) - .unwrap(); - - let sessions = discover_sessions(project_dir).unwrap(); - assert_eq!(sessions.len(), 1); - - let entry = sessions[0].index_entry.as_ref().unwrap(); - assert_eq!(entry.session_id.as_deref(), Some("abc-123")); - assert_eq!(entry.first_prompt.as_deref(), Some("Hello")); - assert_eq!(entry.message_count, Some(5)); - } - - #[test] - fn resolve_original_path_prefers_index_project_path() { - let sessions = vec![SessionRef { - id: "test-session".to_string(), - jsonl_path: Utf8PathBuf::from("/tmp/fake.jsonl"), - artifacts_dir: None, - index_entry: Some(SessionIndexEntry { - session_id: Some("test-session".to_string()), - first_prompt: None, - summary: None, - message_count: None, - created: None, - modified: None, - git_branch: None, - project_path: Some(r"G:\dev\projects\bevy_sprite3d".to_string()), - }), - }]; - let result = resolve_original_path("G--dev-projects-bevy-sprite3d", &sessions); - assert_eq!(result, "G:/dev/projects/bevy_sprite3d"); - } - - #[test] - fn resolve_original_path_falls_back_to_decode() { - let sessions: Vec = vec![]; - let result = resolve_original_path("G--dev-projects-dirigent", &sessions); - assert_eq!(result, "G:/dev/projects/dirigent"); - } - - #[test] - fn discover_projects_empty_when_no_projects_dir() { - let tmp = TempDir::new().unwrap(); - let home_dir = Utf8Path::from_path(tmp.path()).unwrap(); - - // No "projects" subdirectory — should return empty vec, not an error. - let projects = discover_projects(home_dir).unwrap(); - assert!(projects.is_empty()); - } -} diff --git a/crates/dirigent_anth/src/error.rs b/crates/dirigent_anth/src/error.rs deleted file mode 100644 index ef33e4e..0000000 --- a/crates/dirigent_anth/src/error.rs +++ /dev/null @@ -1,19 +0,0 @@ -#[derive(Debug, thiserror::Error)] -pub enum AntError { - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), - - #[error("JSON parse error at line {line}: {source}")] - JsonParse { - line: usize, - source: serde_json::Error, - }, - - #[error("Claude home directory not found")] - HomeNotFound, - - #[error("Invalid path: {0}")] - InvalidPath(String), -} - -pub type Result = std::result::Result; diff --git a/crates/dirigent_anth/src/lib.rs b/crates/dirigent_anth/src/lib.rs deleted file mode 100644 index e0b6621..0000000 --- a/crates/dirigent_anth/src/lib.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! dirigent_anth — Claude Code Session Parser & Toolkit -//! -//! Reads Claude Code's local JSONL session storage and produces typed, -//! deduplicated, correlated Rust data structures. -//! -//! # Design -//! -//! See `docs/superpowers/plans/2026-03-23-dirigent-ant-design.md` - -pub mod claude_grab; -pub mod anth_usage; -pub mod correlation; -pub mod dedup; -pub mod discovery; -pub mod error; -pub mod noise; -pub mod parser; -pub mod subagent; -pub mod tree; -pub mod types; -pub mod util; - -/// Load and fully parse a session: dedup, correlate, tree, subagents. -pub fn load_session(session_ref: &types::SessionRef) -> error::Result { - let messages = parser::parse_session_deduped(&session_ref.jsonl_path)?; - let tree = tree::ConversationTree::build(&messages); - let tool_exchanges = correlation::correlate_tools(&messages); - let mut subagents = if let Some(ref dir) = session_ref.artifacts_dir { - subagent::load_subagents(dir)? - } else { - Vec::new() - }; - subagent::link_subagents_to_calls(&mut subagents, &tool_exchanges); - - Ok(types::ParsedSession { - messages, - tree, - tool_exchanges, - subagents, - }) -} - -pub use correlation::correlate_tools; -pub use dedup::dedup_messages; -pub use discovery::{decode_project_path, discover_claude_home, discover_projects, discover_sessions, resolve_original_path}; -pub use error::{AntError, Result}; -pub use noise::{classify_noise, NoiseKind}; -pub use parser::{parse_line, parse_session, parse_session_deduped}; -pub use subagent::{link_subagents_to_calls, load_subagents}; -pub use tree::{message_parent_uuid, message_uuid, ConversationNode, ConversationTree}; -pub use types::*; -pub use util::parse_timestamp; diff --git a/crates/dirigent_anth/src/noise.rs b/crates/dirigent_anth/src/noise.rs deleted file mode 100644 index 6e986aa..0000000 --- a/crates/dirigent_anth/src/noise.rs +++ /dev/null @@ -1,72 +0,0 @@ -use crate::types::*; - -/// Classification of noise patterns in Claude Code JSONL. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum NoiseKind { - Meta, - Warmup, - Interrupted, - Continuation, - ApiError, - SystemCaveat, - QueueOp, -} - -/// Classify a message as noise, if applicable. -/// Returns None for normal messages. -pub fn classify_noise(message: &RawMessage) -> Option { - match message { - RawMessage::QueueOperation(_) => Some(NoiseKind::QueueOp), - RawMessage::User(user) => { - if user.is_meta.unwrap_or(false) { - return Some(NoiseKind::Meta); - } - if let Some(text) = extract_user_text(user) { - if text == "Warmup" { - return Some(NoiseKind::Warmup); - } - if text.starts_with("[Request interrupted") { - return Some(NoiseKind::Interrupted); - } - if text.starts_with("This session is being continued") { - return Some(NoiseKind::Continuation); - } - if text.starts_with("API Error") { - return Some(NoiseKind::ApiError); - } - if text.starts_with("Caveat: The messages below") { - return Some(NoiseKind::SystemCaveat); - } - } - None - } - _ => None, - } -} - -/// Extract plain text from a user message's content. -fn extract_user_text(user: &RawUserMessage) -> Option<&str> { - match &user.message.content { - Content::Text(s) => Some(s.as_str()), - Content::Blocks(_) => None, // tool_result blocks, not plain text - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn normal_assistant_is_not_noise() { - let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"id":"m","role":"assistant","content":[{"type":"text","text":"Hello"}],"stop_reason":"end_turn"}}"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - assert_eq!(classify_noise(&msg), None); - } - - #[test] - fn queue_op_is_noise() { - let json = r#"{"type":"queue-operation","operation":"enqueue","timestamp":"2026-01-01T00:00:00Z","sessionId":"s"}"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - assert_eq!(classify_noise(&msg), Some(NoiseKind::QueueOp)); - } -} diff --git a/crates/dirigent_anth/src/parser.rs b/crates/dirigent_anth/src/parser.rs deleted file mode 100644 index e7db24d..0000000 --- a/crates/dirigent_anth/src/parser.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! JSONL line parser for Claude Code session files. - -use std::io::BufRead; - -use camino::Utf8Path; - -use crate::error::Result; -use crate::types::RawMessage; - -/// Parse a single JSONL line into a RawMessage. -/// Returns None for lines that cannot be parsed (logged via tracing). -pub fn parse_line(line: &str, line_number: usize) -> Option { - match serde_json::from_str::(line) { - Ok(msg) => Some(msg), - Err(e) => { - tracing::warn!(line = line_number, error = %e, "Skipping unparseable JSONL line"); - None - } - } -} - -/// Parse all messages from a JSONL file. -/// Skips unparseable lines (lenient). Returns I/O errors. -pub fn parse_session(path: &Utf8Path) -> Result> { - let file = std::fs::File::open(path.as_std_path())?; - let reader = std::io::BufReader::new(file); - let mut messages = Vec::new(); - - for (i, line) in reader.lines().enumerate() { - let line = line?; - if line.trim().is_empty() { - continue; - } - if let Some(msg) = parse_line(&line, i + 1) { - messages.push(msg); - } - } - - Ok(messages) -} - -/// Parse a session JSONL file with streaming deduplication applied. -/// -/// Claude Code writes multiple JSONL lines for the same assistant message as -/// it streams. This function collapses those into a single final version per -/// uuid. See [`crate::dedup::dedup_messages`] for details. -pub fn parse_session_deduped(path: &Utf8Path) -> Result> { - let messages = parse_session(path)?; - Ok(crate::dedup::dedup_messages(messages)) -} diff --git a/crates/dirigent_anth/src/subagent.rs b/crates/dirigent_anth/src/subagent.rs deleted file mode 100644 index 8fff571..0000000 --- a/crates/dirigent_anth/src/subagent.rs +++ /dev/null @@ -1,215 +0,0 @@ -//! Sub-agent session loading. -//! -//! Claude Code spawns sub-agents for Agent tool calls and stores their -//! conversations under `/subagents/`. Each sub-agent -//! has a JSONL file and an optional `.meta.json` with metadata such as the -//! agent type. - -use camino::Utf8Path; - -use crate::error::Result; -use crate::parser::parse_session; -use crate::types::{SubAgentMeta, SubAgentSession, ToolExchange}; - -/// Load all sub-agent sessions from a session's artifacts directory. -/// -/// Expects files at: `/subagents/agent-.jsonl` -/// with optional companion: `/subagents/agent-.meta.json` -/// -/// Returns an empty `Vec` if the `subagents/` subdirectory does not exist. -pub fn load_subagents(session_artifacts_dir: &Utf8Path) -> Result> { - let subagents_dir = session_artifacts_dir.join("subagents"); - if !subagents_dir.as_std_path().exists() { - return Ok(Vec::new()); - } - - let mut subagents = Vec::new(); - - for entry in std::fs::read_dir(subagents_dir.as_std_path())? { - let entry = entry?; - let path = entry.path(); - - // Only process agent-*.jsonl files - let file_name = match path.file_name().and_then(|n| n.to_str()) { - Some(name) => name.to_string(), - None => continue, - }; - - if !file_name.starts_with("agent-") || !file_name.ends_with(".jsonl") { - continue; - } - - // Extract agent ID: "agent-abc123.jsonl" → "abc123" - let agent_id = file_name - .strip_prefix("agent-") - .and_then(|s| s.strip_suffix(".jsonl")) - .unwrap_or(&file_name) - .to_string(); - - let jsonl_path = match camino::Utf8PathBuf::try_from(path.clone()) { - Ok(p) => p, - Err(_) => continue, - }; - - // Parse the JSONL session - let messages = parse_session(&jsonl_path)?; - - // Try to load companion metadata file - let meta_path = path.with_file_name(format!("agent-{}.meta.json", agent_id)); - let meta = if meta_path.exists() { - let content = std::fs::read_to_string(&meta_path)?; - serde_json::from_str::(&content) - .unwrap_or(SubAgentMeta { agent_type: None }) - } else { - SubAgentMeta { agent_type: None } - }; - - subagents.push(SubAgentSession { - agent_id, - meta, - messages, - parent_tool_call_id: None, - }); - } - - Ok(subagents) -} - -/// Try to link sub-agent sessions to their parent Agent tool calls. -/// -/// For each Agent tool call in `tool_exchanges`, parses the tool result text -/// for `agentId: ` and matches it against sub-agent sessions. On match, -/// sets `SubAgentSession.parent_tool_call_id` to the tool call's ID. -/// -/// This is best-effort: if the agentId text format changes or a result is -/// missing, the sub-agent is still usable but without tool_use linkage. -pub fn link_subagents_to_calls( - subagents: &mut [SubAgentSession], - tool_exchanges: &[ToolExchange], -) { - use regex::Regex; - - if subagents.is_empty() || tool_exchanges.is_empty() { - return; - } - - // Compile once, match many - let re = Regex::new(r"agentId:\s*(\S+)").expect("valid regex"); - - for exchange in tool_exchanges { - // Only look at Agent tool calls - if exchange.call.name != crate::types::ToolName::Agent { - continue; - } - - // Extract agentId from the tool result text - let agent_id = exchange - .result - .as_ref() - .and_then(|r| r.content.as_deref()) - .and_then(|text| re.captures(text)) - .and_then(|caps| caps.get(1)) - .map(|m| m.as_str()); - - let agent_id = match agent_id { - Some(id) => id, - None => continue, - }; - - // Find matching sub-agent and set the linkage - if let Some(subagent) = subagents.iter_mut().find(|s| s.agent_id == agent_id) { - subagent.parent_tool_call_id = Some(exchange.call.id.clone()); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::{ToolCall, ToolName, ToolResultData}; - - #[test] - fn test_link_subagents_to_calls_matches_agent_id() { - let mut subagents = vec![ - SubAgentSession { - agent_id: "abc123def".to_string(), - meta: SubAgentMeta { agent_type: Some("Explore".to_string()) }, - messages: vec![], - parent_tool_call_id: None, - }, - SubAgentSession { - agent_id: "xyz789".to_string(), - meta: SubAgentMeta { agent_type: None }, - messages: vec![], - parent_tool_call_id: None, - }, - ]; - - let exchanges = vec![ - ToolExchange { - call: ToolCall { - id: "toolu_01ABC".to_string(), - name: ToolName::Agent, - input: serde_json::json!({"description": "test"}), - source_message_uuid: "msg-1".to_string(), - }, - result: Some(ToolResultData { - tool_use_id: "toolu_01ABC".to_string(), - content: Some("agentId: abc123def (use SendMessage with to: 'abc123def' to continue)\ntotal_tokens: 1000".to_string()), - is_error: false, - source_message_uuid: "msg-2".to_string(), - }), - }, - ToolExchange { - call: ToolCall { - id: "toolu_02DEF".to_string(), - name: ToolName::Read, - input: serde_json::json!({}), - source_message_uuid: "msg-3".to_string(), - }, - result: None, - }, - ]; - - link_subagents_to_calls(&mut subagents, &exchanges); - - assert_eq!(subagents[0].parent_tool_call_id, Some("toolu_01ABC".to_string())); - assert_eq!(subagents[1].parent_tool_call_id, None); - } - - #[test] - fn test_link_subagents_empty_inputs() { - let mut empty_subagents: Vec = vec![]; - let empty_exchanges: Vec = vec![]; - link_subagents_to_calls(&mut empty_subagents, &empty_exchanges); - // No panic - } - - #[test] - fn test_link_subagents_no_match() { - let mut subagents = vec![SubAgentSession { - agent_id: "no_match".to_string(), - meta: SubAgentMeta { agent_type: None }, - messages: vec![], - parent_tool_call_id: None, - }]; - - let exchanges = vec![ToolExchange { - call: ToolCall { - id: "toolu_99".to_string(), - name: ToolName::Agent, - input: serde_json::json!({}), - source_message_uuid: "msg-1".to_string(), - }, - result: Some(ToolResultData { - tool_use_id: "toolu_99".to_string(), - content: Some("agentId: different_id\ntokens: 500".to_string()), - is_error: false, - source_message_uuid: "msg-2".to_string(), - }), - }]; - - link_subagents_to_calls(&mut subagents, &exchanges); - assert_eq!(subagents[0].parent_tool_call_id, None); - } -} diff --git a/crates/dirigent_anth/src/tree.rs b/crates/dirigent_anth/src/tree.rs deleted file mode 100644 index ca8f37d..0000000 --- a/crates/dirigent_anth/src/tree.rs +++ /dev/null @@ -1,171 +0,0 @@ -//! Conversation tree module — builds a parent/child tree from `RawMessage`s. -//! -//! Claude Code sessions are not purely linear: the user can edit earlier -//! messages, producing branches. Each message carries a `uuid` and a -//! `parentUuid` that describe the relationship. This module reconstructs -//! the tree so callers can walk threads, detect branches, and select the -//! main thread. - -use std::collections::HashMap; - -use crate::types::RawMessage; - -// --------------------------------------------------------------------------- -// Node & tree types -// --------------------------------------------------------------------------- - -/// A single node in the conversation tree. -#[derive(Debug)] -pub struct ConversationNode { - /// The UUID of this message. - pub uuid: String, - /// The raw message stored at this node. - pub message: RawMessage, - /// UUIDs of direct children, in insertion order. - pub children: Vec, -} - -/// The full conversation tree for a session. -/// -/// A session may have multiple roots when the first message has no -/// `parentUuid`, or when a message refers to a parent that is not present -/// in the slice provided to [`ConversationTree::build`]. -#[derive(Debug)] -pub struct ConversationTree { - /// Root node UUIDs (messages with no parent or with an unknown parent). - pub roots: Vec, - /// All nodes indexed by UUID. - pub nodes: HashMap, -} - -// --------------------------------------------------------------------------- -// UUID / parent-UUID helpers -// --------------------------------------------------------------------------- - -/// Extract the `uuid` from any `RawMessage` variant. -/// -/// Returns `None` for variants that carry no UUID (e.g. `QueueOperation`). -pub fn message_uuid(msg: &RawMessage) -> Option<&str> { - match msg { - RawMessage::User(m) => m.uuid.as_deref(), - RawMessage::Assistant(m) => m.uuid.as_deref(), - RawMessage::Progress(m) => m.uuid.as_deref(), - RawMessage::System(m) => m.uuid.as_deref(), - RawMessage::QueueOperation(_) - | RawMessage::FileHistorySnapshot(_) - | RawMessage::LastPrompt(_) => None, - } -} - -/// Extract the `parent_uuid` from any `RawMessage` variant. -/// -/// Returns `None` for variants that carry no parent UUID. -pub fn message_parent_uuid(msg: &RawMessage) -> Option<&str> { - match msg { - RawMessage::User(m) => m.parent_uuid.as_deref(), - RawMessage::Assistant(m) => m.parent_uuid.as_deref(), - RawMessage::Progress(m) => m.parent_uuid.as_deref(), - RawMessage::System(m) => m.parent_uuid.as_deref(), - RawMessage::QueueOperation(_) - | RawMessage::FileHistorySnapshot(_) - | RawMessage::LastPrompt(_) => None, - } -} - -// --------------------------------------------------------------------------- -// ConversationTree impl -// --------------------------------------------------------------------------- - -impl ConversationTree { - /// Build a conversation tree from a sequence of messages. - /// - /// Messages without a UUID (e.g. `QueueOperation`) are silently skipped. - /// If a message's `parentUuid` is present but not found in the set, - /// that message is treated as a root. - pub fn build(messages: &[RawMessage]) -> Self { - let mut nodes: HashMap = HashMap::new(); - let mut roots: Vec = Vec::new(); - - // First pass: insert every addressable message as a node. - for msg in messages { - if let Some(uuid) = message_uuid(msg) { - nodes.insert( - uuid.to_string(), - ConversationNode { - uuid: uuid.to_string(), - message: msg.clone(), - children: Vec::new(), - }, - ); - } - } - - // Second pass: collect (uuid, parent_uuid) pairs so we can wire up - // parent→child edges without a simultaneous mutable borrow. - let parent_links: Vec<(String, Option)> = messages - .iter() - .filter_map(|msg| { - let uuid = message_uuid(msg)?.to_string(); - let parent = message_parent_uuid(msg).map(|s| s.to_string()); - Some((uuid, parent)) - }) - .collect(); - - for (uuid, parent_uuid) in parent_links { - match parent_uuid { - Some(parent_id) if nodes.contains_key(&parent_id) => { - // Safe: parent_id != uuid (a message cannot be its own parent). - nodes - .get_mut(&parent_id) - .expect("parent key confirmed above") - .children - .push(uuid); - } - _ => { - // No parent, or parent not in the provided slice — treat as root. - roots.push(uuid); - } - } - } - - ConversationTree { roots, nodes } - } - - /// Walk the *main thread*: start from the first root and always follow - /// the first child at each step. - /// - /// In a linear session this is the complete conversation. In a branching - /// session this is the path taken before any edits. - pub fn main_thread(&self) -> Vec<&ConversationNode> { - let mut result = Vec::new(); - if let Some(root_id) = self.roots.first() { - let mut current = root_id.as_str(); - loop { - match self.nodes.get(current) { - Some(node) => { - result.push(node); - match node.children.first() { - Some(first_child) => current = first_child.as_str(), - None => break, - } - } - None => break, - } - } - } - result - } - - /// Returns `true` when every node has at most one child (no branches). - pub fn is_linear(&self) -> bool { - self.nodes.values().all(|n| n.children.len() <= 1) - } - - /// Returns all nodes that have more than one child (branch points). - pub fn branch_points(&self) -> Vec<&ConversationNode> { - self.nodes - .values() - .filter(|n| n.children.len() > 1) - .collect() - } -} diff --git a/crates/dirigent_anth/src/types.rs b/crates/dirigent_anth/src/types.rs deleted file mode 100644 index e4c92ac..0000000 --- a/crates/dirigent_anth/src/types.rs +++ /dev/null @@ -1,847 +0,0 @@ -//! Core types for parsing Claude Code JSONL session data. - -use camino::Utf8PathBuf; -use serde::{Deserialize, Serialize}; - -// --------------------------------------------------------------------------- -// Content types -// --------------------------------------------------------------------------- - -/// Content is either a plain string or an array of content blocks. -/// -/// Uses a custom deserializer so that `Blocks` variant applies lenient -/// deserialization — unknown content block types (e.g. `tool_reference`) -/// are silently skipped instead of failing the entire message. -#[derive(Debug, Clone, Serialize)] -#[serde(untagged)] -pub enum Content { - Text(String), - Blocks(Vec), -} - -impl<'de> serde::Deserialize<'de> for Content { - fn deserialize(deserializer: D) -> std::result::Result - where - D: serde::Deserializer<'de>, - { - let value = serde_json::Value::deserialize(deserializer)?; - match value { - serde_json::Value::String(s) => Ok(Content::Text(s)), - serde_json::Value::Array(arr) => { - let blocks = arr - .into_iter() - .filter_map(|v| { - serde_json::from_value::(v.clone()) - .ok() - .or_else(|| { - tracing::debug!( - "Skipping unknown content block: {:?}", - v.get("type") - ); - None - }) - }) - .collect(); - Ok(Content::Blocks(blocks)) - } - other => Err(serde::de::Error::custom(format!( - "expected string or array for Content, got {}", - match &other { - serde_json::Value::Null => "null", - serde_json::Value::Bool(_) => "bool", - serde_json::Value::Number(_) => "number", - serde_json::Value::Object(_) => "object", - _ => "unknown", - } - ))), - } - } -} - -/// Typed content block inside messages. -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ContentBlock { - Text { - text: String, - }, - ToolUse { - id: String, - name: String, - input: serde_json::Value, - #[serde(default)] - caller: Option, - }, - ToolResult { - tool_use_id: String, - #[serde(default)] - content: Option, - #[serde(default)] - is_error: bool, - }, - Thinking { - thinking: String, - }, - Image { - source: serde_json::Value, - }, -} - -// --------------------------------------------------------------------------- -// Lenient content block deserialization -// --------------------------------------------------------------------------- - -/// Deserializes a `Vec` leniently — unknown block types are -/// silently skipped instead of failing the entire message. -fn deserialize_content_blocks<'de, D>( - deserializer: D, -) -> std::result::Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - use serde::Deserialize as _; - let raw: Vec = Vec::deserialize(deserializer)?; - Ok(raw - .into_iter() - .filter_map(|v| { - serde_json::from_value::(v.clone()).ok().or_else(|| { - tracing::debug!("Skipping unknown content block: {:?}", v.get("type")); - None - }) - }) - .collect()) -} - -// --------------------------------------------------------------------------- -// Top-level JSONL line discriminator -// --------------------------------------------------------------------------- - -/// Top-level JSONL line discriminator. -#[derive(Debug, Clone, Deserialize)] -#[serde(tag = "type", rename_all = "kebab-case")] -pub enum RawMessage { - User(RawUserMessage), - Assistant(RawAssistantMessage), - Progress(RawProgressMessage), - System(RawSystemMessage), - QueueOperation(RawQueueOperation), - FileHistorySnapshot(RawFileHistorySnapshot), - LastPrompt(RawLastPrompt), -} - -// --------------------------------------------------------------------------- -// User message -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawUserMessage { - #[serde(default)] - pub uuid: Option, - #[serde(default)] - pub parent_uuid: Option, - #[serde(default)] - pub timestamp: Option, - #[serde(default)] - pub session_id: Option, - #[serde(default)] - pub cwd: Option, - #[serde(default)] - pub version: Option, - #[serde(default)] - pub git_branch: Option, - #[serde(default)] - pub is_sidechain: bool, - #[serde(default)] - pub is_meta: Option, - #[serde(default)] - pub user_type: Option, - pub message: UserMessageInner, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct UserMessageInner { - pub role: String, - pub content: Content, -} - -// --------------------------------------------------------------------------- -// Assistant message -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawAssistantMessage { - #[serde(default)] - pub uuid: Option, - #[serde(default)] - pub parent_uuid: Option, - #[serde(default)] - pub timestamp: Option, - #[serde(default)] - pub session_id: Option, - #[serde(default)] - pub cwd: Option, - #[serde(default)] - pub version: Option, - #[serde(default)] - pub git_branch: Option, - #[serde(default)] - pub is_sidechain: bool, - #[serde(default)] - pub request_id: Option, - pub message: AssistantInner, -} - -// NOTE: AssistantInner is the Anthropic API response object nested inside -// the Claude Code JSONL wrapper. The API uses snake_case (stop_reason, etc.) -// unlike the outer JSONL wrapper which uses camelCase. -#[derive(Debug, Clone, Deserialize)] -pub struct AssistantInner { - #[serde(default)] - pub model: Option, - #[serde(default)] - pub id: Option, - #[serde(default, rename = "type")] - pub message_type: Option, - #[serde(default)] - pub role: Option, - #[serde(default, deserialize_with = "deserialize_content_blocks")] - pub content: Vec, - #[serde(default)] - pub stop_reason: Option, - #[serde(default)] - pub stop_sequence: Option, - #[serde(default)] - pub usage: Option, -} - -// --------------------------------------------------------------------------- -// Progress message -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawProgressMessage { - #[serde(default)] - pub uuid: Option, - #[serde(default)] - pub parent_uuid: Option, - #[serde(default)] - pub timestamp: Option, - #[serde(default)] - pub session_id: Option, - #[serde(default)] - pub cwd: Option, - #[serde(default)] - pub version: Option, - #[serde(default)] - pub git_branch: Option, - #[serde(default)] - pub is_sidechain: bool, - #[serde(default)] - pub data: Option, -} - -// --------------------------------------------------------------------------- -// System message -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawSystemMessage { - #[serde(default)] - pub uuid: Option, - #[serde(default)] - pub parent_uuid: Option, - #[serde(default)] - pub timestamp: Option, - #[serde(default)] - pub session_id: Option, - #[serde(default)] - pub cwd: Option, - #[serde(default)] - pub version: Option, - #[serde(default)] - pub git_branch: Option, - #[serde(default)] - pub is_sidechain: bool, - #[serde(default)] - pub data: Option, -} - -// --------------------------------------------------------------------------- -// Queue operation -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawQueueOperation { - pub operation: String, - #[serde(default)] - pub timestamp: Option, - #[serde(default)] - pub session_id: Option, -} - -// --------------------------------------------------------------------------- -// File history snapshot -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawFileHistorySnapshot { - #[serde(default)] - pub message_id: Option, - #[serde(default)] - pub is_snapshot_update: bool, - #[serde(default)] - pub snapshot: Option, -} - -// --------------------------------------------------------------------------- -// Last prompt -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RawLastPrompt { - #[serde(default)] - pub last_prompt: Option, - #[serde(default)] - pub session_id: Option, -} - -// --------------------------------------------------------------------------- -// Tool types (for correlation module later) -// --------------------------------------------------------------------------- - -/// Known tool names used by Claude Code. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum ToolName { - Bash, - Read, - Write, - Edit, - Grep, - Glob, - Agent, - Skill, - WebSearch, - WebFetch, - TodoWrite, - NotebookEdit, - Other(String), -} - -impl From for ToolName { - fn from(s: String) -> Self { - match s.as_str() { - "Bash" => ToolName::Bash, - "Read" => ToolName::Read, - "Write" => ToolName::Write, - "Edit" => ToolName::Edit, - "Grep" => ToolName::Grep, - "Glob" => ToolName::Glob, - "Agent" => ToolName::Agent, - "Skill" => ToolName::Skill, - "WebSearch" => ToolName::WebSearch, - "WebFetch" => ToolName::WebFetch, - "TodoWrite" => ToolName::TodoWrite, - "NotebookEdit" => ToolName::NotebookEdit, - other => ToolName::Other(other.to_string()), - } - } -} - -/// A tool call extracted from an assistant message. -#[derive(Debug, Clone)] -pub struct ToolCall { - pub id: String, - pub name: ToolName, - pub input: serde_json::Value, - pub source_message_uuid: String, -} - -/// A tool result extracted from a user message. -#[derive(Debug, Clone)] -pub struct ToolResultData { - pub tool_use_id: String, - pub content: Option, - pub is_error: bool, - pub source_message_uuid: String, -} - -/// A correlated tool call + result pair. -#[derive(Debug, Clone)] -pub struct ToolExchange { - pub call: ToolCall, - pub result: Option, -} - -// --------------------------------------------------------------------------- -// Discovery types (for discovery module later) -// --------------------------------------------------------------------------- - -/// A discovered Claude Code project directory. -#[derive(Debug, Clone)] -pub struct ClaudeProject { - pub path: Utf8PathBuf, - pub original_path: String, - pub sessions: Vec, -} - -/// Reference to a session (not yet parsed). -#[derive(Debug, Clone)] -pub struct SessionRef { - pub id: String, - pub jsonl_path: Utf8PathBuf, - pub artifacts_dir: Option, - pub index_entry: Option, -} - -/// From sessions-index.json (when available). -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SessionIndexEntry { - #[serde(default)] - pub session_id: Option, - #[serde(default)] - pub first_prompt: Option, - #[serde(default)] - pub summary: Option, - #[serde(default)] - pub message_count: Option, - #[serde(default)] - pub created: Option, - #[serde(default)] - pub modified: Option, - #[serde(default)] - pub git_branch: Option, - #[serde(default)] - pub project_path: Option, -} - -// --------------------------------------------------------------------------- -// Sub-agent types -// --------------------------------------------------------------------------- - -/// Sub-agent metadata from .meta.json. -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SubAgentMeta { - #[serde(default)] - pub agent_type: Option, -} - -/// A parsed sub-agent session. -#[derive(Debug, Clone)] -pub struct SubAgentSession { - pub agent_id: String, - pub meta: SubAgentMeta, - pub messages: Vec, - pub parent_tool_call_id: Option, -} - -// --------------------------------------------------------------------------- -// MessageMeta (convenience, future use) -// --------------------------------------------------------------------------- - -/// Common metadata extracted from any message. Defined for future consumers. -#[derive(Debug, Clone)] -pub struct MessageMeta { - pub uuid: String, - pub parent_uuid: Option, - pub timestamp: Option, - pub session_id: String, - pub cwd: Option, - pub version: Option, - pub git_branch: Option, - pub is_sidechain: bool, -} - -// --------------------------------------------------------------------------- -// ParsedSession -// --------------------------------------------------------------------------- - -/// A fully parsed session with all correlations built. -#[derive(Debug)] -pub struct ParsedSession { - pub messages: Vec, - pub tree: crate::tree::ConversationTree, - pub tool_exchanges: Vec, - pub subagents: Vec, -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_content_text_string() { - let json = r#""Hello world""#; - let content: Content = serde_json::from_str(json).unwrap(); - match content { - Content::Text(s) => assert_eq!(s, "Hello world"), - _ => panic!("Expected Content::Text"), - } - } - - #[test] - fn parse_content_blocks() { - let json = r#"[{"type": "text", "text": "Hello"}]"#; - let content: Content = serde_json::from_str(json).unwrap(); - match content { - Content::Blocks(blocks) => { - assert_eq!(blocks.len(), 1); - match &blocks[0] { - ContentBlock::Text { text } => assert_eq!(text, "Hello"), - _ => panic!("Expected ContentBlock::Text"), - } - } - _ => panic!("Expected Content::Blocks"), - } - } - - #[test] - fn parse_tool_use_block() { - let json = r#"{"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}"#; - let block: ContentBlock = serde_json::from_str(json).unwrap(); - match block { - ContentBlock::ToolUse { id, name, .. } => { - assert_eq!(id, "toolu_123"); - assert_eq!(name, "Bash"); - } - _ => panic!("Expected ContentBlock::ToolUse"), - } - } - - #[test] - fn parse_tool_result_block() { - let json = r#"{"type": "tool_result", "tool_use_id": "toolu_123", "content": "output text", "is_error": false}"#; - let block: ContentBlock = serde_json::from_str(json).unwrap(); - match block { - ContentBlock::ToolResult { - tool_use_id, - is_error, - .. - } => { - assert_eq!(tool_use_id, "toolu_123"); - assert!(!is_error); - } - _ => panic!("Expected ContentBlock::ToolResult"), - } - } - - #[test] - fn parse_thinking_block() { - let json = r#"{"type": "thinking", "thinking": "Let me consider..."}"#; - let block: ContentBlock = serde_json::from_str(json).unwrap(); - match block { - ContentBlock::Thinking { thinking } => { - assert_eq!(thinking, "Let me consider..."); - } - _ => panic!("Expected ContentBlock::Thinking"), - } - } - - #[test] - fn parse_queue_operation() { - let json = r#"{"type": "queue-operation", "operation": "enqueue", "timestamp": "2026-03-14T21:15:17.531Z", "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73"}"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::QueueOperation(op) => { - assert_eq!(op.operation, "enqueue"); - assert_eq!( - op.session_id.as_deref(), - Some("00f72d8d-fc54-485c-a082-310ffcabdb73") - ); - } - _ => panic!("Expected RawMessage::QueueOperation"), - } - } - - #[test] - fn parse_user_message_with_string_content() { - let json = r#"{ - "parentUuid": "b1ab1ac7-fdb6-4e25-bc17-4c060b470b4a", - "isSidechain": false, - "userType": "external", - "cwd": "G:\\dev\\projects\\dirigent", - "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73", - "version": "2.1.71", - "gitBranch": "main", - "type": "user", - "message": { - "role": "user", - "content": "Hello world" - }, - "isMeta": false, - "uuid": "1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0", - "timestamp": "2026-03-14T21:08:58.586Z" - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::User(u) => { - assert_eq!(u.uuid.as_deref(), Some("1d843a4a-b99d-4c02-91a3-7cfe3dcac9f0")); - assert_eq!(u.session_id.as_deref(), Some("00f72d8d-fc54-485c-a082-310ffcabdb73")); - assert_eq!(u.is_meta, Some(false)); - match &u.message.content { - Content::Text(s) => assert_eq!(s, "Hello world"), - _ => panic!("Expected Content::Text"), - } - } - _ => panic!("Expected RawMessage::User"), - } - } - - #[test] - fn parse_assistant_message_with_tool_use() { - let json = r#"{ - "parentUuid": "77793647-f957-4aec-8b04-a9c07e01e37b", - "isSidechain": false, - "userType": "external", - "cwd": "G:\\dev\\projects\\dirigent", - "sessionId": "00f72d8d-fc54-485c-a082-310ffcabdb73", - "version": "2.1.71", - "gitBranch": "main", - "message": { - "model": "claude-opus-4-6", - "id": "msg_01NcwYjEydGEyZCSCgwmcnYd", - "type": "message", - "role": "assistant", - "content": [ - { - "type": "tool_use", - "id": "toolu_01DP5mkAQnAi2o54idq24cPn", - "name": "Agent", - "input": { - "description": "Investigate config sources of truth", - "subagent_type": "Explore", - "prompt": "test prompt" - }, - "caller": { "type": "direct" } - } - ], - "stop_reason": null, - "stop_sequence": null, - "usage": { - "input_tokens": 3, - "cache_creation_input_tokens": 20147, - "cache_read_input_tokens": 0, - "output_tokens": 9, - "service_tier": "standard" - } - }, - "requestId": "req_011CZ3fYWGjcQCgh5d58d2k8", - "type": "assistant", - "uuid": "6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b", - "timestamp": "2026-03-14T21:15:27.916Z" - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::Assistant(a) => { - assert_eq!(a.uuid.as_deref(), Some("6cad0d13-d0ae-47fa-a6b1-b7b45a2b5e0b")); - assert_eq!(a.message.model.as_deref(), Some("claude-opus-4-6")); - assert_eq!(a.message.content.len(), 1); - match &a.message.content[0] { - ContentBlock::ToolUse { name, id, .. } => { - assert_eq!(name, "Agent"); - assert_eq!(id, "toolu_01DP5mkAQnAi2o54idq24cPn"); - } - _ => panic!("Expected ContentBlock::ToolUse"), - } - assert!(a.message.stop_reason.is_none()); - assert!(a.message.usage.is_some()); - } - _ => panic!("Expected RawMessage::Assistant"), - } - } - - #[test] - fn unknown_content_block_type_skipped_in_assistant() { - let json = r#"{ - "parentUuid": null, - "isSidechain": false, - "sessionId": "test", - "message": { - "role": "assistant", - "content": [ - {"type": "text", "text": "known"}, - {"type": "future_type", "data": "something"} - ] - }, - "type": "assistant", - "uuid": "test-uuid", - "timestamp": "2026-01-01T00:00:00Z" - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::Assistant(a) => { - assert_eq!(a.message.content.len(), 1); - match &a.message.content[0] { - ContentBlock::Text { text } => assert_eq!(text, "known"), - _ => panic!("Expected ContentBlock::Text"), - } - } - _ => panic!("Expected RawMessage::Assistant"), - } - } - - // ----------------------------------------------------------------------- - // Regression tests for parse failure audit (2026-04-04) - // ----------------------------------------------------------------------- - - #[test] - fn tool_reference_in_tool_result_content_does_not_fail() { - // Suggestion 1 & 3: tool_reference blocks inside tool_result.content - // should be silently skipped, not fail the entire message. - let json = r#"{ - "type": "user", - "uuid": "test-uuid", - "parentUuid": null, - "isSidechain": false, - "sessionId": "test-session", - "message": { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": "toolu_abc123", - "content": [ - {"type": "text", "text": "File contents here"}, - {"type": "tool_reference", "tool_name": "TodoWrite"} - ], - "is_error": false - } - ] - } - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::User(u) => { - match &u.message.content { - Content::Blocks(blocks) => { - assert_eq!(blocks.len(), 1); - match &blocks[0] { - ContentBlock::ToolResult { tool_use_id, content, .. } => { - assert_eq!(tool_use_id, "toolu_abc123"); - // The inner content should have 1 block (text), tool_reference skipped - match content.as_ref().unwrap() { - Content::Blocks(inner) => { - assert_eq!(inner.len(), 1); - match &inner[0] { - ContentBlock::Text { text } => { - assert_eq!(text, "File contents here"); - } - _ => panic!("Expected inner ContentBlock::Text"), - } - } - _ => panic!("Expected inner Content::Blocks"), - } - } - _ => panic!("Expected ContentBlock::ToolResult"), - } - } - _ => panic!("Expected Content::Blocks"), - } - } - _ => panic!("Expected RawMessage::User"), - } - } - - #[test] - fn file_history_snapshot_parses() { - // Suggestion 2: file-history-snapshot lines should parse, not fail. - let json = r#"{ - "type": "file-history-snapshot", - "messageId": "abc-123", - "isSnapshotUpdate": false, - "snapshot": { - "messageId": "abc-123", - "trackedFileBackups": { - "src/main.rs": {"backupFileName": "main.rs.bak", "backupTime": "2026-01-01T00:00:00Z", "version": "1"} - }, - "timestamp": "2026-01-01T00:00:00Z" - } - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::FileHistorySnapshot(s) => { - assert_eq!(s.message_id.as_deref(), Some("abc-123")); - assert!(!s.is_snapshot_update); - assert!(s.snapshot.is_some()); - } - _ => panic!("Expected RawMessage::FileHistorySnapshot"), - } - } - - #[test] - fn last_prompt_parses() { - // Suggestion 2: last-prompt lines should parse, not fail. - let json = r#"{ - "type": "last-prompt", - "lastPrompt": "Fix the bug in auth middleware", - "sessionId": "session-uuid-123" - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::LastPrompt(lp) => { - assert_eq!(lp.last_prompt.as_deref(), Some("Fix the bug in auth middleware")); - assert_eq!(lp.session_id.as_deref(), Some("session-uuid-123")); - } - _ => panic!("Expected RawMessage::LastPrompt"), - } - } - - #[test] - fn unknown_content_block_in_user_message_skipped() { - // Suggestion 3: Unknown block types in user message content - // should be silently skipped (lenient everywhere). - let json = r#"{ - "type": "user", - "uuid": "test-uuid", - "isSidechain": false, - "sessionId": "test", - "message": { - "role": "user", - "content": [ - {"type": "text", "text": "known"}, - {"type": "future_unknown_type", "data": "something"} - ] - } - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - match msg { - RawMessage::User(u) => { - match &u.message.content { - Content::Blocks(blocks) => { - assert_eq!(blocks.len(), 1); - match &blocks[0] { - ContentBlock::Text { text } => assert_eq!(text, "known"), - _ => panic!("Expected ContentBlock::Text"), - } - } - _ => panic!("Expected Content::Blocks"), - } - } - _ => panic!("Expected RawMessage::User"), - } - } - - #[test] - fn tool_name_from_string() { - assert_eq!(ToolName::from("Bash".to_string()), ToolName::Bash); - assert_eq!(ToolName::from("Read".to_string()), ToolName::Read); - assert_eq!(ToolName::from("Agent".to_string()), ToolName::Agent); - assert_eq!(ToolName::from("WebSearch".to_string()), ToolName::WebSearch); - assert_eq!( - ToolName::from("CustomTool".to_string()), - ToolName::Other("CustomTool".to_string()) - ); - } -} diff --git a/crates/dirigent_anth/src/util.rs b/crates/dirigent_anth/src/util.rs deleted file mode 100644 index 2c010f4..0000000 --- a/crates/dirigent_anth/src/util.rs +++ /dev/null @@ -1,70 +0,0 @@ -use chrono::{DateTime, Utc}; - -/// Parse a timestamp from various formats found in Claude Code data. -/// -/// Supports: -/// - ISO 8601 string: "2026-03-22T17:00:13.192Z" -/// - Unix milliseconds (number > 1e12): 1769461914249 -/// - Unix seconds (number <= 1e12): 1769461914 -pub fn parse_timestamp(value: &serde_json::Value) -> Option> { - match value { - serde_json::Value::String(s) => { - DateTime::parse_from_rfc3339(s) - .ok() - .map(|dt| dt.with_timezone(&Utc)) - } - serde_json::Value::Number(n) => { - if let Some(ms) = n.as_i64() { - if ms > 1_000_000_000_000 { - DateTime::from_timestamp_millis(ms) - } else { - DateTime::from_timestamp(ms, 0) - } - } else { - None - } - } - _ => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::Datelike; - - #[test] - fn parse_timestamp_iso8601() { - let v = serde_json::json!("2026-03-22T17:00:13.192Z"); - let dt = parse_timestamp(&v).unwrap(); - assert_eq!(dt.year(), 2026); - assert_eq!(dt.month(), 3); - assert_eq!(dt.day(), 22); - } - - #[test] - fn parse_timestamp_unix_millis() { - let v = serde_json::json!(1769461914249_i64); - let dt = parse_timestamp(&v).unwrap(); - assert!(dt.year() >= 2025); - } - - #[test] - fn parse_timestamp_unix_seconds() { - let v = serde_json::json!(1769461914_i64); - let dt = parse_timestamp(&v).unwrap(); - assert!(dt.year() >= 2025); - } - - #[test] - fn parse_timestamp_null_returns_none() { - let v = serde_json::json!(null); - assert!(parse_timestamp(&v).is_none()); - } - - #[test] - fn parse_timestamp_invalid_string_returns_none() { - let v = serde_json::json!("not a date"); - assert!(parse_timestamp(&v).is_none()); - } -} diff --git a/crates/dirigent_anth/tests/fixtures/branching_tree.jsonl b/crates/dirigent_anth/tests/fixtures/branching_tree.jsonl deleted file mode 100644 index 93f440c..0000000 --- a/crates/dirigent_anth/tests/fixtures/branching_tree.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"type":"user","uuid":"r-001","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Help me"}} -{"type":"assistant","uuid":"a-001","parentUuid":"r-001","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"Sure"}],"stop_reason":"end_turn","usage":{"input_tokens":10,"output_tokens":5}}} -{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Do option A"}} -{"type":"assistant","uuid":"a-003","parentUuid":"u-002","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-003","type":"message","role":"assistant","content":[{"type":"text","text":"Doing A"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}} -{"type":"user","uuid":"u-002b","parentUuid":"a-001","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Actually, do option B"}} -{"type":"assistant","uuid":"a-003b","parentUuid":"u-002b","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-tree","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-003","message":{"model":"claude-opus-4-6","id":"msg-003b","type":"message","role":"assistant","content":[{"type":"text","text":"Doing B"}],"stop_reason":"end_turn","usage":{"input_tokens":15,"output_tokens":5}}} diff --git a/crates/dirigent_anth/tests/fixtures/minimal_session.jsonl b/crates/dirigent_anth/tests/fixtures/minimal_session.jsonl deleted file mode 100644 index 6476f6f..0000000 --- a/crates/dirigent_anth/tests/fixtures/minimal_session.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-001"} -{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-14T21:00:00.001Z","sessionId":"test-session-001"} -{"type":"user","uuid":"u-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Hello, help me with this project"}} -{"type":"assistant","uuid":"a-001","parentUuid":"u-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-opus-4-6","id":"msg-001","type":"message","role":"assistant","content":[{"type":"text","text":"I'll help you."},{"type":"tool_use","id":"toolu_01","name":"Bash","input":{"command":"ls","description":"List files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}} -{"type":"user","uuid":"u-002","parentUuid":"a-001","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01","content":"file1.rs\nfile2.rs","is_error":false}]}} -{"type":"assistant","uuid":"a-002","parentUuid":"u-002","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-001","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-002","message":{"model":"claude-opus-4-6","id":"msg-002","type":"message","role":"assistant","content":[{"type":"text","text":"I can see two Rust files in the directory."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":30}}} diff --git a/crates/dirigent_anth/tests/fixtures/noise_patterns.jsonl b/crates/dirigent_anth/tests/fixtures/noise_patterns.jsonl deleted file mode 100644 index 1369fe2..0000000 --- a/crates/dirigent_anth/tests/fixtures/noise_patterns.jsonl +++ /dev/null @@ -1,9 +0,0 @@ -{"type":"queue-operation","operation":"enqueue","timestamp":"2026-03-14T21:00:00.000Z","sessionId":"test-session-noise"} -{"type":"user","uuid":"u-n-001","parentUuid":null,"timestamp":"2026-03-14T21:00:01.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":"system injected stuff"}} -{"type":"user","uuid":"u-n-002","parentUuid":"u-n-001","timestamp":"2026-03-14T21:00:02.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Warmup"}} -{"type":"user","uuid":"u-n-003","parentUuid":"u-n-002","timestamp":"2026-03-14T21:00:03.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"[Request interrupted by user"}} -{"type":"user","uuid":"u-n-004","parentUuid":"u-n-003","timestamp":"2026-03-14T21:00:04.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"This session is being continued from a previous conversation"}} -{"type":"user","uuid":"u-n-005","parentUuid":"u-n-004","timestamp":"2026-03-14T21:00:05.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"API Error: rate limit exceeded"}} -{"type":"user","uuid":"u-n-006","parentUuid":"u-n-005","timestamp":"2026-03-14T21:00:06.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Caveat: The messages below were generated by the user"}} -{"type":"user","uuid":"u-n-007","parentUuid":"u-n-006","timestamp":"2026-03-14T21:00:07.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Please help me fix this bug"}} -{"type":"assistant","uuid":"a-n-001","parentUuid":"u-n-007","timestamp":"2026-03-14T21:00:08.000Z","sessionId":"test-session-noise","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"message":{"id":"msg-n-001","role":"assistant","content":[{"type":"text","text":"Sure, let me help."}],"stop_reason":"end_turn"}} diff --git a/crates/dirigent_anth/tests/fixtures/streaming_dedup.jsonl b/crates/dirigent_anth/tests/fixtures/streaming_dedup.jsonl deleted file mode 100644 index e787149..0000000 --- a/crates/dirigent_anth/tests/fixtures/streaming_dedup.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"type":"user","uuid":"u-100","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"What files are here?"}} -{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me"}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":3}}} -{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.100Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look"},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":""}}],"stop_reason":null,"usage":{"input_tokens":50,"output_tokens":12}}} -{"type":"assistant","uuid":"a-100","parentUuid":"u-100","timestamp":"2026-03-23T10:00:01.200Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-100","message":{"model":"claude-opus-4-6","id":"msg-100","type":"message","role":"assistant","content":[{"type":"text","text":"Let me look at this."},{"type":"tool_use","id":"toolu_100","name":"Bash","input":{"command":"ls"}}],"stop_reason":"tool_use","usage":{"input_tokens":50,"output_tokens":20}}} -{"type":"user","uuid":"u-101","parentUuid":"a-100","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_100","content":"main.rs\nlib.rs","is_error":false}]}} -{"type":"assistant","uuid":"a-101","parentUuid":"u-101","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-dedup","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-101","message":{"model":"claude-opus-4-6","id":"msg-101","type":"message","role":"assistant","content":[{"type":"text","text":"Done."}],"stop_reason":"end_turn","usage":{"input_tokens":100,"output_tokens":5}}} diff --git a/crates/dirigent_anth/tests/fixtures/subagent/parent.jsonl b/crates/dirigent_anth/tests/fixtures/subagent/parent.jsonl deleted file mode 100644 index c391246..0000000 --- a/crates/dirigent_anth/tests/fixtures/subagent/parent.jsonl +++ /dev/null @@ -1,4 +0,0 @@ -{"type":"user","uuid":"u-300","parentUuid":null,"timestamp":"2026-03-23T12:00:00.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"message":{"role":"user","content":"Search the codebase"}} -{"type":"assistant","uuid":"a-300","parentUuid":"u-300","timestamp":"2026-03-23T12:00:01.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-300","message":{"model":"claude-opus-4-6","id":"msg-300","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_300","name":"Agent","input":{"description":"Search codebase","subagent_type":"Explore","prompt":"Find all config files"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":20}}} -{"type":"user","uuid":"u-301","parentUuid":"a-300","timestamp":"2026-03-23T12:00:30.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_300","content":"Found 3 config files","is_error":false}]}} -{"type":"assistant","uuid":"a-301","parentUuid":"u-301","timestamp":"2026-03-23T12:00:31.000Z","sessionId":"test-session-sub","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-301","message":{"model":"claude-opus-4-6","id":"msg-301","type":"message","role":"assistant","content":[{"type":"text","text":"I found the config files."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":10}}} diff --git a/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl b/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl deleted file mode 100644 index 7ed323e..0000000 --- a/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"type":"user","uuid":"sa-u1","parentUuid":null,"timestamp":"2026-03-23T12:00:02.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"isMeta":false,"message":{"role":"user","content":"Find all config files"}} -{"type":"assistant","uuid":"sa-a1","parentUuid":"sa-u1","timestamp":"2026-03-23T12:00:03.000Z","sessionId":"agent-abc123","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":true,"requestId":"req-sa1","message":{"model":"claude-opus-4-6","id":"msg-sa1","type":"message","role":"assistant","content":[{"type":"text","text":"Found config.toml, settings.json, .env"}],"stop_reason":"end_turn","usage":{"input_tokens":50,"output_tokens":15}}} diff --git a/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json b/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json deleted file mode 100644 index 9fdf2d6..0000000 --- a/crates/dirigent_anth/tests/fixtures/subagent/parent/subagents/agent-abc123.meta.json +++ /dev/null @@ -1 +0,0 @@ -{"agentType": "Explore"} diff --git a/crates/dirigent_anth/tests/fixtures/tool_correlation.jsonl b/crates/dirigent_anth/tests/fixtures/tool_correlation.jsonl deleted file mode 100644 index 0f4801d..0000000 --- a/crates/dirigent_anth/tests/fixtures/tool_correlation.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"type":"user","uuid":"u-200","parentUuid":null,"timestamp":"2026-03-23T10:00:00.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"Fix the bug"}} -{"type":"assistant","uuid":"a-200","parentUuid":"u-200","timestamp":"2026-03-23T10:00:01.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-200","message":{"model":"claude-opus-4-6","id":"msg-200","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_200","name":"Bash","input":{"command":"cargo test"}},{"type":"tool_use","id":"toolu_201","name":"Read","input":{"file_path":"src/main.rs"}}],"stop_reason":"tool_use","usage":{"input_tokens":100,"output_tokens":50}}} -{"type":"user","uuid":"u-201","parentUuid":"a-200","timestamp":"2026-03-23T10:00:02.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_200","content":"test result output","is_error":false},{"type":"tool_result","tool_use_id":"toolu_201","content":"fn main() {}","is_error":false}]}} -{"type":"assistant","uuid":"a-201","parentUuid":"u-201","timestamp":"2026-03-23T10:00:03.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-201","message":{"model":"claude-opus-4-6","id":"msg-201","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_202","name":"Write","input":{"file_path":"src/fix.rs","content":"fixed"}}],"stop_reason":"tool_use","usage":{"input_tokens":150,"output_tokens":30}}} -{"type":"user","uuid":"u-202","parentUuid":"a-201","timestamp":"2026-03-23T10:00:04.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":true,"userType":"external","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_202","content":"File written successfully","is_error":false}]}} -{"type":"assistant","uuid":"a-202","parentUuid":"u-202","timestamp":"2026-03-23T10:00:05.000Z","sessionId":"test-session-corr","cwd":"G:\\dev\\test","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-202","message":{"model":"claude-opus-4-6","id":"msg-202","type":"message","role":"assistant","content":[{"type":"text","text":"Bug is fixed."}],"stop_reason":"end_turn","usage":{"input_tokens":200,"output_tokens":20}}} diff --git a/crates/dirigent_anth/tests/integration_tests.rs b/crates/dirigent_anth/tests/integration_tests.rs deleted file mode 100644 index cfd0134..0000000 --- a/crates/dirigent_anth/tests/integration_tests.rs +++ /dev/null @@ -1,294 +0,0 @@ -use camino::{Utf8Path, Utf8PathBuf}; -use chrono::Datelike; -use dirigent_anth::{ - correlation::correlate_tools, - dedup::dedup_messages, - noise::{classify_noise, NoiseKind}, - parse_session, - tree::ConversationTree, - types::{ContentBlock, RawMessage}, - util::parse_timestamp, -}; - -#[test] -fn parse_minimal_session() { - let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); - let messages = parse_session(path).unwrap(); - - assert_eq!(messages.len(), 6, "Expected 6 messages, got {}", messages.len()); - - let type_names: Vec<&str> = messages - .iter() - .map(|m| match m { - RawMessage::User(_) => "user", - RawMessage::Assistant(_) => "assistant", - RawMessage::Progress(_) => "progress", - RawMessage::System(_) => "system", - RawMessage::QueueOperation(_) => "queue-operation", - RawMessage::FileHistorySnapshot(_) => "file-history-snapshot", - RawMessage::LastPrompt(_) => "last-prompt", - }) - .collect(); - - assert_eq!( - type_names.iter().filter(|&&t| t == "queue-operation").count(), - 2 - ); - assert_eq!(type_names.iter().filter(|&&t| t == "user").count(), 2); - assert_eq!( - type_names.iter().filter(|&&t| t == "assistant").count(), - 2 - ); -} - -#[test] -fn parse_line_returns_none_for_invalid_json() { - assert!(dirigent_anth::parse_line("not valid json", 1).is_none()); - assert!(dirigent_anth::parse_line("{}", 1).is_none()); -} - -#[test] -fn dedup_streaming_session() { - let path = Utf8Path::new("tests/fixtures/streaming_dedup.jsonl"); - let messages = parse_session(path).unwrap(); - - // Raw should have 6 lines (including 3 versions of same assistant message) - assert_eq!(messages.len(), 6, "Raw messages: expected 6, got {}", messages.len()); - - let deduped = dedup_messages(messages); - - // After dedup: U1, A1(final), U2, A2 = 4 - assert_eq!(deduped.len(), 4, "Deduped messages: expected 4, got {}", deduped.len()); - - // The kept assistant message must be the final version - let first_assistant = deduped.iter().find(|m| matches!(m, RawMessage::Assistant(_))).unwrap(); - if let RawMessage::Assistant(a) = first_assistant { - assert!(a.message.stop_reason.is_some(), "Deduped assistant should have stop_reason set"); - assert_eq!(a.message.stop_reason.as_deref(), Some("tool_use")); - assert_eq!(a.message.content.len(), 2, "Final version should have 2 content blocks"); - } else { - unreachable!(); - } -} - -#[test] -fn dedup_preserves_non_streamed_messages() { - let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); - let messages = parse_session(path).unwrap(); - let count_before = messages.len(); - let deduped = dedup_messages(messages); - // No streaming in minimal_session, so count should be same - assert_eq!(deduped.len(), count_before); -} - -#[test] -fn correlate_parallel_tools() { - let path = Utf8Path::new("tests/fixtures/tool_correlation.jsonl"); - let messages = dirigent_anth::parse_session_deduped(path).unwrap(); - let exchanges = correlate_tools(&messages); - - // 3 tool calls: 2 parallel (Bash+Read) + 1 sequential (Write) - assert_eq!(exchanges.len(), 3); - - // All should have results - assert!(exchanges.iter().all(|e| e.result.is_some())); - - // Verify correct pairing by ID - for ex in &exchanges { - assert_eq!(ex.call.id, ex.result.as_ref().unwrap().tool_use_id); - } -} - -#[test] -fn correlate_no_tools_returns_empty() { - // Test with just a plain user message — no tool calls or results - let messages = vec![ - serde_json::from_str::( - r#"{"type":"user","uuid":"x","timestamp":"2026-01-01T00:00:00Z","sessionId":"s","message":{"role":"user","content":"hello"}}"#, - ) - .unwrap(), - ]; - let exchanges = correlate_tools(&messages); - assert!(exchanges.is_empty()); -} - -#[test] -fn build_branching_tree() { - let path = Utf8Path::new("tests/fixtures/branching_tree.jsonl"); - let messages = dirigent_anth::parse_session(path).unwrap(); - let tree = ConversationTree::build(&messages); - - assert_eq!(tree.roots.len(), 1); - assert!(!tree.is_linear()); - assert_eq!(tree.branch_points().len(), 1); // A1 has 2 children - - let main = tree.main_thread(); - assert_eq!(main.len(), 4); // R → A1 → U2 → A3 (first branch) -} - -#[test] -fn linear_conversation_is_linear() { - let path = Utf8Path::new("tests/fixtures/minimal_session.jsonl"); - let messages = dirigent_anth::parse_session(path).unwrap(); - let tree = ConversationTree::build(&messages); - assert!(tree.is_linear()); -} - -#[test] -fn classify_noise_from_fixture() { - let path = Utf8Path::new("tests/fixtures/noise_patterns.jsonl"); - let messages = dirigent_anth::parse_session(path).unwrap(); - - assert_eq!(messages.len(), 9, "Expected 9 messages in noise fixture"); - - let classifications: Vec> = messages.iter() - .map(classify_noise) - .collect(); - - assert_eq!(classifications[0], Some(NoiseKind::QueueOp)); - assert_eq!(classifications[1], Some(NoiseKind::Meta)); - assert_eq!(classifications[2], Some(NoiseKind::Warmup)); - assert_eq!(classifications[3], Some(NoiseKind::Interrupted)); - assert_eq!(classifications[4], Some(NoiseKind::Continuation)); - assert_eq!(classifications[5], Some(NoiseKind::ApiError)); - assert_eq!(classifications[6], Some(NoiseKind::SystemCaveat)); - assert_eq!(classifications[7], None); // normal user - assert_eq!(classifications[8], None); // normal assistant -} - -#[test] -fn load_subagent_from_fixture() { - let artifacts_dir = Utf8Path::new("tests/fixtures/subagent/parent"); - let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap(); - - assert_eq!(subagents.len(), 1); - assert_eq!(subagents[0].agent_id, "abc123"); - assert_eq!(subagents[0].meta.agent_type.as_deref(), Some("Explore")); - assert_eq!(subagents[0].messages.len(), 2); -} - -#[test] -fn load_subagents_empty_dir() { - // Non-existent artifacts dir should return empty vec - let artifacts_dir = Utf8Path::new("tests/fixtures/nonexistent"); - let subagents = dirigent_anth::load_subagents(artifacts_dir).unwrap(); - assert!(subagents.is_empty()); -} - -#[test] -fn load_full_session_with_subagents() { - use dirigent_anth::types::SessionRef; - - let session_ref = SessionRef { - id: "parent".to_string(), - jsonl_path: Utf8PathBuf::from("tests/fixtures/subagent/parent.jsonl"), - artifacts_dir: Some(Utf8PathBuf::from("tests/fixtures/subagent/parent")), - index_entry: None, - }; - - let session = dirigent_anth::load_session(&session_ref).unwrap(); - assert!(!session.messages.is_empty()); - assert!(!session.subagents.is_empty()); - assert!(!session.tree.roots.is_empty()); - assert!(!session.tool_exchanges.is_empty()); -} - -#[test] -fn load_session_without_artifacts() { - use dirigent_anth::types::SessionRef; - - let session_ref = SessionRef { - id: "minimal".to_string(), - jsonl_path: Utf8PathBuf::from("tests/fixtures/minimal_session.jsonl"), - artifacts_dir: None, - index_entry: None, - }; - - let session = dirigent_anth::load_session(&session_ref).unwrap(); - assert_eq!(session.messages.len(), 6); // 2 queue-ops + 2 users + 2 assistants - assert!(session.subagents.is_empty()); - assert!(session.tree.is_linear()); -} - -#[test] -fn content_as_string_or_blocks() { - // String content - let s: dirigent_anth::types::Content = serde_json::from_str(r#""hello""#).unwrap(); - assert!(matches!(s, dirigent_anth::types::Content::Text(_))); - - // Block content - let b: dirigent_anth::types::Content = - serde_json::from_str(r#"[{"type":"text","text":"hi"}]"#).unwrap(); - assert!(matches!(b, dirigent_anth::types::Content::Blocks(_))); - - // Empty blocks - let empty: dirigent_anth::types::Content = serde_json::from_str(r#"[]"#).unwrap(); - assert!(matches!(empty, dirigent_anth::types::Content::Blocks(ref v) if v.is_empty())); -} - -#[test] -fn missing_optional_fields_dont_crash() { - // Minimal assistant message with many fields missing - let json = r#"{ - "type": "assistant", - "message": { - "content": [{"type": "text", "text": "hi"}] - } - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - assert!(matches!(msg, RawMessage::Assistant(_))); -} - -#[test] -fn tool_result_content_string_and_blocks() { - // tool_result with string content - let json = r#"{"type":"tool_result","tool_use_id":"t1","content":"output text","is_error":false}"#; - let block: ContentBlock = serde_json::from_str(json).unwrap(); - if let ContentBlock::ToolResult { content, is_error, .. } = block { - assert!(!is_error); - assert!(content.is_some()); - } else { - panic!("Expected ToolResult"); - } - - // tool_result with no content - let json2 = r#"{"type":"tool_result","tool_use_id":"t2"}"#; - let block2: ContentBlock = serde_json::from_str(json2).unwrap(); - if let ContentBlock::ToolResult { content, is_error, .. } = block2 { - assert!(!is_error); - assert!(content.is_none()); - } else { - panic!("Expected ToolResult"); - } -} - -#[test] -fn extra_unknown_fields_are_ignored() { - // Messages with extra fields not in our structs should parse fine - let json = r#"{ - "type": "user", - "uuid": "x", - "timestamp": "2026-01-01T00:00:00Z", - "sessionId": "s", - "unknownField": "should be ignored", - "anotherExtra": 42, - "message": {"role": "user", "content": "hello"} - }"#; - let msg: RawMessage = serde_json::from_str(json).unwrap(); - assert!(matches!(msg, RawMessage::User(_))); -} - -#[test] -fn timestamp_parsing_all_formats() { - // ISO 8601 - let iso = parse_timestamp(&serde_json::json!("2026-03-22T17:00:13.192Z")).unwrap(); - assert_eq!(iso.year(), 2026); - - // Unix millis - let ms = parse_timestamp(&serde_json::json!(1769461914249_i64)).unwrap(); - assert!(ms.year() >= 2025); - - // Unix seconds - let secs = parse_timestamp(&serde_json::json!(1769461914_i64)).unwrap(); - assert!(secs.year() >= 2025); -} diff --git a/crates/dirigent_anth/tests/usage_parse.rs b/crates/dirigent_anth/tests/usage_parse.rs deleted file mode 100644 index fca1916..0000000 --- a/crates/dirigent_anth/tests/usage_parse.rs +++ /dev/null @@ -1,101 +0,0 @@ -use dirigent_anth::anth_usage::process_usage_screen; - -const SAMPLE: &str = r#" -──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── - Status Config Usage Stats - - Session - Total cost: $0.0000 - Total duration (API): 0s - Total duration (wall): 4s - Total code changes: 0 lines added, 0 lines removed - Usage: 0 input, 0 output, 0 cache read, 0 cache write - - Current session - ███████ 14% used - Resets 12:30pm (Europe/Vienna) - - Current week (all models) - ██████ 12% used - Resets May 12, 9am (Europe/Vienna) - - Current week (Sonnet only) - 0% used - Resets May 12, 9am (Europe/Vienna) - - What's contributing to your limits usage? - Approximate, based on local sessions on this machine — does not include other devices or claude.ai - - Last 24h · these are independent characteristics of your usage, not a breakdown - - 97% of your usage came from subagent-heavy sessions - Each subagent runs its own requests. Be deliberate about spawning them — and - consider configuring a cheaper model for simpler subagents. - - 16% of your usage was at >150k context - Longer sessions are more expensive even when cached. /compact mid-task, /clear - when switches to new tasks. - - Subagents % of usage - Explore 3% - claude-code-guide 2% - - d to day · w to week - - Esc to cancel -"#; - -#[test] -fn parses_gauges() { - let result = process_usage_screen(SAMPLE); - assert_eq!(result.data.gauges.len(), 3); - - assert_eq!(result.data.gauges[0].name, "Current session"); - assert_eq!(result.data.gauges[0].percent_used, 14); - assert_eq!( - result.data.gauges[0].resets.as_deref(), - Some("12:30pm (Europe/Vienna)") - ); - - assert_eq!(result.data.gauges[1].name, "Current week (all models)"); - assert_eq!(result.data.gauges[1].percent_used, 12); - assert_eq!( - result.data.gauges[1].resets.as_deref(), - Some("May 12, 9am (Europe/Vienna)") - ); - - assert_eq!(result.data.gauges[2].name, "Current week (Sonnet only)"); - assert_eq!(result.data.gauges[2].percent_used, 0); - - // resets_iso should be present for all gauges with reset info - assert!(result.data.gauges[0].resets_iso.is_some()); - assert!(result.data.gauges[1].resets_iso.is_some()); - assert!(result.data.gauges[2].resets_iso.is_some()); - - // Week resets should contain the right date - let week_iso = result.data.gauges[1].resets_iso.as_ref().unwrap(); - assert!(week_iso.starts_with("2026-05-12") || week_iso.contains("05-12")); -} - -#[test] -fn parses_contributions() { - let result = process_usage_screen(SAMPLE); - let contrib = result.data.contributions.as_ref().unwrap(); - - assert_eq!(contrib.factors.len(), 2); - assert_eq!(contrib.factors[0].percent, 97); - assert!(contrib.factors[0].description.contains("subagent-heavy")); - assert_eq!(contrib.factors[1].percent, 16); - - assert_eq!(contrib.subagents.len(), 2); - assert_eq!(contrib.subagents[0].name, "Explore"); - assert_eq!(contrib.subagents[0].percent, 3); - assert_eq!(contrib.subagents[1].name, "claude-code-guide"); - assert_eq!(contrib.subagents[1].percent, 2); -} - -#[test] -fn raw_screen_starts_with_rule() { - let result = process_usage_screen(SAMPLE); - assert!(result.raw_screen.starts_with('─')); -} diff --git a/crates/dirigent_archivist/CLAUDE.md b/crates/dirigent_archivist/CLAUDE.md deleted file mode 100644 index d87b46b..0000000 --- a/crates/dirigent_archivist/CLAUDE.md +++ /dev/null @@ -1,761 +0,0 @@ -# Package: dirigent_archivist - -Persistent storage for all agentic interactions in Dirigent. - -## Quick Facts -- **Type**: Library -- **Main Entry**: src/lib.rs -- **Dependencies**: dirigent_protocol, uuid, chrono, serde, tokio, tracing, thiserror, sha2, hex, async-trait -- **Status**: Complete - Production ready with comprehensive tests - -## Purpose - -The Archivist provides file-based archival storage for all session data, messages, and attachments in Dirigent. It implements an archive-first architecture with connector API fallback, using NDJSON, JSON, and TSV formats for durability and human-readability. - -## Key Features - -- **File-based Storage**: NDJSON for messages, JSON for metadata, TSV for indices -- **Content-Addressable Files**: SHA-256 based storage for attachments with automatic deduplication -- **Session Lineage**: Track splits, continuations, and mutations with parent references -- **Connector Registry**: Coordinate UID assignment across connectors with collision detection -- **Event Streaming**: Real-time updates via EventHandler subscribing to dirigent_protocol events -- **Archive-First Design**: Read from archive first, fall back to connector API when needed -- **Caching**: In-memory caching of connector and session mappings for performance - -## Architecture - -The Archivist is built on three core architectural principles: - -### 1. Archive-First Read Strategy - -The Archivist is the primary source of truth for historical data: -- UI and APIs query the archive first -- Only fall back to connector APIs if data is not in archive -- This enables offline access and consistent history across restarts - -### 2. Write-Through Event Capture (Append-Only) - -The EventHandler subscribes to the global event stream from dirigent_core: -- Captures session creation, message streaming, and tool calls in real-time -- Uses MessageAccumulator to assemble streaming chunks into complete messages -- Writes complete messages to archive immediately upon finalization -- No polling required - fully event-driven -- **Append-only writes**: Messages are appended as events arrive, NOT in chronological order -- File order reflects event timing, not message timestamps - -### 3. File-Based Storage with Sort-on-Read - -All data is stored in human-readable, grep-able formats: -- **NDJSON** (Newline-Delimited JSON): Incremental append-only logs for messages and mappings -- **JSON**: Structured metadata for sessions and connectors -- **TSV** (Tab-Separated Values): Fast indices for cross-references -- **Content-Addressed Files**: Binary attachments stored by SHA-256 hash for deduplication -- **Sort-on-Read**: `get_messages()` sorts by timestamp and message_id to ensure chronological order despite append-only writes - -## Backend Trait Layer (Phase 2) - -The archivist uses a trait-based backend abstraction. `ArchiveBackend` -defines the mandatory session and message primitives every backend must -provide, plus `as_xxx()` accessors returning optional sub-traits: - -- `SearchBackend` — reserved for Phase 3+ indexed backends (not wired) -- `DagBackend` — session lineage DAG edges -- `MetaEventsBackend` — ACP connection lifecycle events -- `ConnectorRegistryBackend` — per-archive connector metadata -- `SessionMappingBackend` — native↔scroll session ID mapping - -`JsonlBackend` is the Phase 2 concrete implementation (file-based -NDJSON/JSON/TSV) and opts into every sub-trait except `SearchBackend` -(content search continues to be served by ripgrep via -`crates/api/src/archivist/search_task.rs`). - -The `Archivist` struct (in `src/coordinator/`) owns a registry of backends -keyed by archive name and performs orchestration (alias detection, session -lineage, move/copy, DAG walks, archive lifecycle). Consumers hold -`Arc` directly — the coordinator is concrete, not a trait. - -See `docs/plans/2026-04-18-archivist-phase2-design.md` for design rationale. - -## Multi-Backend Registry (Phase 3) - -The coordinator (`Archivist`) holds `Vec>` sorted -by `read_priority` instead of a flat `HashMap>`. -Each registration carries: - -- `backend: Arc` + its declared capabilities -- `failure_mode`: `Required` (must succeed) | `BestEffort` (errors log + drift health) -- `read_priority`: lower = tried first for reads; also selects the default - write target when no archive is named -- `write_active`: participates in fanout writes -- `enabled`: kill-switch without removing config -- `write_policy`: `Inline` (default; `await` per call) or `Queued` - (mpsc + batch_window + overflow policy) -- Runtime state: `last_health`, `last_error`, `consecutive_failures` - (all `Arc>`, shared with the writer task when queued) -- Optional `writer: Option` (Some iff `write_policy = Queued`) - -Backends are declared in `dirigent.toml` under `[[archives]]` and -constructed at boot via `Archivist::from_config(cfg, &BackendRegistry)`. -Add a new backend type by implementing `BackendFactory` and registering -it on the `BackendRegistry` before `from_config`. - -### Reads - -`get_session`, `get_messages_paged`, `count_messages`, `get_meta_events`, -`get_children`, etc. walk the registry in priority order via -`read_walk_per_session(scroll_id, predicate, op)`. The predicate -capability-filters; `Unavailable` backends are skipped. The first backend -that returns `Some(value)` wins and its name is cached against the -`scroll_id` in a positive LRU (capacity 10_000). Subsequent reads for the -same `scroll_id` short-circuit to the cached backend before falling back -to the full priority walk. - -Collection-shape reads (`list_sessions_paged`, `list_connectors`, -`list_meta_sessions`, `find_meta_session_by_client`) use -`read_walk_collection` — first enabled backend that can answer wins, no -cache, no aggregation across backends. Phase 3 explicitly defers -cross-backend merge/dedup to a later phase. - -### Writes - -Mutating methods (`append_messages`, `register_session`, `update_session_*`, -`append_meta_events`, `append_dag_edge`, `clear_session_messages`, -`update_connector_fingerprint`) resolve a primary (per-call `archive: -Some(name)` override or the default-write target) and fan out to every -other `enabled && write_active` backend that has the required capability. -Capability-mismatched backends are skipped with a debug `capability_skip` -log (never an error). `Required` failures propagate to the caller; -`BestEffort` failures log + drift health. - -`register_connector` currently does NOT fan out — alias detection + the -tri-state `Accepted`/`Aliased`/`Rejected` return shape make replication -non-trivial. Fanout for connectors is deferred; single-backend setups are -unaffected. - -For `write_policy = Queued` backends, the primary/secondary write paths -enqueue a `WriteOp` into the backend's writer task instead of awaiting. -Errors drift the backend's health but do not propagate to the caller. -Coalescing merges consecutive `AppendMessages`/`AppendMetaEvents` for the -same `scroll_id` within `batch_window_ms`. - -### Cross-backend operations - -- `delete_session(scroll_id, _)` fans out to every enabled backend that has - the session. Copies in `write_active=false` backends produce - `ArchivistError::DeleteOnReadOnlyBackend` (write-active copies are still - deleted); cache invalidated regardless of outcome. -- `copy_session(scroll_id, from, to)` reads from `from`, writes to `to`, - including DAG and meta-events when both sides have the capability. The - source remains canonical (the cache is NOT rewritten). -- `move_session(scroll_id, from, to)` is `copy + delete-from-source`. If - the source-side delete fails after the copy succeeded, - `ArchivistError::PartialMove { copied_to, delete_error }` is returned so - the caller knows the session now lives in both places. - -The Phase 2 connector-aware `move_session(scroll_id, target_connector_uid, _)` -and `copy_session(scroll_id, target_connector_uid, _)` survived the Phase -3 rename as `move_session_to_connector` / `copy_session_to_connector`. -Their bulk variant is `move_sessions_to_connector`. - -### Health - -`HealthStatus` drifts on every coordinator call that observes a backend: - -- Successful write → `Healthy`; `consecutive_failures` reset to 0. -- Successful read → `Healthy` (only rescues `Degraded`; does not reset the counter). -- Write failure → `Degraded { reason }`; `consecutive_failures += 1`; after - K = 5 consecutive failures drifts to `Unavailable { reason }`. Reads skip - `Unavailable` backends; writes against an `Unavailable` `Required` - backend fail, while writes against an `Unavailable` `BestEffort` backend - are still attempted. -- Read failure alone never drifts past `Degraded`; writes are the - authoritative health signal. - -`list_archives_with_health()` returns a `Vec` snapshot of -every registration: name, type, capabilities, health, last_error, and -queue_depth (for queued backends). - -### Lifecycle - -Phase 3 is **startup-only**. `add_archive` / `remove_archive` / -`set_default_archive` on the coordinator return -`ArchivistError::DynamicRegistryUnsupported`. To change the registry, -edit `dirigent.toml` and restart the server. `Archivist::shutdown()` -drains queued writer tasks (sends `WriteOp::Shutdown` on each writer's -mpsc and awaits ack); call it before process exit. - -Test-only constructors `Archivist::from_registrations(regs)` and -`SessionMetadata::stub(scroll_id)` live under `#[cfg(any(test, feature = -"test-utils"))]` for integration tests that bypass the factory. - -See `docs/plans/2026-04-19-archivist-phase3-design.md` for the full -design rationale, and `examples/multi_backend.rs` for a runnable -end-to-end example. - -## Module Organization - -### Core Modules - -- **`lib.rs`**: Public API surface and re-exports -- **`types.rs`**: Core data structures (session metadata, message records, connector info, API types) -- **`error.rs`**: Error types and Result alias for archivist operations - -### Backend Layer (`backend/`) - -- **`traits.rs`**: `ArchiveBackend` trait + 5 optional sub-traits -- **`capability.rs`**: `ArchiveCapability` enum + `CapabilitySet` type -- **`health.rs`**: `HealthStatus` enum returned by `health_check` -- **`contract.rs`**: Reusable behavioral tests for any `&dyn ArchiveBackend` (cfg-gated) -- **`mock.rs`**: In-memory `MockBackend` for coordinator unit tests (cfg-gated) - -### Concrete Backends (`backends/`) - -- **`jsonl/`**: The file-based `JsonlBackend` — the only Phase 2 backend. - Reuses `storage/` primitives for NDJSON/JSON/TSV operations. - -### Coordinator (`coordinator/`) - -- **`mod.rs`**: The `Archivist` struct + constructors -- **`archives.rs`**: Archive lifecycle (add/remove/list/default) -- **`connectors.rs`**: Connector registration + alias detection -- **`sessions.rs`**: Session registration, metadata updates, move/copy -- **`meta.rs`**: Meta events, DAG walks, cleanup - -### Storage Layer (`storage/`) - -Low-level file I/O primitives used by `JsonlBackend`. All storage operations are async and use tokio. - -- **`paths.rs`**: ArchivePaths utility for consistent directory structure and path resolution -- **`ndjson.rs`**: Newline-delimited JSON operations (read_ndjson, append_ndjson) -- **`json.rs`**: JSON operations (read_json, write_json) -- **`tsv.rs`**: Tab-separated value operations for connector index -- **`files.rs`**: Content-addressable file storage with SHA-256 hashing and deduplication - -### Supporting Modules - -- **`registry.rs`**: Archive registry persistence (multi-archive metadata) -- **`migration.rs`**: Single-archive → multi-archive migration path -- **`session.rs`**: Session lineage types shared across layers -- **`accumulator.rs`**: MessageAccumulator for assembling streaming message chunks -- **`backfill.rs`**: Backfill helpers for importing historical sessions -- **`import/`**: External conversation importers (e.g. Claude export) - -### Events - -- **`events.rs`**: EventHandler for subscribing to dirigent_protocol events and archiving them - -## Configuration - -The Archivist archive root is determined by `DirigentPaths` resolution: - -- Set `DIRIGENT_DATA_DIR` to override the data directory; archives will be stored at `/archives/` -- Defaults to `~/.local/share/dirigent/archives/` (or platform equivalent) - -```bash -DIRIGENT_DATA_DIR=/path/to/data dx serve -``` - -## Archive Structure - -``` -dirigent_archive/ -├── .contexts/ -│ └── {scroll_id:uuidv7}/ # One directory per session -│ ├── session.json # Session metadata -│ ├── messages.jsonl # Incremental message log (.ndjson also supported) -│ └── lineage.json # Session lineage info (optional) -├── .db/ -│ └── connectors/ -│ ├── index.tsv # Fast connector lookup (TSV) -│ └── {connector_uid}/ -│ ├── connector.json # Connector metadata -│ └── sessions.jsonl # Session mappings (.ndjson also supported) -└── .files/ - └── {sha256-hash} # Content-addressable file storage -``` - -### Why Hidden Directories? - -The `.contexts`, `.db`, and `.files` directories are hidden (prefixed with `.`) to keep the archive root clean for future rendered outputs (like `chat.md` exports). This is similar to how `.git` hides implementation details in a codebase. - -## File Formats - -### Session Metadata (`session.json`) - -```json -{ - "version": 1, - "scroll_id": "01936e8f-e5a7-7000-8000-000000000001", - "created_at": "2025-01-01T12:00:00Z", - "updated_at": "2025-01-01T12:30:00Z", - "title": "Implement user authentication", - "connector_uid": "01936e8f-e5a7-7000-8000-000000000002", - "native_session_id": "abc123", - "agent_id": null, - "parent_scroll_id": null, - "continuation": null, - "tags": ["backend", "auth"], - "metadata": { - "source": "OpenCode", - "model": "claude-3-5-sonnet" - } -} -``` - -### Messages Log (`messages.jsonl`) - -One JSON object per line, **append-only**: - -```jsonl -{"version":1,"message_id":"01936e8f-e5a7-7000-8000-000000000003","session":"01936e8f-e5a7-7000-8000-000000000001","parent_id":null,"ts":"2025-01-01T12:01:00Z","role":"user","author":"alice","content_md":"How do I implement JWT auth?","attachments":[],"metadata":{}} -{"version":1,"message_id":"01936e8f-e5a7-7000-8000-000000000004","session":"01936e8f-e5a7-7000-8000-000000000001","parent_id":"01936e8f-e5a7-7000-8000-000000000003","ts":"2025-01-01T12:01:10Z","role":"assistant","author":"claude","content_md":"Here's how to implement JWT authentication...","attachments":[],"metadata":{"model":"claude-3-5-sonnet"}} -``` - -**IMPORTANT - Ordering**: The order of lines in the message log file (`messages.jsonl` or `messages.ndjson`) reflects **event arrival order**, NOT chronological order. Assistant replies often arrive after subsequent user messages due to streaming latency, resulting in non-chronological file order. Always use the `Archivist::get_messages()` API to retrieve messages, which sorts by `ts` (timestamp) and `message_id` (UUIDv7) to guarantee chronological order. - -**File Format Compatibility**: The archivist supports both `.ndjson` and `.jsonl` file extensions for newline-delimited JSON files. When reading, `.jsonl` is preferred if present, with automatic fallback to `.ndjson` for backward compatibility. Write operations use `.jsonl` (canonical format). Both formats are identical in content - the difference is purely the file extension. - -### Connector Index (`index.tsv`) - -Tab-separated values with header row: - -```tsv -connector_uid type title client_native_id alias_of created_at -01936e8f-e5a7-7000-8000-000000000002 OpenCode OpenCode Local opencode@http://localhost:12225 2025-01-01T12:00:00Z -``` - -### Session Mappings (`sessions.jsonl`) - -Maps native session IDs from connectors to scroll IDs in the archive: - -```jsonl -{"version":1,"connector_uid":"01936e8f-e5a7-7000-8000-000000000002","native_session_id":"abc123","scroll_id":"01936e8f-e5a7-7000-8000-000000000001","created_at":"2025-01-01T12:00:00Z","alias_of":null} -``` - -## Message Ordering Guarantees - -### The Problem: Append Order ≠ Chronological Order - -In the event-driven architecture, messages are written to the message log file (`messages.jsonl`) as completion events arrive. Due to streaming latency: - -- User messages complete nearly instantly and are written immediately -- Assistant messages stream over time and complete later -- A second user message can be written before the first assistant reply completes - -Example scenario: -``` -T0: User sends "tell me a joke about snakes" (ts=18:23:36.947) -T1: Assistant starts streaming reply (ts=18:23:36.969) -T2: User sends "now one about tigers" (ts=18:23:49.429) <- completes and writes BEFORE assistant finishes -T3: Assistant finishes "snakes" reply <- writes AFTER "tigers" user message -``` - -File order in the message log file: -``` -1. user "snakes" (18:23:36.947) -2. user "tigers" (18:23:49.429) <- written second -3. assistant "snakes" (18:23:36.969) <- written third, but timestamp is earlier! -``` - -### The Solution: Sort-on-Read - -The `Archivist::get_messages()` implementation sorts messages before returning: - -1. **Primary sort**: `ts` (timestamp) ascending -2. **Secondary sort**: `message_id` (UUIDv7) ascending for stable tie-breaking - -This guarantees chronological order regardless of NDJSON append order: -``` -1. user "snakes" (18:23:36.947) -2. assistant "snakes" (18:23:36.969) -3. user "tigers" (18:23:49.429) -``` - -### Why This Approach? - -- **Maintains durability**: Append-only writes preserve crash safety -- **No migration needed**: Existing archives work without rewrites -- **Simple implementation**: No buffered writes or complex write-time ordering -- **Performance trade-off**: Small CPU cost on read (sorting) vs. complex write-time coordination - -### Consumer Guidance - -- **DO**: Use `Archivist::get_messages()` to retrieve messages -- **DON'T**: Read the message log file directly and assume file order = chronological order -- **UI/API**: Always sort by `ts` then `message_id` for defense in depth -- **Tie-breaking**: Use `message_id` (UUIDv7) as secondary sort for stable ordering when timestamps match - -## Key Types - -### SessionMetadata - -Stores all metadata about a session including: -- **scroll_id**: UUIDv7 identifier for the session -- **connector_uid**: Which connector owns this session -- **native_session_id**: Original session ID from the connector (optional) -- **title**: Optional human-readable session title (see Title Management below) -- **parent_scroll_id**: For session lineage (splits, continuations) -- **continuation**: Type of continuation (SPLIT, COMPACT, REFERENCE, EDIT) -- **tags**: User-defined categorization -- **metadata**: Free-form JSON for connector-specific fields - -#### Title Management - -Session titles are fully supported and persist across restarts. Titles are stored in the `SessionMetadata` struct and saved to the `session.json` file. - -**Setting Titles:** -```rust -// Update title for an existing session -archivist.update_session_metadata( - scroll_id, - Some("My Custom Session Title".to_string()), - None, // model - None // archive -).await?; -``` - -**Default Behavior:** -- New sessions can specify an initial title during registration -- If no title is provided, sessions default to `None` -- The UI typically displays "Untitled" for sessions without titles - -**Title Loading:** -- Titles are automatically loaded when retrieving session metadata via `get_session_metadata()` -- Session lists include titles via `list_sessions()` and `list_sessions_all()` -- Titles are part of the `SessionMetadata` struct returned by all session queries - -**UI Integration:** -- The web UI displays session titles in the session list and sidebar -- Users can rename sessions via the "Rename" button in the session list view -- Renaming calls `api::archivist::rename_session()` which uses `update_session_metadata()` -- Title changes are persisted immediately and survive application restarts - -### MessageRecord - -Represents a single message in the archive: -- **message_id**: UUIDv7 identifier -- **session**: scroll_id this message belongs to -- **role**: "user", "assistant", or "system" -- **content_md**: Message content in Markdown format -- **attachments**: References to attached files -- **metadata**: Free-form JSON for connector-specific fields - -### ConnectorRecord - -Metadata about a connector: -- **connector_uid**: UUIDv7 identifier -- **type**: "OpenCode", "ACP", or custom -- **client_native_id**: Unique identifier from client (e.g., "opencode@http://localhost:12225") -- **alias_of**: If this connector is an alias of another (for deduplication) - -## Archivist Public API - -The `Archivist` struct (in `coordinator/`) is the main public entry point -for archival operations. Consumers hold `Arc` and call inherent -methods — there is no `Archivist` trait anymore. The coordinator resolves -the target backend per call (via `archive: Option`) and delegates -to `ArchiveBackend` methods. - -Key method families (see `coordinator/*.rs` for full signatures): - -- **Archive lifecycle** (`archives.rs`): `add_archive`, `remove_archive`, - `list_archives`, `set_default_archive` -- **Connectors** (`connectors.rs`): `register_connector` with tri-state - result (Accepted / Aliased / Rejected), `list_connectors` -- **Sessions** (`sessions.rs`): `register_session`, `get_session_metadata`, - `update_session_metadata`, `list_sessions_paged`, `move_session`, - `copy_session`, `resolve_session` -- **Messages**: `append_messages`, `get_messages` (sorts by `ts` then - `message_id` for stable chronological order) -- **Meta / DAG** (`meta.rs`): meta-event recording, session lineage DAG - walks, cleanup routines - -## List Filter vs. Full-Text Search - -Two distinct query paths exist — do not conflate them. - -**List filter** — `Archivist::list_sessions_paged(SessionListQuery)` returns a -cursor-paged list of sessions, AND-filtered by `title_query` (substring on -title), `tags`, `model_filter` (substring on `metadata.model`), `project_id`, -`connector_uid`, and `include_hidden`. This is the right tool for "narrow the -list of visible sessions." - -**Full-text search** — `api::search_sessions` (in the `api` package, backed by -`api::archivist::search_task::SearchTask`) spawns `rg --json` over the -archive's `.contexts/` tree to find messages containing text. It streams -`SearchExcerpt`s with parsed NDJSON content and supports cancellation via -`CancellationToken`. This is the right tool for "find messages containing -text." - -**Do not extend `list_sessions_paged` to do content search.** Content search -belongs in the ripgrep pipeline. Future improvements to content search -(indexed backends, relevance scoring) are Phase 2d / Phase 3 concerns. - -## JsonlBackend Implementation - -The Phase 2 production backend — an implementation of `ArchiveBackend` plus -every sub-trait except `SearchBackend`: - -- **Thread-safe**: Uses RwLock for in-memory caches -- **Async**: All operations use tokio for non-blocking I/O -- **Caching**: In-memory caches for connector and session mappings -- **Collision Detection**: Tri-state registration for connectors and sessions - -Located under `src/backends/jsonl/` and split by concern (`backend.rs`, -`connectors.rs`, `dag.rs`, `mapping.rs`, `meta.rs`). - -### Caching Strategy - -`JsonlBackend` maintains two in-memory caches: - -1. **connector_cache**: HashMap - - Populated on registration - - Read from TSV index on startup (future enhancement) - -2. **session_cache**: HashMap<(Uuid, String), Uuid> - - Maps (connector_uid, native_session_id) to scroll_id - - Populated on registration and session resolution - - Enables fast session lookups without disk I/O - -## Event Handling - -The EventHandler subscribes to dirigent_protocol events and archives them in real-time: - -```rust -// Create archivist and event handler -let archivist = Archivist::new_with_single_archive(archive_path).await?; -let handler = EventHandler::new(Arc::new(archivist)); - -// Subscribe to event stream from dirigent_core -let events = event_stream.subscribe(); - -// Run event loop (blocking) -handler.run(events).await; -``` - -### Supported Events - -- **SessionCreated**: Registers new sessions with the archivist -- **MessageCompleted**: Writes finalized messages to the archive -- **SessionUpdate**: Accumulates streaming message chunks - - AgentMessageChunk - - UserMessageChunk - - AgentThoughtChunk - - ToolCall - -### MessageAccumulator - -Assembles streaming message chunks into complete messages: - -- Accumulates text chunks by message_id -- Tracks thinking blocks separately -- Stores tool calls with input/output -- Finalizes messages on MessageCompleted event -- Converts to MessageRecord for archival - -## Integration with dirigent_core - -The Archivist integrates with dirigent_core via the global event stream: - -1. **CoreRuntime** emits events for all connector operations -2. **EventHandler** subscribes to event stream -3. **MessageAccumulator** assembles streaming chunks -4. **Archivist** writes complete messages to archive - -This enables: -- Automatic archival of all sessions and messages -- No polling required - fully event-driven -- Consistent history across restarts -- Offline access to historical data - -## Testing - -The package has comprehensive test coverage across multiple dimensions: - -### Unit Tests - -Located in each module (`src/*.rs`, `src/storage/*.rs`): -- Type serialization/deserialization -- UUIDv7 generation and ordering -- Timestamp formatting (RFC 3339) -- Storage operations (NDJSON, JSON, TSV, files) -- Connector registration tri-state logic -- Session registration and alias detection - -### Integration Tests - -Located in `tests/`: -- `integration_tests.rs`: Full `Archivist` + `JsonlBackend` lifecycle, event - handler integration, multi-connector scenarios, session lineage, message - accumulation -- `list_sessions_paged_test.rs`, `pagination_test.rs`: List filter + cursor - pagination coverage -- `import_claude_idempotency_test.rs`: Claude export re-import idempotency - -### Backend Contract Tests - -`src/backend/contract.rs` holds reusable async assertions that any -`&dyn ArchiveBackend` must pass. `JsonlBackend` and `MockBackend` both -run the contract suite; new backends added in Phase 3+ should do the same. - -### Examples - -Located in `examples/`: -- `basic_usage.rs`: Core archivist operations -- `event_handling.rs`: EventHandler and MessageAccumulator -- `file_storage.rs`: Content-addressable file storage - -Run tests: -```bash -cargo test --package dirigent_archivist -``` - -Run examples: -```bash -cargo run --package dirigent_archivist --example basic_usage -cargo run --package dirigent_archivist --example event_handling -cargo run --package dirigent_archivist --example file_storage -``` - -## Performance Characteristics - -- **Append Operations**: O(1) with sequential file writes -- **Session Lookup**: O(1) with in-memory cache, O(n) cache miss -- **Message Retrieval**: O(n) where n = number of messages (NDJSON parsing) -- **File Storage**: O(1) content-addressable lookup with SHA-256 hashing -- **Connector Index**: O(n) TSV scan, suitable for hundreds of connectors - -### Scalability Considerations - -- **Large Sessions**: NDJSON is append-only, so reading large sessions requires parsing all lines -- **Many Sessions**: TSV indices are suitable for thousands of sessions per connector -- **File Deduplication**: SHA-256 hashing provides automatic deduplication across sessions -- **Concurrent Access**: RwLock allows multiple concurrent readers, single writer - -## Error Handling - -The Archivist uses thiserror for rich error types: - -```rust -pub enum ArchivistError { - IoError(std::io::Error), - SerdeError(serde_json::Error), - SessionUnknown(Uuid), - CollisionInconsistent(Uuid), - // ... etc -} -``` - -All public APIs return `Result` for explicit error handling. - -## Development Notes - -- All storage operations are async (using tokio) -- Content-addressable storage uses SHA-256 hashes (hex-encoded) -- Archive directory structure mirrors session/message hierarchy -- UUIDv7 provides time-ordered, sortable identifiers -- RFC 3339 UTC timestamps for all time-based fields -- Schema versioning via `version` field in all records - -## Related Packages - -- **dirigent_protocol**: Shared types and protocol definitions (dependency) -- **dirigent_core**: Runtime integration for SSE event capture (integration point) -- **api**: Server functions for archive queries (future) -- **web**: UI for archive browsing and search (future) - -## Phase 4: `ArchiveFilter` (2026-04-21) - -Every `ArchiveRegistration` carries a `filter: ArchiveFilter`. The filter -describes which sessions/writes the backend wants to receive. Fields: - -- `include_connectors: Option>` — if Some, only these - connector UIDs pass. `None` means no connector gate. -- `exclude_connectors: HashSet` — always rejected. -- `include_tags: HashSet` — if non-empty, the session must carry - at least one matching tag. -- `exclude_tags: HashSet` — any matching tag rejects. -- `include_hidden: bool` — default `true`. If `false`, sessions whose - metadata has `"hidden": true` are skipped. - -### Primary-always-writes invariant - -The per-call primary (either the `archive: Some(name)` argument or the -default write-target) is **never** filtered. If a caller explicitly asks -to write to archive X, the filter on X is not consulted. Filters only -gate secondary fanout. - -### Boot validator - -At boot (`coordinator/boot.rs`), the validator rejects configurations -where: - -- No write-active + enabled registration has an **unrestricted** filter - (`ArchiveFilter::default()` is unrestricted). Prevents configurations - that silently drop all writes. -- An archive's filter has `include_connectors = Some(empty set)` — - equivalent to "reject everything", which is almost certainly a config - bug. - -See `docs/plans/2026-04-21-archivist-phase4-design.md` §4 for the full -design rationale. - -## Phase 5: Importers (2026-04-21) - -The `import::` module centres on an `Importer` trait with per-source -implementations under `import::sources::*`. Each source produces a -`ParsedConversation` (ChatGPT) / `ParsedSession` (Codex) / session -directory walk (Claude) and feeds the results through the common -`import_sessions` orchestrator, which fires `ImportProgressEvent`s on a -bounded `ImportProgressSink`. - -### `Importer` trait - -Every importer declares a `config_shape()` so UIs can render a dynamic -form; a `discover()` that returns an `ImportDiscovery` preview; and an -`import()` that does the actual work. All three methods are async. - -The trait lives in `import::trait_def`. Shape types (`ImportConfig`, -`ImportTarget`, `ConfigField`, `ConfigFieldKind`, `ImportError`) are -serialisable and safe to cross the WASM boundary. - -### Registry - -`ImporterRegistry::with_defaults()` registers every enabled -`importer-*` feature. Currently: `claude`, `chatgpt`, `codex`. The -registry is constructed at boot and stored on `AppState`. - -### Progress sink - -`ImportProgressSink::channel()` returns a bounded mpsc pair. -Non-terminal events use `try_send` (dropped on full); terminal events -use `send().await` so consumers always see the final state. - -### Source crates - -- `dirigent_chatgpt` — parses `conversations.json` from the OpenAI data - export. -- `dirigent_codex` — parses `*.jsonl` session files under - `~/.codex/sessions`. - -Both crates hold pure parser types with zero dirigent-specific types. - -See `docs/plans/2026-04-21-archivist-phase5-design.md`. - -## Future Enhancements - -- Indexed `SearchBackend` implementations (tantivy/sqlite) — currently - content search is ripgrep-based in the `api` package -- Session splitting and lineage management (mutations.ndjson) -- Knowledge overview generation (chat.md exports) -- Embedding storage and search (embeds/) -- Network RPC interface for remote archivist -- Compaction and pruning policies -- Additional concrete backends (e.g. SQLite, remote) - -## Documentation - -- **Package README**: `./README.md` - User-facing overview -- **Architecture Docs**: `../../docs/building/05_archivist/` - Design and planning -- **API Docs**: Run `cargo doc --package dirigent_archivist --open` -- **Examples**: See `examples/` directory for working code samples diff --git a/crates/dirigent_archivist/Cargo.toml b/crates/dirigent_archivist/Cargo.toml deleted file mode 100644 index 5f29eea..0000000 --- a/crates/dirigent_archivist/Cargo.toml +++ /dev/null @@ -1,69 +0,0 @@ -[package] -name = "dirigent_archivist" -version = "0.1.0" -edition = "2021" - -[lib] -path = "src/lib.rs" - -[features] -# All built-in importers are on by default. Turn the corresponding -# `importer-*` flag off (and opt out of `default`) to ship a slimmer build. -default = ["importer-claude", "importer-chatgpt", "importer-codex"] - -# Exposes the sub-trait contract test harness (`backend::contract`) to -# downstream crates so new backends can reuse the same behavioral checks. -test-utils = [] - -# Per-source importer feature gates. Each flag guards the corresponding -# `ImporterRegistry::with_defaults` registration and (where relevant) the -# source module itself. -importer-claude = [] -importer-chatgpt = ["dep:dirigent_chatgpt"] -importer-codex = ["dep:dirigent_codex"] - -[dependencies] -# Core dependencies -dirigent_protocol = { path = "../dirigent_protocol" } -dirigent_anth = { path = "../dirigent_anth" } -dirigent_chatgpt = { path = "../dirigent_chatgpt", optional = true } -dirigent_codex = { path = "../dirigent_codex", optional = true } -camino = "1.1" - -# UUID support with v7 and serde -uuid = { version = "1.11", features = ["v5", "v7", "serde"] } - -# Date/time handling -chrono = { version = "0.4", features = ["serde"] } - -# Serialization -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -toml = "0.8" - -# Async runtime and file operations -tokio = { version = "1.42", features = ["fs", "sync", "time", "io-util", "macros", "rt-multi-thread"] } - -# Logging -tracing = "0.1" - -# Error handling -thiserror = "2.0" -anyhow = "1" - -# Hashing for content-addressable storage -sha2 = "0.10" -hex = "0.4" - -# LRU read cache for registry backends -lru = "0.12" - -# Async traits -async-trait = "0.1" - -# Async futures -futures = "0.3" - -[dev-dependencies] -tempfile = "3.0" -walkdir = "2" diff --git a/crates/dirigent_archivist/README.md b/crates/dirigent_archivist/README.md deleted file mode 100644 index 5c860fc..0000000 --- a/crates/dirigent_archivist/README.md +++ /dev/null @@ -1,338 +0,0 @@ -# Dirigent Archivist - -Persistent storage for all agentic interactions in Dirigent. - -## Overview - -The Archivist automatically archives every conversation, message, and file from your AI sessions into a local, grep-able, human-readable archive. No cloud required - your data stays on your machine in formats you can read and search manually. - -## Why Archivist? - -- **Offline Access**: All conversations are saved locally, accessible even when connectors are offline -- **Manual Curation**: Files are in plain JSON/NDJSON/TSV - grep, edit, or analyze them with any tool -- **Knowledge Base**: Build a searchable archive of all your AI interactions across projects -- **Session Lineage**: Track conversation branches, splits, and continuations -- **File Deduplication**: Attachments are stored once, referenced multiple times (content-addressable) -- **Archive-First**: UI reads from local archive first, only falls back to remote connectors when needed - -## Quick Start - -The Archivist runs automatically when you start Dirigent. The archive location is determined by the `DIRIGENT_DATA_DIR` environment variable (archives are stored at `/archives/`): - -```bash -# Override data directory (archives at /path/to/data/archives/) -DIRIGENT_DATA_DIR=/path/to/data dx serve -``` - -That's it! Every session and message will be automatically archived. - -## Archive Structure - -Your archive is organized like this: - -``` -dirigent_archive/ -├── .contexts/ # Session data -│ └── 01936e8f-e5a7-7000-8000.../ -│ ├── session.json # Session metadata -│ └── messages.ndjson # All messages (one JSON per line) -├── .db/ -│ └── connectors/ # Connector registry -│ ├── index.tsv # Fast lookup table -│ └── 01936e8f-e5a7.../ -│ ├── connector.json # Connector info -│ └── sessions.ndjson # Session ID mappings -└── .files/ # Attachments (by SHA-256) - └── a1b2c3d4... # Content-addressable storage -``` - -### Why Hidden Directories? - -The `.contexts`, `.db`, and `.files` directories start with `.` to keep them internal (like `.git`). In the future, you'll be able to export rendered markdown files into the archive root for easy reading. - -## File Formats - -### Session Metadata (`.contexts/{id}/session.json`) - -```json -{ - "version": 1, - "scroll_id": "01936e8f-e5a7-7000-8000-000000000001", - "created_at": "2025-01-01T12:00:00Z", - "updated_at": "2025-01-01T12:30:00Z", - "title": "Implement user authentication", - "connector_uid": "01936e8f-e5a7-7000-8000-000000000002", - "tags": ["backend", "auth"], - "metadata": { - "source": "OpenCode", - "model": "claude-3-5-sonnet" - } -} -``` - -### Messages (`.contexts/{id}/messages.ndjson`) - -Newline-delimited JSON - one message per line, **append-only**: - -```jsonl -{"version":1,"message_id":"...","session":"...","role":"user","ts":"2025-01-01T12:01:00Z","content_md":"How do I implement JWT auth?","attachments":[],"metadata":{}} -{"version":1,"message_id":"...","session":"...","role":"assistant","ts":"2025-01-01T12:01:10Z","content_md":"Here's how to implement JWT authentication...","attachments":[],"metadata":{"model":"claude-3-5-sonnet"}} -``` - -**IMPORTANT**: Messages are written as events arrive, NOT in chronological order. Assistant replies often appear after subsequent user messages due to streaming latency. When reading programmatically, use the Archivist API which sorts by timestamp (`ts`) to ensure correct order. For manual inspection, sort by the `ts` field. - -### Connector Index (`.db/connectors/index.tsv`) - -Tab-separated values for fast scanning: - -```tsv -connector_uid type title client_native_id alias_of created_at -01936e8f... OpenCode OpenCode Local opencode@http://localhost:12225 2025-01-01T12:00:00Z -``` - -## Searching Your Archive - -Since everything is plain text, you can use standard Unix tools: - -```bash -# Find all sessions about "authentication" -grep -r "authentication" dirigent_archive/.contexts/*/session.json - -# Find messages mentioning a specific error -grep "ECONNREFUSED" dirigent_archive/.contexts/*/messages.ndjson - -# List all sessions for a connector -cat dirigent_archive/.db/connectors/*/sessions.ndjson | jq . - -# Get all user messages from a session (sorted by timestamp) -cat dirigent_archive/.contexts/01936e8f.../messages.ndjson | jq -s 'sort_by(.ts) | .[] | select(.role=="user")' - -# View messages in chronological order -cat dirigent_archive/.contexts/01936e8f.../messages.ndjson | jq -s 'sort_by(.ts)' -``` - -**Note on ordering**: Remember that the file order is append-only (event arrival order). Always sort by `ts` (timestamp) when reading manually to see messages in chronological order. - -## Integration with Dirigent - -The Archivist integrates seamlessly with Dirigent's core runtime: - -1. **Automatic Archiving**: Every session and message is archived in real-time as events arrive -2. **Event-Driven**: No polling - listens to dirigent_core's event stream -3. **Append-Only Writes**: Messages written as completion events arrive (preserves durability) -4. **Sort-on-Read**: API returns messages in chronological order despite append-only file order -5. **UI Integration**: Web UI reads from archive first, shows data even when connectors are offline -6. **Connector Coordination**: Assigns stable UUIDs to connectors with collision detection - -## Key Concepts - -### Scroll IDs - -Every session gets a unique `scroll_id` (UUIDv7) that's independent of the connector's native session ID. This allows: -- Sessions to move between connectors -- Stable references even if connector data is deleted -- Time-ordered sorting (UUIDv7 encodes timestamp) - -### Session Lineage - -Sessions can have parent sessions, creating a tree of related conversations: -- **Split**: Fork conversation at a specific message -- **Compact**: Summarized version of parent -- **Reference**: Points to parent without duplication -- **Edit**: Modified version of parent - -### Content-Addressable Storage - -Files are stored by their SHA-256 hash, so: -- Same file uploaded twice uses same storage -- Files can be shared across sessions without duplication -- You can verify file integrity by hash - -## Configuration - -### Environment Variables - -- `DIRIGENT_DATA_DIR`: Override data directory; archives are stored at `/archives/` - -### Example Configurations - -```bash -# Use custom data directory (archives at /home/user/mydata/archives/) -DIRIGENT_DATA_DIR=/home/user/mydata dx serve - -# Use global data directory -DIRIGENT_DATA_DIR=/home/user/.dirigent dx serve - -# Use temporary data directory (testing) -DIRIGENT_DATA_DIR=/tmp/dirigent_test dx serve -``` - -## Programmatic Access - -While the Archivist runs automatically, you can also use it programmatically: - -```rust -use dirigent_archivist::Archivist; -use std::path::PathBuf; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Create an archivist over a single archive directory. - // Internally this wires up a `JsonlBackend` for the archive. - let archivist = Archivist::new_with_single_archive( - PathBuf::from("./dirigent_archive") - ).await?; - - // List sessions for a connector - let sessions = archivist.list_sessions(connector_uid).await?; - - for session in sessions { - println!("{}: {}", session.scroll_id, session.title.unwrap_or_default()); - } - - Ok(()) -} -``` - -`Archivist` is a concrete struct that owns a registry of `ArchiveBackend` -implementations keyed by archive name. In Phase 2 the only backend is -`JsonlBackend` (file-based NDJSON/JSON/TSV). See `examples/` for more -detailed usage. - -## Performance - -The Archivist is designed for human-scale workloads (thousands of sessions, millions of messages): - -- **Fast Writes**: Append-only NDJSON is O(1) -- **Cached Reads**: Common lookups cached in memory -- **Grep-able**: TSV indices can be scanned in milliseconds -- **Incremental**: Only new messages are written, no full re-writes - -### Scalability Notes - -- Large sessions (1000+ messages) may take a few seconds to load -- TSV indices are suitable for 100-1000 connectors -- File deduplication saves space for repeated attachments - -## Querying and Curation - -### Future: Knowledge Overviews - -The Archivist is designed to support knowledge curation workflows: -- Export sessions as clean markdown files -- Create summaries and overviews across sessions -- Tag and categorize conversations -- Build a personal knowledge base - -These features are planned for future releases. - -### Current: Manual Curation - -For now, you can manually curate your archive: -- Edit `session.json` to add tags -- Grep through messages for specific topics -- Copy/organize sessions into project folders -- Use jq/awk/sed to extract insights - -## Advanced Features - -### Session Splitting - -Create a new conversation branch from any point in history: - -```rust -// Future API (not yet implemented) -let new_session = archivist.split_session( - session_id, - at_message_id, - Continuation::Split -).await?; -``` - -### Attachment Storage - -Files are automatically deduplicated using SHA-256: - -```rust -// Store file (content-addressable) -let file_id = archivist.store_file( - &file_data, - "spec.pdf", - Some("application/pdf") -).await?; - -// Reference in message -let attachment = AttachmentRef { - file_id, // "sha256:abc123..." - name: "spec.pdf".to_string(), - mime_type: Some("application/pdf".to_string()), -}; -``` - -### Multi-Archive Support - -`Archivist` natively manages multiple named archives via an on-disk -registry. Each archive is backed by its own `ArchiveBackend` (currently -`JsonlBackend`) and selected per call via an optional `archive` argument. -This enables: -- Separate archives per project -- A default archive plus specialized side archives -- Moving or copying sessions between archives - -Future backends (e.g. SQLite, indexed, remote) will plug into the same -trait layer without changing the coordinator API. - -## Troubleshooting - -### Archive Not Created - -If the archive directory doesn't appear: -1. Check `DIRIGENT_DATA_DIR` is set correctly (or that the default data directory is writable) -2. Ensure write permissions on parent directory -3. Check logs for I/O errors - -### Missing Sessions - -If sessions don't appear in archive: -1. Verify EventHandler is running -2. Check for event subscription errors in logs -3. Ensure connector emits `SessionCreated` events - -### Large Archive Size - -If archive grows too large: -1. Check for duplicate files in `.files/` -2. Consider archiving old sessions separately -3. Future: Use compaction features (not yet implemented) - -## Development Status - -**Current** (Phase 2 complete): -- Automatic archival of sessions and messages -- Event-driven integration with dirigent_core -- File-based storage with NDJSON/JSON/TSV (`JsonlBackend`) -- Content-addressable file storage -- Multi-archive coordinator with per-archive backends -- Trait-based backend abstraction (`ArchiveBackend` + sub-traits) - -**Future**: -- Indexed `SearchBackend` implementations (full-text search) -- Additional concrete backends (SQLite, remote) -- Session splitting and lineage management -- Knowledge overview generation -- Network RPC interface - -## Documentation - -- **Developer Guide**: `CLAUDE.md` - Package architecture and implementation details -- **Architecture**: `docs/building/05_archivist/vision.md` - Design rationale -- **API Docs**: `cargo doc --package dirigent_archivist --open` -- **Examples**: See `examples/` for working code - -## Contributing - -The Archivist is part of the Dirigent project. See the main repository for contribution guidelines. - -## License - -Part of the Dirigent project. diff --git a/crates/dirigent_archivist/examples/basic_usage.rs b/crates/dirigent_archivist/examples/basic_usage.rs deleted file mode 100644 index 5eacea2..0000000 --- a/crates/dirigent_archivist/examples/basic_usage.rs +++ /dev/null @@ -1,198 +0,0 @@ -//! Basic usage example for dirigent_archivist -//! -//! This example demonstrates: -//! - Creating a Archivist -//! - Registering a connector -//! - Registering a session -//! - Appending messages to a session -//! - Listing sessions for a connector -//! - Retrieving messages for a session - -use chrono::Utc; -use dirigent_archivist::{ - Archivist, MessageRecord, RegisterConnectorRequest, RegisterSessionRequest, - Result, -}; -use std::path::PathBuf; -use uuid::Uuid; - -#[tokio::main] -async fn main() -> Result<()> { - // Create a temporary archive directory for this example - let temp_dir = std::env::temp_dir().join(format!("dirigent_example_{}", Uuid::now_v7())); - println!("Creating archive at: {}", temp_dir.display()); - - // Step 1: Create a Archivist - let archivist = Archivist::new_with_single_archive(temp_dir.clone()).await?; - println!("Archivist created successfully"); - - // Step 2: Register a connector - println!("\n--- Registering Connector ---"); - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "OpenCode Local".to_string(), - client_native_id: "opencode@http://localhost:12225".to_string(), - custom_uid: None, // Let archivist generate a UID - metadata: serde_json::json!({ - "version": "0.1.0", - "protocol": "OpenCode HTTP API" - }), - fingerprint: None, - }; - - let connector_resp = archivist.register_connector(connector_req, None).await?; - println!("Connector registered: {:?}", connector_resp); - let connector_uid = connector_resp.connector_uid; - - // Step 3: Register a session - println!("\n--- Registering Session ---"); - let session_req = RegisterSessionRequest { - connector_uid, - native_session_id: "session-abc123".to_string(), - title: Some("Example chat session".to_string()), - custom_scroll_id: None, // Let archivist generate a scroll ID - metadata: serde_json::json!({ - "project_path": "/home/user/projects/example", - "model": "claude-3-5-sonnet" - }), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_resp = archivist.register_session(session_req, None).await?; - println!("Session registered: {:?}", session_resp); - let scroll_id = session_resp.scroll_id; - - // Step 4: Append messages to the session - println!("\n--- Appending Messages ---"); - - // User message - let user_msg = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: Some("alice".to_string()), - content_md: "Hello! Can you help me write a function to calculate fibonacci numbers?" - .to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - // Assistant message - let assistant_msg = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: Some(user_msg.message_id), - ts: Utc::now(), - role: "assistant".to_string(), - author: Some("claude".to_string()), - content_md: r#"Sure! Here's a recursive fibonacci function in Rust: - -```rust -fn fibonacci(n: u32) -> u64 { - match n { - 0 => 0, - 1 => 1, - _ => fibonacci(n - 1) + fibonacci(n - 2), - } -} -``` - -This is the classic recursive implementation, though it's not the most efficient for large values of n."# - .to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({ - "model": "claude-3-5-sonnet", - "latency_ms": 1245 - }), - }; - - archivist - .append_messages(scroll_id, vec![user_msg.clone(), assistant_msg.clone()], None) - .await?; - println!("Appended 2 messages to session"); - - // Step 5: List all sessions for the connector - println!("\n--- Listing Sessions ---"); - let page = archivist - .list_sessions_paged( - dirigent_archivist::SessionListQuery::default() - .with_connector(connector_uid) - .with_limit(100), - ) - .await?; - let sessions = page.items; - println!("Found {} session(s) for connector:", sessions.len()); - for session in &sessions { - println!( - " - {} ({}): {:?}", - session.scroll_id, - session.created_at.format("%Y-%m-%d %H:%M:%S"), - session.title - ); - } - - // Step 6: Retrieve all messages for the session - println!("\n--- Retrieving Messages ---"); - let messages = archivist.get_messages(scroll_id, None).await?; - println!("Retrieved {} message(s):", messages.len()); - for msg in &messages { - println!("\n[{}] {}", msg.role, msg.ts.format("%Y-%m-%d %H:%M:%S")); - println!("{}", msg.content_md); - } - - // Step 7: Demonstrate session resolution - println!("\n--- Resolving Session ---"); - let resolved_scroll_id = archivist - .resolve_session(connector_uid, "session-abc123", None) - .await?; - println!( - "Resolved native session 'session-abc123' to scroll_id: {}", - resolved_scroll_id - ); - assert_eq!(resolved_scroll_id, scroll_id); - - // Step 8: Show archive structure - println!("\n--- Archive Structure ---"); - println!("Archive root: {}", temp_dir.display()); - println!("\nDirectory structure:"); - show_directory_tree(&temp_dir, 0)?; - - // Cleanup - println!("\n--- Cleanup ---"); - std::fs::remove_dir_all(&temp_dir)?; - println!("Removed temporary archive"); - - Ok(()) -} - -/// Helper function to display directory tree -fn show_directory_tree(path: &PathBuf, depth: usize) -> Result<()> { - let indent = " ".repeat(depth); - - if path.is_dir() { - println!("{}{}/", indent, path.file_name().unwrap().to_string_lossy()); - - let mut entries: Vec<_> = std::fs::read_dir(path)?.filter_map(|e| e.ok()).collect(); - entries.sort_by_key(|e| e.path()); - - for entry in entries { - show_directory_tree(&entry.path(), depth + 1)?; - } - } else { - println!("{}{}", indent, path.file_name().unwrap().to_string_lossy()); - } - - Ok(()) -} diff --git a/crates/dirigent_archivist/examples/demo_types.rs b/crates/dirigent_archivist/examples/demo_types.rs deleted file mode 100644 index 3b3fa20..0000000 --- a/crates/dirigent_archivist/examples/demo_types.rs +++ /dev/null @@ -1,156 +0,0 @@ -// Demonstration of archivist types serialization -// Run with: cargo run --package dirigent_archivist --example demo_types - -use chrono::Utc; -use dirigent_archivist::*; -use uuid::Uuid; - -fn main() { - println!("=== ARCHIVIST TYPES DEMONSTRATION ===\n"); - - // Demo 1: SessionMetadata (matches session.json format) - println!("1. SessionMetadata (session.json):"); - let session_metadata = SessionMetadata { - version: 1, - scroll_id: Uuid::now_v7(), - created_at: Utc::now(), - updated_at: Utc::now(), - title: Some("Example Session".to_string()), - connector_uid: Uuid::now_v7(), - native_session_id: Some("abc123".to_string()), - agent_id: Some("claude-3-5".to_string()), - parent_scroll_id: None, - continuation: Some(Continuation::Split), - tags: vec!["example".to_string(), "test".to_string()], - metadata: serde_json::json!({ - "source": "OpenCode", - "project": "dirigent" - }), - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::default(), - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - }; - println!( - "{}\n", - serde_json::to_string_pretty(&session_metadata).unwrap() - ); - - // Demo 2: MessageRecord (matches messages.ndjson format) - println!("2. MessageRecord (messages.ndjson line):"); - let message = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_metadata.scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: Some("alice".to_string()), - content_md: "How do I implement archivist types?".to_string(), - content_parts: None, - attachments: vec![AttachmentRef { - file_id: "sha256:abc123".to_string(), - name: "spec.pdf".to_string(), - mime_type: Some("application/pdf".to_string()), - }], - metadata: serde_json::json!({ - "connector_msg_id": "msg-456" - }), - }; - // NDJSON format (one line) - println!("{}\n", serde_json::to_string(&message).unwrap()); - - // Demo 3: ConnectorRecord (matches connector.json format) - println!("3. ConnectorRecord (connector.json):"); - let connector = ConnectorRecord { - version: 1, - connector_uid: session_metadata.connector_uid, - r#type: "OpenCode".to_string(), - title: "OpenCode Local".to_string(), - client_native_id: "opencode@http://localhost:12225".to_string(), - alias_of: None, - created_at: Utc::now(), - metadata: serde_json::json!({}), - fingerprint: None, - }; - println!("{}\n", serde_json::to_string_pretty(&connector).unwrap()); - - // Demo 4: SessionMapping (matches sessions.ndjson format) - println!("4. SessionMapping (sessions.ndjson line):"); - let mapping = SessionMapping { - version: 1, - connector_uid: connector.connector_uid, - native_session_id: "abc123".to_string(), - scroll_id: session_metadata.scroll_id, - created_at: Utc::now(), - alias_of: None, - }; - println!("{}\n", serde_json::to_string(&mapping).unwrap()); - - // Demo 5: FileRecord (matches file_index.jsonl format) - println!("5. FileRecord (file_index.jsonl line):"); - let file_record = FileRecord { - version: 1, - file_id: "sha256:abc123def456".to_string(), - path: ".files/ab/cd/abc123def456".to_string(), - size: 123456, - mime: Some("application/pdf".to_string()), - original_name: "spec.pdf".to_string(), - sessions: vec![session_metadata.scroll_id], - metadata: serde_json::json!({ - "source": "upload" - }), - }; - println!("{}\n", serde_json::to_string(&file_record).unwrap()); - - // Demo 6: Enum serialization - println!("6. Enum Serialization:"); - println!( - " Continuation::Split: {}", - serde_json::to_string(&Continuation::Split).unwrap() - ); - println!( - " Continuation::Compact: {}", - serde_json::to_string(&Continuation::Compact).unwrap() - ); - println!( - " RegisterStatus::Accepted: {}", - serde_json::to_string(&RegisterStatus::Accepted).unwrap() - ); - println!( - " RegisterStatus::Aliased: {}", - serde_json::to_string(&RegisterStatus::Aliased).unwrap() - ); - println!(); - - // Demo 7: API types - println!("7. RegisterConnectorResponse:"); - let response = RegisterConnectorResponse { - status: RegisterStatus::Accepted, - connector_uid: Uuid::now_v7(), - alias_of: None, - note: Some("Successfully registered".to_string()), - }; - println!("{}\n", serde_json::to_string_pretty(&response).unwrap()); - - println!("8. RegisterSessionResponse:"); - let response = RegisterSessionResponse { - status: RegisterStatus::Aliased, - scroll_id: Uuid::now_v7(), - alias_of: Some(Uuid::now_v7()), - }; - println!("{}\n", serde_json::to_string_pretty(&response).unwrap()); - - println!("=== ALL TYPES MATCH VISION.MD SPECIFICATION ==="); -} diff --git a/crates/dirigent_archivist/examples/event_handling.rs b/crates/dirigent_archivist/examples/event_handling.rs deleted file mode 100644 index 50b6acf..0000000 --- a/crates/dirigent_archivist/examples/event_handling.rs +++ /dev/null @@ -1,277 +0,0 @@ -//! Event handling example for dirigent_archivist -//! -//! This example demonstrates: -//! - Creating an EventHandler -//! - Subscribing to dirigent_protocol events -//! - Accumulating streaming message chunks -//! - Finalizing complete messages -//! - Automatic archival via event stream - -use chrono::Utc; -use dirigent_archivist::{Archivist, EventHandler, Result}; -use dirigent_protocol::streaming::{BusEvent, BusReceiver, EventOrigin, EventRouting}; -use dirigent_protocol::{ - ContentBlock, Event, Message, MessageMetadata, MessagePart, MessageRole, MessageStatus, - Session, SessionMetadata, SessionUpdate, ToolCall, ToolCallStatus, -}; -use std::sync::Arc; -use std::sync::atomic::AtomicU64; -use tokio::sync::mpsc; -use uuid::Uuid; - -/// Wrap a raw `Event` in a `BusEvent` with default routing. -fn wrap(event: Event) -> BusEvent { - BusEvent { - routing: EventRouting::default(), - origin: EventOrigin::Runtime, - event: Arc::new(event), - } -} - -#[tokio::main] -async fn main() -> Result<()> { - // Create a temporary archive directory for this example - let temp_dir = std::env::temp_dir().join(format!("dirigent_event_example_{}", Uuid::now_v7())); - println!("Creating archive at: {}", temp_dir.display()); - - // Step 1: Create archivist and event handler - let archivist = Archivist::new_with_single_archive(temp_dir.clone()).await?; - let archivist = Arc::new(archivist); - let handler = EventHandler::new(archivist.clone()); - - println!("EventHandler created successfully"); - - // Step 2: Create a mock event stream. In production this is built - // by `SharingBus::subscribe_all()`; here we fabricate a `BusReceiver` - // directly so the example stays self-contained. - let (tx, rx) = mpsc::channel::(100); - let bus_rx = BusReceiver { - id: 0, - rx, - lagged: Arc::new(AtomicU64::new(0)), - }; - - // Step 3: Spawn event handler task - let handler_task = tokio::spawn(async move { - handler.run(bus_rx).await; - }); - - // Step 4: Simulate event flow - println!("\n--- Simulating Event Stream ---"); - - // Generate connector and session IDs - let connector_id = Uuid::now_v7().to_string(); - let session_id = Uuid::now_v7().to_string(); - let message_id = Uuid::now_v7().to_string(); - - // Event 1: SessionCreated - println!("\n1. Sending SessionCreated event..."); - let session_created = Event::SessionCreated { - connector_id: connector_id.clone(), - session: Session { - id: session_id.clone(), - title: "Example streaming session".to_string(), - created_at: Utc::now(), - updated_at: Utc::now(), - metadata: SessionMetadata { - project_path: "/home/user/project".to_string(), - model: Some("claude-3-5-sonnet".to_string()), - total_messages: 0, - system_message: None, - current_mode_id: None, - _meta: None, - project_id: None, - }, - cwd: None, - models: None, - modes: None, - config_options: None, - acp_client_id: None, - }, - }; - tx.send(wrap(session_created)).await.unwrap(); - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - - // Event 2-5: Streaming message chunks (AgentMessageChunk) - println!("2. Sending streaming message chunks..."); - let chunks = vec!["Hello! ", "I'm here to ", "help you with ", "your code."]; - - for (i, chunk) in chunks.iter().enumerate() { - let chunk_event = Event::SessionUpdate { - connector_id: connector_id.clone(), - session_id: session_id.clone(), - update: SessionUpdate::AgentMessageChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: chunk.to_string(), - }, - _meta: None, - }, - }; - tx.send(wrap(chunk_event)).await.unwrap(); - println!(" Chunk {}: {:?}", i + 1, chunk); - tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; - } - - // Event 6: Thinking chunk - println!("3. Sending thinking chunk..."); - let thinking_event = Event::SessionUpdate { - connector_id: connector_id.clone(), - session_id: session_id.clone(), - update: SessionUpdate::AgentThoughtChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: "Let me consider the best approach...".to_string(), - }, - _meta: None, - }, - }; - tx.send(wrap(thinking_event)).await.unwrap(); - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - - // Event 7: Tool call - println!("4. Sending tool call event..."); - let tool_call_event = Event::SessionUpdate { - connector_id: connector_id.clone(), - session_id: session_id.clone(), - update: SessionUpdate::ToolCall { - message_id: message_id.clone(), - tool_call: ToolCall { - id: "tool_call_123".to_string(), - tool_name: "read_file".to_string(), - status: ToolCallStatus::Completed, - content: vec![], - raw_input: Some(serde_json::json!({ - "path": "/home/user/project/main.rs" - })), - raw_output: Some(serde_json::json!({ - "content": "fn main() { println!(\"Hello\"); }" - })), - title: None, - error: None, - metadata: None, - origin: None, - }, - _meta: None, - }, - }; - tx.send(wrap(tool_call_event)).await.unwrap(); - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - - // Event 8: MessageCompleted (triggers finalization) - println!("5. Sending MessageCompleted event..."); - let message_completed = Event::MessageCompleted { - connector_id: connector_id.clone(), - message: Message { - id: message_id.clone(), - session_id: session_id.clone(), - role: MessageRole::Assistant, - created_at: Utc::now(), - content: vec![MessagePart::Text { - text: chunks.concat(), - }], - status: MessageStatus::Completed, - metadata: Some(MessageMetadata { - cost: None, - tokens_input: None, - tokens_output: None, - response_time_ms: None, - latency_ms: Some(1500), - model: Some("claude-3-5-sonnet".to_string()), - other: None, - }), - }, - }; - tx.send(wrap(message_completed)).await.unwrap(); - tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; - - // Event 9: Second message (user response) - println!("6. Sending user message..."); - let user_message_id = Uuid::now_v7().to_string(); - let user_chunks = vec!["Thanks! ", "Can you explain ", "the code?"]; - - for (i, chunk) in user_chunks.iter().enumerate() { - let chunk_event = Event::SessionUpdate { - connector_id: connector_id.clone(), - session_id: session_id.clone(), - update: SessionUpdate::UserMessageChunk { - message_id: user_message_id.clone(), - content: ContentBlock::Text { - text: chunk.to_string(), - }, - _meta: None, - }, - }; - tx.send(wrap(chunk_event)).await.unwrap(); - println!(" User chunk {}: {:?}", i + 1, chunk); - tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; - } - - let user_message_completed = Event::MessageCompleted { - connector_id: connector_id.clone(), - message: Message { - id: user_message_id.clone(), - session_id: session_id.clone(), - role: MessageRole::User, - created_at: Utc::now(), - content: vec![MessagePart::Text { - text: user_chunks.concat(), - }], - status: MessageStatus::Completed, - metadata: None, - }, - }; - tx.send(wrap(user_message_completed)).await.unwrap(); - tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; - - // Step 5: Verify archived data - println!("\n--- Verifying Archived Data ---"); - - // Parse connector_uid from connector_id string - let connector_uid = - Uuid::parse_str(&connector_id).expect("connector_id should be a valid UUID"); - - // List sessions - let page = archivist - .list_sessions_paged( - dirigent_archivist::SessionListQuery::default() - .with_connector(connector_uid) - .with_limit(100), - ) - .await?; - let sessions = page.items; - println!("Found {} session(s) in archive", sessions.len()); - for session in &sessions { - println!(" Session: {} - {:?}", session.scroll_id, session.title); - } - - // Get messages - if let Some(session) = sessions.first() { - let messages = archivist.get_messages(session.scroll_id, None).await?; - println!("\nFound {} message(s):", messages.len()); - for msg in &messages { - println!("\n[{}] {} chars", msg.role, msg.content_md.len()); - println!( - "Content preview: {}", - &msg.content_md.chars().take(100).collect::() - ); - } - } - - // Step 6: Cleanup - println!("\n--- Cleanup ---"); - - // Drop the event sender to close the channel - drop(tx); - - // Wait for handler to finish - handler_task.await.expect("Handler task failed"); - - // Remove temporary archive - std::fs::remove_dir_all(&temp_dir)?; - println!("Removed temporary archive"); - - println!("\nExample completed successfully!"); - - Ok(()) -} diff --git a/crates/dirigent_archivist/examples/file_storage.rs b/crates/dirigent_archivist/examples/file_storage.rs deleted file mode 100644 index 2c7737d..0000000 --- a/crates/dirigent_archivist/examples/file_storage.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! File storage example for dirigent_archivist -//! -//! This example demonstrates: -//! - Storing files with content-addressing -//! - Retrieving files by file_id -//! - Automatic deduplication of identical content -//! - Session tracking for file references - -use dirigent_archivist::storage::{files, ndjson, paths::ArchivePaths}; -use dirigent_archivist::types::FileRecord; -use dirigent_archivist::Result; -use uuid::Uuid; - -#[tokio::main] -async fn main() -> Result<()> { - // Create a temporary archive directory for this example - let temp_dir = std::env::temp_dir().join(format!("dirigent_files_example_{}", Uuid::now_v7())); - println!("Creating archive at: {}", temp_dir.display()); - - let paths = ArchivePaths::new(temp_dir.clone()); - - // Example 1: Store a file - println!("\n--- Example 1: Store a File ---"); - let content1 = b"This is a sample document with some text content."; - let session1 = Uuid::now_v7(); - - let file_id1 = files::store_file( - &paths, - content1, - "document.txt".to_string(), - Some("text/plain".to_string()), - session1, - ) - .await?; - - println!("Stored file with ID: {}", file_id1); - println!("Session: {}", session1); - - // Example 2: Retrieve the file - println!("\n--- Example 2: Retrieve the File ---"); - let retrieved1 = files::get_file(&paths, &file_id1).await?; - println!("Retrieved {} bytes", retrieved1.len()); - println!("Content: {}", String::from_utf8_lossy(&retrieved1)); - - // Example 3: Store the same content from a different session (deduplication) - println!("\n--- Example 3: Deduplication Demo ---"); - let session2 = Uuid::now_v7(); - - let file_id2 = files::store_file( - &paths, - content1, // Same content as before - "duplicate.txt".to_string(), // Different name - Some("text/plain".to_string()), - session2, - ) - .await?; - - println!("Stored same content with different name"); - println!("File ID 1: {}", file_id1); - println!("File ID 2: {}", file_id2); - println!("Same file_id? {}", file_id1 == file_id2); - println!("\nDeduplication: Same content produces same file_id, stored only once!"); - - // Example 4: Check the file index - println!("\n--- Example 4: File Index ---"); - let index_path = paths.root().join(".files").join("file_index.jsonl"); - let records: Vec = ndjson::read_ndjson(&index_path).await?; - - println!("File index contains {} record(s)", records.len()); - for record in &records { - println!("\nFile: {}", record.file_id); - println!(" Original name: {}", record.original_name); - println!(" MIME type: {:?}", record.mime); - println!(" Size: {} bytes", record.size); - println!(" Referenced by {} session(s):", record.sessions.len()); - for session_id in &record.sessions { - println!(" - {}", session_id); - } - } - - // Example 5: Store different content - println!("\n--- Example 5: Store Different Content ---"); - let content2 = b"This is completely different content with more data!"; - let session3 = Uuid::now_v7(); - - let file_id3 = files::store_file( - &paths, - content2, - "different.txt".to_string(), - Some("text/plain".to_string()), - session3, - ) - .await?; - - println!("Stored different content"); - println!("File ID 3: {}", file_id3); - println!("Different from file_id1? {}", file_id1 != file_id3); - - // Example 6: Store binary content - println!("\n--- Example 6: Binary Content ---"); - let binary_content: Vec = (0..256).map(|i| i as u8).collect(); - let session4 = Uuid::now_v7(); - - let file_id4 = files::store_file( - &paths, - &binary_content, - "binary.dat".to_string(), - Some("application/octet-stream".to_string()), - session4, - ) - .await?; - - println!("Stored binary content (256 bytes)"); - println!("File ID: {}", file_id4); - - // Retrieve and verify - let retrieved_binary = files::get_file(&paths, &file_id4).await?; - println!("Retrieved {} bytes", retrieved_binary.len()); - println!( - "Binary content verified: {}", - retrieved_binary == binary_content - ); - - // Example 7: Show final archive structure - println!("\n--- Example 7: Archive Structure ---"); - println!("Archive root: {}", temp_dir.display()); - show_files_directory(&paths)?; - - // Example 8: Final statistics - println!("\n--- Final Statistics ---"); - let final_records: Vec = ndjson::read_ndjson(&index_path).await?; - println!("Total unique files stored: {}", final_records.len()); - - let total_sessions: usize = final_records.iter().map(|r| r.sessions.len()).sum(); - println!("Total session references: {}", total_sessions); - - let total_size: u64 = final_records.iter().map(|r| r.size).sum(); - println!("Total storage used: {} bytes", total_size); - - // Content-addressing means if we had stored content1 1000 times, - // we'd still only use storage for it once! - println!("\nContent-addressing benefit:"); - println!(" File '{}' is referenced by {} sessions", file_id1, 2); - println!(" But stored only once on disk!"); - - // Cleanup - println!("\n--- Cleanup ---"); - std::fs::remove_dir_all(&temp_dir)?; - println!("Removed temporary archive"); - - println!("\nExample completed successfully!"); - - Ok(()) -} - -/// Helper function to show .files directory structure -fn show_files_directory(paths: &ArchivePaths) -> Result<()> { - let files_dir = paths.root().join(".files"); - - if !files_dir.exists() { - println!("No files directory found"); - return Ok(()); - } - - println!("\n.files/ directory:"); - - // Show index file - let index_path = files_dir.join("file_index.jsonl"); - if index_path.exists() { - let metadata = std::fs::metadata(&index_path)?; - println!(" file_index.jsonl ({} bytes)", metadata.len()); - } - - // Show shard directories - for entry in std::fs::read_dir(&files_dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - println!(" {}/", path.file_name().unwrap().to_string_lossy()); - - // Show files in shard - for sub_entry in std::fs::read_dir(&path)? { - let sub_entry = sub_entry?; - let sub_path = sub_entry.path(); - - if sub_path.is_dir() { - println!(" {}/", sub_path.file_name().unwrap().to_string_lossy()); - - // Show files in sub-shard - for file_entry in std::fs::read_dir(&sub_path)? { - let file_entry = file_entry?; - let file_path = file_entry.path(); - let metadata = std::fs::metadata(&file_path)?; - println!( - " {} ({} bytes)", - file_path.file_name().unwrap().to_string_lossy(), - metadata.len() - ); - } - } else { - let metadata = std::fs::metadata(&sub_path)?; - println!( - " {} ({} bytes)", - sub_path.file_name().unwrap().to_string_lossy(), - metadata.len() - ); - } - } - } - } - - Ok(()) -} diff --git a/crates/dirigent_archivist/examples/multi_backend.rs b/crates/dirigent_archivist/examples/multi_backend.rs deleted file mode 100644 index 73773ea..0000000 --- a/crates/dirigent_archivist/examples/multi_backend.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! Example: two `JsonlBackend`s side by side, demonstrating boot-from-config, -//! priority-ordered read routing, write fanout, and a health snapshot. -//! -//! Layout: -//! - `primary` → `read_priority = 0`, `failure_mode = required` (default) -//! - `mirror` → `read_priority = 10`, `failure_mode = best_effort` -//! -//! The primary is the default write target (lowest priority among -//! Required+write-active backends). `append_messages` fans out inline to the -//! mirror too. Reads walk the registrations in priority order, so the primary -//! answers first; if it is missing a session, the walk falls through to the -//! mirror. -//! -//! Run with: -//! -//! cargo run --package dirigent_archivist --example multi_backend - -use std::sync::Arc; - -use chrono::Utc; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ArchivesConfig, BackendRegistry}; -use dirigent_archivist::types::{ - MessageRecord, RegisterConnectorRequest, RegisterSessionRequest, -}; -use uuid::Uuid; - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - let dir_a = tempfile::tempdir()?; - let dir_b = tempfile::tempdir()?; - - // Build a two-archive config entirely from TOML so the example doubles as - // a faithful demonstration of the config surface. - let cfg_src = format!( - r#" - [[archives]] - name = "primary" - type = "jsonl" - read_priority = 0 - [archives.params] - path = "{a}" - - [[archives]] - name = "mirror" - type = "jsonl" - failure_mode = "best_effort" - read_priority = 10 - [archives.params] - path = "{b}" - "#, - a = dir_a.path().to_string_lossy().replace('\\', "/"), - b = dir_b.path().to_string_lossy().replace('\\', "/"), - ); - let cfg: ArchivesConfig = toml::from_str(&cfg_src)?; - let registry = BackendRegistry::with_jsonl(); - let archivist = Arc::new(Archivist::from_config(cfg, ®istry, None).await?); - - println!("\n=== Multi-backend Archivist example ===\n"); - println!("Boot complete. Archives (ordered by read_priority):"); - for s in archivist.list_archives_with_health().await { - println!( - " - name={:<8} type={:<6} priority={:<3} enabled={} write_active={} failure_mode={:?} health={:?}", - s.name, - s.type_name, - s.read_priority, - s.enabled, - s.write_active, - s.failure_mode, - s.health, - ); - } - - // ------------------------------------------------------------------ - // Register a connector. The primary owns the canonical record; fanout - // mirrors it to the secondary. - // ------------------------------------------------------------------ - let connector_resp = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Example".into(), - title: "multi-backend demo".into(), - client_native_id: "example://multi_backend".into(), - custom_uid: None, - metadata: serde_json::json!({ "demo": true }), - fingerprint: None, - }, - None, - ) - .await?; - let connector_uid = connector_resp.connector_uid; - println!( - "\nRegistered connector: uid={} status={:?}", - connector_uid, connector_resp.status - ); - - // ------------------------------------------------------------------ - // Register a session under that connector. `register_session` writes - // the mapping and `session.json` on the primary first, then fans out - // to any enabled secondaries. - // ------------------------------------------------------------------ - let session_resp = archivist - .register_session( - RegisterSessionRequest { - connector_uid, - native_session_id: "demo-session-1".into(), - title: Some("multi-backend demo session".into()), - custom_scroll_id: None, - metadata: serde_json::json!({ "model": "demo" }), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - let scroll_id = session_resp.scroll_id; - println!( - "Registered session: scroll_id={} status={:?}", - scroll_id, session_resp.status - ); - - // ------------------------------------------------------------------ - // Append a couple of messages. `append_messages` writes to the primary - // and then fans out inline to the mirror. - // ------------------------------------------------------------------ - let user_msg = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".into(), - author: Some("alice".into()), - content_md: "Hello from the multi-backend example!".into(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - let asst_msg = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: Some(user_msg.message_id), - ts: Utc::now(), - role: "assistant".into(), - author: Some("claude".into()), - content_md: "Greetings. I have been written to two archives.".into(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - archivist - .append_messages(scroll_id, vec![user_msg, asst_msg], None) - .await?; - println!("\nAppended 2 messages — fanned out to primary + mirror."); - - // ------------------------------------------------------------------ - // Read path: the priority walk tries the primary first (priority=0). - // It finds the session there and never consults the mirror. - // ------------------------------------------------------------------ - let meta = archivist.get_session_metadata(scroll_id, None).await?; - println!( - "\nRead session via priority walk: title={:?} completeness={:?}", - meta.title, meta.completeness - ); - println!( - "Read cache size after read: {}", - archivist.read_cache_size().await - ); - - let messages = archivist.get_messages(scroll_id, None).await?; - println!("Read {} message(s) from the archive:", messages.len()); - for m in &messages { - println!(" - [{}] {}", m.role, m.content_md); - } - - // ------------------------------------------------------------------ - // Final health snapshot. Both backends should still be Available and - // have no queued writes (both run Inline write policies by default). - // ------------------------------------------------------------------ - println!("\nFinal health snapshot:"); - for s in archivist.list_archives_with_health().await { - println!( - " - {:<8} health={:?} queue_depth={:?} last_error={:?}", - s.name, s.health, s.queue_depth, s.last_error - ); - } - - // Clean shutdown drains any queued writer tasks. Both backends here run - // Inline, so this is effectively a no-op but remains the correct API. - archivist.shutdown().await?; - println!("\nShutdown complete."); - Ok(()) -} diff --git a/crates/dirigent_archivist/src/accumulator.rs b/crates/dirigent_archivist/src/accumulator.rs deleted file mode 100644 index c6296b6..0000000 --- a/crates/dirigent_archivist/src/accumulator.rs +++ /dev/null @@ -1,923 +0,0 @@ -//! Message accumulator for incremental message assembly. -//! -//! This is a thin wrapper around [`dirigent_protocol::accumulator::MessageAccumulator`] -//! that delegates chunk/tool/thinking operations to the protocol accumulator and -//! converts [`AccumulatedMessage`] to [`MessageRecord`] on `finalize()`. -//! -//! The accumulator preserves the order of content parts (text, thinking, tool calls) -//! as they arrive in the event stream, enabling inline tool rendering in the UI. - -use chrono::{DateTime, Utc}; -use dirigent_protocol::accumulator::{ - AccumulatedMessage, AccumulatedPart, - MessageAccumulator as ProtocolAccumulator, -}; -#[cfg(test)] -use dirigent_protocol::MessagePart; -use dirigent_protocol::ContentBlock; -use serde_json::Value; -use std::collections::HashMap; -use uuid::Uuid; - -use crate::error::Result; -use crate::types::MessageRecord; - -// Re-export ToolCallData from the protocol for backward compatibility. -pub use dirigent_protocol::accumulator::ToolCallData; - -/// Accumulator for assembling streaming message deltas into [`MessageRecord`]s. -/// -/// Wraps the protocol-level [`ProtocolAccumulator`] and adds archivist-specific -/// concerns: per-message metadata, UUID parsing, and markdown generation. -#[derive(Debug, Default)] -pub struct MessageAccumulator { - inner: ProtocolAccumulator, - /// Per-message metadata not tracked by the protocol accumulator. - metadata: HashMap, -} - -impl MessageAccumulator { - /// Create a new message accumulator - pub fn new() -> Result { - Ok(Self { - inner: ProtocolAccumulator::new(), - metadata: HashMap::new(), - }) - } - - /// Add a content chunk to the message buffer - pub fn add_chunk( - &mut self, - message_id: String, - session_id: String, - connector_id: String, - role: String, - content: ContentBlock, - ) { - self.inner - .add_chunk(&message_id, &session_id, &connector_id, &role, content); - } - - /// Add thinking content to the message buffer - pub fn add_thinking( - &mut self, - message_id: String, - session_id: String, - connector_id: String, - content: String, - ) { - self.inner.add_thinking(&message_id, &session_id, &connector_id, &content); - } - - /// Add or update a tool call in the message buffer - /// - /// This method handles both initial ToolCall events and ToolCallUpdate events. - /// If a tool call with the given ID already exists, it updates the existing entry. - /// Otherwise, it adds a new entry. - /// - /// This ensures that each tool_call_id appears exactly ONCE in the final message, - /// with the most recent input/output data. - pub fn add_or_update_tool_call(&mut self, message_id: String, tool_call: ToolCallData) { - self.inner.add_or_update_tool_call(&message_id, tool_call); - } - - /// Add a tool call to the message buffer (DEPRECATED - use add_or_update_tool_call) - #[deprecated(note = "Use add_or_update_tool_call instead to avoid duplicates")] - pub fn add_tool_call(&mut self, message_id: String, tool_call: ToolCallData) { - self.add_or_update_tool_call(message_id, tool_call); - } - - /// Update an existing tool call in the message buffer - /// - /// Finds the tool call by ID and updates its input/output with non-empty values - /// from the update. If no matching tool call is found, this is a no-op (the - /// update arrived before the initial ToolCall). - pub fn update_tool_call( - &mut self, - message_id: String, - tool_call_id: &str, - input: Option, - output: Option, - ) { - // Construct a ToolCallData and delegate to add_or_update_tool_call. - // We need the tool_name but don't have it here; use an empty string - // since add_or_update_tool_call only updates existing entries when the - // id matches. However, if there's no existing entry, this would create - // a new one with empty tool_name - so we need to check first. - // - // Instead, we use the protocol accumulator's update semantics directly: - // build a ToolCallData with the values we have. - let tool_call = ToolCallData { - id: tool_call_id.to_string(), - tool_name: String::new(), // Will be overwritten by existing entry's name - input: input.unwrap_or(Value::Null), - output, - }; - - // Only delegate if a buffer exists for this message (matching original behavior). - if self.inner.has_buffer(&message_id) { - self.inner.add_or_update_tool_call(&message_id, tool_call); - } - } - - /// Get all message IDs for a given session that have active buffers - pub fn get_message_ids_for_session(&self, session_id: &str) -> Vec { - self.inner.message_ids_for_session(session_id) - } - - /// Get message IDs for buffers that have been inactive longer than the threshold - pub fn get_stale_message_ids( - &self, - _now: DateTime, - threshold: std::time::Duration, - ) -> Vec { - self.inner.stale_message_ids(threshold) - } - - /// Get all message IDs that have active buffers - pub fn get_all_message_ids(&self) -> Vec { - self.inner.active_message_ids() - } - - /// Finalize a message and produce a complete `(MessageRecord, connector_id, native_session_id)`. - /// - /// Returns `None` if no buffer exists for the given `message_id`. - /// The `connector_id` and `native_session_id` in the tuple are the raw values - /// that were passed into `add_chunk`/`add_thinking` — callers in Task 5 will use - /// these to resolve the canonical scroll_id. - pub fn finalize(&mut self, message_id: &str) -> Option<(MessageRecord, String, String)> { - let accumulated = self.inner.finalize(message_id)?; - - let connector_id = accumulated.connector_id.clone(); - let native_session_id = accumulated.session_id.clone(); - - // Take stored metadata for this message (if any). - let metadata = self - .metadata - .remove(message_id) - .unwrap_or(Value::Null); - - let record = accumulated_to_record(accumulated, metadata); - Some((record, connector_id, native_session_id)) - } -} - -// --------------------------------------------------------------------------- -// Conversion helpers -// --------------------------------------------------------------------------- - -/// Convert an [`AccumulatedMessage`] into a [`MessageRecord`] for archival. -fn accumulated_to_record(accumulated: AccumulatedMessage, metadata: Value) -> MessageRecord { - // Build content_md by iterating parts in order - let mut content_md = String::new(); - - for part in &accumulated.parts { - match part { - AccumulatedPart::Text { text } => { - content_md.push_str(text); - } - AccumulatedPart::Thinking { text } => { - content_md.push_str("\n\n\n"); - content_md.push_str(text); - content_md.push_str("\n"); - } - AccumulatedPart::Tool { data } => { - content_md.push_str(&format!( - "\n\n**Tool**: {}\n```json\n{}\n```", - data.tool_name, - serde_json::to_string_pretty(&data.input) - .unwrap_or_else(|_| "{}".to_string()) - )); - } - } - } - - // Convert accumulated parts to protocol MessageParts for rich rendering - let message_parts = accumulated.to_message_parts(); - - // Serialize content_parts for storage (None if empty to save space) - let content_parts = if message_parts.is_empty() { - None - } else { - serde_json::to_value(&message_parts).ok() - }; - - // Parse UUIDs from strings - // Strip "msg-" prefix if present (ACP connectors use this format) - let message_id_str = accumulated - .message_id - .strip_prefix("msg-") - .unwrap_or(&accumulated.message_id); - - if message_id_str != accumulated.message_id.as_str() { - tracing::debug!( - "Stripped 'msg-' prefix from message_id: {} -> {}", - accumulated.message_id, - message_id_str - ); - } - - let message_uuid = match Uuid::parse_str(message_id_str) { - Ok(uuid) => uuid, - Err(_) => { - tracing::warn!( - "Failed to parse message_id as UUID: {}", - accumulated.message_id - ); - Uuid::now_v7() - } - }; - - // Strip "msg-" prefix from session_id if present (for consistency) - let session_id_str = accumulated - .session_id - .strip_prefix("msg-") - .unwrap_or(&accumulated.session_id); - - if session_id_str != accumulated.session_id.as_str() { - tracing::debug!( - "Stripped 'msg-' prefix from session_id: {} -> {}", - accumulated.session_id, - session_id_str - ); - } - - let session_uuid = match Uuid::parse_str(session_id_str) { - Ok(uuid) => uuid, - Err(_) => { - tracing::warn!( - "Failed to parse session_id as UUID: {}", - accumulated.session_id - ); - Uuid::now_v7() - } - }; - - MessageRecord { - version: 1, - message_id: message_uuid, - session: session_uuid, - parent_id: None, - ts: accumulated.created_at.unwrap_or_else(Utc::now), - role: accumulated.role, - author: None, - content_md, - content_parts, - attachments: Vec::new(), - metadata, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_accumulator_creation() { - let acc = MessageAccumulator::new().unwrap(); - assert_eq!(acc.get_all_message_ids().len(), 0); - } - - #[test] - fn test_add_text_chunk() { - let mut acc = MessageAccumulator::new().unwrap(); - - acc.add_chunk( - "msg_1".to_string(), - "session_1".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Hello, ".to_string(), - }, - ); - - acc.add_chunk( - "msg_1".to_string(), - "session_1".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "world!".to_string(), - }, - ); - - // Two consecutive text chunks should be coalesced. - // Finalize and check the result. - let (record, _, _) = acc.finalize("msg_1").unwrap(); - assert_eq!(record.content_md, "Hello, world!"); - } - - #[test] - fn test_add_thinking_chunk() { - let mut acc = MessageAccumulator::new().unwrap(); - - acc.add_thinking( - "msg_2".to_string(), - "session_2".to_string(), - "connector_1".to_string(), - "Let me think... ".to_string(), - ); - - acc.add_thinking( - "msg_2".to_string(), - "session_2".to_string(), - "connector_1".to_string(), - "I need to analyze this.".to_string(), - ); - - // Finalize and verify thinking was coalesced - let (record, _, _) = acc.finalize("msg_2").unwrap(); - assert!(record.content_md.contains("Let me think... I need to analyze this.")); - assert_eq!(record.role, "assistant"); - } - - #[test] - fn test_finalize_text_only() { - let mut acc = MessageAccumulator::new().unwrap(); - - acc.add_chunk( - "01936e8f-e5a7-7000-8000-000000000001".to_string(), - "01936e8f-e5a7-7000-8000-000000000002".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Hello, ".to_string(), - }, - ); - - acc.add_chunk( - "01936e8f-e5a7-7000-8000-000000000001".to_string(), - "01936e8f-e5a7-7000-8000-000000000002".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "world!".to_string(), - }, - ); - - let (record, _, _) = acc - .finalize("01936e8f-e5a7-7000-8000-000000000001") - .unwrap(); - - assert_eq!(record.content_md, "Hello, world!"); - assert_eq!(record.role, "user"); - assert!(record.ts <= Utc::now()); - } - - #[test] - fn test_finalize_with_thinking() { - let mut acc = MessageAccumulator::new().unwrap(); - - acc.add_chunk( - "01936e8f-e5a7-7000-8000-000000000003".to_string(), - "01936e8f-e5a7-7000-8000-000000000004".to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Here's my response.".to_string(), - }, - ); - - acc.add_thinking( - "01936e8f-e5a7-7000-8000-000000000003".to_string(), - "01936e8f-e5a7-7000-8000-000000000004".to_string(), - "connector_1".to_string(), - "Let me analyze this carefully.".to_string(), - ); - - let (record, _, _) = acc - .finalize("01936e8f-e5a7-7000-8000-000000000003") - .unwrap(); - - assert!(record.content_md.contains("Here's my response.")); - assert!(record.content_md.contains("")); - assert!(record.content_md.contains("Let me analyze this carefully.")); - assert!(record.content_md.contains("")); - } - - #[test] - fn test_finalize_nonexistent_message() { - let mut acc = MessageAccumulator::new().unwrap(); - let result = acc.finalize("nonexistent"); - assert!(result.is_none()); - } - - #[test] - fn test_add_tool_call() { - let mut acc = MessageAccumulator::new().unwrap(); - - // First add a text chunk to create the buffer - acc.add_chunk( - "msg_tool".to_string(), - "session_tool".to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "I'll use a tool.".to_string(), - }, - ); - - // Add a tool call - let tool_call = ToolCallData { - id: "call_123".to_string(), - tool_name: "search".to_string(), - input: serde_json::json!({"query": "test"}), - output: Some(serde_json::json!({"results": ["a", "b"]})), - }; - - #[allow(deprecated)] - acc.add_tool_call("msg_tool".to_string(), tool_call); - - // Finalize and verify - let (record, _, _) = acc.finalize("msg_tool").unwrap(); - let parts = - serde_json::from_value::>(record.content_parts.unwrap()).unwrap(); - assert_eq!(parts.len(), 2); // One Text, one Tool - assert!(matches!(parts[1], MessagePart::Tool { .. })); - if let MessagePart::Tool { tool, .. } = &parts[1] { - assert_eq!(tool, "search"); - } - } - - #[test] - fn test_finalize_with_tool_calls() { - let mut acc = MessageAccumulator::new().unwrap(); - - acc.add_chunk( - "01936e8f-e5a7-7000-8000-000000000005".to_string(), - "01936e8f-e5a7-7000-8000-000000000006".to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Let me search for that.".to_string(), - }, - ); - - let tool_call = ToolCallData { - id: "call_456".to_string(), - tool_name: "web_search".to_string(), - input: serde_json::json!({"query": "Rust async"}), - output: None, - }; - - #[allow(deprecated)] - acc.add_tool_call( - "01936e8f-e5a7-7000-8000-000000000005".to_string(), - tool_call, - ); - - let (record, _, _) = acc - .finalize("01936e8f-e5a7-7000-8000-000000000005") - .unwrap(); - - assert!(record.content_md.contains("Let me search for that.")); - assert!(record.content_md.contains("**Tool**: web_search")); - assert!(record.content_md.contains("Rust async")); - } - - #[test] - fn test_concurrent_messages() { - let mut acc = MessageAccumulator::new().unwrap(); - - // Add chunks for two different messages - acc.add_chunk( - "msg_a".to_string(), - "session_1".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Message A".to_string(), - }, - ); - - acc.add_chunk( - "msg_b".to_string(), - "session_1".to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Message B".to_string(), - }, - ); - - acc.add_chunk( - "msg_a".to_string(), - "session_1".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: " continued".to_string(), - }, - ); - - // Both messages should be buffered - assert_eq!(acc.get_all_message_ids().len(), 2); - - // Finalize and check - let (record_a, _, _) = acc.finalize("msg_a").unwrap(); - assert_eq!(record_a.content_md, "Message A continued"); - - let (record_b, _, _) = acc.finalize("msg_b").unwrap(); - assert_eq!(record_b.content_md, "Message B"); - } - - #[test] - fn test_get_message_ids_for_session() { - let mut acc = MessageAccumulator::new().unwrap(); - - // Add messages to different sessions - acc.add_chunk( - "msg_1".to_string(), - "session_a".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Message 1".to_string(), - }, - ); - - acc.add_chunk( - "msg_2".to_string(), - "session_a".to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Message 2".to_string(), - }, - ); - - acc.add_chunk( - "msg_3".to_string(), - "session_b".to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Message 3".to_string(), - }, - ); - - // Get message IDs for session_a - let mut session_a_ids = acc.get_message_ids_for_session("session_a"); - session_a_ids.sort(); - assert_eq!(session_a_ids, vec!["msg_1", "msg_2"]); - - // Get message IDs for session_b - let session_b_ids = acc.get_message_ids_for_session("session_b"); - assert_eq!(session_b_ids, vec!["msg_3"]); - - // Get message IDs for non-existent session - let empty_ids = acc.get_message_ids_for_session("session_c"); - assert!(empty_ids.is_empty()); - } - - #[test] - fn test_finalize_with_msg_prefix() { - let mut acc = MessageAccumulator::new().unwrap(); - - // Use message_id and session_id with "msg-" prefix (ACP format) - let uuid_str = "01936e8f-e5a7-7000-8000-000000000007"; - let session_uuid_str = "01936e8f-e5a7-7000-8000-000000000008"; - - acc.add_chunk( - format!("msg-{}", uuid_str), - format!("msg-{}", session_uuid_str), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Testing msg- prefix handling.".to_string(), - }, - ); - - let (record, _, _) = acc.finalize(&format!("msg-{}", uuid_str)).unwrap(); - - // Verify that the UUID was correctly parsed (not regenerated) - assert_eq!(record.message_id.to_string(), uuid_str); - assert_eq!(record.session.to_string(), session_uuid_str); - assert_eq!(record.content_md, "Testing msg- prefix handling."); - } - - #[test] - fn test_finalize_without_msg_prefix() { - let mut acc = MessageAccumulator::new().unwrap(); - - // Use message_id and session_id without "msg-" prefix - let uuid_str = "01936e8f-e5a7-7000-8000-000000000009"; - let session_uuid_str = "01936e8f-e5a7-7000-8000-00000000000a"; - - acc.add_chunk( - uuid_str.to_string(), - session_uuid_str.to_string(), - "connector_1".to_string(), - "user".to_string(), - ContentBlock::Text { - text: "Testing without prefix.".to_string(), - }, - ); - - let (record, _, _) = acc.finalize(uuid_str).unwrap(); - - // Verify that the UUID was correctly parsed - assert_eq!(record.message_id.to_string(), uuid_str); - assert_eq!(record.session.to_string(), session_uuid_str); - assert_eq!(record.content_md, "Testing without prefix."); - } - - #[test] - fn test_interleaved_tool_calls() { - let mut acc = MessageAccumulator::new().unwrap(); - let msg_id = "01936e8f-e5a7-7000-8000-000000000010"; - let session_id = "01936e8f-e5a7-7000-8000-000000000011"; - - // Text chunk 1 - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Let me search for that. ".to_string(), - }, - ); - - // Tool call 1 - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_1".to_string(), - tool_name: "search".to_string(), - input: serde_json::json!({"query": "rust"}), - output: None, - }, - ); - - // Text chunk 2 - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Now let me check the documentation. ".to_string(), - }, - ); - - // Tool call 2 - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_2".to_string(), - tool_name: "read_docs".to_string(), - input: serde_json::json!({"path": "README.md"}), - output: None, - }, - ); - - // Text chunk 3 - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Based on my research...".to_string(), - }, - ); - - let (record, _, _) = acc.finalize(msg_id).unwrap(); - - // Verify content_md has correct order (text1, tool1, text2, tool2, text3) - let content = &record.content_md; - let search_pos = content.find("**Tool**: search").expect("search tool not found"); - let docs_pos = content - .find("**Tool**: read_docs") - .expect("read_docs tool not found"); - let text1_pos = content.find("Let me search").expect("text1 not found"); - let text2_pos = content.find("Now let me check").expect("text2 not found"); - let text3_pos = content.find("Based on my research").expect("text3 not found"); - - // Verify order: text1 < search < text2 < read_docs < text3 - assert!( - text1_pos < search_pos, - "text1 should come before search tool" - ); - assert!( - search_pos < text2_pos, - "search tool should come before text2" - ); - assert!( - text2_pos < docs_pos, - "text2 should come before read_docs tool" - ); - assert!( - docs_pos < text3_pos, - "read_docs tool should come before text3" - ); - - // Verify content_parts structure - let parts = - serde_json::from_value::>(record.content_parts.unwrap()).unwrap(); - assert_eq!( - parts.len(), - 5, - "Should have 5 parts: text, tool, text, tool, text" - ); - - // Verify each part type in order - assert!(matches!(parts[0], MessagePart::Text { .. })); - assert!(matches!(parts[1], MessagePart::Tool { .. })); - assert!(matches!(parts[2], MessagePart::Text { .. })); - assert!(matches!(parts[3], MessagePart::Tool { .. })); - assert!(matches!(parts[4], MessagePart::Text { .. })); - } - - #[test] - fn test_text_coalescing_with_tool_separation() { - let mut acc = MessageAccumulator::new().unwrap(); - let msg_id = "msg1"; - let session_id = "session1"; - - // Two consecutive text chunks (should coalesce) - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Hello ".to_string(), - }, - ); - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "world. ".to_string(), - }, - ); - - // Tool call (separates text) - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_1".to_string(), - tool_name: "search".to_string(), - input: serde_json::json!({}), - output: None, - }, - ); - - // Two more consecutive text chunks (should coalesce separately) - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "More ".to_string(), - }, - ); - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "text.".to_string(), - }, - ); - - let (record, _, _) = acc.finalize(msg_id).unwrap(); - - // Should have 3 parts: coalesced text1, tool, coalesced text2 - let parts = - serde_json::from_value::>(record.content_parts.unwrap()).unwrap(); - assert_eq!(parts.len(), 3); - - // Verify first text part is coalesced - if let MessagePart::Text { text } = &parts[0] { - assert_eq!(text, "Hello world. "); - } else { - panic!("Expected Text part"); - } - - // Verify tool part - assert!(matches!(parts[1], MessagePart::Tool { .. })); - - // Verify second text part is coalesced - if let MessagePart::Text { text } = &parts[2] { - assert_eq!(text, "More text."); - } else { - panic!("Expected Text part"); - } - } - - #[test] - fn test_tool_call_progressive_updates() { - let mut acc = MessageAccumulator::new().unwrap(); - let msg_id = "msg1"; - let session_id = "session1"; - - // Create buffer with initial text chunk - acc.add_chunk( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "assistant".to_string(), - ContentBlock::Text { - text: "Using grep... ".to_string(), - }, - ); - - // Initial tool call (empty input, no output) - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_1".to_string(), - tool_name: "grep".to_string(), - input: serde_json::json!({}), - output: None, - }, - ); - - // Update with actual input - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_1".to_string(), - tool_name: "grep".to_string(), - input: serde_json::json!({"pattern": "rust"}), - output: None, - }, - ); - - // Update with output - acc.add_or_update_tool_call( - msg_id.to_string(), - ToolCallData { - id: "call_1".to_string(), - tool_name: "grep".to_string(), - input: serde_json::json!({}), // Empty, should not overwrite - output: Some(serde_json::json!({"results": ["match1", "match2"]})), - }, - ); - - let (record, _, _) = acc.finalize(msg_id).unwrap(); - - // Should have 2 parts: text and tool (tool merged from 3 updates) - let parts = - serde_json::from_value::>(record.content_parts.unwrap()).unwrap(); - assert_eq!(parts.len(), 2); - - // Verify first part is text - assert!(matches!(parts[0], MessagePart::Text { .. })); - - // Verify second part is tool with merged data - if let MessagePart::Tool { - tool, input, output, .. - } = &parts[1] - { - assert_eq!(tool, "grep"); - assert_eq!(input, &serde_json::json!({"pattern": "rust"})); // Input preserved - assert!(output.is_some()); // Output added - } else { - panic!("Expected Tool part"); - } - } - - #[test] - fn test_thinking_coalescing() { - let mut acc = MessageAccumulator::new().unwrap(); - let msg_id = "msg1"; - let session_id = "session1"; - - // Add multiple thinking chunks - acc.add_thinking( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "First thought. ".to_string(), - ); - acc.add_thinking( - msg_id.to_string(), - session_id.to_string(), - "connector_1".to_string(), - "Second thought.".to_string(), - ); - - let (record, _, _) = acc.finalize(msg_id).unwrap(); - - // Should have 1 thinking part (coalesced) - let parts = - serde_json::from_value::>(record.content_parts.unwrap()).unwrap(); - assert_eq!(parts.len(), 1); - - // Verify it's coalesced thinking - if let MessagePart::Thinking { text } = &parts[0] { - assert_eq!(text, "First thought. Second thought."); - } else { - panic!("Expected Thinking part"); - } - } -} diff --git a/crates/dirigent_archivist/src/backend/capability.rs b/crates/dirigent_archivist/src/backend/capability.rs deleted file mode 100644 index c5a3669..0000000 --- a/crates/dirigent_archivist/src/backend/capability.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Archive backend capability enumeration. -//! -//! Mandatory session + message primitives are NOT listed here — every -//! backend has them. This enum represents the *optional* sub-traits a -//! backend opts into, surfaced through `ArchiveBackend::as_xxx()` accessors. - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] -pub enum ArchiveCapability { - Search, - Dag, - MetaEvents, - ConnectorRegistry, - SessionMapping, -} - -pub type CapabilitySet = std::collections::HashSet; diff --git a/crates/dirigent_archivist/src/backend/contract.rs b/crates/dirigent_archivist/src/backend/contract.rs deleted file mode 100644 index 5599620..0000000 --- a/crates/dirigent_archivist/src/backend/contract.rs +++ /dev/null @@ -1,108 +0,0 @@ -//! Reusable sub-trait contract tests. -//! -//! Pass any `&dyn ArchiveBackend` to verify it honors the behavioral -//! contract of each sub-trait it exposes. Phase 2 runs this against -//! `JsonlBackend`; Phase 3+ reuses it for every new backend. - -#![cfg(any(test, feature = "test-utils"))] - -use uuid::Uuid; - -use crate::backend::ArchiveBackend; - -/// Exercises `ConnectorRegistryBackend` through `as_connector_registry()`. -/// Skips silently if the backend does not expose the sub-trait. -pub async fn verify_connector_registry_contract(backend: &dyn ArchiveBackend) { - let Some(registry) = backend.as_connector_registry() else { - return; - }; - - // Empty state — listing returns Vec::new(), not an error. - let list = registry.list_connectors().await.expect("list_connectors"); - assert!(list.is_empty(), "fresh backend should have no connectors"); - - // get_connector on missing UID returns Ok(None). - let missing = registry - .get_connector(Uuid::new_v4()) - .await - .expect("get_connector"); - assert!(missing.is_none()); - - // resolve_connector_uid on unknown id returns Ok(None). - let unresolved = registry - .resolve_connector_uid("nonexistent@host") - .await - .expect("resolve_connector_uid"); - assert!(unresolved.is_none()); -} - -/// Exercises `SessionMappingBackend`. -pub async fn verify_session_mapping_contract(backend: &dyn ArchiveBackend) { - let Some(mapping) = backend.as_session_mapping() else { - return; - }; - - let missing = mapping - .get_mapping(Uuid::new_v4(), "absent") - .await - .expect("get_mapping"); - assert!(missing.is_none()); - - let owner = mapping - .find_owner("absent") - .await - .expect("find_owner"); - assert!(owner.is_none()); -} - -/// Exercises `DagBackend`. -pub async fn verify_dag_contract(backend: &dyn ArchiveBackend) { - let Some(dag) = backend.as_dag() else { - return; - }; - - let children = dag - .get_children(Uuid::new_v4()) - .await - .expect("get_children"); - assert!(children.is_empty()); - - let edges = dag - .get_dag_edges(Uuid::new_v4()) - .await - .expect("get_dag_edges"); - assert!(edges.is_empty()); -} - -/// Exercises `MetaEventsBackend`. -pub async fn verify_meta_events_contract(backend: &dyn ArchiveBackend) { - let Some(meta) = backend.as_meta_events() else { - return; - }; - - let events = meta - .get_meta_events(Uuid::new_v4()) - .await - .expect("get_meta_events"); - assert!(events.is_empty()); - - let by_client = meta - .find_meta_session_by_client("absent") - .await - .expect("find_meta_session_by_client"); - assert!(by_client.is_none()); - - let all = meta - .list_meta_sessions() - .await - .expect("list_meta_sessions"); - assert!(all.is_empty()); -} - -/// One-shot helper: runs every sub-trait contract whose capability is present. -pub async fn verify_all_contracts(backend: &dyn ArchiveBackend) { - verify_connector_registry_contract(backend).await; - verify_session_mapping_contract(backend).await; - verify_dag_contract(backend).await; - verify_meta_events_contract(backend).await; -} diff --git a/crates/dirigent_archivist/src/backend/health.rs b/crates/dirigent_archivist/src/backend/health.rs deleted file mode 100644 index 6cc3882..0000000 --- a/crates/dirigent_archivist/src/backend/health.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! Health status reported by `ArchiveBackend::health_check`. - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub enum HealthStatus { - Healthy, - Degraded { reason: String }, - Unavailable { reason: String }, -} diff --git a/crates/dirigent_archivist/src/backend/mock.rs b/crates/dirigent_archivist/src/backend/mock.rs deleted file mode 100644 index e2215bc..0000000 --- a/crates/dirigent_archivist/src/backend/mock.rs +++ /dev/null @@ -1,574 +0,0 @@ -//! In-memory `ArchiveBackend` for coordinator unit tests. -//! -//! Fully supports every sub-trait. State lives in `Mutex>`. - -#![cfg(any(test, feature = "test-utils"))] - -use std::collections::HashMap; -use std::sync::Mutex; - -use async_trait::async_trait; -use uuid::Uuid; - -use crate::backend::{ - ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend, - DagBackend, HealthStatus, MetaEventsBackend, SessionMappingBackend, -}; -use crate::error::{ArchivistError, Result}; -use crate::types::{ - ConnectorRecord, DagEdge, MessageCursor, MessagePage, MessageRecord, - MetaEventRecord, SessionListQuery, SessionMapping, SessionMetadata, SessionPage, -}; - -pub struct MockBackend { - capabilities: CapabilitySet, - sessions: Mutex>, - messages: Mutex>>, - connectors: Mutex>, - mappings: Mutex>, - meta_events: Mutex>>, - dag_edges: Mutex>, - fail_next_writes: std::sync::atomic::AtomicUsize, - fail_next_reads: std::sync::atomic::AtomicUsize, - permanent_error: std::sync::Mutex>, - append_calls: std::sync::Mutex>, - per_op_delay: std::sync::Mutex, -} - -impl MockBackend { - pub fn new() -> Self { - let mut capabilities = CapabilitySet::new(); - capabilities.insert(ArchiveCapability::Dag); - capabilities.insert(ArchiveCapability::MetaEvents); - capabilities.insert(ArchiveCapability::ConnectorRegistry); - capabilities.insert(ArchiveCapability::SessionMapping); - Self { - capabilities, - sessions: Mutex::new(HashMap::new()), - messages: Mutex::new(HashMap::new()), - connectors: Mutex::new(HashMap::new()), - mappings: Mutex::new(HashMap::new()), - meta_events: Mutex::new(HashMap::new()), - dag_edges: Mutex::new(Vec::new()), - fail_next_writes: std::sync::atomic::AtomicUsize::new(0), - fail_next_reads: std::sync::atomic::AtomicUsize::new(0), - permanent_error: std::sync::Mutex::new(None), - append_calls: std::sync::Mutex::new(std::collections::HashMap::new()), - per_op_delay: std::sync::Mutex::new(std::time::Duration::ZERO), - } - } -} - -impl MockBackend { - /// Build a mock with the exact capability set provided. All other state - /// starts empty (same as `new()`). - pub fn with_capabilities(capabilities: CapabilitySet) -> Self { - let mut m = Self::new(); - m.capabilities = capabilities; - m - } - - /// Test helper: does this mock have any meta events for the given session? - pub fn has_meta_events(&self, scroll_id: uuid::Uuid) -> bool { - self.meta_events - .lock() - .unwrap() - .get(&scroll_id) - .map(|v| !v.is_empty()) - .unwrap_or(false) - } - - /// Queue up `count` injected write failures. The next `count` calls to - /// any mutating API return `ArchivistError::Other("injected write failure")` - /// before touching state. - pub fn inject_write_failures(&self, count: usize) { - self.fail_next_writes - .store(count, std::sync::atomic::Ordering::SeqCst); - } - - /// Queue up `count` injected read failures for per-scroll_id reads. - pub fn inject_read_failures(&self, count: usize) { - self.fail_next_reads - .store(count, std::sync::atomic::Ordering::SeqCst); - } - - /// Simulate a permanently broken backend. - pub fn break_permanently(&self, reason: impl Into) { - *self.permanent_error.lock().unwrap() = Some(reason.into()); - } - - pub fn clear_failures(&self) { - self.fail_next_writes - .store(0, std::sync::atomic::Ordering::SeqCst); - self.fail_next_reads - .store(0, std::sync::atomic::Ordering::SeqCst); - *self.permanent_error.lock().unwrap() = None; - } - - /// Test helper: how many `MessageRecord`s this mock has for the given session. - pub fn appended_count(&self, scroll_id: uuid::Uuid) -> usize { - self.messages - .lock() - .unwrap() - .get(&scroll_id) - .map(|v| v.len()) - .unwrap_or(0) - } - - /// Test helper: how many times `append_messages` was invoked for the - /// given session (regardless of message count per invocation). - pub fn append_call_count(&self, scroll_id: uuid::Uuid) -> usize { - self.append_calls - .lock() - .unwrap() - .get(&scroll_id) - .copied() - .unwrap_or(0) - } - - /// Test helper: artificially slow every mutating backend operation by - /// sleeping `d` before it touches state. Used to simulate a slow backend - /// for backpressure tests. - pub fn set_per_op_delay(&self, d: std::time::Duration) { - *self.per_op_delay.lock().unwrap() = d; - } - - async fn maybe_delay(&self) { - let d = *self.per_op_delay.lock().unwrap(); - if !d.is_zero() { - tokio::time::sleep(d).await; - } - } - - pub(crate) fn check_write_failure(&self) -> Result<()> { - if let Some(reason) = self.permanent_error.lock().unwrap().clone() { - return Err(ArchivistError::Other(reason)); - } - let prev = self - .fail_next_writes - .fetch_update( - std::sync::atomic::Ordering::SeqCst, - std::sync::atomic::Ordering::SeqCst, - |n| if n > 0 { Some(n - 1) } else { None }, - ) - .ok(); - if prev.is_some() { - return Err(ArchivistError::Other("injected write failure".into())); - } - Ok(()) - } - - pub(crate) fn check_read_failure(&self) -> Result<()> { - if let Some(reason) = self.permanent_error.lock().unwrap().clone() { - return Err(ArchivistError::Other(reason)); - } - let prev = self - .fail_next_reads - .fetch_update( - std::sync::atomic::Ordering::SeqCst, - std::sync::atomic::Ordering::SeqCst, - |n| if n > 0 { Some(n - 1) } else { None }, - ) - .ok(); - if prev.is_some() { - return Err(ArchivistError::Other("injected read failure".into())); - } - Ok(()) - } -} - -impl Default for MockBackend { - fn default() -> Self { - Self::new() - } -} - -#[async_trait] -impl ArchiveBackend for MockBackend { - fn capabilities(&self) -> &CapabilitySet { - &self.capabilities - } - async fn health_check(&self) -> HealthStatus { - HealthStatus::Healthy - } - - async fn put_session(&self, meta: SessionMetadata) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.sessions.lock().unwrap().insert(meta.scroll_id, meta); - Ok(()) - } - async fn get_session(&self, scroll_id: Uuid) -> Result> { - self.check_read_failure()?; - Ok(self.sessions.lock().unwrap().get(&scroll_id).cloned()) - } - async fn list_sessions_paged(&self, query: SessionListQuery) -> Result { - let mut items: Vec = - self.sessions.lock().unwrap().values().cloned().collect(); - if !query.connector_uids.is_empty() { - items.retain(|s| query.connector_uids.contains(&s.connector_uid)); - } - items.sort_by(|a, b| { - b.updated_at - .cmp(&a.updated_at) - .then(b.scroll_id.cmp(&a.scroll_id)) - }); - let limit = query.limit.min(crate::types::MAX_PAGE_LIMIT).max(1); - let total_count = items.len(); - let items: Vec<_> = items.into_iter().take(limit).collect(); - Ok(SessionPage { - items, - next_cursor: None, - total_count: Some(total_count), - }) - } - async fn delete_session(&self, scroll_id: Uuid) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - if self.sessions.lock().unwrap().remove(&scroll_id).is_none() { - return Err(ArchivistError::SessionUnknown(scroll_id)); - } - self.messages.lock().unwrap().remove(&scroll_id); - Ok(()) - } - - async fn append_messages( - &self, - scroll_id: Uuid, - msgs: Vec, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - *self - .append_calls - .lock() - .unwrap() - .entry(scroll_id) - .or_insert(0) += 1; - self.messages - .lock() - .unwrap() - .entry(scroll_id) - .or_default() - .extend(msgs); - Ok(()) - } - async fn get_messages_paged( - &self, - scroll_id: Uuid, - cursor: Option, - limit: usize, - ) -> Result { - self.check_read_failure()?; - let mut all = self - .messages - .lock() - .unwrap() - .get(&scroll_id) - .cloned() - .unwrap_or_default(); - all.sort_by(|a, b| a.ts.cmp(&b.ts).then(a.message_id.cmp(&b.message_id))); - if let Some(c) = cursor.as_ref() { - all.retain(|m| (m.ts, m.message_id) > (c.ts, c.message_id)); - } - let total = all.len(); - let taken: Vec<_> = all.into_iter().take(limit.max(1)).collect(); - let next_cursor = if total > taken.len() { - taken.last().map(|m| MessageCursor { - ts: m.ts, - message_id: m.message_id, - }) - } else { - None - }; - Ok(MessagePage { - items: taken, - next_cursor, - }) - } - async fn count_messages(&self, scroll_id: Uuid) -> Result { - self.check_read_failure()?; - Ok(self - .messages - .lock() - .unwrap() - .get(&scroll_id) - .map(|v| v.len()) - .unwrap_or(0)) - } - async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.messages.lock().unwrap().remove(&scroll_id); - Ok(()) - } - - fn as_dag(&self) -> Option<&dyn DagBackend> { - if self.capabilities.contains(&ArchiveCapability::Dag) { - Some(self) - } else { - None - } - } - fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> { - if self.capabilities.contains(&ArchiveCapability::MetaEvents) { - Some(self) - } else { - None - } - } - fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> { - if self - .capabilities - .contains(&ArchiveCapability::ConnectorRegistry) - { - Some(self) - } else { - None - } - } - fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> { - if self - .capabilities - .contains(&ArchiveCapability::SessionMapping) - { - Some(self) - } else { - None - } - } -} - -#[async_trait] -impl ConnectorRegistryBackend for MockBackend { - async fn put_connector(&self, record: ConnectorRecord) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.connectors - .lock() - .unwrap() - .insert(record.connector_uid, record); - Ok(()) - } - async fn get_connector(&self, connector_uid: Uuid) -> Result> { - Ok(self - .connectors - .lock() - .unwrap() - .get(&connector_uid) - .cloned()) - } - async fn list_connectors(&self) -> Result> { - Ok(self.connectors.lock().unwrap().values().cloned().collect()) - } - async fn resolve_connector_uid(&self, client_native_id: &str) -> Result> { - Ok(self - .connectors - .lock() - .unwrap() - .values() - .find(|c| c.client_native_id == client_native_id) - .map(|c| c.connector_uid)) - } - async fn update_connector_fingerprint( - &self, - connector_uid: Uuid, - fingerprint: String, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - if let Some(r) = self.connectors.lock().unwrap().get_mut(&connector_uid) { - r.fingerprint = Some(fingerprint); - Ok(()) - } else { - Err(ArchivistError::ConnectorUnknown(connector_uid)) - } - } -} - -#[async_trait] -impl SessionMappingBackend for MockBackend { - async fn put_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - scroll_id: Uuid, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.mappings - .lock() - .unwrap() - .insert((connector_uid, native_session_id.to_string()), scroll_id); - Ok(()) - } - async fn get_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - ) -> Result> { - Ok(self - .mappings - .lock() - .unwrap() - .get(&(connector_uid, native_session_id.to_string())) - .copied()) - } - async fn list_mappings_for_connector( - &self, - connector_uid: Uuid, - ) -> Result> { - Ok(self - .mappings - .lock() - .unwrap() - .iter() - .filter(|((c, _), _)| *c == connector_uid) - .map(|((c, n), s)| SessionMapping { - version: 1, - connector_uid: *c, - native_session_id: n.clone(), - scroll_id: *s, - created_at: chrono::Utc::now(), - alias_of: None, - }) - .collect()) - } - async fn find_owner(&self, native_session_id: &str) -> Result> { - Ok(self - .mappings - .lock() - .unwrap() - .iter() - .find(|((_, n), _)| n == native_session_id) - .map(|((c, _), s)| (*c, *s))) - } - - async fn rewrite_connector_mappings( - &self, - connector_uid: Uuid, - mappings: Vec, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - let mut map = self.mappings.lock().unwrap(); - map.retain(|(c, _), _| *c != connector_uid); - for m in mappings { - map.insert((connector_uid, m.native_session_id), m.scroll_id); - } - Ok(()) - } -} - -#[async_trait] -impl DagBackend for MockBackend { - async fn append_dag_edge(&self, edge: DagEdge) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.dag_edges.lock().unwrap().push(edge); - Ok(()) - } - async fn get_children(&self, parent: Uuid) -> Result> { - self.check_read_failure()?; - let edges = self.dag_edges.lock().unwrap(); - let sessions = self.sessions.lock().unwrap(); - Ok(edges - .iter() - .filter(|e| e.parent == parent) - .filter_map(|e| sessions.get(&e.child).cloned()) - .collect()) - } - async fn get_dag_edges(&self, root: Uuid) -> Result> { - self.check_read_failure()?; - Ok(self - .dag_edges - .lock() - .unwrap() - .iter() - .filter(|e| e.parent == root) - .cloned() - .collect()) - } -} - -#[async_trait] -impl MetaEventsBackend for MockBackend { - async fn append_meta_events( - &self, - scroll_id: Uuid, - events: Vec, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - self.meta_events - .lock() - .unwrap() - .entry(scroll_id) - .or_default() - .extend(events); - Ok(()) - } - async fn get_meta_events(&self, scroll_id: Uuid) -> Result> { - self.check_read_failure()?; - Ok(self - .meta_events - .lock() - .unwrap() - .get(&scroll_id) - .cloned() - .unwrap_or_default()) - } - async fn update_meta_session_status( - &self, - scroll_id: Uuid, - is_connected: bool, - current_session_id: Option, - ) -> Result<()> { - self.check_write_failure()?; - self.maybe_delay().await; - if let Some(s) = self.sessions.lock().unwrap().get_mut(&scroll_id) { - s.is_connected = Some(is_connected); - s.current_session_id = current_session_id; - Ok(()) - } else { - Err(ArchivistError::SessionUnknown(scroll_id)) - } - } - async fn list_meta_sessions(&self) -> Result> { - Ok(self - .sessions - .lock() - .unwrap() - .values() - .filter(|s| matches!(s.kind, crate::types::SessionKind::AcpConnection)) - .cloned() - .collect()) - } - async fn find_meta_session_by_client( - &self, - client_id: &str, - ) -> Result> { - Ok(self - .sessions - .lock() - .unwrap() - .values() - .find(|s| s.acp_client_id.as_deref() == Some(client_id)) - .cloned()) - } -} - -#[cfg(test)] -mod failure_injection_tests { - use super::*; - - #[tokio::test] - async fn injected_write_failure_returns_error_then_recovers() { - let m = MockBackend::new(); - m.inject_write_failures(2); - let scroll = uuid::Uuid::nil(); - assert!(m.append_messages(scroll, vec![]).await.is_err()); - assert!(m.append_messages(scroll, vec![]).await.is_err()); - assert!(m.append_messages(scroll, vec![]).await.is_ok()); // back to normal - } -} diff --git a/crates/dirigent_archivist/src/backend/mod.rs b/crates/dirigent_archivist/src/backend/mod.rs deleted file mode 100644 index 24fa7d3..0000000 --- a/crates/dirigent_archivist/src/backend/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Archive backend trait layer. -//! -//! See `docs/plans/2026-04-18-archivist-phase2-design.md` for the design. - -pub mod capability; -pub mod health; -pub mod traits; - -#[cfg(any(test, feature = "test-utils"))] -pub mod contract; - -#[cfg(any(test, feature = "test-utils"))] -pub mod mock; - -pub use capability::{ArchiveCapability, CapabilitySet}; -pub use health::HealthStatus; -pub use traits::{ - ArchiveBackend, ConnectorRegistryBackend, DagBackend, MetaEventsBackend, - SearchBackend, SessionMappingBackend, -}; diff --git a/crates/dirigent_archivist/src/backend/traits.rs b/crates/dirigent_archivist/src/backend/traits.rs deleted file mode 100644 index 62b49aa..0000000 --- a/crates/dirigent_archivist/src/backend/traits.rs +++ /dev/null @@ -1,167 +0,0 @@ -//! Archive backend trait definitions. -//! -//! `ArchiveBackend` is mandatory for every backend: session + message -//! primitives plus self-description (capabilities, health). Optional -//! sub-traits (`SearchBackend`, `DagBackend`, `MetaEventsBackend`, -//! `ConnectorRegistryBackend`, `SessionMappingBackend`) are surfaced -//! via `as_xxx() -> Option<&dyn SubTrait>` accessors returning a -//! borrow from `self`. -//! -//! See `docs/plans/2026-04-18-archivist-phase2-design.md` §Trait Definitions. - -use async_trait::async_trait; -use uuid::Uuid; - -use crate::backend::capability::CapabilitySet; -use crate::backend::health::HealthStatus; -use crate::error::Result; -use crate::types::{ - ConnectorRecord, DagEdge, MessageCursor, MessagePage, MessageRecord, - MetaEventRecord, SessionListQuery, SessionMapping, SessionMetadata, - SessionPage, -}; - -// --------------------------------------------------------------------------- -// Mandatory backend surface -// --------------------------------------------------------------------------- - -/// An archive storage backend. -/// -/// All backends must implement session metadata and message primitives; -/// optional capabilities are exposed through `as_xxx()` accessors that -/// return `None` when unsupported. `JsonlBackend` implements every -/// sub-trait except `SearchBackend`. -#[async_trait] -pub trait ArchiveBackend: Send + Sync { - // --- Self-description --- - fn capabilities(&self) -> &CapabilitySet; - async fn health_check(&self) -> HealthStatus; - - // --- Session metadata --- - async fn put_session(&self, meta: SessionMetadata) -> Result<()>; - async fn get_session(&self, scroll_id: Uuid) -> Result>; - async fn list_sessions_paged(&self, query: SessionListQuery) -> Result; - async fn delete_session(&self, scroll_id: Uuid) -> Result<()>; - - // --- Messages --- - async fn append_messages( - &self, - scroll_id: Uuid, - messages: Vec, - ) -> Result<()>; - async fn get_messages_paged( - &self, - scroll_id: Uuid, - cursor: Option, - limit: usize, - ) -> Result; - async fn count_messages(&self, scroll_id: Uuid) -> Result; - async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()>; - - // --- Optional capability accessors --- - fn as_search(&self) -> Option<&dyn SearchBackend> { - None - } - fn as_dag(&self) -> Option<&dyn DagBackend> { - None - } - fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> { - None - } - fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> { - None - } - fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> { - None - } -} - -// --------------------------------------------------------------------------- -// Optional sub-traits -// --------------------------------------------------------------------------- - -/// Content search. Reserved in Phase 2; not wired to `JsonlBackend`. -/// -/// `packages/api/src/archivist/search_task.rs` continues to serve content -/// search via ripgrep — this trait exists as a forward-compatible hook for -/// indexed backends (ChromaDB, tantivy, …) arriving in Phase 3+. -#[async_trait] -pub trait SearchBackend: Send + Sync { - // Deliberately left without methods; Phase 3 adds the concrete - // query/result shapes when a real indexed backend lands. -} - -#[async_trait] -pub trait DagBackend: Send + Sync { - async fn append_dag_edge(&self, edge: DagEdge) -> Result<()>; - async fn get_children(&self, parent: Uuid) -> Result>; - async fn get_dag_edges(&self, root: Uuid) -> Result>; -} - -#[async_trait] -pub trait MetaEventsBackend: Send + Sync { - async fn append_meta_events( - &self, - scroll_id: Uuid, - events: Vec, - ) -> Result<()>; - async fn get_meta_events(&self, scroll_id: Uuid) -> Result>; - async fn update_meta_session_status( - &self, - scroll_id: Uuid, - is_connected: bool, - current_session_id: Option, - ) -> Result<()>; - async fn list_meta_sessions(&self) -> Result>; - async fn find_meta_session_by_client( - &self, - client_id: &str, - ) -> Result>; -} - -#[async_trait] -pub trait ConnectorRegistryBackend: Send + Sync { - async fn put_connector(&self, record: ConnectorRecord) -> Result<()>; - async fn get_connector(&self, connector_uid: Uuid) -> Result>; - async fn list_connectors(&self) -> Result>; - async fn resolve_connector_uid(&self, client_native_id: &str) -> Result>; - async fn update_connector_fingerprint( - &self, - connector_uid: Uuid, - fingerprint: String, - ) -> Result<()>; -} - -#[async_trait] -pub trait SessionMappingBackend: Send + Sync { - async fn put_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - scroll_id: Uuid, - ) -> Result<()>; - async fn get_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - ) -> Result>; - async fn list_mappings_for_connector( - &self, - connector_uid: Uuid, - ) -> Result>; - async fn find_owner(&self, native_session_id: &str) -> Result>; - - /// Replace the entire mapping table for `connector_uid` with `mappings`. - /// - /// Phase 2 uses this to remove an individual mapping — callers read the - /// current table via `list_mappings_for_connector`, filter out the - /// unwanted row, and call this method with the remainder. Implementations - /// must also invalidate any in-memory cache entries that reference the - /// removed rows so subsequent `get_mapping` / `find_owner` calls don't - /// return stale hits. - async fn rewrite_connector_mappings( - &self, - connector_uid: Uuid, - mappings: Vec, - ) -> Result<()>; -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/backend.rs b/crates/dirigent_archivist/src/backends/jsonl/backend.rs deleted file mode 100644 index 4e5ae54..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/backend.rs +++ /dev/null @@ -1,624 +0,0 @@ -//! `JsonlBackend` — the Phase 2 concrete backend. - -use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; - -use async_trait::async_trait; -use chrono::Utc; -use tokio::sync::RwLock; -use uuid::Uuid; - -use crate::backend::{ - ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend, - DagBackend, HealthStatus, MetaEventsBackend, SessionMappingBackend, -}; -use crate::error::{ArchivistError, Result}; -use crate::storage::{ - append_ndjson, read_connector_index, read_json, read_ndjson, write_json, ArchivePaths, -}; -use crate::types::{ - ConnectorRecord, MessageCursor, MessagePage, MessageRecord, SessionCompleteness, - SessionKind, SessionListQuery, SessionMapping, SessionMetadata, SessionPage, -}; - -/// NDJSON/JSON/TSV file-based `ArchiveBackend`. -pub struct JsonlBackend { - pub(crate) paths: ArchivePaths, - pub(crate) connector_cache: RwLock>, - pub(crate) session_cache: RwLock>, - pub(crate) capabilities: CapabilitySet, -} - -impl JsonlBackend { - /// Create a new backend rooted at `archive_root`. - /// - /// Creates the required directories (`.contexts`, `.db/connectors`, `.files`) - /// and initializes empty caches. Matches `FileBasedArchivist::new`. - pub async fn new(archive_root: PathBuf) -> Result { - let paths = ArchivePaths::new(archive_root); - - tokio::fs::create_dir_all(paths.root().join(".contexts")).await?; - tokio::fs::create_dir_all(paths.root().join(".db").join("connectors")).await?; - tokio::fs::create_dir_all(paths.root().join(".files")).await?; - - let mut capabilities = HashSet::new(); - capabilities.insert(ArchiveCapability::Dag); - capabilities.insert(ArchiveCapability::MetaEvents); - capabilities.insert(ArchiveCapability::ConnectorRegistry); - capabilities.insert(ArchiveCapability::SessionMapping); - - Ok(Self { - paths, - connector_cache: RwLock::new(HashMap::new()), - session_cache: RwLock::new(HashMap::new()), - capabilities, - }) - } - - /// Filesystem path utilities for this backend. - pub fn paths(&self) -> &ArchivePaths { - &self.paths - } - - /// Read and chronologically sort all messages for a session. - /// - /// See module docs for the append-order vs. chronological-order rationale. - pub(crate) async fn read_messages_sorted( - &self, - scroll_id: Uuid, - ) -> Result> { - let path = self.paths.messages_path_for_read(scroll_id); - let mut msgs: Vec = - read_ndjson(&path).await.unwrap_or_default(); - msgs.sort_by(|a, b| { - a.ts.cmp(&b.ts).then(a.message_id.cmp(&b.message_id)) - }); - Ok(msgs) - } - - /// Locate the (connector_uid, native_session_id) owning `scroll_id` by - /// scanning the session cache first, then each connector's session - /// mapping files on disk. - async fn find_mapping_for_scroll_id(&self, scroll_id: Uuid) -> Option<(Uuid, String)> { - // Check cache first - { - let cache = self.session_cache.read().await; - for ((connector_uid, native_id), cached_scroll_id) in cache.iter() { - if *cached_scroll_id == scroll_id { - return Some((*connector_uid, native_id.clone())); - } - } - } - - // Cache miss: scan connector index and each connector's sessions file - let index_path = self.paths.connector_index_tsv(); - let rows = match read_connector_index(&index_path).await { - Ok(rows) => rows, - Err(_) => return None, - }; - - for row in &rows { - let sessions_path = self.paths.sessions_path_for_read(row.connector_uid); - let mappings: Vec = match read_ndjson(&sessions_path).await { - Ok(m) => m, - Err(_) => continue, - }; - for mapping in mappings { - if mapping.scroll_id == scroll_id { - return Some((row.connector_uid, mapping.native_session_id)); - } - } - } - - None - } - - /// Load every session for a connector, including hidden ones. Used by - /// `list_sessions_paged` — it applies visibility filters itself. - async fn load_sessions_for_connector( - &self, - connector_uid: Uuid, - ) -> Result> { - let sessions_path = self.paths.sessions_path_for_read(connector_uid); - let mappings: Vec = read_ndjson(&sessions_path).await?; - - let mut sessions = Vec::new(); - for mapping in mappings { - let session_json_path = self.paths.session_json(mapping.scroll_id); - match read_json::(&session_json_path).await { - Ok(metadata) => sessions.push(metadata), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - tracing::debug!( - scroll_id = %mapping.scroll_id, - "session.json missing, surfacing as Discovered stub" - ); - sessions.push(SessionMetadata { - version: 1, - scroll_id: mapping.scroll_id, - created_at: mapping.created_at, - updated_at: mapping.created_at, - title: None, - connector_uid, - native_session_id: Some(mapping.native_session_id.clone()), - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: Vec::new(), - metadata: serde_json::json!({}), - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::Discovered, - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - }); - } - Err(e) => return Err(e.into()), - } - } - Ok(sessions) - } -} - -/// Returns true if `session` satisfies every filter in `query`. -/// -/// `connector_uid` is already honored by the caller (it picks which connector -/// directories to scan), so we do not re-check it here. -fn matches_query( - session: &SessionMetadata, - query: &crate::types::SessionListQuery, -) -> bool { - // Visibility - if !query.include_hidden && (session.no_update || session.is_subagent) { - return false; - } - - // Project scope — project_ids lives in metadata.project_id - if !query.project_ids.is_empty() { - let session_project_id = session - .metadata - .get("project_id") - .and_then(|v| v.as_str()); - match session_project_id { - Some(pid) => { - if !query.project_ids.iter().any(|q| q.as_str() == pid) { - return false; - } - } - None => return false, - } - } - - // Project path filter — exact match on metadata.project_path - if let Some(ref path) = query.project_path { - let session_path = session - .metadata - .get("project_path") - .and_then(|v| v.as_str()); - if session_path != Some(path.as_str()) { - return false; - } - } - - // Title filter — case-insensitive substring. - if let Some(q) = query.title_query.as_ref() { - let needle = q.to_lowercase(); - let haystack = match session.title.as_ref() { - Some(t) => t.to_lowercase(), - None => return false, - }; - if !haystack.contains(&needle) { - return false; - } - } - - // Tag filter — all requested tags must be present on the session. - if !query.tags.is_empty() { - for required in &query.tags { - if !session.tags.iter().any(|t| t == required) { - return false; - } - } - } - - // Model filter — case-insensitive substring on metadata.model. - if let Some(q) = query.model_filter.as_ref() { - let needle = q.to_lowercase(); - let haystack = session - .metadata - .get("model") - .and_then(|v| v.as_str()) - .map(|s| s.to_lowercase()); - match haystack { - Some(h) if h.contains(&needle) => {} - _ => return false, - } - } - - true -} - -#[async_trait] -impl ArchiveBackend for JsonlBackend { - fn capabilities(&self) -> &CapabilitySet { - &self.capabilities - } - - async fn health_check(&self) -> HealthStatus { - match tokio::fs::metadata(self.paths.root()).await { - Ok(m) if m.is_dir() => HealthStatus::Healthy, - Ok(_) => HealthStatus::Unavailable { - reason: "archive root is not a directory".into(), - }, - Err(e) => HealthStatus::Unavailable { - reason: format!("stat archive root failed: {e}"), - }, - } - } - - async fn put_session(&self, meta: SessionMetadata) -> Result<()> { - tokio::fs::create_dir_all(&self.paths.session_dir(meta.scroll_id)).await?; - write_json(&self.paths.session_json(meta.scroll_id), &meta).await?; - Ok(()) - } - - async fn get_session(&self, scroll_id: Uuid) -> Result> { - // FileBasedArchivist ignores archive parameter (single-archive only) - let session_json_path = self.paths.session_json(scroll_id); - - match read_json(&session_json_path).await { - Ok(metadata) => Ok(Some(metadata)), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(e) => Err(e.into()), - } - } - - async fn list_sessions_paged(&self, query: SessionListQuery) -> Result { - use crate::types::{SessionCursor, SessionPage, MAX_PAGE_LIMIT}; - - // Determine which connectors to scan. - let connector_uids: Vec = if !query.connector_uids.is_empty() { - query.connector_uids.clone() - } else { - // Iterate every primary (non-alias) connector. - let index_path = self.paths.connector_index_tsv(); - let connectors = read_connector_index(&index_path).await?; - connectors - .into_iter() - .filter(|c| c.alias_of.is_none()) - .map(|c| c.connector_uid) - .collect() - }; - - // Stream matching sessions from every selected connector. - let mut matched: Vec = Vec::new(); - - for connector_uid in connector_uids { - let sessions = match self.load_sessions_for_connector(connector_uid).await { - Ok(s) => s, - Err(e) => { - tracing::warn!( - connector_uid = %connector_uid, - error = %e, - "Failed to list sessions for connector during paged scan, skipping" - ); - continue; - } - }; - - for session in sessions { - if !matches_query(&session, &query) { - continue; - } - matched.push(session); - } - } - - // Sort by (updated_at DESC, scroll_id DESC). - matched.sort_by(|a, b| { - b.updated_at - .cmp(&a.updated_at) - .then_with(|| b.scroll_id.cmp(&a.scroll_id)) - }); - - // Skip entries at-or-before the cursor. - if let Some(cursor) = query.cursor.as_ref() { - matched.retain(|s| { - (s.updated_at, s.scroll_id) < (cursor.updated_at, cursor.scroll_id) - }); - } - - // Capture total count before slicing. - let total_count = matched.len(); - - // Clamp limit and paginate. - let effective_limit = query.limit.min(MAX_PAGE_LIMIT).max(1); - let has_more = matched.len() > effective_limit; - matched.truncate(effective_limit); - - let next_cursor = if has_more { - matched.last().map(|s| SessionCursor { - updated_at: s.updated_at, - scroll_id: s.scroll_id, - }) - } else { - None - }; - - Ok(SessionPage { - items: matched, - next_cursor, - total_count: Some(total_count), - }) - } - - async fn delete_session(&self, scroll_id: Uuid) -> Result<()> { - // FileBasedArchivist ignores archive parameter (single-archive only) - - // First, read session metadata to get connector_uid and native_session_id - let session_dir = self.paths.session_dir(scroll_id); - let session_json_path = self.paths.session_json(scroll_id); - - if !session_dir.exists() { - return Err(ArchivistError::SessionUnknown(scroll_id)); - } - - // Read session metadata to get connector info - let metadata: SessionMetadata = read_json(&session_json_path).await?; - let connector_uid = metadata.connector_uid; - let native_session_id = metadata.native_session_id.clone(); - - // Delete the session directory and all its contents - tokio::fs::remove_dir_all(&session_dir).await.map_err(|e| { - tracing::error!("Failed to delete session directory {:?}: {}", session_dir, e); - ArchivistError::Io(e) - })?; - - tracing::info!( - "Deleted session directory for scroll_id: {}", - scroll_id - ); - - // Remove from session cache - if let Some(native_id) = &native_session_id { - let mut cache = self.session_cache.write().await; - cache.remove(&(connector_uid, native_id.clone())); - } - - // Note: We're not removing from sessions.ndjson because it's append-only. - // The session simply won't have a directory anymore, so list_sessions will skip it. - // A future enhancement could add a "deleted" flag or periodic compaction. - - tracing::info!( - "Successfully deleted session {} (connector: {})", - scroll_id, - connector_uid - ); - - Ok(()) - } - - async fn append_messages( - &self, - scroll_id: Uuid, - messages: Vec, - ) -> Result<()> { - // Ensure session directory exists (handles resync case where directory was deleted) - self.paths.ensure_dirs(scroll_id).await?; - - // Append each message to messages.jsonl - let messages_path = self.paths.messages_path_for_write(scroll_id); - for message in &messages { - append_ndjson(&messages_path, message).await?; - } - - // Update session.json timestamp (or create if missing) - let session_json_path = self.paths.session_json(scroll_id); - let now = Utc::now(); - - let session_metadata = match read_json::(&session_json_path).await { - Ok(mut metadata) => { - metadata.updated_at = now; - metadata - } - Err(_) => { - // session.json doesn't exist, create minimal metadata - // This handles resync case where directory was deleted but mapping still exists - tracing::info!( - scroll_id = %scroll_id, - "Creating minimal session.json during append (was missing)" - ); - - // Look up the correct connector_uid and native_session_id via session mappings - let (connector_uid, native_session_id) = match self.find_mapping_for_scroll_id(scroll_id).await { - Some(mapping) => mapping, - None => { - tracing::error!( - scroll_id = %scroll_id, - "Cannot reconstruct session.json: no connector mapping found. \ - Messages written but session metadata will remain missing." - ); - return Ok(()); - } - }; - - SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: None, - connector_uid, - native_session_id: Some(native_session_id), - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: Vec::new(), - metadata: serde_json::json!({}), - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::default(), - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - } - } - }; - - write_json(&session_json_path, &session_metadata).await?; - - Ok(()) - } - - async fn get_messages_paged( - &self, - scroll_id: Uuid, - cursor: Option, - limit: usize, - ) -> Result { - use crate::types::MAX_PAGE_LIMIT; - - // Hard-clamp limit — same policy as sessions. - let effective_limit = limit.min(MAX_PAGE_LIMIT).max(1); - - // Read NDJSON, sort, apply cursor. - let mut all = self.read_messages_sorted(scroll_id).await?; - - if let Some(c) = cursor.as_ref() { - // Keep strictly-after the cursor point in (ts, message_id) order. - all.retain(|m| (m.ts, m.message_id) > (c.ts, c.message_id)); - } - - let total = all.len(); - let taken: Vec<_> = all.into_iter().take(effective_limit).collect(); - - let next_cursor = if total > taken.len() { - taken.last().map(|m| MessageCursor { - ts: m.ts, - message_id: m.message_id, - }) - } else { - None - }; - - Ok(MessagePage { - items: taken, - next_cursor, - }) - } - - async fn count_messages(&self, scroll_id: Uuid) -> Result { - let messages_path = self.paths.messages_path_for_read(scroll_id); - - // Read file and count lines (each line = one message) - // If file doesn't exist, return 0 (empty session) - match tokio::fs::read_to_string(&messages_path).await { - Ok(content) => { - // Count non-empty lines - let count = content.lines().filter(|line| !line.trim().is_empty()).count(); - Ok(count) - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - // File doesn't exist yet - empty session - Ok(0) - } - Err(e) => Err(e.into()), - } - } - - async fn clear_session_messages(&self, scroll_id: Uuid) -> Result<()> { - // First, verify the session exists by reading its metadata - let session_json_path = self.paths.session_json(scroll_id); - let mut session_metadata: SessionMetadata = match read_json(&session_json_path).await { - Ok(metadata) => metadata, - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - return Err(ArchivistError::SessionUnknown(scroll_id)); - } - Err(e) => return Err(e.into()), - }; - - // Truncate the messages file (clear all messages) - // First try to clear .jsonl (new format), then fall back to .ndjson (legacy) - let jsonl_path = self.paths.messages_path_for_write(scroll_id); - #[allow(deprecated)] - let ndjson_path = self.paths.messages_ndjson(scroll_id); - - let mut cleared = false; - - // Clear .jsonl if it exists - if jsonl_path.exists() { - tokio::fs::write(&jsonl_path, "").await?; - cleared = true; - } - - // Also clear .ndjson if it exists (in case both are present) - if ndjson_path.exists() { - tokio::fs::write(&ndjson_path, "").await?; - cleared = true; - } - - if cleared { - tracing::info!( - scroll_id = %scroll_id, - "Cleared all messages from session" - ); - } - - // Update the session's updated_at timestamp - session_metadata.updated_at = Utc::now(); - write_json(&session_json_path, &session_metadata).await?; - - tracing::info!( - scroll_id = %scroll_id, - "Updated session metadata after clearing messages" - ); - - Ok(()) - } - - fn as_dag(&self) -> Option<&dyn DagBackend> { - Some(self) - } - fn as_meta_events(&self) -> Option<&dyn MetaEventsBackend> { - Some(self) - } - fn as_connector_registry(&self) -> Option<&dyn ConnectorRegistryBackend> { - Some(self) - } - fn as_session_mapping(&self) -> Option<&dyn SessionMappingBackend> { - Some(self) - } -} - -#[cfg(test)] -mod contract_tests { - use super::*; - use tempfile::tempdir; - - #[tokio::test] - async fn jsonl_backend_honors_all_contracts() { - let dir = tempdir().expect("tempdir"); - let backend = JsonlBackend::new(dir.path().to_path_buf()) - .await - .expect("new"); - crate::backend::contract::verify_all_contracts(&backend).await; - } -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/connectors.rs b/crates/dirigent_archivist/src/backends/jsonl/connectors.rs deleted file mode 100644 index 2373e5b..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/connectors.rs +++ /dev/null @@ -1,161 +0,0 @@ -//! `ConnectorRegistryBackend` impl for `JsonlBackend`. - -use async_trait::async_trait; -use uuid::Uuid; - -use crate::backend::ConnectorRegistryBackend; -use crate::backends::jsonl::backend::JsonlBackend; -use crate::error::{ArchivistError, Result}; -use crate::storage::{ - read_connector_index, read_json, write_connector_index, write_json, -}; -use crate::types::{ConnectorIndexRow, ConnectorRecord}; - -#[async_trait] -impl ConnectorRegistryBackend for JsonlBackend { - async fn put_connector(&self, record: ConnectorRecord) -> Result<()> { - // Write connector.json - let connector_dir = self.paths.connector_dir(record.connector_uid); - tokio::fs::create_dir_all(&connector_dir).await?; - write_json(&connector_dir.join("connector.json"), &record).await?; - - // Append row to index.tsv (read-modify-write). - let index_path = self.paths.connector_index_tsv(); - let mut rows = read_connector_index(&index_path).await?; - rows.push(ConnectorIndexRow { - connector_uid: record.connector_uid, - r#type: record.r#type.clone(), - title: record.title.clone(), - client_native_id: record.client_native_id.clone(), - alias_of: record.alias_of, - created_at: record.created_at, - fingerprint: record.fingerprint.clone(), - }); - write_connector_index(&index_path, &rows).await?; - - // Update cache - self.connector_cache - .write() - .await - .insert(record.connector_uid, record); - - Ok(()) - } - - async fn get_connector(&self, connector_uid: Uuid) -> Result> { - // Fast path: consult the in-memory cache. - { - let cache = self.connector_cache.read().await; - if let Some(record) = cache.get(&connector_uid) { - return Ok(Some(record.clone())); - } - } - - // Disk fallback. - let connector_json = self - .paths - .connector_dir(connector_uid) - .join("connector.json"); - match read_json::(&connector_json).await { - Ok(record) => Ok(Some(record)), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(e) => Err(e.into()), - } - } - - async fn list_connectors(&self) -> Result> { - let index_path = self.paths.connector_index_tsv(); - let rows = read_connector_index(&index_path).await?; - let mut connectors = Vec::new(); - for row in rows { - if row.alias_of.is_some() { - continue; - } - let connector_json = self - .paths - .connector_dir(row.connector_uid) - .join("connector.json"); - match read_json::(&connector_json).await { - Ok(record) => connectors.push(record), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, - Err(e) => return Err(e.into()), - } - } - Ok(connectors) - } - - async fn resolve_connector_uid( - &self, - client_native_id: &str, - ) -> Result> { - // First, try parsing client_native_id as a UUID directly - // This handles the common case where the connector_id IS the UUID - if let Ok(uuid) = Uuid::parse_str(client_native_id) { - // Check if this UUID is a registered connector_uid in cache - let cache = self.connector_cache.read().await; - if cache.contains_key(&uuid) { - return Ok(Some(uuid)); - } - drop(cache); - - // Check on disk if not in cache - let connector_json = self.paths.connector_dir(uuid).join("connector.json"); - if connector_json.exists() { - return Ok(Some(uuid)); - } - } - - // Not a UUID or not registered as a connector_uid - search by client_native_id - // Load connector index and find by client_native_id - let index_path = self.paths.connector_index_tsv(); - let connectors = read_connector_index(&index_path).await?; - - if let Some(connector) = connectors - .iter() - .find(|c| c.client_native_id == client_native_id) - { - return Ok(Some(connector.connector_uid)); - } - - // Not found - return Ok(None). Error wrapping is a coordinator concern. - tracing::warn!( - "Failed to resolve connector_uid for client_native_id '{}'. \ - This connector may not be registered with the archivist.", - client_native_id - ); - - Ok(None) - } - - async fn update_connector_fingerprint( - &self, - connector_uid: Uuid, - fingerprint: String, - ) -> Result<()> { - // 1. Read and update connector.json - let connector_dir = self.paths.connector_dir(connector_uid); - let connector_json = connector_dir.join("connector.json"); - let mut record: ConnectorRecord = read_json(&connector_json) - .await - .map_err(|_| ArchivistError::ConnectorUnknown(connector_uid))?; - - record.fingerprint = Some(fingerprint.clone()); - write_json(&connector_json, &record).await?; - - // 2. Update in-memory cache - self.connector_cache - .write() - .await - .insert(connector_uid, record); - - // 3. Update index.tsv - let index_path = self.paths.connector_index_tsv(); - let mut rows = read_connector_index(&index_path).await?; - if let Some(row) = rows.iter_mut().find(|r| r.connector_uid == connector_uid) { - row.fingerprint = Some(fingerprint); - } - write_connector_index(&index_path, &rows).await?; - - Ok(()) - } -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/dag.rs b/crates/dirigent_archivist/src/backends/jsonl/dag.rs deleted file mode 100644 index 4c70fb2..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/dag.rs +++ /dev/null @@ -1,69 +0,0 @@ -//! `DagBackend` impl for `JsonlBackend`. - -use async_trait::async_trait; -use uuid::Uuid; - -use crate::backend::DagBackend; -use crate::backends::jsonl::backend::JsonlBackend; -use crate::error::Result; -use crate::storage::{append_ndjson, read_ndjson}; -use crate::types::{DagEdge, SessionMetadata}; - -#[async_trait] -impl DagBackend for JsonlBackend { - async fn append_dag_edge(&self, edge: DagEdge) -> Result<()> { - let dag_path = self.paths.dag_path(); - if let Some(parent) = dag_path.parent() { - tokio::fs::create_dir_all(parent).await?; - } - append_ndjson(&dag_path, &edge).await?; - Ok(()) - } - - async fn get_children(&self, parent: Uuid) -> Result> { - let dag_path = self.paths.dag_path(); - let edges: Vec = read_ndjson(&dag_path).await.unwrap_or_default(); - - let child_ids: Vec = edges - .iter() - .filter(|e| e.parent == parent) - .map(|e| e.child) - .collect(); - - let mut children = Vec::new(); - for child_id in child_ids { - match crate::backend::ArchiveBackend::get_session(self, child_id).await { - Ok(Some(meta)) => children.push(meta), - Ok(None) => { - tracing::warn!( - child_scroll_id = %child_id, - "DAG child session not found" - ); - } - Err(e) => { - tracing::warn!( - child_scroll_id = %child_id, - error = %e, - "DAG child session not found" - ); - } - } - } - - Ok(children) - } - - async fn get_dag_edges(&self, root: Uuid) -> Result> { - // Single-level read: return edges whose parent == root. - // The recursive DAG walk is coordinator-level orchestration. - let dag_path = self.paths.dag_path(); - let all_edges: Vec = read_ndjson(&dag_path).await.unwrap_or_default(); - - let edges = all_edges - .into_iter() - .filter(|e| e.parent == root) - .collect(); - - Ok(edges) - } -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/mapping.rs b/crates/dirigent_archivist/src/backends/jsonl/mapping.rs deleted file mode 100644 index b9c8b00..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/mapping.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! `SessionMappingBackend` impl for `JsonlBackend`. - -use async_trait::async_trait; -use chrono::Utc; -use uuid::Uuid; - -use crate::backend::SessionMappingBackend; -use crate::backends::jsonl::backend::JsonlBackend; -use crate::error::Result; -use crate::storage::{append_ndjson, read_connector_index, read_ndjson, write_ndjson}; -use crate::types::SessionMapping; - -#[async_trait] -impl SessionMappingBackend for JsonlBackend { - async fn put_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - scroll_id: Uuid, - ) -> Result<()> { - // Ported from the mapping-persistence tail of - // `FileBasedArchivist::register_session`: ensure the connector - // directory exists, append a `SessionMapping` row to - // `.db/connectors/{uid}/sessions.jsonl`, and prime `session_cache`. - // - // No alias detection — the caller has already chosen `scroll_id`. - let now = Utc::now(); - - // Ensure connector directory exists before appending. - self.paths.ensure_connector_dir(connector_uid).await?; - - let session_mapping = SessionMapping { - version: 1, - connector_uid, - native_session_id: native_session_id.to_string(), - scroll_id, - created_at: now, - alias_of: None, - }; - - let sessions_write_path = self.paths.sessions_path_for_write(connector_uid); - append_ndjson(&sessions_write_path, &session_mapping).await?; - - // Prime the in-memory cache for fast resolution. - self.session_cache - .write() - .await - .insert((connector_uid, native_session_id.to_string()), scroll_id); - - Ok(()) - } - - async fn get_mapping( - &self, - connector_uid: Uuid, - native_session_id: &str, - ) -> Result> { - // Ported from `FileBasedArchivist::resolve_session`. Cache-first - // lookup; on miss, scan the connector's sessions file and populate - // the cache on hit. Unlike the archivist trait, a miss returns - // `Ok(None)` instead of `Err(SessionUnknown)`. - - // Check cache first - let cache_key = (connector_uid, native_session_id.to_string()); - { - let cache = self.session_cache.read().await; - if let Some(&scroll_id) = cache.get(&cache_key) { - return Ok(Some(scroll_id)); - } - } - - // Cache miss - load from disk - let sessions_path = self.paths.sessions_path_for_read(connector_uid); - let mappings: Vec = read_ndjson(&sessions_path).await?; - - // Find mapping by native_session_id - if let Some(mapping) = mappings - .iter() - .find(|m| m.native_session_id == native_session_id) - { - // Update cache - self.session_cache - .write() - .await - .insert(cache_key, mapping.scroll_id); - Ok(Some(mapping.scroll_id)) - } else { - Ok(None) - } - } - - async fn list_mappings_for_connector( - &self, - connector_uid: Uuid, - ) -> Result> { - // Read `.db/connectors/{uid}/sessions.jsonl` (with `.ndjson` - // fallback handled by `sessions_path_for_read` + `read_ndjson`). - let sessions_path = self.paths.sessions_path_for_read(connector_uid); - let mappings: Vec = - read_ndjson(&sessions_path).await.unwrap_or_default(); - Ok(mappings) - } - - async fn find_owner( - &self, - native_session_id: &str, - ) -> Result> { - // Ported verbatim from `FileBasedArchivist::find_session_owner`. - - // Fast path: scan in-memory session_cache - { - let cache = self.session_cache.read().await; - for ((connector_uid, cached_native_id), scroll_id) in cache.iter() { - if cached_native_id == native_session_id { - return Ok(Some((*connector_uid, *scroll_id))); - } - } - } - - // Slow path: read connector index and scan each connector's sessions file - let index_path = self.paths.connector_index_tsv(); - let rows = read_connector_index(&index_path).await?; - - for row in &rows { - // Skip alias connectors - only search primary connectors - if row.alias_of.is_some() { - continue; - } - - let sessions_path = self.paths.sessions_path_for_read(row.connector_uid); - let mappings: Vec = read_ndjson(&sessions_path).await?; - - if let Some(mapping) = mappings - .iter() - .find(|m| m.native_session_id == native_session_id) - { - // Cache the found mapping for future lookups - let cache_key = (row.connector_uid, native_session_id.to_string()); - self.session_cache - .write() - .await - .insert(cache_key, mapping.scroll_id); - - return Ok(Some((row.connector_uid, mapping.scroll_id))); - } - } - - Ok(None) - } - - async fn rewrite_connector_mappings( - &self, - connector_uid: Uuid, - mappings: Vec, - ) -> Result<()> { - // Ensure the connector directory exists before we write. - self.paths.ensure_connector_dir(connector_uid).await?; - - // Invalidate cache entries for this connector first, then re-prime - // from the new mapping set. Any (connector_uid, native_id) entry not - // present in `mappings` is dropped. - { - let mut cache = self.session_cache.write().await; - cache.retain(|(cu, _), _| *cu != connector_uid); - for m in &mappings { - cache.insert( - (connector_uid, m.native_session_id.clone()), - m.scroll_id, - ); - } - } - - // Truncate + re-write the canonical `.jsonl` table. - let write_path = self.paths.sessions_path_for_write(connector_uid); - write_ndjson(&write_path, &mappings).await?; - - Ok(()) - } -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/meta.rs b/crates/dirigent_archivist/src/backends/jsonl/meta.rs deleted file mode 100644 index 90321fe..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/meta.rs +++ /dev/null @@ -1,200 +0,0 @@ -//! `MetaEventsBackend` impl for `JsonlBackend`. - -use async_trait::async_trait; -use chrono::Utc; -use uuid::Uuid; - -use crate::backend::MetaEventsBackend; -use crate::backends::jsonl::backend::JsonlBackend; -use crate::error::{ArchivistError, Result}; -use crate::storage::{append_ndjson, read_json, read_ndjson, write_json}; -use crate::types::{ - MetaEventRecord, SessionCompleteness, SessionKind, SessionMetadata, -}; - -#[async_trait] -impl MetaEventsBackend for JsonlBackend { - async fn append_meta_events( - &self, - scroll_id: Uuid, - events: Vec, - ) -> Result<()> { - // Ensure session directory exists - self.paths.ensure_dirs(scroll_id).await?; - - // Append each event to events.jsonl - let events_path = self.paths.events_path(scroll_id); - for event in &events { - append_ndjson(&events_path, event).await?; - } - - // Update session.json timestamp - let session_json_path = self.paths.session_json(scroll_id); - let now = Utc::now(); - - let session_metadata = match read_json::(&session_json_path).await { - Ok(mut metadata) => { - metadata.updated_at = now; - metadata - } - Err(_) => { - // session.json doesn't exist, this shouldn't happen for meta sessions - // but we'll handle it gracefully - tracing::warn!( - scroll_id = %scroll_id, - "session.json missing when appending meta events, creating minimal metadata" - ); - - SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: None, - connector_uid: scroll_id, // Use scroll_id as placeholder - native_session_id: None, - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: Vec::new(), - metadata: serde_json::json!({}), - no_update: false, - kind: SessionKind::AcpConnection, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::default(), - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - } - } - }; - - write_json(&session_json_path, &session_metadata).await?; - - Ok(()) - } - - async fn get_meta_events(&self, scroll_id: Uuid) -> Result> { - let events_path = self.paths.events_path(scroll_id); - - // Read events from events.jsonl - let mut events: Vec = read_ndjson(&events_path) - .await - .unwrap_or_else(|_| Vec::new()); - - // Sort by timestamp then event_id for stable ordering - events.sort_by(|a, b| { - a.ts.cmp(&b.ts).then_with(|| a.event_id.cmp(&b.event_id)) - }); - - Ok(events) - } - - async fn update_meta_session_status( - &self, - scroll_id: Uuid, - is_connected: bool, - current_session_id: Option, - ) -> Result<()> { - // Load existing session metadata - let session_json_path = self.paths.session_json(scroll_id); - - let mut session_metadata: SessionMetadata = match read_json(&session_json_path).await { - Ok(metadata) => metadata, - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - return Err(ArchivistError::SessionUnknown(scroll_id)); - } - Err(e) => return Err(e.into()), - }; - - // Update connection status fields - session_metadata.is_connected = Some(is_connected); - session_metadata.current_session_id = current_session_id; - session_metadata.updated_at = Utc::now(); - - // Write updated metadata back to disk - write_json(&session_json_path, &session_metadata).await?; - - tracing::info!( - scroll_id = %scroll_id, - is_connected = %is_connected, - current_session_id = ?current_session_id, - "Updated meta session status" - ); - - Ok(()) - } - - async fn list_meta_sessions(&self) -> Result> { - // Scan .contexts/ directory for all session.json files - let contexts_dir = self.paths.root().join(".contexts"); - - if !contexts_dir.exists() { - return Ok(Vec::new()); - } - - let mut meta_sessions = Vec::new(); - - // Read all session directories - let mut entries = tokio::fs::read_dir(&contexts_dir).await?; - - while let Some(entry) = entries.next_entry().await? { - if !entry.file_type().await?.is_dir() { - continue; - } - - let session_json_path = entry.path().join("session.json"); - - // Try to read session.json - match read_json::(&session_json_path).await { - Ok(metadata) => { - // Filter to only AcpConnection sessions - if metadata.kind == SessionKind::AcpConnection { - meta_sessions.push(metadata); - } - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - // Skip missing session files - continue; - } - Err(e) => { - tracing::warn!( - path = ?session_json_path, - error = %e, - "Failed to read session.json while listing meta sessions" - ); - continue; - } - } - } - - // Sort by updated_at descending (newest first) - meta_sessions.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); - - Ok(meta_sessions) - } - - async fn find_meta_session_by_client( - &self, - client_id: &str, - ) -> Result> { - // Use list_meta_sessions and filter by acp_client_id - let meta_sessions = self.list_meta_sessions().await?; - - let result = meta_sessions - .into_iter() - .find(|session| { - session.acp_client_id.as_deref() == Some(client_id) - }); - - Ok(result) - } -} diff --git a/crates/dirigent_archivist/src/backends/jsonl/mod.rs b/crates/dirigent_archivist/src/backends/jsonl/mod.rs deleted file mode 100644 index 01f9a00..0000000 --- a/crates/dirigent_archivist/src/backends/jsonl/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! NDJSON/JSON/TSV file-based backend. -//! -//! Ports the body of the former `FileBasedArchivist`. Uses the existing -//! `crate::storage` free-function primitives unchanged. - -mod backend; -mod connectors; -mod dag; -mod mapping; -mod meta; - -pub use backend::JsonlBackend; diff --git a/crates/dirigent_archivist/src/backends/mod.rs b/crates/dirigent_archivist/src/backends/mod.rs deleted file mode 100644 index 1866265..0000000 --- a/crates/dirigent_archivist/src/backends/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Concrete backend implementations for `ArchiveBackend`. - -pub mod jsonl; - -pub use jsonl::JsonlBackend; diff --git a/crates/dirigent_archivist/src/backfill.rs b/crates/dirigent_archivist/src/backfill.rs deleted file mode 100644 index 27c6cbe..0000000 --- a/crates/dirigent_archivist/src/backfill.rs +++ /dev/null @@ -1,558 +0,0 @@ -//! Backfill functionality for importing existing sessions from connectors. -//! -//! This module provides utilities to import sessions and messages from connectors -//! that support listing operations (like OpenCode connectors) into the Archivist. - -use futures::future::BoxFuture; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{MessageRecord, RegisterSessionRequest, RegisterStatus}; -use dirigent_protocol::{Message, Session}; - -/// Statistics collected during a backfill operation. -/// -/// This provides a summary of what was imported and any errors encountered. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub struct BackfillStats { - /// Total number of sessions found in the connector - pub sessions_found: usize, - /// Number of sessions successfully imported (new registrations) - pub sessions_imported: usize, - /// Number of sessions skipped (already archived) - pub sessions_skipped: usize, - /// Total number of messages imported across all sessions - pub messages_imported: usize, - /// Error messages for sessions that failed to import - pub errors: Vec, -} - -impl BackfillStats { - /// Create a new BackfillStats with all counts at zero - pub fn new() -> Self { - Self { - sessions_found: 0, - sessions_imported: 0, - sessions_skipped: 0, - messages_imported: 0, - errors: Vec::new(), - } - } -} - -impl Default for BackfillStats { - fn default() -> Self { - Self::new() - } -} - -/// Backfill sessions from a connector into the archive. -/// -/// This function imports existing sessions from a connector by: -/// 1. Attempting to register each session with the archivist -/// 2. For newly registered sessions, fetching messages via the provided closure -/// 3. Appending fetched messages to the archive -/// 4. Collecting statistics on successes, failures, and skips -/// -/// # Arguments -/// -/// * `archivist` - The archivist to backfill into -/// * `connector_uid` - The UID of the connector being backfilled -/// * `sessions` - List of sessions to import (from connector's list_sessions()) -/// * `fetch_messages` - Async closure to fetch messages for a given native session ID -/// -/// # Returns -/// -/// Statistics about the backfill operation including counts and errors -/// -/// # Error Handling -/// -/// This function continues processing all sessions even if individual sessions fail. -/// Errors are collected in `BackfillStats.errors` rather than aborting the operation. -/// -/// # Example -/// -/// ```no_run -/// use dirigent_archivist::{Archivist, backfill_from_sessions}; -/// use dirigent_protocol::{Session, Message}; -/// use uuid::Uuid; -/// -/// # async fn example(archivist: &Archivist, sessions: Vec) { -/// let connector_uid = Uuid::now_v7(); -/// -/// let stats = backfill_from_sessions( -/// archivist, -/// connector_uid, -/// sessions, -/// |session_id| { -/// Box::pin(async move { -/// // Fetch messages from connector -/// // Return Vec -/// Ok(vec![]) -/// }) -/// } -/// ).await.unwrap(); -/// -/// println!("Imported {} sessions, {} messages", -/// stats.sessions_imported, -/// stats.messages_imported); -/// # } -/// ``` -pub async fn backfill_from_sessions( - archivist: &Archivist, - connector_uid: Uuid, - sessions: Vec, - fetch_messages: F, -) -> Result -where - F: Fn(&str) -> BoxFuture<'static, Result>> + Send + Sync, -{ - let mut stats = BackfillStats::new(); - stats.sessions_found = sessions.len(); - - for session in sessions { - let native_session_id = session.id.clone(); - - // Try to resolve the session - if it exists, skip it - match archivist - .resolve_session(connector_uid, &native_session_id, None) - .await - { - Ok(_scroll_id) => { - // Session already archived, skip - stats.sessions_skipped += 1; - continue; - } - Err(ArchivistError::SessionUnknown(_)) => { - // Session not found, proceed with import - } - Err(e) => { - // Unexpected error during resolution - stats.errors.push(format!( - "Failed to resolve session {}: {}", - native_session_id, e - )); - continue; - } - } - - // Register the session - let register_req = RegisterSessionRequest { - connector_uid, - native_session_id: native_session_id.clone(), - title: Some(session.title.clone()), - custom_scroll_id: None, // Let archivist generate - metadata: serde_json::to_value(&session.metadata) - .unwrap_or_else(|_| serde_json::json!({})), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let scroll_id = match archivist.register_session(register_req, None).await { - Ok(response) => { - match response.status { - RegisterStatus::Accepted => { - stats.sessions_imported += 1; - response.scroll_id - } - RegisterStatus::Aliased => { - // Already exists (shouldn't happen since we checked, but handle gracefully) - stats.sessions_skipped += 1; - continue; - } - RegisterStatus::Rejected => { - // Registration rejected (collision inconsistency) - stats.errors.push(format!( - "Session registration rejected for {}: UID collision", - native_session_id - )); - continue; - } - } - } - Err(e) => { - stats.errors.push(format!( - "Failed to register session {}: {}", - native_session_id, e - )); - continue; - } - }; - - // Fetch messages for this session - let messages = match fetch_messages(&native_session_id).await { - Ok(msgs) => msgs, - Err(e) => { - stats.errors.push(format!( - "Failed to fetch messages for session {}: {}", - native_session_id, e - )); - continue; - } - }; - - // Convert protocol messages to message records - let message_records: Vec = messages - .into_iter() - .map(|msg| convert_message_to_record(msg, scroll_id)) - .collect(); - - let message_count = message_records.len(); - - // Append messages to the archive - if let Err(e) = archivist - .append_messages(scroll_id, message_records, None) - .await - { - stats.errors.push(format!( - "Failed to append messages for session {}: {}", - native_session_id, e - )); - continue; - } - - stats.messages_imported += message_count; - } - - Ok(stats) -} - -/// Convert a dirigent_protocol::Message to a MessageRecord for archival. -/// -/// This function translates the protocol message format into the archivist's -/// internal storage format, extracting markdown content and metadata. -pub fn convert_message_to_record(msg: Message, scroll_id: Uuid) -> MessageRecord { - // Extract text content from message parts and convert to markdown - let mut md_parts = Vec::new(); - for part in &msg.content { - match part { - dirigent_protocol::MessagePart::Text { text } => { - md_parts.push(text.clone()); - } - dirigent_protocol::MessagePart::Thinking { text } => { - md_parts.push(format!("\n{}\n", text)); - } - dirigent_protocol::MessagePart::Code { language, code } => { - md_parts.push(format!("```{}\n{}\n```", language, code)); - } - dirigent_protocol::MessagePart::Tool { - tool, - tool_call_id: _, - input, - output, - } => { - let mut tool_text = - format!("**Tool: {}**\n\nInput:\n```json\n{}\n```", tool, input); - if let Some(out) = output { - tool_text.push_str(&format!("\n\nOutput:\n```json\n{}\n```", out)); - } - md_parts.push(tool_text); - } - dirigent_protocol::MessagePart::File { path, content } => { - md_parts.push(format!("**File: {}**\n\n```\n{}\n```", path, content)); - } - } - } - let content_md = md_parts.join("\n\n"); - - // Serialize original content parts for rich UI rendering - let content_parts = serde_json::to_value(&msg.content).ok(); - - // Convert role - let role = match msg.role { - dirigent_protocol::MessageRole::User => "user", - dirigent_protocol::MessageRole::Assistant => "assistant", - } - .to_string(); - - // Generate message ID from the protocol message ID or create new one - let message_id = Uuid::now_v7(); - - MessageRecord { - version: 1, - message_id, - session: scroll_id, - parent_id: None, - ts: msg.created_at, - role, - author: None, // Protocol messages don't have author field - content_md, - content_parts, - attachments: Vec::new(), // Would need to extract from message parts if supported - metadata: msg - .metadata - .and_then(|m| serde_json::to_value(m).ok()) - .unwrap_or_else(|| serde_json::json!({})), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::coordinator::Archivist; - use chrono::Utc; - use dirigent_protocol::{MessageRole, MessageStatus, SessionMetadata}; - use tempfile::TempDir; - - async fn setup_test_archivist() -> (Archivist, TempDir) { - let temp_dir = TempDir::new().unwrap(); - // Use `from_single_backend` so each test is isolated (no shared - // registry file in the tempdir's parent racing against siblings). - let backend = std::sync::Arc::new( - crate::backends::JsonlBackend::new(temp_dir.path().to_path_buf()) - .await - .unwrap(), - ); - let archivist = Archivist::from_single_backend("main".into(), backend) - .await - .unwrap(); - (archivist, temp_dir) - } - - fn create_test_session(id: &str, title: &str) -> Session { - Session { - id: id.to_string(), - title: title.to_string(), - created_at: Utc::now(), - updated_at: Utc::now(), - metadata: SessionMetadata { - project_path: "/test".to_string(), - model: Some("test-model".to_string()), - total_messages: 0, - system_message: None, - current_mode_id: None, - _meta: None, - project_id: None, - }, - cwd: None, - config_options: None, - acp_client_id: None, - models: None, - modes: None, - } - } - - fn create_test_message(id: &str, session_id: &str, role: MessageRole, text: &str) -> Message { - Message { - id: id.to_string(), - session_id: session_id.to_string(), - role, - created_at: Utc::now(), - content: vec![dirigent_protocol::MessagePart::Text { - text: text.to_string(), - }], - status: MessageStatus::Completed, - metadata: None, - } - } - - #[tokio::test] - async fn test_backfill_new_sessions() { - let (archivist, _temp) = setup_test_archivist().await; - - // Register connector first - let connector_uid = Uuid::now_v7(); - let connector_req = crate::types::RegisterConnectorRequest { - custom_uid: Some(connector_uid), - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "test-connector".to_string(), - metadata: serde_json::json!({}), - fingerprint: None, - }; - archivist - .register_connector(connector_req, None) - .await - .unwrap(); - - // Create test sessions - let sessions = vec![ - create_test_session("session-1", "Session 1"), - create_test_session("session-2", "Session 2"), - ]; - - // Mock message fetcher - let fetch_messages = |session_id: &str| { - let sid = session_id.to_string(); - Box::pin(async move { - Ok(vec![ - create_test_message("msg-1", &sid, MessageRole::User, "Hello"), - create_test_message("msg-2", &sid, MessageRole::Assistant, "Hi there"), - ]) - }) as BoxFuture<'static, Result>> - }; - - // Backfill - let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages) - .await - .unwrap(); - - // Verify stats - assert_eq!(stats.sessions_found, 2); - assert_eq!(stats.sessions_imported, 2); - assert_eq!(stats.sessions_skipped, 0); - assert_eq!(stats.messages_imported, 4); // 2 messages per session - assert_eq!(stats.errors.len(), 0); - } - - #[tokio::test] - async fn test_backfill_skips_existing_sessions() { - let (archivist, _temp) = setup_test_archivist().await; - - // Register connector first - let connector_uid = Uuid::now_v7(); - let connector_req = crate::types::RegisterConnectorRequest { - custom_uid: Some(connector_uid), - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "test-connector".to_string(), - metadata: serde_json::json!({}), - fingerprint: None, - }; - archivist - .register_connector(connector_req, None) - .await - .unwrap(); - - // Pre-register one session - let session1 = create_test_session("session-1", "Session 1"); - let req = RegisterSessionRequest { - connector_uid, - native_session_id: session1.id.clone(), - title: Some(session1.title.clone()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - archivist.register_session(req, None).await.unwrap(); - - // Create sessions including the pre-registered one - let sessions = vec![session1, create_test_session("session-2", "Session 2")]; - - // Mock message fetcher - let fetch_messages = |session_id: &str| { - let sid = session_id.to_string(); - Box::pin(async move { - Ok(vec![create_test_message( - "msg-1", - &sid, - MessageRole::User, - "Test", - )]) - }) as BoxFuture<'static, Result>> - }; - - // Backfill - let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages) - .await - .unwrap(); - - // Verify stats - session-1 should be skipped - assert_eq!(stats.sessions_found, 2); - assert_eq!(stats.sessions_imported, 1); // Only session-2 - assert_eq!(stats.sessions_skipped, 1); // session-1 already exists - assert_eq!(stats.messages_imported, 1); // Only messages from session-2 - assert_eq!(stats.errors.len(), 0); - } - - #[tokio::test] - async fn test_backfill_handles_fetch_errors() { - let (archivist, _temp) = setup_test_archivist().await; - - // Register connector first - let connector_uid = Uuid::now_v7(); - let connector_req = crate::types::RegisterConnectorRequest { - custom_uid: Some(connector_uid), - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "test-connector".to_string(), - metadata: serde_json::json!({}), - fingerprint: None, - }; - archivist - .register_connector(connector_req, None) - .await - .unwrap(); - - let sessions = vec![create_test_session("session-1", "Session 1")]; - - // Mock message fetcher that fails - let fetch_messages = |_session_id: &str| { - Box::pin(async move { - Err(ArchivistError::InvalidRequest( - "Failed to fetch messages".to_string(), - )) - }) as BoxFuture<'static, Result>> - }; - - // Backfill - let stats = backfill_from_sessions(&archivist, connector_uid, sessions, fetch_messages) - .await - .unwrap(); - - // Verify stats - session registered but messages failed - assert_eq!(stats.sessions_found, 1); - assert_eq!(stats.sessions_imported, 1); // Session was registered - assert_eq!(stats.messages_imported, 0); // But no messages imported - assert_eq!(stats.errors.len(), 1); // Error recorded - assert!(stats.errors[0].contains("Failed to fetch messages")); - } - - #[test] - fn test_backfill_stats_default() { - let stats = BackfillStats::default(); - assert_eq!(stats.sessions_found, 0); - assert_eq!(stats.sessions_imported, 0); - assert_eq!(stats.sessions_skipped, 0); - assert_eq!(stats.messages_imported, 0); - assert_eq!(stats.errors.len(), 0); - } - - #[test] - fn test_convert_message_to_record() { - let scroll_id = Uuid::now_v7(); - let msg = create_test_message("msg-1", "session-1", MessageRole::User, "Hello world"); - - let record = convert_message_to_record(msg, scroll_id); - - assert_eq!(record.session, scroll_id); - assert_eq!(record.role, "user"); - assert_eq!(record.content_md, "Hello world"); - assert_eq!(record.version, 1); - } - - #[test] - fn test_convert_message_with_thinking() { - let scroll_id = Uuid::now_v7(); - let msg = Message { - id: "msg-1".to_string(), - session_id: "session-1".to_string(), - role: MessageRole::Assistant, - created_at: Utc::now(), - content: vec![dirigent_protocol::MessagePart::Thinking { - text: "Let me think...".to_string(), - }], - status: MessageStatus::Completed, - metadata: None, - }; - - let record = convert_message_to_record(msg, scroll_id); - - assert!(record.content_md.contains("")); - assert!(record.content_md.contains("Let me think...")); - assert!(record.content_md.contains("")); - } -} diff --git a/crates/dirigent_archivist/src/coordinator/admin.rs b/crates/dirigent_archivist/src/coordinator/admin.rs deleted file mode 100644 index 39cf36f..0000000 --- a/crates/dirigent_archivist/src/coordinator/admin.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! Admin / inspection methods on `Archivist`. -//! -//! Split out because they aren't part of the hot-path coordinator API: -//! `shutdown` drains queued writer tasks, `list_archives_with_health` -//! snapshots every registration's health + queue depth, and the cache -//! admin methods delegate to `ReadCache`. - -use std::sync::Arc; - -use tokio::sync::oneshot; - -use crate::error::Result; -use crate::registry::writer::WriteOp; -use crate::registry::{ArchiveRegistration, ArchiveStatus}; - -use super::Archivist; - -impl Archivist { - /// Drain every queued writer task. Inline backends are no-ops. - /// Call before process exit to ensure in-flight batches land. - pub async fn shutdown(&self) -> Result<()> { - let regs: Vec> = self.registrations.read().await.clone(); - for reg in regs.iter() { - if let Some(writer) = reg.writer.as_ref() { - let (tx, rx) = oneshot::channel(); - // If the send fails, the writer task has already exited — skip the wait. - if writer.sender.send(WriteOp::Shutdown(tx)).await.is_ok() { - let _ = rx.await; - } - // Join the task, if it's still attached. - if let Some(handle) = writer.join.lock().await.take() { - let _ = handle.await; - } - } - } - Ok(()) - } - - /// Snapshot every registered archive's current status. - pub async fn list_archives_with_health(&self) -> Vec { - let regs: Vec> = self.registrations.read().await.clone(); - let mut out = Vec::with_capacity(regs.len()); - for reg in regs.iter() { - let health = reg.last_health.read().await.clone(); - let last_error = reg.last_error.read().await.clone(); - let queue_depth = reg.writer.as_ref().map(|w| w.queue_depth_now()); - out.push(ArchiveStatus { - name: reg.name.clone(), - type_name: reg.type_name.to_string(), - enabled: reg.enabled, - write_active: reg.write_active, - failure_mode: reg.failure_mode, - read_priority: reg.read_priority, - capabilities: reg.capabilities().clone(), - health, - last_error, - queue_depth, - }); - } - out - } - - pub async fn clear_read_cache(&self) { - self.read_cache.clear().await; - } - - pub async fn read_cache_size(&self) -> usize { - self.read_cache.len().await - } -} diff --git a/crates/dirigent_archivist/src/coordinator/archives.rs b/crates/dirigent_archivist/src/coordinator/archives.rs deleted file mode 100644 index 3004d02..0000000 --- a/crates/dirigent_archivist/src/coordinator/archives.rs +++ /dev/null @@ -1,77 +0,0 @@ -//! Archive lifecycle methods for `Archivist`. -//! -//! Phase 3 is **startup-only**: the archive registry is constructed from -//! `dirigent.toml` at boot and not mutated at runtime. Accordingly, -//! `add_archive`, `remove_archive`, and `set_default_archive` all return -//! [`ArchivistError::DynamicRegistryUnsupported`]. The `list_archives` -//! and `get_default_archive` read-paths continue to operate against the -//! new `Vec>` storage. - -use std::path::PathBuf; - -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::registry::FailureMode; - -impl Archivist { - /// **Deprecated in Phase 3.** Archive registry is configured at boot - /// via `dirigent.toml`; runtime mutation is not supported. - pub async fn add_archive(&self, _name: String, _path: PathBuf) -> Result<()> { - Err(ArchivistError::DynamicRegistryUnsupported) - } - - /// **Deprecated in Phase 3.** Archive registry is configured at boot - /// via `dirigent.toml`; runtime mutation is not supported. - pub async fn remove_archive(&self, _name: String, _force: bool) -> Result<()> { - Err(ArchivistError::DynamicRegistryUnsupported) - } - - /// List all configured archives. Session counts are reported as `0` - /// because the Phase 3 multi-backend coordinator does not persist a - /// per-archive connector index; counts will be reintroduced by the - /// admin-status query in Task 23. - pub async fn list_archives(&self) -> Result> { - let regs = self.registrations.read().await; - let primary_name = regs - .iter() - .filter(|r| { - r.enabled && r.write_active && r.failure_mode == FailureMode::Required - }) - .min_by_key(|r| r.read_priority) - .map(|r| r.name.clone()); - - Ok(regs - .iter() - .map(|r| super::types::ArchiveInfo { - name: r.name.clone(), - path: PathBuf::new(), - created_at: chrono::Utc::now(), - session_count: 0, - is_default: primary_name.as_deref() == Some(r.name.as_str()), - }) - .collect()) - } - - /// Get the name of the "default" archive — interpreted in Phase 3 as - /// the enabled, write-active, `Required` backend with the lowest - /// `read_priority`. - pub async fn get_default_archive(&self) -> Result { - let regs = self.registrations.read().await; - regs.iter() - .filter(|r| { - r.enabled && r.write_active && r.failure_mode == FailureMode::Required - }) - .min_by_key(|r| r.read_priority) - .map(|r| r.name.clone()) - .ok_or_else(|| ArchivistError::PrimaryUnavailable { - name: "".into(), - reason: "no required write-active backend".into(), - }) - } - - /// **Deprecated in Phase 3.** Archive registry is configured at boot - /// via `dirigent.toml`; runtime mutation is not supported. - pub async fn set_default_archive(&self, _name: String) -> Result<()> { - Err(ArchivistError::DynamicRegistryUnsupported) - } -} diff --git a/crates/dirigent_archivist/src/coordinator/boot.rs b/crates/dirigent_archivist/src/coordinator/boot.rs deleted file mode 100644 index ce890cb..0000000 --- a/crates/dirigent_archivist/src/coordinator/boot.rs +++ /dev/null @@ -1,281 +0,0 @@ -//! Boot-time construction of the `Archivist` coordinator from a parsed -//! `ArchivesConfig` and a `BackendRegistry` of factories. - -use std::sync::Arc; - -use tokio::sync::RwLock; - -use crate::backend::HealthStatus; -use crate::error::ArchivistBootError; -use crate::registry::{ - cache::ReadCache, ArchiveRegistration, ArchivesConfig, BackendRegistry, FailureMode, - WritePolicy, -}; - -use super::Archivist; - -impl Archivist { - /// Construct the coordinator from a parsed `[[archives]]` config block - /// and a registry of backend factories. - /// - /// - Validates the config (duplicate-name / no-primary rules). - /// - Instantiates every enabled backend via the factory. - /// - Runs a startup `health_check` per backend. - /// - Sorts registrations by `read_priority` (ties by declaration order). - /// - Writer tasks for `WritePolicy::Queued` backends are wired in Task 17; - /// for now every backend boots with `writer = None`. - pub async fn from_config( - mut config: ArchivesConfig, - registry: &BackendRegistry, - base_dir: Option<&std::path::Path>, - ) -> Result { - config.validate()?; - - // Filter-level validation (Phase 4, Task 19). - // - // 1. At least one enabled write-active archive must have an - // unrestricted filter. Otherwise there is no default home for - // a session that does not match any filter, and the primary - // target would silently exclude sessions despite being the - // "write-always" backend. - // 2. No archive may declare a filter whose `include_connectors` - // set is `Some(empty)` — that form rejects every session - // unconditionally and is almost always a config typo. - let mut has_unrestricted_write_active = false; - for entry in &config.entries { - if let Some(inc) = &entry.filter.include_connectors { - if inc.is_empty() { - return Err(ArchivistBootError::FilterRejectsEverything { - archive: entry.name.clone(), - }); - } - } - if entry.enabled && entry.write_active && entry.filter.is_unrestricted() { - has_unrestricted_write_active = true; - } - } - if !config.entries.is_empty() && !has_unrestricted_write_active { - return Err(ArchivistBootError::NoUnrestrictedPrimary); - } - - // Resolve relative `params.path` values against `base_dir` so that - // archives declared with relative paths land under the data directory - // rather than the binary's CWD. - if let Some(base) = base_dir { - for entry in &mut config.entries { - if let toml::Value::Table(ref mut table) = entry.params { - if let Some(toml::Value::String(ref mut path_str)) = table.get_mut("path") { - let p = std::path::Path::new(path_str.as_str()); - if p.is_relative() { - *path_str = base.join(&*path_str).to_string_lossy().into_owned(); - } - } - } - } - } - - let mut registrations: Vec> = Vec::new(); - - for entry in config.entries.into_iter() { - let backend = registry - .build(&entry.name, &entry.type_name, entry.params) - .await - .map_err(|e| match e { - crate::registry::BackendBuildError::UnknownType(t) => { - ArchivistBootError::UnknownType { - name: entry.name.clone(), - type_name: t, - } - } - other => ArchivistBootError::BackendBuild { - name: entry.name.clone(), - source: other, - }, - })?; - - let initial_health = backend.health_check().await; - - if entry.failure_mode == FailureMode::Required { - if let HealthStatus::Unavailable { reason } = &initial_health { - return Err(ArchivistBootError::UnavailableRequiredBackend { - name: entry.name.clone(), - reason: reason.clone(), - }); - } - } - - let runtime_policy: WritePolicy = entry.write_policy.into_runtime(); - - // Build shared drift state up-front so the writer task (if any) - // and the registration's health-drift helpers mutate the SAME - // `Arc>` cells. This keeps Task 22's drift semantics - // coherent across the inline and queued paths. - let health_state: Arc> = - Arc::new(RwLock::new(initial_health.clone())); - let error_state: Arc< - RwLock, String)>>, - > = Arc::new(RwLock::new(None)); - let failure_counter: Arc> = Arc::new(RwLock::new(0u32)); - - let writer = match &runtime_policy { - WritePolicy::Inline => None, - WritePolicy::Queued { - batch_window_ms, - capacity, - overflow, - } => Some(crate::registry::writer::spawn_writer( - backend.clone(), - entry.name.clone(), - *capacity, - std::time::Duration::from_millis(*batch_window_ms), - *overflow, - health_state.clone(), - error_state.clone(), - failure_counter.clone(), - )), - }; - - // Leak `type_name` to satisfy &'static str on the registration; safe at boot, - // and a constant number of entries (O(archives in config)). - let type_name_static: &'static str = Box::leak(entry.type_name.into_boxed_str()); - - let registration = ArchiveRegistration::new_with_shared_state( - entry.name, - type_name_static, - backend, - entry.write_active, - entry.failure_mode, - entry.read_priority, - entry.enabled, - runtime_policy, - writer, - health_state, - error_state, - failure_counter, - ) - .with_filter(entry.filter); - - registrations.push(Arc::new(registration)); - } - - // Sort by `read_priority`. Rust's sort is stable, so ties keep declaration order. - registrations.sort_by_key(|r| r.read_priority); - - Ok(Self { - registrations: RwLock::new(registrations), - read_cache: Arc::new(ReadCache::new()), - registry_path: std::path::PathBuf::new(), - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::registry::{ArchivesConfig, BackendRegistry}; - - fn parse(toml_src: &str) -> ArchivesConfig { - toml::from_str(toml_src).unwrap() - } - - #[tokio::test] - async fn relative_archive_path_resolved_against_base_dir() { - let base = tempfile::tempdir().unwrap(); - let cfg = parse( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "my_archive" - "#, - ); - let registry = BackendRegistry::with_jsonl(); - let archivist = Archivist::from_config(cfg, ®istry, Some(base.path())) - .await - .unwrap(); - - // The archive should have been created under base_dir/my_archive. - // Verify by checking the .contexts directory exists. - let expected = base.path().join("my_archive").join(".contexts"); - assert!( - expected.exists(), - "expected {expected:?} to exist after boot with relative path" - ); - - // Also verify the archivist is functional (has one registration). - let archives = archivist.list_archives().await.unwrap(); - assert_eq!(archives.len(), 1); - } - - #[tokio::test] - async fn absolute_archive_path_not_affected_by_base_dir() { - let base = tempfile::tempdir().unwrap(); - let archive_dir = tempfile::tempdir().unwrap(); - let abs_path = archive_dir.path().to_string_lossy().replace('\\', "/"); - - let cfg = parse(&format!( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "{abs_path}" - "#, - )); - let registry = BackendRegistry::with_jsonl(); - let archivist = Archivist::from_config(cfg, ®istry, Some(base.path())) - .await - .unwrap(); - - // The archive should be at the absolute path, NOT under base_dir. - let expected = archive_dir.path().join(".contexts"); - assert!( - expected.exists(), - "expected {expected:?} to exist (absolute path should be used as-is)" - ); - - // Verify nothing was created under base_dir with the archive name. - // If base_dir resolution incorrectly touched the absolute path, we'd - // see stray directories under base_dir. - let base_entries: Vec<_> = std::fs::read_dir(base.path()) - .unwrap() - .collect(); - assert!( - base_entries.is_empty(), - "base_dir should be untouched when archive path is absolute, found: {base_entries:?}" - ); - - let archives = archivist.list_archives().await.unwrap(); - assert_eq!(archives.len(), 1); - } - - #[tokio::test] - async fn none_base_dir_preserves_existing_behavior() { - let archive_dir = tempfile::tempdir().unwrap(); - let abs_path = archive_dir.path().to_string_lossy().replace('\\', "/"); - - let cfg = parse(&format!( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "{abs_path}" - "#, - )); - let registry = BackendRegistry::with_jsonl(); - let archivist = Archivist::from_config(cfg, ®istry, None) - .await - .unwrap(); - - let expected = archive_dir.path().join(".contexts"); - assert!( - expected.exists(), - "expected {expected:?} to exist with None base_dir and absolute path" - ); - - let archives = archivist.list_archives().await.unwrap(); - assert_eq!(archives.len(), 1); - } -} diff --git a/crates/dirigent_archivist/src/coordinator/connectors.rs b/crates/dirigent_archivist/src/coordinator/connectors.rs deleted file mode 100644 index 1195afc..0000000 --- a/crates/dirigent_archivist/src/coordinator/connectors.rs +++ /dev/null @@ -1,285 +0,0 @@ -//! Connector orchestration for `Archivist`. -//! -//! Alias detection and tri-state registration logic live here; persistence is -//! delegated to each backend's `ConnectorRegistryBackend` sub-trait. Ported -//! from `FileBasedArchivist::register_connector` and -//! `MultiArchiveArchivist::resolve_connector_uid`. - -use chrono::Utc; -use uuid::Uuid; - -use crate::backend::ArchiveCapability; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{ - ConnectorRecord, RegisterConnectorRequest, RegisterConnectorResponse, RegisterStatus, -}; - -impl Archivist { - /// Register a connector with alias detection. - /// - /// Ported from `FileBasedArchivist::register_connector`. Decision order: - /// - /// 1. If `custom_uid` collides with an existing connector: - /// - same `client_native_id` → `Aliased` (idempotent re-registration). - /// - different `client_native_id` → `CollisionInconsistent` error. - /// 2. If the `client_native_id` is already registered under a different - /// UID → `Aliased` to that pre-existing UID. - /// 3. If a `fingerprint` matches a pre-existing connector → `Aliased` to - /// that UID. (Identity persistence across connector re-adds.) - /// 4. Otherwise → `Accepted`; a new `ConnectorRecord` is persisted via - /// `ConnectorRegistryBackend::put_connector`. - // TODO(phase3 task 16): register_connector fanout requires replicating the - // ConnectorRecord to secondaries. Since connectors are identity-shaped (UIDs - // must match across backends), the tri-state alias detection must stay - // canonical on the primary, but the accepted record should be mirrored to - // secondaries. Deferred to a follow-up within Phase 3 — the core Task 16 - // plan covers append_messages and the session mutators which are the hot - // paths. Current behaviour: single-primary via `resolve_backend`. - pub async fn register_connector( - &self, - req: RegisterConnectorRequest, - archive: Option, - ) -> Result { - let backend = self.resolve_backend(archive).await?; - let registry = backend.as_connector_registry().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::ConnectorRegistry, - backend: "selected".into(), - } - })?; - - // Generate connector UID (use custom_uid or generate new) - let connector_uid = req.custom_uid.unwrap_or_else(Uuid::now_v7); - - // Load existing (non-alias) connectors for collision detection. - let existing_connectors = registry.list_connectors().await?; - - // 1. Check for UID collision. - if let Some(existing) = existing_connectors - .iter() - .find(|c| c.connector_uid == connector_uid) - { - if existing.client_native_id == req.client_native_id { - // Same UID with same client_native_id -> ALIASED (idempotent). - return Ok(RegisterConnectorResponse { - status: RegisterStatus::Aliased, - connector_uid, - alias_of: Some(connector_uid), - note: Some("Connector already registered with this UID".to_string()), - }); - } else { - // Same UID with different client_native_id -> REJECTED. - return Err(ArchivistError::CollisionInconsistent(connector_uid)); - } - } - - // 2. Check for existing client_native_id (different UID collision). - if let Some(existing) = existing_connectors - .iter() - .find(|c| c.client_native_id == req.client_native_id) - { - return Ok(RegisterConnectorResponse { - status: RegisterStatus::Aliased, - connector_uid: existing.connector_uid, - alias_of: Some(existing.connector_uid), - note: Some("Connector already registered with different UID".to_string()), - }); - } - - // 3. Check for fingerprint match (identity persistence across re-adds). - // - // Note: the original `FileBasedArchivist` additionally refreshed the - // matched connector's `title`/`metadata` on disk and in cache here. - // That refresh bypassed both the TSV index and any backend abstraction - // (direct `read_json`/`write_json` against `connector.json`). The - // `ConnectorRegistryBackend` trait does not yet expose an - // "update metadata" method, and `put_connector` would append a - // duplicate row to the index rather than mutate in place. The refresh - // was best-effort (`let _ = write_json(...)`) and is not exercised by - // existing tests; deliberately skipped here. Re-introduce via a - // dedicated backend method if a consumer relies on it. - if let Some(ref fp) = req.fingerprint { - if let Some(existing) = existing_connectors - .iter() - .find(|c| c.fingerprint.as_deref() == Some(fp.as_str())) - { - let matched_uid = existing.connector_uid; - return Ok(RegisterConnectorResponse { - status: RegisterStatus::Aliased, - connector_uid: matched_uid, - alias_of: Some(matched_uid), - note: Some(format!("Matched by fingerprint: {}", fp)), - }); - } - } - - // 4. No collision -> ACCEPTED, create and persist new connector. - let now = Utc::now(); - let connector_record = ConnectorRecord { - version: 1, - connector_uid, - r#type: req.r#type, - title: req.title, - client_native_id: req.client_native_id, - alias_of: None, - created_at: now, - metadata: req.metadata, - fingerprint: req.fingerprint, - }; - - registry.put_connector(connector_record).await?; - - Ok(RegisterConnectorResponse { - status: RegisterStatus::Accepted, - connector_uid, - alias_of: None, - note: None, - }) - } - - /// Resolve a connector UID by scanning every registered backend. - /// - /// Ported from `MultiArchiveArchivist::resolve_connector_uid`: each - /// backend is tried in turn; the first backend that recognises the - /// `client_native_id` wins. As a secondary path, if `client_native_id` - /// parses as a UUID, checks whether a backend already has a connector - /// record at that UID. Returns `ConnectorUnknown(Uuid::nil())` if no - /// backend can resolve it. - pub async fn resolve_connector_uid(&self, client_native_id: &str) -> Result { - // Hand-rolled walk rather than `read_walk_collection`: we want - // "try every backend" semantics — a backend that returns `Ok(None)` - // should NOT win the walk. `read_walk_collection` treats any `Ok(_)` - // as a hit, so it would stop at the first backend that answered at - // all. Health drift is still wired through `record_read_*`. - let regs: Vec<_> = self.registrations.read().await.clone(); - for reg in regs.iter() { - if !reg.enabled { - continue; - } - let Some(registry) = reg.backend.as_connector_registry() else { - continue; - }; - match registry.resolve_connector_uid(client_native_id).await { - Ok(Some(uid)) => { - self.record_read_success(reg).await; - return Ok(uid); - } - Ok(None) => { - self.record_read_success(reg).await; - if let Ok(parsed) = Uuid::parse_str(client_native_id) { - match registry.get_connector(parsed).await { - Ok(Some(_)) => return Ok(parsed), - Ok(None) => {} - Err(_) => { - self.record_read_failure(reg).await; - } - } - } - } - Err(_) => { - self.record_read_failure(reg).await; - } - } - } - Err(ArchivistError::ConnectorUnknown(Uuid::nil())) - } - - /// List connectors in the selected archive (non-aliases only). - /// - /// When `archive` is `Some`, the explicit override still resolves directly - /// against that named backend (returning `ArchiveNameUnknown` / - /// `CapabilityNotSupported` as appropriate). When `None`, routing walks - /// enabled backends in `read_priority` order and returns the first - /// `ConnectorRegistry`-capable answer. - pub async fn list_connectors( - &self, - archive: Option, - ) -> Result> { - if let Some(name) = archive { - let reg = self - .find_registration(&name) - .await - .ok_or(ArchivistError::ArchiveNameUnknown(name))?; - let registry = reg.backend.as_connector_registry().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::ConnectorRegistry, - backend: reg.name.clone(), - } - })?; - return registry.list_connectors().await; - } - Ok(self - .read_walk_collection( - |reg| reg.backend.as_connector_registry().is_some(), - |backend| async move { - let cr = backend - .as_connector_registry() - .expect("predicate ensured"); - cr.list_connectors().await - }, - ) - .await? - .unwrap_or_default()) - } - - /// Update the stable fingerprint of an existing connector. - /// - /// NOTE: read-mutate-write on the backend side; falls through to inline - /// under `WritePolicy::Queued` (no `WriteOp` variant). - pub async fn update_connector_fingerprint( - &self, - connector_uid: Uuid, - fingerprint: String, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let primary_reg = primary.backend.as_connector_registry().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks ConnectorRegistry capability".into(), - } - })?; - if let Err(e) = primary_reg - .update_connector_fingerprint(connector_uid, fingerprint.clone()) - .await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - let Some(sec_reg) = reg.backend.as_connector_registry() else { - tracing::debug!( - backend = reg.name.as_str(), - type_name = reg.type_name, - op = "update_connector_fingerprint", - "capability_skip" - ); - continue; - }; - if let Err(e) = sec_reg - .update_connector_fingerprint(connector_uid, fingerprint.clone()) - .await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - Ok(()) - } -} diff --git a/crates/dirigent_archivist/src/coordinator/meta.rs b/crates/dirigent_archivist/src/coordinator/meta.rs deleted file mode 100644 index 94c8c6f..0000000 --- a/crates/dirigent_archivist/src/coordinator/meta.rs +++ /dev/null @@ -1,526 +0,0 @@ -//! Meta events, DAG, and cleanup orchestration for `Archivist`. -//! -//! Ported from `FileBasedArchivist` in `archivist.rs`. Meta events and DAG -//! methods are thin delegates over `as_meta_events()` / `as_dag()`; -//! `get_session_tree` performs a recursive DAG walk; `cleanup_empty_sessions` -//! pages through all sessions and deletes those with zero messages (skipping -//! `SessionKind::AcpConnection` meta sessions, which track events rather than -//! messages). - -use uuid::Uuid; - -use crate::backend::ArchiveCapability; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{ - DagEdge, MetaEventRecord, SessionKind, SessionListQuery, SessionMetadata, MAX_PAGE_LIMIT, -}; - -impl Archivist { - // ------------------------------------------------------------------ - // Meta events - // ------------------------------------------------------------------ - - pub async fn append_meta_events( - &self, - scroll_id: Uuid, - events: Vec, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - // Primary must have MetaEvents capability even in the queued path — - // the writer task dispatches to `as_meta_events()`, so we'd silently - // drop events on an incapable primary. Fail fast here. - let _ = primary.backend.as_meta_events().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks MetaEvents capability".into(), - } - })?; - - match &primary.write_policy { - crate::registry::WritePolicy::Inline => { - let primary_meta = primary - .backend - .as_meta_events() - .expect("capability checked above"); - if let Err(e) = primary_meta - .append_meta_events(scroll_id, events.clone()) - .await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = primary - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - writer - .enqueue(crate::registry::writer::WriteOp::AppendMetaEvents { - scroll_id, - events: events.clone(), - }) - .await?; - } - } - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "append_meta_events", - "filter_skip" - ); - continue; - } - if reg.backend.as_meta_events().is_none() { - tracing::debug!( - backend = reg.name.as_str(), - type_name = reg.type_name, - op = "append_meta_events", - "capability_skip" - ); - continue; - } - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - let me = reg - .backend - .as_meta_events() - .expect("capability checked above"); - if let Err(e) = me.append_meta_events(scroll_id, events.clone()).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = reg - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - if let Err(e) = writer - .enqueue(crate::registry::writer::WriteOp::AppendMetaEvents { - scroll_id, - events: events.clone(), - }) - .await - { - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } - } - } - } - Ok(()) - } - - pub async fn get_meta_events( - &self, - scroll_id: Uuid, - _archive: Option, - ) -> Result> { - // `archive` is now ignored for reads; routing picks the highest-priority - // backend that has the session and supports `MetaEvents`. - Ok(self - .read_walk_per_session( - scroll_id, - |reg| reg.backend.as_meta_events().is_some(), - |backend| async move { - let me = backend.as_meta_events().expect("predicate ensured"); - me.get_meta_events(scroll_id).await.map(Some) - }, - ) - .await? - .unwrap_or_default()) - } - - /// Update the connection status of an ACP meta-session. - /// - /// NOTE: read-mutate-write on the backend side (the impl rewrites fields - /// on the stored session); falls through to inline under - /// `WritePolicy::Queued` (no `WriteOp` variant). - pub async fn update_meta_session_status( - &self, - scroll_id: Uuid, - is_connected: bool, - current_session_id: Option, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let primary_meta = primary.backend.as_meta_events().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks MetaEvents capability".into(), - } - })?; - if let Err(e) = primary_meta - .update_meta_session_status(scroll_id, is_connected, current_session_id) - .await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - let Some(me) = reg.backend.as_meta_events() else { - tracing::debug!( - backend = reg.name.as_str(), - type_name = reg.type_name, - op = "update_meta_session_status", - "capability_skip" - ); - continue; - }; - if let Err(e) = me - .update_meta_session_status(scroll_id, is_connected, current_session_id) - .await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - Ok(()) - } - - pub async fn list_meta_sessions( - &self, - _archive: Option, - ) -> Result> { - // Collection-shape read: first enabled/healthy backend that supports - // `MetaEvents` wins. `archive` override is no longer honoured here — - // routing decides. - Ok(self - .read_walk_collection( - |reg| reg.backend.as_meta_events().is_some(), - |backend| async move { - let me = backend.as_meta_events().expect("predicate ensured"); - me.list_meta_sessions().await - }, - ) - .await? - .unwrap_or_default()) - } - - pub async fn find_meta_session_by_client( - &self, - client_id: &str, - _archive: Option, - ) -> Result> { - // Collection-shape read: first enabled/healthy backend that supports - // `MetaEvents` wins. The inner op returns `Result>`, so the - // walker's outer `Option` flattens to the inner one — "no backend - // answered" and "backend answered None" collapse the same way. - let client_id = client_id.to_string(); - let result = self - .read_walk_collection( - |reg| reg.backend.as_meta_events().is_some(), - |backend| { - let client_id = client_id.clone(); - async move { - let me = backend.as_meta_events().expect("predicate ensured"); - me.find_meta_session_by_client(&client_id).await - } - }, - ) - .await?; - Ok(result.flatten()) - } - - // ------------------------------------------------------------------ - // DAG - // ------------------------------------------------------------------ - - pub async fn append_dag_edge( - &self, - edge: DagEdge, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - // Primary must have DAG capability — even in the queued path the - // writer task dispatches to `as_dag()`, so silently accepting a - // non-DAG primary would lose the edge. - let _ = primary.backend.as_dag().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks Dag capability".into(), - } - })?; - - match &primary.write_policy { - crate::registry::WritePolicy::Inline => { - let primary_dag = primary - .backend - .as_dag() - .expect("capability checked above"); - if let Err(e) = primary_dag.append_dag_edge(edge.clone()).await { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = primary - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - writer - .enqueue(crate::registry::writer::WriteOp::AppendDagEdge(edge.clone())) - .await?; - } - } - - // DAG edges are indexed under the parent scroll_id, so use that for - // filter evaluation (the session whose DAG is being extended). - let parent_scroll_id = edge.parent; - let session_metadata_for_filter = self - .load_metadata_for_filter(parent_scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %parent_scroll_id, - op = "append_dag_edge", - "filter_skip" - ); - continue; - } - if reg.backend.as_dag().is_none() { - tracing::debug!( - backend = reg.name.as_str(), - type_name = reg.type_name, - op = "append_dag_edge", - "capability_skip" - ); - continue; - } - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - let d = reg.backend.as_dag().expect("capability checked above"); - if let Err(e) = d.append_dag_edge(edge.clone()).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = reg - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - if let Err(e) = writer - .enqueue(crate::registry::writer::WriteOp::AppendDagEdge(edge.clone())) - .await - { - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } - } - } - } - Ok(()) - } - - pub async fn get_children( - &self, - scroll_id: Uuid, - _archive: Option, - ) -> Result> { - // `archive` is now ignored for reads; routing picks the highest-priority - // backend that has the session and supports `Dag`. - Ok(self - .read_walk_per_session( - scroll_id, - |reg| reg.backend.as_dag().is_some(), - |backend| async move { - let d = backend.as_dag().expect("predicate ensured"); - d.get_children(scroll_id).await.map(Some) - }, - ) - .await? - .unwrap_or_default()) - } - - /// Recursive DAG walk rooted at `root_scroll_id`. - /// - /// Matches the shape of `FileBasedArchivist::get_session_tree`: returns - /// every edge reachable from `root_scroll_id` (children, grandchildren, - /// …). Uses `DagBackend::get_dag_edges` per-parent plus a `seen` set to - /// guard against cycles. - pub async fn get_session_tree( - &self, - root_scroll_id: Uuid, - archive: Option, - ) -> Result> { - // TODO(phase3): consider multi-backend DAG walk in a future phase — - // current impl uses the default backend only. Consistent BFS across - // a tree requires all `get_dag_edges` calls to target the SAME - // backend as the root, which the walker API does not yet expose. - let backend = self.resolve_backend(archive).await?; - let dag = backend - .as_dag() - .ok_or_else(|| ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::Dag, - backend: "selected".into(), - })?; - - let mut out = Vec::new(); - let mut stack = vec![root_scroll_id]; - let mut seen = std::collections::HashSet::new(); - while let Some(parent) = stack.pop() { - if !seen.insert(parent) { - continue; - } - let edges = dag.get_dag_edges(parent).await?; - for e in &edges { - stack.push(e.child); - } - out.extend(edges); - } - Ok(out) - } - - // ------------------------------------------------------------------ - // Cleanup - // ------------------------------------------------------------------ - - /// Delete sessions that have zero messages. - /// - /// Ported from `FileBasedArchivist::cleanup_empty_sessions`. Pages through - /// every session (including hidden ones) via `list_sessions_paged`, counts - /// messages per session, and deletes those with zero. Meta sessions - /// (`SessionKind::AcpConnection`) are skipped — they track connection - /// events in `events.jsonl`, not messages, so an empty message log is - /// expected. - /// - /// Returns `(deleted, total_scanned)`. - pub async fn cleanup_empty_sessions( - &self, - archive: Option, - ) -> Result<(usize, usize)> { - let backend = self.resolve_backend(archive).await?; - - let mut total: usize = 0; - let mut deleted: usize = 0; - let mut q = SessionListQuery { - include_hidden: true, - limit: MAX_PAGE_LIMIT, - ..SessionListQuery::default() - }; - - loop { - let page = backend.list_sessions_paged(q.clone()).await?; - - for session in page.items.iter() { - total += 1; - - // Skip meta sessions - they track events, not messages, so a - // zero message count is expected and not a signal of emptiness. - if session.kind == SessionKind::AcpConnection { - tracing::debug!( - scroll_id = %session.scroll_id, - "Skipping meta session (AcpConnection) during cleanup" - ); - continue; - } - - let count = match backend.count_messages(session.scroll_id).await { - Ok(c) => c, - Err(e) => { - // Match legacy semantics: if we can't count messages, - // skip this session rather than risk deleting a - // non-empty one. - tracing::warn!( - scroll_id = %session.scroll_id, - error = %e, - "Failed to count messages for session, skipping cleanup" - ); - continue; - } - }; - - if count == 0 { - match backend.delete_session(session.scroll_id).await { - Ok(()) => { - tracing::info!( - scroll_id = %session.scroll_id, - "Deleted empty session during cleanup" - ); - deleted += 1; - } - Err(e) => { - tracing::warn!( - scroll_id = %session.scroll_id, - error = %e, - "Failed to delete empty session during cleanup" - ); - } - } - } - } - - match page.next_cursor { - Some(cursor) => q.cursor = Some(cursor), - None => break, - } - } - - tracing::info!( - deleted = deleted, - total = total, - "Completed empty session cleanup" - ); - - Ok((deleted, total)) - } -} diff --git a/crates/dirigent_archivist/src/coordinator/mod.rs b/crates/dirigent_archivist/src/coordinator/mod.rs deleted file mode 100644 index 61e4654..0000000 --- a/crates/dirigent_archivist/src/coordinator/mod.rs +++ /dev/null @@ -1,231 +0,0 @@ -//! Concrete archivist coordinator. -//! -//! Owns a `Vec>` sorted by `read_priority`, plus a -//! positive `scroll_id → backend` cache. The registry is constructed from -//! `dirigent.toml` at boot (Task 12). `Archivist::new` remains a legacy -//! convenience for the dev-instance migration path; later tasks migrate -//! consumers to `Archivist::from_config`. - -mod admin; -mod archives; -mod boot; -mod connectors; -mod meta; -mod routing; -mod sessions; -pub mod types; - -pub use types::{ArchiveInfo, ArchiveMetadata}; - -use std::path::PathBuf; -use std::sync::Arc; - -use tokio::sync::RwLock; - -use crate::backend::ArchiveBackend; -use crate::error::{ArchivistError, Result}; -use crate::registry::{ - cache::ReadCache, ArchiveRegistration, FailureMode, WritePolicy, -}; - -pub struct Archivist { - pub(crate) registrations: RwLock>>, - #[allow(dead_code)] // wired up in later tasks (cache-backed reads) - pub(crate) read_cache: Arc, - #[allow(dead_code)] // retained for future admin endpoints / diagnostics - pub(crate) registry_path: PathBuf, -} - -impl Archivist { - /// Legacy constructor: builds a single JsonlBackend rooted at - /// `registry_path.parent()`. Kept so dev-instance migration still - /// succeeds before Task 28 migrates consumers to `from_config`. - pub async fn new(registry_path: PathBuf) -> Result { - use crate::backends::JsonlBackend; - - let mut registrations: Vec> = Vec::new(); - if !registry_path.as_os_str().is_empty() { - let archive_root = registry_path - .parent() - .map(|p| p.to_path_buf()) - .unwrap_or_else(|| registry_path.clone()); - let backend = Arc::new(JsonlBackend::new(archive_root).await?) - as Arc; - let initial_health = backend.health_check().await; - registrations.push(Arc::new(ArchiveRegistration::new( - "main".into(), - "jsonl", - backend, - /* write_active */ true, - FailureMode::Required, - /* read_priority */ 0, - /* enabled */ true, - WritePolicy::Inline, - /* writer */ None, - initial_health, - ))); - } - - Ok(Self { - registrations: RwLock::new(registrations), - read_cache: Arc::new(ReadCache::new()), - registry_path, - }) - } - - /// Construct a coordinator with a single `JsonlBackend` archive named - /// "main" rooted at `archive_root`. - pub async fn new_with_single_archive(archive_root: PathBuf) -> Result { - use crate::backends::JsonlBackend; - - let backend = Arc::new(JsonlBackend::new(archive_root).await?) - as Arc; - let initial_health = backend.health_check().await; - let reg = Arc::new(ArchiveRegistration::new( - "main".into(), - "jsonl", - backend, - true, - FailureMode::Required, - 0, - true, - WritePolicy::Inline, - None, - initial_health, - )); - Ok(Self { - registrations: RwLock::new(vec![reg]), - read_cache: Arc::new(ReadCache::new()), - registry_path: PathBuf::new(), - }) - } - - /// Construct a coordinator with a pre-built single backend (for tests - /// that need to hold the backend directly alongside the coordinator). - pub async fn from_single_backend( - name: String, - backend: Arc, - ) -> Result { - let initial_health = backend.health_check().await; - let reg = Arc::new(ArchiveRegistration::new( - name, - "external", - backend, - true, - FailureMode::Required, - 0, - true, - WritePolicy::Inline, - None, - initial_health, - )); - Ok(Self { - registrations: RwLock::new(vec![reg]), - read_cache: Arc::new(ReadCache::new()), - registry_path: PathBuf::new(), - }) - } - - /// Resolve a single backend by optional name. - /// - /// `None` → lowest-`read_priority` enabled write-active `Required` - /// backend. `Some(name)` → the backend with that name (must exist). - #[allow(dead_code)] // wired up in later tasks - pub(crate) async fn resolve_backend( - &self, - archive: Option, - ) -> Result> { - let regs = self.registrations.read().await; - - if regs.is_empty() { - return Err(ArchivistError::NoArchiveConfigured); - } - - let chosen = match archive { - Some(name) => match regs.iter().find(|r| r.name == name) { - Some(r) => r, - None => return Err(ArchivistError::ArchiveNameUnknown(name)), - }, - None => regs - .iter() - .filter(|r| { - r.enabled && r.write_active && r.failure_mode == FailureMode::Required - }) - .min_by_key(|r| r.read_priority) - .ok_or_else(|| ArchivistError::PrimaryUnavailable { - name: "".into(), - reason: "no required write-active backend".into(), - })?, - }; - - Ok(chosen.backend.clone()) - } - - /// Resolve the primary `ArchiveRegistration` for a write. - /// - /// `None` → default-write target (lowest `read_priority` among enabled - /// write-active `Required` backends). `Some(name)` → the backend with that - /// name; errors if disabled or not write-active. - #[allow(dead_code)] // wired up in Task 16 - pub(crate) async fn resolve_primary( - &self, - archive: Option, - ) -> Result> { - let regs = self.registrations.read().await; - if regs.is_empty() { - return Err(ArchivistError::NoArchiveConfigured); - } - let chosen = match archive { - Some(name) => { - let r = regs - .iter() - .find(|r| r.name == name) - .ok_or_else(|| ArchivistError::ArchiveNameUnknown(name.clone()))?; - if !r.enabled { - return Err(ArchivistError::PrimaryUnavailable { - name: r.name.clone(), - reason: "backend is disabled".into(), - }); - } - if !r.write_active { - return Err(ArchivistError::PrimaryUnavailable { - name: r.name.clone(), - reason: "backend is not write-active".into(), - }); - } - r.clone() - } - None => regs - .iter() - .filter(|r| { - r.enabled - && r.write_active - && r.failure_mode == crate::registry::FailureMode::Required - }) - .min_by_key(|r| r.read_priority) - .cloned() - .ok_or_else(|| ArchivistError::PrimaryUnavailable { - name: "".into(), - reason: "no required write-active backend".into(), - })?, - }; - Ok(chosen) - } -} - -#[cfg(any(test, feature = "test-utils"))] -impl Archivist { - /// Test-only: construct directly from pre-built registrations. - pub fn from_registrations( - regs: Vec>, - ) -> Self { - Self { - registrations: tokio::sync::RwLock::new(regs), - read_cache: std::sync::Arc::new(crate::registry::cache::ReadCache::new()), - registry_path: std::path::PathBuf::new(), - } - } -} - -#[cfg(test)] -mod tests; diff --git a/crates/dirigent_archivist/src/coordinator/routing.rs b/crates/dirigent_archivist/src/coordinator/routing.rs deleted file mode 100644 index 47b0eed..0000000 --- a/crates/dirigent_archivist/src/coordinator/routing.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Read priority walk shared by every per-scroll_id and collection-shape -//! coordinator method. -//! -//! The walk honours per-backend `enabled`, caller-supplied capability -//! predicates, and current health. Per-scroll_id reads populate a positive -//! LRU cache keyed on `scroll_id`, so the second read for the same session -//! can short-circuit the priority walk. - -use std::sync::Arc; - -use uuid::Uuid; - -use crate::backend::ArchiveBackend; -use crate::error::Result; -use crate::registry::ArchiveRegistration; - -use super::Archivist; - -impl Archivist { - /// Walk enabled + healthy registrations in `read_priority` order. - /// - /// `predicate` decides whether a backend can serve the read (typically a - /// capability check). `op` is invoked on the first matching backend: - /// - `Ok(Some(value))` — wins the walk; per-scroll_id cache is updated; returned. - /// - `Ok(None)` — backend doesn't have it; continue. - /// - `Err(_)` — drift the backend's health and continue. - pub(crate) async fn read_walk_per_session( - &self, - scroll_id: Uuid, - predicate: P, - op: F, - ) -> Result> - where - T: Send, - P: Fn(&ArchiveRegistration) -> bool + Send + Sync, - F: Fn(Arc) -> Fut + Send + Sync, - Fut: std::future::Future>> + Send, - { - // Cache hit: try the cached backend first. - if let Some(cached_name) = self.read_cache.get(scroll_id).await { - if let Some(reg) = self.find_registration(&cached_name).await { - if predicate(®) && reg.enabled && !self.is_unavailable(®).await { - match op(reg.backend.clone()).await { - Ok(Some(value)) => return Ok(Some(value)), - Ok(None) => { - // Cached entry no longer holds — invalidate and fall through. - self.read_cache.invalidate(scroll_id).await; - } - Err(_) => { - self.record_read_failure(®).await; - self.read_cache.invalidate(scroll_id).await; - } - } - } - } - } - - // Priority walk. - let regs: Vec> = self.registrations.read().await.clone(); - for reg in regs.iter() { - if !reg.enabled || !predicate(reg) || self.is_unavailable(reg).await { - continue; - } - match op(reg.backend.clone()).await { - Ok(Some(value)) => { - self.record_read_success(reg).await; - self.read_cache.put(scroll_id, reg.name.clone()).await; - return Ok(Some(value)); - } - Ok(None) => { - self.record_read_success(reg).await; - continue; - } - Err(_) => { - self.record_read_failure(reg).await; - continue; - } - } - } - - Ok(None) - } - - /// Collection-shape read variant: returns the first enabled/healthy backend's - /// result, no cache. `op`'s return type is `Result` (no `Option`): - /// an error is treated as "backend couldn't serve this" and drifted; `Ok(T)` - /// is the answer. - pub(crate) async fn read_walk_collection( - &self, - predicate: P, - op: F, - ) -> Result> - where - T: Send, - P: Fn(&ArchiveRegistration) -> bool + Send + Sync, - F: Fn(Arc) -> Fut + Send + Sync, - Fut: std::future::Future> + Send, - { - let regs: Vec> = self.registrations.read().await.clone(); - for reg in regs.iter() { - if !reg.enabled || !predicate(reg) || self.is_unavailable(reg).await { - continue; - } - match op(reg.backend.clone()).await { - Ok(value) => { - self.record_read_success(reg).await; - return Ok(Some(value)); - } - Err(_) => { - self.record_read_failure(reg).await; - continue; - } - } - } - Ok(None) - } - - pub(crate) async fn find_registration( - &self, - name: &str, - ) -> Option> { - self.registrations - .read() - .await - .iter() - .find(|r| r.name == name) - .cloned() - } - - async fn is_unavailable(&self, reg: &ArchiveRegistration) -> bool { - matches!( - *reg.last_health.read().await, - crate::backend::HealthStatus::Unavailable { .. } - ) - } -} diff --git a/crates/dirigent_archivist/src/coordinator/sessions.rs b/crates/dirigent_archivist/src/coordinator/sessions.rs deleted file mode 100644 index f0c8c5e..0000000 --- a/crates/dirigent_archivist/src/coordinator/sessions.rs +++ /dev/null @@ -1,1470 +0,0 @@ -//! Session orchestration for `Archivist`. -//! -//! Covers registration (with alias detection), resolution, mandatory-primitive -//! dispatch, read-modify-write metadata wrappers, and move/copy semantics. -//! Ported from the `FileBasedArchivist` bodies in `archivist.rs`; persistence -//! routes through backend sub-traits (`SessionMappingBackend`, -//! `ConnectorRegistryBackend`) plus the mandatory `ArchiveBackend` surface. - -use chrono::Utc; -use uuid::Uuid; - -use crate::backend::ArchiveCapability; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{ - MessageCursor, MessagePage, MessageRecord, MoveReport, RegisterSessionRequest, - RegisterSessionResponse, RegisterStatus, SessionCompleteness, SessionListQuery, - SessionMetadata, SessionPage, MAX_PAGE_LIMIT, -}; - -impl Archivist { - // ------------------------------------------------------------------ - // Filter helpers (Task 20) - // ------------------------------------------------------------------ - - /// Evaluate a registration's filter against optional session metadata. - /// - /// - If the filter is unrestricted, always allows. - /// - If the filter is restricted and metadata is present, delegates to - /// `ArchiveFilter::allows`. - /// - If the filter is restricted but metadata is unavailable, rejects - /// (the safe default: without metadata we cannot prove the session - /// should be replicated). - pub(crate) fn filter_allows( - reg: &crate::registry::ArchiveRegistration, - session: Option<&SessionMetadata>, - ) -> bool { - if reg.filter.is_unrestricted() { - return true; - } - match session { - Some(s) => reg.filter.allows(s, &s.connector_uid), - None => false, - } - } - - /// Lazily load session metadata for filter evaluation. - /// - /// Returns `None` when none of the non-primary registrations carry a - /// non-unrestricted filter (no load needed) or when the load itself - /// failed (e.g. the session isn't present anywhere yet). - pub(crate) async fn load_metadata_for_filter( - &self, - scroll_id: Uuid, - regs: &[std::sync::Arc], - primary_name: &str, - ) -> Option { - let any_restricted = regs.iter().any(|r| { - r.name != primary_name - && r.enabled - && r.write_active - && !r.filter.is_unrestricted() - }); - if !any_restricted { - return None; - } - self.get_session_metadata(scroll_id, None).await.ok() - } - - // ------------------------------------------------------------------ - // Registration & alias detection - // ------------------------------------------------------------------ - - /// Register a session with alias detection. - /// - /// Ported from `FileBasedArchivist::register_session`. Persistence routes - /// through the backend's `SessionMappingBackend::put_mapping` and - /// `ArchiveBackend::put_session` methods; alias detection - /// (Accepted vs. Aliased) stays at the coordinator. - pub async fn register_session( - &self, - req: RegisterSessionRequest, - archive: Option, - ) -> Result { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let primary_mapping = primary.backend.as_session_mapping().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks SessionMapping capability".into(), - } - })?; - let primary_registry = primary.backend.as_connector_registry().ok_or_else(|| { - ArchivistError::PrimaryUnavailable { - name: primary.name.clone(), - reason: "backend lacks ConnectorRegistry capability".into(), - } - })?; - - // Generate scroll_id (use custom_scroll_id only if it's UUID7, otherwise generate new) - // This validation prevents UUID4 leaks into folder names. - let scroll_id = match req.custom_scroll_id { - Some(uuid) if uuid.get_version_num() == 7 => { - tracing::debug!("Using provided UUID7 as scroll_id: {}", uuid); - uuid - } - Some(uuid) => { - tracing::warn!( - "Rejected non-UUID7 custom_scroll_id: {} (version {}), generating fresh UUID7", - uuid, - uuid.get_version_num() - ); - Uuid::now_v7() - } - None => Uuid::now_v7(), - }; - - // Validate that the connector exists on the PRIMARY. - if primary_registry - .get_connector(req.connector_uid) - .await? - .is_none() - { - return Err(ArchivistError::ConnectorUnknown(req.connector_uid)); - } - - // Alias detection: if a mapping already exists for this - // (connector_uid, native_session_id) on the PRIMARY, return ALIASED. - // Alias detection stays on the primary only — the answer is canonical - // and must not be per-backend. - if let Some(existing_scroll) = primary_mapping - .get_mapping(req.connector_uid, &req.native_session_id) - .await? - { - return Ok(RegisterSessionResponse { - status: RegisterStatus::Aliased, - scroll_id: existing_scroll, - alias_of: Some(existing_scroll), - }); - } - - // ACCEPTED: write mapping first, then the session metadata. This order - // matches the original `FileBasedArchivist` sequence — if metadata - // creation fails, the mapping still lets `resolve_session` work, and - // the next `append_messages` reconstructs a minimal session.json. - let now = Utc::now(); - - // Write mapping on PRIMARY first. Any failure here propagates — the - // canonical mapping must succeed for the operation to be meaningful. - if let Err(e) = primary_mapping - .put_mapping(req.connector_uid, &req.native_session_id, scroll_id) - .await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - - let session_metadata = SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: req.title.clone(), - connector_uid: req.connector_uid, - native_session_id: Some(req.native_session_id.clone()), - agent_id: req.agent_id.clone(), - parent_scroll_id: req.parent_scroll_id, - continuation: req.continuation, - tags: Vec::new(), - metadata: req.metadata.clone(), - no_update: false, - kind: crate::types::SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: req.completeness, - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: req.is_subagent, - subagent_type: req.subagent_type.clone(), - spawning_tool_use_id: req.spawning_tool_use_id.clone(), - }; - - // Mirror the original best-effort semantics: if writing session.json - // fails, log and proceed — the mapping is already durable. - match &primary.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = primary.backend.put_session(session_metadata.clone()).await { - tracing::warn!( - scroll_id = %scroll_id, - native_session_id = %req.native_session_id, - connector_uid = %req.connector_uid, - error = %e, - "Failed to write session metadata after mapping write. \ - Session is registered but metadata will be created on first message write." - ); - self.record_write_failure(&primary, &format!("{e}")).await; - } else { - self.record_write_success(&primary).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = primary - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - // Queued put_session is fire-and-forget; errors drift health - // on the writer task side. - let _ = writer - .enqueue(crate::registry::writer::WriteOp::PutSession( - session_metadata.clone(), - )) - .await; - } - } - - // Secondaries: fan out mapping + session with capability filter. - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - - // Per-archive include/exclude filter. Checked before capability so - // cheaper checks come first. - if !reg.filter.allows(&session_metadata, &session_metadata.connector_uid) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "register_session", - "filter_skip" - ); - continue; - } - - // Mapping on secondary (capability check). Mapping writes have no - // `WriteOp` variant (SessionMapping is a sub-trait, not part of the - // queued dispatch surface), so they remain inline in both policies. - let Some(sec_mapping) = reg.backend.as_session_mapping() else { - tracing::debug!( - backend = reg.name.as_str(), - type_name = reg.type_name, - op = "register_session:mapping", - "capability_skip" - ); - continue; - }; - if let Err(e) = sec_mapping - .put_mapping(req.connector_uid, &req.native_session_id, scroll_id) - .await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - continue; - } - - // put_session on secondary — honours the secondary's write_policy. - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = reg.backend.put_session(session_metadata.clone()).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = reg - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - if let Err(e) = writer - .enqueue(crate::registry::writer::WriteOp::PutSession( - session_metadata.clone(), - )) - .await - { - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } - } - } - } - - Ok(RegisterSessionResponse { - status: RegisterStatus::Accepted, - scroll_id, - alias_of: None, - }) - } - - /// Resolve a native session ID into a scroll_id via the mapping table. - /// - /// A missing mapping is surfaced as `SessionUnknown(Uuid::nil())` to keep - /// the legacy `Archivist::resolve_session` contract (caller expects an - /// error, not `Ok(None)`). - pub async fn resolve_session( - &self, - connector_uid: Uuid, - native_session_id: &str, - archive: Option, - ) -> Result { - let backend = self.resolve_backend(archive).await?; - let mapping = backend.as_session_mapping().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::SessionMapping, - backend: "selected".into(), - } - })?; - - match mapping.get_mapping(connector_uid, native_session_id).await? { - Some(scroll_id) => Ok(scroll_id), - None => Err(ArchivistError::SessionUnknown(Uuid::nil())), - } - } - - /// Locate the `(connector_uid, scroll_id)` that owns a native session id. - pub async fn find_session_owner( - &self, - native_session_id: &str, - archive: Option, - ) -> Result> { - let backend = self.resolve_backend(archive).await?; - let mapping = backend.as_session_mapping().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::SessionMapping, - backend: "selected".into(), - } - })?; - mapping.find_owner(native_session_id).await - } - - // ------------------------------------------------------------------ - // Mandatory primitive dispatch - // ------------------------------------------------------------------ - - /// Fetch session metadata. `None` from the backend becomes - /// `SessionUnknown` so callers can treat a missing session as an error. - pub async fn get_session_metadata( - &self, - scroll_id: Uuid, - _archive: Option, - ) -> Result { - // `archive` is now ignored for reads; routing picks the highest-priority - // backend that has the session. - self.read_walk_per_session( - scroll_id, - |_reg| true, - |backend| async move { backend.get_session(scroll_id).await }, - ) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id)) - } - - /// List sessions with cursor pagination. The `archive` field on the query - /// selects the backend; it is consumed by the coordinator and not passed - /// on to the backend implementation. - pub async fn list_sessions_paged( - &self, - mut query: SessionListQuery, - ) -> Result { - let archive = query.archive.take(); - let backend = self.resolve_backend(archive).await?; - backend.list_sessions_paged(query).await - } - - pub async fn append_messages( - &self, - scroll_id: Uuid, - messages: Vec, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - match &primary.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = primary - .backend - .append_messages(scroll_id, messages.clone()) - .await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = primary - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - writer - .enqueue(crate::registry::writer::WriteOp::AppendMessages { - scroll_id, - msgs: messages.clone(), - }) - .await?; - } - } - - // Load metadata once for filter checks. Only needed if at least one - // non-primary secondary has a non-unrestricted filter. - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "append_messages", - "filter_skip" - ); - continue; - } - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = reg.backend.append_messages(scroll_id, messages.clone()).await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = reg - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - if let Err(e) = writer - .enqueue(crate::registry::writer::WriteOp::AppendMessages { - scroll_id, - msgs: messages.clone(), - }) - .await - { - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } - } - } - } - Ok(()) - } - - pub async fn get_messages_paged( - &self, - scroll_id: Uuid, - cursor: Option, - limit: usize, - _archive: Option, - ) -> Result { - // `archive` is now ignored for reads; routing picks the highest-priority - // backend that has the session. - Ok(self - .read_walk_per_session( - scroll_id, - |_reg| true, - |backend| { - let cursor = cursor.clone(); - async move { - backend - .get_messages_paged(scroll_id, cursor, limit) - .await - .map(Some) - } - }, - ) - .await? - .unwrap_or(MessagePage { - items: Vec::new(), - next_cursor: None, - })) - } - - /// Fetch all messages for a session by draining every cursor page. - /// - /// This is a convenience wrapper over `get_messages_paged` that reconstructs - /// the flat `Vec` shape the pre-Phase-2 `Archivist::get_messages` trait - /// method promised. Consumers that know they want to stream large histories - /// should prefer `get_messages_paged` directly. - pub async fn get_messages( - &self, - scroll_id: Uuid, - archive: Option, - ) -> Result> { - let mut out = Vec::new(); - let mut cursor = None; - loop { - let page = self - .get_messages_paged( - scroll_id, - cursor, - crate::types::MAX_PAGE_LIMIT, - archive.clone(), - ) - .await?; - out.extend(page.items); - match page.next_cursor { - Some(c) => cursor = Some(c), - None => return Ok(out), - } - } - } - - /// Return a slice of messages by offset/limit (sorted chronologically). - /// - /// Ported from `FileBasedArchivist::get_messages_range`. This loads the - /// full message set (via `get_messages`) and slices it — it is NOT cursor- - /// based and should only be used for small offsets / tests. For anything - /// production-sized, use `get_messages_paged`. - pub async fn get_messages_range( - &self, - scroll_id: Uuid, - offset: usize, - limit: usize, - archive: Option, - ) -> Result> { - let all = self.get_messages(scroll_id, archive).await?; - Ok(all.into_iter().skip(offset).take(limit).collect()) - } - - pub async fn count_messages( - &self, - scroll_id: Uuid, - _archive: Option, - ) -> Result { - // `archive` is now ignored for reads; routing picks the highest-priority - // backend that has the session. - Ok(self - .read_walk_per_session( - scroll_id, - |_reg| true, - |backend| async move { backend.count_messages(scroll_id).await.map(Some) }, - ) - .await? - .unwrap_or(0)) - } - - pub async fn delete_session( - &self, - scroll_id: Uuid, - _archive: Option, // ignored: delete spans every backend that has the session - ) -> Result<()> { - let regs: Vec> = - self.registrations.read().await.clone(); - - let mut read_only_violations: Vec = Vec::new(); - let mut last_required_error: Option = None; - - for reg in regs.iter() { - if !reg.enabled { - continue; - } - - // Cheap existence check before attempting delete. We treat read failures - // here as "backend doesn't have it" to avoid cascading errors. - let exists = reg - .backend - .get_session(scroll_id) - .await - .ok() - .flatten() - .is_some(); - if !exists { - continue; - } - - if !reg.write_active { - read_only_violations.push(reg.name.clone()); - continue; - } - - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - match reg.backend.delete_session(scroll_id).await { - Ok(()) => self.record_write_success(reg).await, - Err(e) => { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required - && last_required_error.is_none() - { - last_required_error = Some(e); - } - } - } - } - crate::registry::WritePolicy::Queued { .. } => { - if let Some(writer) = reg.writer.as_ref() { - let _ = writer - .enqueue(crate::registry::writer::WriteOp::DeleteSession { scroll_id }) - .await; - } - } - } - } - - // Cache invalidation regardless of outcome — the session no longer has a stable home. - self.read_cache.invalidate(scroll_id).await; - - if let Some(e) = last_required_error { - return Err(e); - } - if let Some(name) = read_only_violations.into_iter().next() { - return Err(ArchivistError::DeleteOnReadOnlyBackend { - backend: name, - scroll_id, - }); - } - Ok(()) - } - - pub async fn clear_session_messages( - &self, - scroll_id: Uuid, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - match &primary.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = primary.backend.clear_session_messages(scroll_id).await { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = primary - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - writer - .enqueue(crate::registry::writer::WriteOp::ClearSessionMessages { - scroll_id, - }) - .await?; - } - } - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "clear_session_messages", - "filter_skip" - ); - continue; - } - match ®.write_policy { - crate::registry::WritePolicy::Inline => { - if let Err(e) = reg.backend.clear_session_messages(scroll_id).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - crate::registry::WritePolicy::Queued { .. } => { - let writer = reg - .writer - .as_ref() - .expect("queued policy implies writer handle present"); - if let Err(e) = writer - .enqueue(crate::registry::writer::WriteOp::ClearSessionMessages { - scroll_id, - }) - .await - { - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } - } - } - } - Ok(()) - } - - // ------------------------------------------------------------------ - // Read-modify-write metadata wrappers - // ------------------------------------------------------------------ - - /// Update `title` and/or `model` on an existing session. - /// - /// Ported from `FileBasedArchivist::update_session_metadata`. `title` is - /// set directly; `model` is written into `metadata.model` (JSON object). - /// `updated_at` is always bumped. - /// - /// NOTE: Read-mutate-write operations silently fall through to inline - /// even under `WritePolicy::Queued`. There is no `WriteOp::UpdateSessionMetadata` - /// variant because RMW doesn't compose with batching — the read-side of the - /// RMW must see a consistent, already-persisted session. - pub async fn update_session_metadata( - &self, - scroll_id: Uuid, - title: Option, - model: Option, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let apply = |backend: std::sync::Arc, - title: Option, - model: Option| - -> std::pin::Pin>> + Send>> { - Box::pin(async move { - let mut session_metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - - if let Some(new_title) = title { - session_metadata.title = Some(new_title); - } - if let Some(new_model) = model { - if let Some(obj) = session_metadata.metadata.as_object_mut() { - obj.insert("model".to_string(), serde_json::Value::String(new_model)); - } - } - - session_metadata.updated_at = Utc::now(); - let title_dbg = session_metadata.title.clone(); - backend.put_session(session_metadata).await?; - Ok(title_dbg) - }) - }; - - let title_dbg = match apply(primary.backend.clone(), title.clone(), model.clone()).await { - Ok(t) => { - self.record_write_success(&primary).await; - t - } - Err(e) => { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - }; - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "update_session_metadata", - "filter_skip" - ); - continue; - } - if let Err(e) = apply(reg.backend.clone(), title.clone(), model.clone()).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - - tracing::info!( - scroll_id = %scroll_id, - title = ?title_dbg, - "Updated session metadata" - ); - - Ok(()) - } - - /// Update ACP-specific metadata (`models`, `modes`, `config_options`). - /// - /// NOTE: read-mutate-write; falls through to inline under - /// `WritePolicy::Queued` (no `WriteOp` variant). - pub async fn update_session_acp_metadata( - &self, - scroll_id: Uuid, - models: Option, - modes: Option, - config_options: Option, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let apply = |backend: std::sync::Arc, - models: Option, - modes: Option, - config_options: Option| - -> std::pin::Pin> + Send>> { - Box::pin(async move { - let mut session_metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - - if let Some(new_models) = models { - session_metadata.models = Some(new_models); - } - if let Some(new_modes) = modes { - session_metadata.modes = Some(new_modes); - } - if let Some(new_config_options) = config_options { - session_metadata.config_options = Some(new_config_options); - } - - session_metadata.updated_at = Utc::now(); - - let has_models = session_metadata.models.is_some(); - let has_modes = session_metadata.modes.is_some(); - let has_config_options = session_metadata.config_options.is_some(); - - backend.put_session(session_metadata).await?; - Ok((has_models, has_modes, has_config_options)) - }) - }; - - let (has_models, has_modes, has_config_options) = match apply( - primary.backend.clone(), - models.clone(), - modes.clone(), - config_options.clone(), - ) - .await - { - Ok(flags) => { - self.record_write_success(&primary).await; - flags - } - Err(e) => { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - }; - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "update_session_acp_metadata", - "filter_skip" - ); - continue; - } - if let Err(e) = apply( - reg.backend.clone(), - models.clone(), - modes.clone(), - config_options.clone(), - ) - .await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - - tracing::info!( - scroll_id = %scroll_id, - has_models = has_models, - has_modes = has_modes, - has_config_options = has_config_options, - "Updated session agent metadata" - ); - - Ok(()) - } - - /// Update the Matrix sharing flags on a session. `matrix_shared_at` is - /// stamped on the first transition to `sharing_active=true`. - /// - /// NOTE: read-mutate-write; falls through to inline under - /// `WritePolicy::Queued` (no `WriteOp` variant). - pub async fn update_session_sharing( - &self, - scroll_id: Uuid, - matrix_room_id: Option, - sharing_active: bool, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let apply = |backend: std::sync::Arc, - matrix_room_id: Option, - sharing_active: bool| - -> std::pin::Pin> + Send>> { - Box::pin(async move { - let mut session_metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - - session_metadata.matrix_room_id = matrix_room_id; - session_metadata.matrix_sharing_active = sharing_active; - if sharing_active && session_metadata.matrix_shared_at.is_none() { - session_metadata.matrix_shared_at = Some(Utc::now()); - } - session_metadata.updated_at = Utc::now(); - - backend.put_session(session_metadata).await - }) - }; - - if let Err(e) = apply(primary.backend.clone(), matrix_room_id.clone(), sharing_active).await - { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "update_session_sharing", - "filter_skip" - ); - continue; - } - if let Err(e) = - apply(reg.backend.clone(), matrix_room_id.clone(), sharing_active).await - { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - - tracing::debug!( - scroll_id = %scroll_id, - sharing_active = sharing_active, - "Updated session sharing metadata" - ); - - Ok(()) - } - - /// Set the `completeness` level on a session. - /// - /// NOTE: read-mutate-write; falls through to inline under - /// `WritePolicy::Queued` (no `WriteOp` variant). - pub async fn update_session_completeness( - &self, - scroll_id: Uuid, - completeness: SessionCompleteness, - archive: Option, - ) -> Result<()> { - let primary = self.resolve_primary(archive.clone()).await?; - let regs: Vec> = - self.registrations.read().await.clone(); - - let apply = |backend: std::sync::Arc, - completeness: SessionCompleteness| - -> std::pin::Pin> + Send>> { - Box::pin(async move { - let mut session_metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - - session_metadata.completeness = completeness; - session_metadata.updated_at = Utc::now(); - - backend.put_session(session_metadata).await - }) - }; - - if let Err(e) = apply(primary.backend.clone(), completeness).await { - self.record_write_failure(&primary, &format!("{e}")).await; - return Err(e); - } - self.record_write_success(&primary).await; - - let session_metadata_for_filter = self - .load_metadata_for_filter(scroll_id, ®s, &primary.name) - .await; - - for reg in regs.iter() { - if reg.name == primary.name { - continue; - } - if !reg.enabled || !reg.write_active { - continue; - } - if !Self::filter_allows(reg, session_metadata_for_filter.as_ref()) { - tracing::debug!( - archive = %reg.name, - scroll_id = %scroll_id, - op = "update_session_completeness", - "filter_skip" - ); - continue; - } - if let Err(e) = apply(reg.backend.clone(), completeness).await { - self.record_write_failure(reg, &format!("{e}")).await; - if reg.failure_mode == crate::registry::FailureMode::Required { - return Err(e); - } - } else { - self.record_write_success(reg).await; - } - } - - tracing::debug!( - scroll_id = %scroll_id, - completeness = ?completeness, - "Updated session completeness" - ); - - Ok(()) - } - - // ------------------------------------------------------------------ - // Listing helpers - // ------------------------------------------------------------------ - - /// Return every session in the selected archive whose - /// `matrix_sharing_active` flag is set. - /// - /// `SessionListQuery` does not yet expose a `matrix_sharing_active` - /// filter, so we page through every session (including hidden ones) and - /// filter client-side. Included-hidden matches the original - /// `FileBasedArchivist::list_sessions_with_active_sharing`, which scanned - /// `.contexts/` without respecting `no_update` / `is_subagent`. - pub async fn list_sessions_with_active_sharing( - &self, - archive: Option, - ) -> Result> { - let backend = self.resolve_backend(archive).await?; - - let mut results = Vec::new(); - let mut q = SessionListQuery { - include_hidden: true, - limit: MAX_PAGE_LIMIT, - ..SessionListQuery::default() - }; - - loop { - let page = backend.list_sessions_paged(q.clone()).await?; - for session in page.items { - if session.matrix_sharing_active { - results.push(session); - } - } - match page.next_cursor { - Some(c) => q.cursor = Some(c), - None => break, - } - } - - Ok(results) - } - - // ------------------------------------------------------------------ - // Move & copy - // ------------------------------------------------------------------ - - /// Move a session to a different connector. - /// - /// Ported from `FileBasedArchivist::move_session`: updates the session's - /// `connector_uid`, removes the mapping from the source connector's - /// table (via `rewrite_connector_mappings`), and appends a new mapping - /// to the target. - pub async fn move_session_to_connector( - &self, - scroll_id: Uuid, - target_connector_uid: Uuid, - archive: Option, - ) -> Result<()> { - let backend = self.resolve_backend(archive).await?; - let mapping = backend.as_session_mapping().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::SessionMapping, - backend: "selected".into(), - } - })?; - let registry = backend.as_connector_registry().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::ConnectorRegistry, - backend: "selected".into(), - } - })?; - - // 1. Read session metadata. - let mut metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - let old_connector_uid = metadata.connector_uid; - if old_connector_uid == target_connector_uid { - return Ok(()); - } - - // 2. Verify target connector exists. - if registry.get_connector(target_connector_uid).await?.is_none() { - return Err(ArchivistError::ConnectorUnknown(target_connector_uid)); - } - - // 3. Update session metadata with new connector_uid. - metadata.connector_uid = target_connector_uid; - let native_session_id = metadata - .native_session_id - .clone() - .unwrap_or_else(|| scroll_id.to_string()); - backend.put_session(metadata).await?; - - // 4. Remove mapping from old connector by rewriting its sessions - // table without the moved row. The trait's - // `rewrite_connector_mappings` also handles cache invalidation. - let old_mappings = mapping.list_mappings_for_connector(old_connector_uid).await?; - let filtered: Vec<_> = old_mappings - .into_iter() - .filter(|m| m.scroll_id != scroll_id) - .collect(); - mapping - .rewrite_connector_mappings(old_connector_uid, filtered) - .await?; - - // 5. Append mapping to target connector. - mapping - .put_mapping(target_connector_uid, &native_session_id, scroll_id) - .await?; - - tracing::info!( - scroll_id = %scroll_id, - from = %old_connector_uid, - to = %target_connector_uid, - "Moved session to new connector" - ); - - Ok(()) - } - - /// Bulk-move many sessions. Collects per-session errors into a - /// `MoveReport` without aborting the whole batch. - pub async fn move_sessions_to_connector( - &self, - scroll_ids: Vec, - target_connector_uid: Uuid, - archive: Option, - ) -> Result { - let mut report = MoveReport::default(); - - for scroll_id in scroll_ids { - match self - .move_session_to_connector(scroll_id, target_connector_uid, archive.clone()) - .await - { - Ok(()) => { - report.moved += 1; - } - Err(e) => { - report.failed += 1; - report - .errors - .push(format!("Failed to move session {}: {}", scroll_id, e)); - tracing::warn!( - scroll_id = %scroll_id, - error = %e, - "Failed to move session during bulk move" - ); - } - } - } - - tracing::info!( - moved = report.moved, - failed = report.failed, - target = %target_connector_uid, - "Completed bulk session move" - ); - - Ok(report) - } - - /// Copy a session to a new connector under a fresh scroll_id. - /// - /// Copies the session metadata (with updated `scroll_id`, `connector_uid`, - /// `created_at`, `updated_at`) and all messages, page by page. The target - /// connector gets a new mapping entry. - pub async fn copy_session_to_connector( - &self, - scroll_id: Uuid, - target_connector_uid: Uuid, - archive: Option, - ) -> Result { - let backend = self.resolve_backend(archive).await?; - let mapping = backend.as_session_mapping().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::SessionMapping, - backend: "selected".into(), - } - })?; - let registry = backend.as_connector_registry().ok_or_else(|| { - ArchivistError::CapabilityNotSupported { - capability: ArchiveCapability::ConnectorRegistry, - backend: "selected".into(), - } - })?; - - // 1. Read source session metadata. - let source_metadata = backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - - // 2. Verify target connector exists. - if registry.get_connector(target_connector_uid).await?.is_none() { - return Err(ArchivistError::ConnectorUnknown(target_connector_uid)); - } - - // 3. Create new scroll_id + timestamp. - let new_scroll_id = Uuid::now_v7(); - let now = Utc::now(); - - // 4. Persist new session metadata with updated identity. - let new_metadata = SessionMetadata { - scroll_id: new_scroll_id, - connector_uid: target_connector_uid, - created_at: now, - updated_at: now, - ..source_metadata - }; - let native_session_id = new_metadata - .native_session_id - .clone() - .unwrap_or_else(|| new_scroll_id.to_string()); - backend.put_session(new_metadata).await?; - - // 5. Copy messages via the paged primitive. Uses MAX_PAGE_LIMIT - // per batch to stream large sessions without loading everything. - let mut cursor: Option = None; - loop { - let page = backend - .get_messages_paged(scroll_id, cursor.clone(), MAX_PAGE_LIMIT) - .await?; - if page.items.is_empty() { - break; - } - - // Rewrite each message's session reference to the new scroll_id - // so the copied session owns its own records. - let rewritten: Vec = page - .items - .into_iter() - .map(|mut m| { - m.session = new_scroll_id; - m - }) - .collect(); - - backend.append_messages(new_scroll_id, rewritten).await?; - - match page.next_cursor { - Some(c) => cursor = Some(c), - None => break, - } - } - - // 6. Register mapping for the new (connector, native_id) → new_scroll_id. - mapping - .put_mapping(target_connector_uid, &native_session_id, new_scroll_id) - .await?; - - tracing::info!( - source_scroll_id = %scroll_id, - new_scroll_id = %new_scroll_id, - target_connector = %target_connector_uid, - "Copied session to new connector" - ); - - Ok(new_scroll_id) - } - - // ------------------------------------------------------------------ - // Cross-archive move/copy (Phase 3) - // ------------------------------------------------------------------ - - /// Copy a session from one archive to another, preserving `scroll_id`. - /// - /// Copies session metadata, message pages, DAG edges, and meta events - /// where both sides have the required capability. Leaves the source - /// intact. - pub async fn copy_session( - &self, - scroll_id: Uuid, - from: &str, - to: &str, - ) -> Result<()> { - let from_reg = self - .find_registration(from) - .await - .ok_or_else(|| ArchivistError::ArchiveNameUnknown(from.into()))?; - let to_reg = self - .find_registration(to) - .await - .ok_or_else(|| ArchivistError::ArchiveNameUnknown(to.into()))?; - - if !to_reg.enabled || !to_reg.write_active { - return Err(ArchivistError::PrimaryUnavailable { - name: to_reg.name.clone(), - reason: "target backend is disabled or not write-active".into(), - }); - } - - // 1. Session metadata - let meta = from_reg - .backend - .get_session(scroll_id) - .await? - .ok_or(ArchivistError::SessionUnknown(scroll_id))?; - to_reg.backend.put_session(meta).await?; - - // 2. Messages — page through. - let mut cursor: Option = None; - loop { - let page = from_reg - .backend - .get_messages_paged(scroll_id, cursor.clone(), MAX_PAGE_LIMIT) - .await?; - if page.items.is_empty() { - break; - } - to_reg.backend.append_messages(scroll_id, page.items).await?; - match page.next_cursor { - Some(c) => cursor = Some(c), - None => break, - } - } - - // 3. DAG edges (both sides must support Dag). - if let (Some(src_dag), Some(dst_dag)) = - (from_reg.backend.as_dag(), to_reg.backend.as_dag()) - { - for edge in src_dag.get_dag_edges(scroll_id).await? { - dst_dag.append_dag_edge(edge).await?; - } - } - - // 4. Meta events (both sides must support MetaEvents). - if let (Some(src_me), Some(dst_me)) = - (from_reg.backend.as_meta_events(), to_reg.backend.as_meta_events()) - { - let events = src_me.get_meta_events(scroll_id).await?; - if !events.is_empty() { - dst_me.append_meta_events(scroll_id, events).await?; - } - } - - // Cache: leave pointing at `from` (source remains canonical). - Ok(()) - } - - /// Move a session from one archive to another: `copy_session` followed by - /// source-side delete. If the copy fails, the source is intact. If the - /// source-side delete fails AFTER a successful copy, returns - /// `ArchivistError::PartialMove`. - pub async fn move_session( - &self, - scroll_id: Uuid, - from: &str, - to: &str, - ) -> Result<()> { - // 1. Copy. - self.copy_session(scroll_id, from, to).await?; - - // 2. Delete from source only. - let from_reg = self - .find_registration(from) - .await - .ok_or_else(|| ArchivistError::ArchiveNameUnknown(from.into()))?; - - if let Err(e) = from_reg.backend.delete_session(scroll_id).await { - self.record_write_failure(&from_reg, &format!("{e}")).await; - return Err(ArchivistError::PartialMove { - copied_to: to.into(), - delete_error: Box::new(e), - }); - } - - // 3. Cache: rewrite to `to`. - self.read_cache.rewrite(scroll_id, to.into()).await; - - Ok(()) - } -} diff --git a/crates/dirigent_archivist/src/coordinator/tests.rs b/crates/dirigent_archivist/src/coordinator/tests.rs deleted file mode 100644 index 02ccb65..0000000 --- a/crates/dirigent_archivist/src/coordinator/tests.rs +++ /dev/null @@ -1,195 +0,0 @@ -//! Coordinator orchestration unit tests using `MockBackend`. -//! -//! These tests exercise alias detection, move/copy semantics, DAG walks, -//! and cleanup policies without any disk I/O. - -#![cfg(test)] - -use std::sync::Arc; - -use tokio::sync::RwLock; -use uuid::Uuid; - -use crate::backend::mock::MockBackend; -use crate::backend::ArchiveBackend; -use crate::coordinator::Archivist; -use crate::registry::{ - cache::ReadCache, ArchiveRegistration, FailureMode, WritePolicy, -}; -use crate::types::{ - DagEdge, MessageRecord, RegisterConnectorRequest, RegisterStatus, SessionCompleteness, - SessionKind, SessionMetadata, -}; - -/// Construct a blank `SessionMetadata` with the given `scroll_id` and -/// `connector_uid`. Sensible defaults for every other field. -fn blank_session(scroll_id: Uuid, connector_uid: Uuid) -> SessionMetadata { - let now = chrono::Utc::now(); - SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: None, - connector_uid, - native_session_id: None, - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: Vec::new(), - metadata: serde_json::Value::Null, - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::Complete, - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - } -} - -/// Construct a blank `MessageRecord` scoped to the given session with a -/// freshly generated `message_id` and current timestamp. -fn blank_message(session: Uuid) -> MessageRecord { - MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session, - parent_id: None, - ts: chrono::Utc::now(), - role: "user".into(), - author: None, - content_md: String::new(), - content_parts: None, - attachments: Vec::new(), - metadata: serde_json::Value::Null, - } -} - -async fn make_coordinator_with_single_mock() -> Archivist { - let backend: Arc = Arc::new(MockBackend::new()); - let initial_health = backend.health_check().await; - let reg = Arc::new(ArchiveRegistration::new( - "main".into(), - "mock", - backend, - /* write_active */ true, - FailureMode::Required, - /* read_priority */ 0, - /* enabled */ true, - WritePolicy::Inline, - /* writer */ None, - initial_health, - )); - Archivist { - registrations: RwLock::new(vec![reg]), - read_cache: Arc::new(ReadCache::new()), - registry_path: std::path::PathBuf::from("/mock/.archives.json"), - } -} - -#[tokio::test] -async fn register_connector_assigns_uid_and_returns_accepted() { - let coord = make_coordinator_with_single_mock().await; - let req = RegisterConnectorRequest { - r#type: "OpenCode".into(), - title: "test".into(), - client_native_id: "opencode@localhost".into(), - custom_uid: None, - metadata: serde_json::Value::Null, - fingerprint: None, - }; - let resp = coord.register_connector(req, None).await.expect("register"); - assert!(matches!(resp.status, RegisterStatus::Accepted)); - assert_ne!(resp.connector_uid, Uuid::nil()); -} - -#[tokio::test] -async fn register_connector_aliases_on_duplicate_native_id() { - let coord = make_coordinator_with_single_mock().await; - let mk_req = || RegisterConnectorRequest { - r#type: "OpenCode".into(), - title: "test".into(), - client_native_id: "opencode@localhost".into(), - custom_uid: None, - metadata: serde_json::Value::Null, - fingerprint: None, - }; - let first = coord.register_connector(mk_req(), None).await.unwrap(); - let second = coord.register_connector(mk_req(), None).await.unwrap(); - assert_eq!(second.connector_uid, first.connector_uid); - assert!(matches!(second.status, RegisterStatus::Aliased)); -} - -#[tokio::test] -async fn get_session_tree_walks_full_dag() { - let coord = make_coordinator_with_single_mock().await; - let connector_uid = Uuid::now_v7(); - let root = Uuid::now_v7(); - let child_a = Uuid::now_v7(); - let child_b = Uuid::now_v7(); - let grand = Uuid::now_v7(); - - let backend = coord.registrations.read().await[0].backend.clone(); - for id in [root, child_a, child_b, grand] { - backend - .put_session(blank_session(id, connector_uid)) - .await - .unwrap(); - } - - for (p, c) in [(root, child_a), (root, child_b), (child_a, grand)] { - coord - .append_dag_edge( - DagEdge { - parent: p, - child: c, - agent_id: String::new(), - subagent_type: None, - tool_use_id: None, - ts: Some(chrono::Utc::now()), - }, - None, - ) - .await - .unwrap(); - } - - let edges = coord.get_session_tree(root, None).await.unwrap(); - assert_eq!(edges.len(), 3, "expected 3 edges, got {}", edges.len()); -} - -#[tokio::test] -async fn cleanup_empty_sessions_deletes_only_message_less_sessions() { - let coord = make_coordinator_with_single_mock().await; - - let connector_uid = Uuid::now_v7(); - let empty = Uuid::now_v7(); - let populated = Uuid::now_v7(); - - let backend = coord.registrations.read().await[0].backend.clone(); - for scroll_id in [empty, populated] { - backend - .put_session(blank_session(scroll_id, connector_uid)) - .await - .unwrap(); - } - backend - .append_messages(populated, vec![blank_message(populated)]) - .await - .unwrap(); - - let (deleted, total) = coord.cleanup_empty_sessions(None).await.unwrap(); - assert_eq!(deleted, 1); - assert_eq!(total, 2); - assert!(backend.get_session(empty).await.unwrap().is_none()); - assert!(backend.get_session(populated).await.unwrap().is_some()); -} diff --git a/crates/dirigent_archivist/src/coordinator/types.rs b/crates/dirigent_archivist/src/coordinator/types.rs deleted file mode 100644 index 71c1588..0000000 --- a/crates/dirigent_archivist/src/coordinator/types.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Shared data types used by the archivist coordinator. -//! -//! `ArchiveMetadata` is persisted per-archive in the registry file and -//! tracks creation time, path, and the set of connectors registered in -//! the archive. `ArchiveInfo` is the display-friendly projection returned -//! from listing APIs; it extends the metadata with computed fields like -//! session count and default-archive status. - -use std::path::PathBuf; - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Metadata about a single archive. -/// -/// This structure contains all the information needed to track and display -/// an archive without loading its full backend instance. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArchiveMetadata { - /// Unique name for this archive (e.g., "personal", "work", "experiments") - pub name: String, - - /// Filesystem path to the archive root directory - pub path: PathBuf, - - /// When this archive was first registered with the coordinator - pub created_at: DateTime, - - /// List of connector UIDs registered in this archive - /// - /// This is updated as connectors are registered/unregistered and provides - /// a quick way to see which connectors belong to which archive. - pub connector_uids: Vec, -} - -/// Display-friendly information about an archive. -/// -/// This struct is returned by listing operations and includes computed -/// fields like session count and default status. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArchiveInfo { - /// Unique name for this archive - pub name: String, - - /// Filesystem path to the archive root directory - pub path: PathBuf, - - /// When this archive was first registered - pub created_at: DateTime, - - /// Total number of sessions across all connectors in this archive - /// - /// This is computed by counting sessions across all connectors and - /// may be expensive for large archives. - pub session_count: usize, - - /// Whether this is the current default archive - pub is_default: bool, -} diff --git a/crates/dirigent_archivist/src/error.rs b/crates/dirigent_archivist/src/error.rs deleted file mode 100644 index 0d42863..0000000 --- a/crates/dirigent_archivist/src/error.rs +++ /dev/null @@ -1,314 +0,0 @@ -//! Error types for the Archivist. -//! -//! This module defines all error types that can occur during archival operations, -//! including I/O errors, JSON errors, and domain-specific errors for connectors -//! and sessions. - -use std::path::PathBuf; -use thiserror::Error; -use uuid::Uuid; - -/// Result type alias for Archivist operations -pub type Result = std::result::Result; - -/// Errors that can occur during archival operations -#[derive(Debug, Error)] -pub enum ArchivistError { - /// Connector with the given UID was not found - #[error("Connector not found: {0}")] - ConnectorUnknown(Uuid), - - /// Session with the given scroll ID was not found - #[error("Session not found: {0}")] - SessionUnknown(Uuid), - - /// UUID collision detected with inconsistent data - /// - /// This occurs when a custom UUID is provided that matches an existing - /// entity but with different attributes (e.g., different connector type). - #[error("UUID collision: {0}")] - CollisionInconsistent(Uuid), - - /// Invalid request (e.g., missing required fields, invalid format) - #[error("Invalid request: {0}")] - InvalidRequest(String), - - /// I/O error during file operations - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - - /// JSON serialization/deserialization error - #[error("JSON error: {0}")] - Json(#[from] serde_json::Error), - - // Multi-archive errors - /// Invalid archive name (empty or contains invalid characters) - #[error("Invalid archive name: {0}")] - InvalidArchiveName(String), - - /// Archive already exists with the given name - #[error("Archive already exists: {0}")] - ArchiveAlreadyExists(String), - - /// Archive not found with the given name - #[error("Archive not found: {0}")] - ArchiveNotFound(String), - - /// Archive path conflict (path is already used by another archive) - #[error("Archive path conflict: {0}")] - ArchivePathConflict(PathBuf), - - /// Cannot remove default archive without force flag - #[error("Cannot remove default archive without force flag")] - CannotRemoveDefaultArchive, - - /// Archive is not empty (has sessions) - #[error("Archive '{name}' is not empty ({session_count} sessions)")] - ArchiveNotEmpty { - name: String, - session_count: usize, - }, - - /// No archives configured - #[error("No archives configured")] - NoArchivesConfigured, - - /// Failed to load registry file - #[error("Failed to load registry: {0}")] - RegistryLoadError(String), - - /// Failed to parse registry JSON - #[error("Failed to parse registry: {0}")] - RegistryParseError(String), - - /// Failed to serialize registry to JSON - #[error("Failed to serialize registry: {0}")] - RegistrySerializeError(String), - - /// Failed to write registry file - #[error("Failed to write registry: {0}")] - RegistryWriteError(String), - - /// Backend is unavailable (e.g., disk full, connection lost, degraded state) - #[error("Backend {name} is unavailable")] - BackendUnavailable { name: String }, - - /// Backend does not support the requested capability - #[error("Backend {backend} does not support capability {capability:?}")] - CapabilityNotSupported { - capability: crate::backend::ArchiveCapability, - backend: String, - }, - - /// Health check for a backend failed - #[error("Health check for backend {name} failed: {reason}")] - BackendHealthCheckFailed { name: String, reason: String }, - - /// Primary write backend is unavailable or misconfigured. - #[error("primary write backend `{name}` is unavailable: {reason}")] - PrimaryUnavailable { name: String, reason: String }, - - /// Session exists on a read-only (not write_active) backend; deletion impossible. - #[error("session {scroll_id} exists in read-only backend `{backend}`; cannot delete")] - DeleteOnReadOnlyBackend { backend: String, scroll_id: uuid::Uuid }, - - /// Move succeeded at the destination but source-side delete failed. - #[error("partial move: copy to `{copied_to}` succeeded but source-side delete failed: {delete_error}")] - PartialMove { - copied_to: String, - delete_error: Box, - }, - - /// Queued-write backend's queue is full. - #[error("write queue full for backend `{backend}` (op `{op}`)")] - WriteQueueFull { - backend: String, - op: &'static str, - }, - - /// The coordinator has no archive configured (ephemeral mode). - #[error("no archive is configured (ephemeral mode)")] - NoArchiveConfigured, - - /// A requested archive name does not exist in the registry. - #[error("archive name `{0}` is unknown")] - ArchiveNameUnknown(String), - - /// Runtime mutation of the archive registry is not supported in Phase 3. - #[error("dynamic registry mutation is not supported (Phase 3 is startup-only)")] - DynamicRegistryUnsupported, - - /// Catch-all for injected failures / legacy call sites. Prefer a typed variant when possible. - #[error("{0}")] - Other(String), -} - -/// Errors raised exclusively at boot, by `Archivist::from_config`. -#[derive(Debug, thiserror::Error)] -pub enum ArchivistBootError { - #[error("duplicate archive name `{0}` in config")] - DuplicateName(String), - - #[error("archive `{name}` declares unknown type `{type_name}`")] - UnknownType { name: String, type_name: String }, - - #[error("no `required` write-active backend configured (need at least one primary)")] - NoPrimary, - - #[error("backend `{name}` failed to build: {source}")] - BackendBuild { - name: String, - #[source] - source: crate::registry::BackendBuildError, - }, - - #[error("required backend `{name}` is unavailable at boot: {reason}")] - UnavailableRequiredBackend { name: String, reason: String }, - - #[error("no unrestricted write-active archive — at least one enabled, write_active backend must have an empty filter")] - NoUnrestrictedPrimary, - - #[error("filter for archive `{archive}` rejects all sessions (include_connectors is empty)")] - FilterRejectsEverything { archive: String }, - - #[error("config validation failed: {0}")] - Validation(#[from] crate::registry::ConfigValidationError), -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io; - - #[test] - fn test_error_display() { - let uuid = Uuid::now_v7(); - - // Test ConnectorUnknown - let err = ArchivistError::ConnectorUnknown(uuid); - assert_eq!(err.to_string(), format!("Connector not found: {}", uuid)); - - // Test SessionUnknown - let err = ArchivistError::SessionUnknown(uuid); - assert_eq!(err.to_string(), format!("Session not found: {}", uuid)); - - // Test CollisionInconsistent - let err = ArchivistError::CollisionInconsistent(uuid); - assert_eq!(err.to_string(), format!("UUID collision: {}", uuid)); - - // Test InvalidRequest - let err = ArchivistError::InvalidRequest("missing field".to_string()); - assert_eq!(err.to_string(), "Invalid request: missing field"); - } - - #[test] - fn test_io_error_conversion() { - // Create an I/O error - let io_err = io::Error::new(io::ErrorKind::NotFound, "file not found"); - - // Convert to ArchivistError using From trait - let archivist_err: ArchivistError = io_err.into(); - - // Verify it's the right variant - match archivist_err { - ArchivistError::Io(e) => { - assert_eq!(e.kind(), io::ErrorKind::NotFound); - assert_eq!(e.to_string(), "file not found"); - } - _ => panic!("Expected Io variant"), - } - } - - #[test] - fn test_json_error_conversion() { - // Create a JSON error by trying to parse invalid JSON - let json_err = serde_json::from_str::("invalid json").unwrap_err(); - - // Convert to ArchivistError using From trait - let archivist_err: ArchivistError = json_err.into(); - - // Verify it's the right variant - match archivist_err { - ArchivistError::Json(_) => { - // Success - it's a JSON error - } - _ => panic!("Expected Json variant"), - } - } - - #[test] - fn test_result_type_with_question_mark() { - // Test that Result works with the ? operator - fn test_function() -> Result { - // This should compile and work with ? - let _data: serde_json::Value = serde_json::from_str(r#"{"key": "value"}"#)?; - Ok("success".to_string()) - } - - let result = test_function(); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "success"); - } - - #[test] - fn test_error_chain() { - // Test that errors can be chained properly - fn inner_function() -> std::io::Result { - Err(std::io::Error::new( - std::io::ErrorKind::NotFound, - "inner error", - )) - } - - fn outer_function() -> Result { - // The ? operator should automatically convert io::Error to ArchivistError - let _result = inner_function()?; - Ok("success".to_string()) - } - - let result = outer_function(); - assert!(result.is_err()); - - match result { - Err(ArchivistError::Io(e)) => { - assert_eq!(e.kind(), std::io::ErrorKind::NotFound); - } - _ => panic!("Expected Io error"), - } - } - - #[test] - fn test_error_debug() { - let uuid = Uuid::now_v7(); - let err = ArchivistError::ConnectorUnknown(uuid); - - // Verify Debug implementation works - let debug_str = format!("{:?}", err); - assert!(debug_str.contains("ConnectorUnknown")); - assert!(debug_str.contains(&uuid.to_string())); - } - - #[test] - fn test_all_error_variants() { - let uuid = Uuid::now_v7(); - - // Test all variants can be created - let errors = vec![ - ArchivistError::ConnectorUnknown(uuid), - ArchivistError::SessionUnknown(uuid), - ArchivistError::CollisionInconsistent(uuid), - ArchivistError::InvalidRequest("test".to_string()), - ArchivistError::Io(io::Error::new(io::ErrorKind::Other, "test")), - ArchivistError::Json(serde_json::from_str::("bad").unwrap_err()), - ]; - - // Verify each error has a non-empty display string - for err in errors { - let display = err.to_string(); - assert!(!display.is_empty(), "Error display should not be empty"); - - let debug = format!("{:?}", err); - assert!(!debug.is_empty(), "Error debug should not be empty"); - } - } -} diff --git a/crates/dirigent_archivist/src/events.rs b/crates/dirigent_archivist/src/events.rs deleted file mode 100644 index 92adbc2..0000000 --- a/crates/dirigent_archivist/src/events.rs +++ /dev/null @@ -1,2162 +0,0 @@ -//! Event handling for dirigent_core event stream. -//! -//! The EventHandler subscribes to dirigent_core's global event stream and writes -//! to the archive in real-time, accumulating streaming message chunks into complete -//! messages. - -use std::collections::HashSet; -use std::sync::Arc; -use std::time::Duration; -use tokio::sync::Mutex; -use uuid::Uuid; -use chrono::Utc; - -use dirigent_protocol::{ - ContentBlock, Event, Message, MessagePart, MessageRole, Session, SessionUpdate, - TurnCompleteTrigger, -}; -use dirigent_protocol::streaming::{BusEvent, BusReceiver}; - -/// Closure that publishes a `BusEvent` to the shared event bus. -/// -/// The archivist cannot depend on `dirigent_core::SharingBus` directly -/// (that would introduce a dependency cycle). Callers in `api::archivist` -/// and `web::server` wrap the core's `SharingBus::publish` call in a -/// closure that matches this signature and install it via -/// [`EventHandler::set_bus_publisher`]. Implementations must spawn the -/// publish onto a tokio task (the call site here is synchronous). -pub type BusPublishFn = Arc; - -use crate::accumulator::MessageAccumulator; -use crate::coordinator::Archivist; -use crate::error::Result; -use crate::types::{ - MessageRecord, MetaEventRecord, MetaEventType, RegisterSessionRequest, - RegisterSessionResponse, RegisterStatus, SessionCompleteness, -}; - -/// Event handler for subscribing to dirigent_core events and archiving them -pub struct EventHandler { - archivist: Arc, - accumulator: Mutex, - /// Track which message IDs have been archived to prevent duplicates - archived_messages: Mutex>, - /// Sessions excluded from archiving (per-session toggle support) - /// Key is "connector_id:session_id" composite key - excluded_sessions: Mutex>, - /// Connectors whose sessions should be excluded from archiving by default - /// This is used for Gateway connectors where users can opt-in to archiving - excluded_connector_ids: Mutex>, - /// Sessions currently being replayed via session/load. - /// SessionUpdate events for these sessions are suppressed to avoid - /// re-archiving messages that already exist in the archive. - replaying_sessions: Mutex>, - /// Bus publisher used to emit `SessionRegistered` back onto the shared - /// event bus (`dirigent_core::SharingBus`). Installed via - /// [`Self::set_bus_publisher`] from the boot wiring in the server - /// crate. Without it, downstream consumers (e.g. the web UI's session - /// list refresh) won't receive the signal that a session is durably - /// registered — but archival still functions. - bus_publisher: Option, -} - -impl EventHandler { - /// Create a new event handler - pub fn new(archivist: Arc) -> Self { - Self { - archivist, - accumulator: Mutex::new( - MessageAccumulator::new().expect("Failed to create accumulator"), - ), - archived_messages: Mutex::new(HashSet::new()), - excluded_sessions: Mutex::new(HashSet::new()), - excluded_connector_ids: Mutex::new(HashSet::new()), - replaying_sessions: Mutex::new(HashSet::new()), - bus_publisher: None, - } - } - - /// Install a bus publisher so the archivist can emit its - /// `SessionRegistered` events onto the shared event bus. - /// - /// The callback receives a [`BusEvent`] and is expected to forward it - /// to `dirigent_core::SharingBus::publish` (likely by spawning a tokio - /// task). - pub fn set_bus_publisher(&mut self, publisher: BusPublishFn) { - self.bus_publisher = Some(publisher); - } - - /// Add a connector ID to the list of connectors whose sessions should be excluded by default - /// - /// Sessions from these connectors will start with archiving disabled. Users can still - /// enable archiving for individual sessions using the toggle. - /// - /// This is useful for Gateway connectors where most sessions are transient - /// and don't need to be archived. - pub async fn add_excluded_connector(&self, connector_id: &str) { - let mut excluded = self.excluded_connector_ids.lock().await; - excluded.insert(connector_id.to_string()); - tracing::info!( - "Connector '{}' added to excluded list - sessions will not archive by default", - connector_id - ); - } - - /// Remove a connector ID from the list of excluded connectors - /// - /// New sessions from this connector will archive by default (existing exclusions are not removed). - pub async fn remove_excluded_connector(&self, connector_id: &str) { - let mut excluded = self.excluded_connector_ids.lock().await; - excluded.remove(connector_id); - tracing::info!( - "Connector '{}' removed from excluded list - sessions will archive by default", - connector_id - ); - } - - /// Check if a connector is in the excluded list - pub async fn is_connector_excluded(&self, connector_id: &str) -> bool { - let excluded = self.excluded_connector_ids.lock().await; - excluded.contains(connector_id) - } - - /// Check if a session is excluded from archiving - /// - /// A session is excluded if: - /// 1. It's explicitly in the excluded_sessions set, OR - /// 2. Its connector is in excluded_connector_ids (e.g., Gateway) and the session - /// hasn't been explicitly enabled (which would have removed it from excluded_sessions) - /// - /// For Gateway connectors, sessions default to excluded until explicitly enabled. - pub async fn is_session_excluded(&self, connector_id: &str, session_id: &str) -> bool { - let key = format!("{}:{}", connector_id, session_id); - let excluded = self.excluded_sessions.lock().await; - - // If session is explicitly in excluded set, it's excluded - if excluded.contains(&key) { - return true; - } - - // Check if connector defaults to excluded (e.g., Gateway) - // For these connectors, sessions are excluded by default - let connector_excluded = { - let excluded_connectors = self.excluded_connector_ids.lock().await; - excluded_connectors.contains(connector_id) - }; - - if connector_excluded { - // Connector defaults to excluded. Session is excluded UNLESS it has been - // explicitly enabled. We track enabled sessions by checking if they were - // previously in excluded_sessions and removed (toggle enabled them). - // Since they're not in excluded_sessions now, check included_sessions. - // For simplicity, we use a marker: if connector is excluded but session - // is not in excluded_sessions, check if we have an "enabled" marker. - // Since we don't have a separate set, we'll treat "not in excluded_sessions" - // for an excluded connector as still excluded (the default state). - // The toggle will add an "enabled:key" marker to excluded_sessions to indicate - // the session was explicitly enabled. - let enabled_key = format!("enabled:{}", key); - if excluded.contains(&enabled_key) { - return false; // Explicitly enabled - } - return true; // Default excluded for Gateway - } - - false - } - - /// Exclude a session from archiving (disable archiving for this session) - /// - /// Returns true if the session was newly excluded, false if already excluded. - pub async fn exclude_session(&self, connector_id: &str, session_id: &str) -> bool { - let key = format!("{}:{}", connector_id, session_id); - let mut excluded = self.excluded_sessions.lock().await; - let newly_excluded = excluded.insert(key.clone()); - if newly_excluded { - tracing::info!( - "Archiving disabled for session {} (connector: {})", - session_id, - connector_id - ); - } - newly_excluded - } - - /// Include a session in archiving (enable archiving for this session) - /// - /// Returns true if the session was previously excluded, false if it wasn't excluded. - pub async fn include_session(&self, connector_id: &str, session_id: &str) -> bool { - let key = format!("{}:{}", connector_id, session_id); - let mut excluded = self.excluded_sessions.lock().await; - let was_excluded = excluded.remove(&key); - if was_excluded { - tracing::info!( - "Archiving enabled for session {} (connector: {})", - session_id, - connector_id - ); - } - was_excluded - } - - /// Toggle session archiving status - /// - /// Returns the new status: true = archiving enabled, false = archiving disabled - /// - /// For Gateway connectors (in excluded_connector_ids), sessions default to excluded. - /// Toggling uses an "enabled:" marker to track explicitly enabled sessions. - pub async fn toggle_session_archiving(&self, connector_id: &str, session_id: &str) -> bool { - let key = format!("{}:{}", connector_id, session_id); - let enabled_key = format!("enabled:{}", key); - - // Check if connector defaults to excluded (e.g., Gateway) - let connector_excluded = { - let excluded_connectors = self.excluded_connector_ids.lock().await; - excluded_connectors.contains(connector_id) - }; - - let mut excluded = self.excluded_sessions.lock().await; - - if connector_excluded { - // Gateway connector: sessions default to excluded - if excluded.contains(&enabled_key) { - // Currently enabled, disable by removing the enabled marker - excluded.remove(&enabled_key); - tracing::info!( - "Archiving disabled for session {} (connector: {}, Gateway default)", - session_id, - connector_id - ); - false // archiving now disabled - } else { - // Currently excluded (default), enable by adding the enabled marker - // Also remove from explicit excluded set if present - excluded.remove(&key); - excluded.insert(enabled_key); - tracing::info!( - "Archiving enabled for session {} (connector: {}, Gateway override)", - session_id, - connector_id - ); - true // archiving now enabled - } - } else { - // Non-Gateway connector: normal toggle logic - if excluded.contains(&key) { - excluded.remove(&key); - tracing::info!( - "Archiving enabled for session {} (connector: {})", - session_id, - connector_id - ); - true // archiving now enabled - } else { - excluded.insert(key); - tracing::info!( - "Archiving disabled for session {} (connector: {})", - session_id, - connector_id - ); - false // archiving now disabled - } - } - } - - /// Canonicalize a finalized message record and write it to the archive. - /// - /// This is the SINGLE write path for all finalization triggers (TurnComplete, - /// SessionIdle, stale timeout, shutdown flush). Resolves connector_id to - /// connector_uid, then to the canonical scroll_id, rewrites the record's - /// session field, and appends. - async fn canonicalize_and_write( - &self, - record: &mut MessageRecord, - connector_id: &str, - native_session_id: &str, - message_id: &str, - finalized_via: &str, - ) -> Result<()> { - // 1. Resolve connector_id to connector_uid - let connector_uid = match self.archivist.resolve_connector_uid(connector_id).await { - Ok(uid) => uid, - Err(e) => { - tracing::warn!( - "canonicalize_and_write: Failed to resolve connector_uid for '{}': {}. \ - Message {} will not be archived.", - connector_id, - e, - message_id - ); - return Ok(()); - } - }; - - // 2. Resolve or lazy-register session to get canonical scroll_id - let scroll_id = self - .resolve_or_register_session( - connector_uid, - native_session_id, - &format!( - "{}: Session mapping missing during message write", - finalized_via - ), - ) - .await?; - - // 3. Rewrite record to canonical identity - record.session = scroll_id; - - tracing::info!( - "canonicalize_and_write: Writing message {} ({} bytes) for session {} -> scroll_id {} via {}", - message_id, - record.content_md.len(), - native_session_id, - scroll_id, - finalized_via, - ); - - // 4. Write to archive - self.archivist - .append_messages(scroll_id, vec![record.clone()], None) - .await?; - - Ok(()) - } - - /// Resolve session with lazy registration fallback - /// - /// Attempts to resolve the session mapping. If resolution fails, performs - /// lazy registration with placeholder metadata to ensure messages can be written. - async fn resolve_or_register_session( - &self, - connector_uid: Uuid, - native_session_id: &str, - reason: &str, - ) -> Result { - match self - .archivist - .resolve_session(connector_uid, native_session_id, None) - .await - { - Ok(scroll_id) => Ok(scroll_id), - Err(e) => { - tracing::warn!( - "Failed to resolve session {} for connector {}: {}. Attempting lazy registration.", - native_session_id, - connector_uid, - e - ); - - // Attempt lazy registration using available metadata - let register_req = RegisterSessionRequest { - connector_uid, - native_session_id: native_session_id.to_string(), - title: Some("Lazy-registered session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({ - "lazy_registered": true, - "reason": reason - }), - completeness: SessionCompleteness::Discovered, - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - match self.archivist.register_session(register_req, None).await { - Ok(response) => { - tracing::info!( - "Lazy registration successful for session {} (scroll_id: {})", - native_session_id, - response.scroll_id - ); - Ok(response.scroll_id) - } - Err(reg_error) => { - tracing::error!( - "Lazy registration failed for session {}: {}. Message will be lost.", - native_session_id, - reg_error - ); - Err(reg_error) - } - } - } - } - } - - /// Create or get an existing meta session for an ACP client. - /// - /// If a meta session already exists for this client_id, returns its scroll_id. - /// Otherwise creates a new meta session with SessionKind::AcpConnection. - async fn get_or_create_meta_session( - &self, - client_id: &str, - connector_uid: Uuid, - connected_at: &str, - ) -> Result { - // First, try to find existing meta session - if let Some(existing) = self.archivist.find_meta_session_by_client(client_id, None).await? { - tracing::debug!( - "Found existing meta session {} for client {}", - existing.scroll_id, - client_id - ); - return Ok(existing.scroll_id); - } - - // No existing session, create new one - let req = RegisterSessionRequest { - connector_uid, - native_session_id: format!("acp-meta-{}", client_id), - title: Some(format!("ACP Connection: {}", client_id)), - custom_scroll_id: None, - completeness: SessionCompleteness::Complete, - metadata: serde_json::json!({ - "kind": "ACP_CONNECTION", - "acp_client_id": client_id, - "is_connected": true, - "connected_at": connected_at, - }), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let response = self.archivist.register_session(req, None).await?; - - tracing::info!( - "Created meta session {} for ACP client {}", - response.scroll_id, - client_id - ); - - Ok(response.scroll_id) - } - - /// Finalize stale message buffers that have been inactive for too long - /// - /// This prevents data loss when MessageCompleted events are missed or delayed. - /// Buffers inactive for longer than the threshold are finalized and written to archive. - async fn finalize_stale_buffers(&self, threshold: Duration) -> Result<()> { - let now = Utc::now(); - - // Collect stale message IDs - let stale_ids = { - let acc = self.accumulator.lock().await; - acc.get_stale_message_ids(now, threshold) - }; - - if stale_ids.is_empty() { - return Ok(()); - } - - tracing::warn!( - "Finalizing {} stale message buffer(s) (inactive > {:?})", - stale_ids.len(), - threshold - ); - - // Finalize each stale buffer - for message_id in stale_ids { - // Finalize buffer - let finalized_record = { - let mut acc = self.accumulator.lock().await; - acc.finalize(&message_id) - }; - - if let Some((mut record, connector_id, native_session_id)) = finalized_record { - // Check if already archived - let already_archived = { - let mut archived = self.archived_messages.lock().await; - if archived.contains(&message_id) { - true - } else { - archived.insert(message_id.clone()); - false - } - }; - - if already_archived { - tracing::debug!( - "Stale message {} already archived, skipping", - message_id - ); - continue; - } - - // Tag with finalization method - if let Some(metadata_obj) = record.metadata.as_object_mut() { - metadata_obj.insert( - "finalized_via".to_string(), - serde_json::Value::String("stale_timeout".to_string()), - ); - } - - // Canonicalize identity and write to archive - if let Err(e) = self.canonicalize_and_write( - &mut record, - &connector_id, - &native_session_id, - &message_id, - "stale_timeout", - ).await { - tracing::error!( - "Failed to write stale message {} to archive: {}", - message_id, - e - ); - // Continue processing other stale buffers - } - } - } - - Ok(()) - } - - /// Gracefully shut down the event handler, flushing all active buffers - /// - /// This ensures no messages are lost when the server is shut down. - /// All active message buffers are finalized and written to archive. - pub async fn shutdown_and_flush(&self) -> Result<()> { - tracing::info!("Shutting down EventHandler, flushing active buffers"); - - // Get all active buffer message IDs - let message_ids = { - let acc = self.accumulator.lock().await; - acc.get_all_message_ids() - }; - - if message_ids.is_empty() { - tracing::info!("No active buffers to flush on shutdown"); - return Ok(()); - } - - tracing::warn!( - "Flushing {} active message buffer(s) on shutdown", - message_ids.len() - ); - - let mut flushed_count = 0; - let mut already_archived_count = 0; - - // Finalize each buffer - for message_id in message_ids { - // Finalize buffer - let finalized_record = { - let mut acc = self.accumulator.lock().await; - acc.finalize(&message_id) - }; - - if let Some((mut record, connector_id, native_session_id)) = finalized_record { - // Check if already archived (deduplication) - let already_archived = { - let mut archived = self.archived_messages.lock().await; - if archived.contains(&message_id) { - true - } else { - archived.insert(message_id.clone()); - false - } - }; - - if already_archived { - tracing::debug!("Shutdown: Message {} already archived, skipping", message_id); - already_archived_count += 1; - continue; - } - - // Tag with finalization method - if let Some(metadata_obj) = record.metadata.as_object_mut() { - metadata_obj.insert( - "finalized_via".to_string(), - serde_json::Value::String("shutdown_flush".to_string()), - ); - } - - // Canonicalize identity and write to archive - if let Err(e) = self.canonicalize_and_write( - &mut record, - &connector_id, - &native_session_id, - &message_id, - "shutdown_flush", - ).await { - tracing::error!( - "Shutdown: Failed to write message {} to archive: {}", - message_id, - e - ); - // Continue processing other buffers despite error - } else { - flushed_count += 1; - } - } - } - - tracing::info!( - "Shutdown complete: {} messages flushed, {} already archived", - flushed_count, - already_archived_count - ); - - Ok(()) - } - - /// Run the event loop, consuming events from the bus receiver. - /// - /// Accepts a [`BusReceiver`] from `dirigent_core::SharingBus`. Each - /// received `BusEvent` is unwrapped to its inner `Event` and dispatched - /// to [`Self::handle_event`]. The `routing` metadata is currently - /// ignored by the archivist dispatcher — it continues to operate on the - /// pre-existing `Event` enum — but the scroll_id hints are already - /// attached at publish time for future consumers. - pub async fn run(&self, mut bus_rx: BusReceiver) { - let mut interval = tokio::time::interval(Duration::from_secs(1)); - // Use a long stale threshold (300s / 5 minutes) to match the general tool execution - // limit. Task agents and other slow tools can run for minutes. TurnComplete is the - // authoritative finalization signal; this is just a safety net for missed events. - let stale_threshold = Duration::from_secs(300); - - // Sample the lagged counter periodically so operators see drops in logs. - let lagged = Arc::clone(&bus_rx.lagged); - let mut last_lagged: u64 = 0; - - loop { - tokio::select! { - maybe_bus_event = bus_rx.rx.recv() => { - match maybe_bus_event { - Some(bus_event) => { - // Unwrap the Arc; clone the inner only when - // we're the last holder so the common path is a - // move rather than a deep clone. - let event = Arc::try_unwrap(bus_event.event) - .unwrap_or_else(|shared| (*shared).clone()); - if let Err(e) = self.handle_event(event).await { - tracing::error!("Failed to archive event: {}", e); - } - } - None => { - tracing::info!("Bus event stream closed, stopping event handler"); - break; - } - } - } - _ = interval.tick() => { - // Log any newly-dropped events since last tick. - let current = lagged.load(std::sync::atomic::Ordering::Relaxed); - if current > last_lagged { - tracing::warn!( - dropped = current - last_lagged, - total = current, - "archivist bus subscriber is lagging; events were dropped" - ); - last_lagged = current; - } - if let Err(e) = self.finalize_stale_buffers(stale_threshold).await { - tracing::error!("Failed to finalize stale buffers: {}", e); - } - } - } - } - } - - /// Dispatch event to appropriate handler - async fn handle_event(&self, event: Event) -> Result<()> { - match event { - Event::SessionCreated { - connector_id, - session, - } => { - // Always register sessions - we want to track them even if archiving is disabled - // This allows enabling archiving later - self.handle_session_created(connector_id, session).await?; - } - Event::SessionsListed { - connector_id, - sessions, - } => { - // Register all discovered sessions — archivist deduplicates via ALIASED status - self.handle_sessions_listed(connector_id, sessions).await?; - } - Event::TurnComplete { - connector_id, - session_id, - message_id, - trigger, - } => { - // Check if session is excluded from archiving - if self.is_session_excluded(&connector_id, &session_id).await { - tracing::debug!( - "Skipping TurnComplete for excluded session {} (connector: {})", - session_id, - connector_id - ); - return Ok(()); - } - tracing::debug!( - "TurnComplete received for message {} in session {} (trigger: {:?})", - message_id, - session_id, - trigger - ); - self.handle_turn_complete(connector_id, session_id, message_id, trigger).await?; - } - Event::MessageCompleted { - connector_id, - message, - } => { - // MessageCompleted is now informational only - metadata is ready - // Finalization happens on TurnComplete - tracing::debug!( - "MessageCompleted received for message {} in session {} (connector: {}) - metadata ready, awaiting TurnComplete for finalization", - message.id, - message.session_id, - connector_id - ); - // We could update message metadata in accumulator here if needed, - // but for now we keep this as a no-op for backward compatibility - } - Event::SessionUpdate { - connector_id, - session_id, - update, - } => { - // Skip re-archiving for sessions being replayed (already Complete) - if self.replaying_sessions.lock().await.contains(&session_id) { - tracing::debug!( - "Suppressing SessionUpdate for replaying session {} (already archived)", - session_id - ); - return Ok(()); - } - // Check if session is excluded from archiving - if self.is_session_excluded(&connector_id, &session_id).await { - tracing::debug!( - "Skipping SessionUpdate for excluded session {} (connector: {})", - session_id, - connector_id - ); - return Ok(()); - } - self.handle_session_update(connector_id, session_id, update) - .await?; - } - Event::SessionIdle { connector_id, session_id } => { - // Clear replay state if this session was being replayed - if self.replaying_sessions.lock().await.remove(&session_id) { - tracing::debug!( - "Session {} replay complete, resuming normal archiving", - session_id - ); - } - self.handle_session_idle(connector_id, session_id).await?; - } - Event::SessionMetadataUpdated { - connector_id, - session_id, - title, - total_messages: _, - model, - } => { - // Check if session is excluded from archiving - if self.is_session_excluded(&connector_id, &session_id).await { - tracing::debug!( - "Skipping SessionMetadataUpdated for excluded session {} (connector: {})", - session_id, - connector_id - ); - return Ok(()); - } - self.handle_session_metadata_updated(connector_id, session_id, title, model) - .await?; - } - Event::AcpClientConnected { - client_id, - connected_at, - capabilities, - connector_uid, - } => { - self.handle_acp_client_connected(client_id, connected_at, capabilities, connector_uid).await?; - } - Event::AcpClientDisconnected { - client_id, - disconnected_at, - reason, - } => { - self.handle_acp_client_disconnected(client_id, disconnected_at, reason).await?; - } - Event::AcpClientSessionOpened { - client_id, - gateway_session_id, - client_session_id: _, - timestamp: _, - } => { - self.handle_acp_session_opened(client_id, gateway_session_id).await?; - } - Event::AcpClientSessionRouted { - client_id, - from_session_id, - to_session_id, - connector_id, - connector_title: _, - connector_kind: _, - model: _, - agent_info: _, - timestamp: _, - } => { - self.handle_acp_session_routed(client_id, from_session_id, to_session_id, connector_id).await?; - } - Event::SessionMetadataReceived { - connector_id, - session_id, - models, - modes, - config_options, - } => { - // Check if session is excluded from archiving - if self.is_session_excluded(&connector_id, &session_id).await { - tracing::debug!( - "Skipping SessionMetadataReceived for excluded session {} (connector: {})", - session_id, - connector_id - ); - return Ok(()); - } - self.handle_session_acp_metadata_received(connector_id, session_id, models, modes, config_options).await?; - } - Event::SessionClosed { connector_id, session_id } => { - tracing::debug!("SessionClosed: session '{}' from connector '{}'", session_id, connector_id); - // No action needed — session remains in archive, just not actively connected - } - _ => { - // Ignore other event types for MVP - } - } - Ok(()) - } - - /// Handle SessionCreated event - /// Register a single session with the archivist, given an already-resolved connector_uid. - /// - /// Returns the registration response (Accepted/Aliased/Rejected). - /// Callers that process batches should resolve connector_uid once and reuse it. - async fn register_single_session( - &self, - connector_uid: Uuid, - session: &Session, - completeness: SessionCompleteness, - ) -> Result { - // Never use external session ID as scroll_id, even if it's a valid UUID. - // The archivist MUST generate a fresh UUID7 for storage consistency. - // External IDs (including UUID4s from connectors like claude-code-acp) - // are stored in session metadata for reverse lookup via sessions.jsonl. - let mut metadata = serde_json::json!({ - "project_path": session.metadata.project_path, - "model": session.metadata.model, - "project_id": session.metadata.project_id.map(|u| u.to_string()), - }); - - // Propagate tool_configuration from _meta.extra into session metadata. - // This preserves the connector's tool configuration in the archived session - // so it can be restored when the session is loaded later. - if let Some(ref meta) = session.metadata._meta { - if let Some(tool_config) = meta.extra.get("tool_configuration") { - if let Some(obj) = metadata.as_object_mut() { - obj.insert("tool_configuration".to_string(), tool_config.clone()); - } - } - } - - let req = RegisterSessionRequest { - connector_uid, - native_session_id: session.id.clone(), - title: Some(session.title.clone()), - custom_scroll_id: None, - metadata, - completeness, - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - self.archivist.register_session(req, None).await - } - - async fn handle_session_created(&self, connector_id: String, session: Session) -> Result<()> { - if session.id.is_empty() { - tracing::warn!( - "SessionCreated event has empty session ID — session will not be resumable" - ); - } - - // Resolve connector_id to connector_uid using the archivist - // This handles both UUID connector IDs and human-readable IDs like "gateway-1" - let connector_uid = match self.archivist.resolve_connector_uid(&connector_id).await { - Ok(uid) => uid, - Err(e) => { - tracing::warn!( - "Failed to resolve connector_uid for connector_id '{}': {}. \ - This connector may not be registered with the archivist.", - connector_id, - e - ); - return Ok(()); // Skip this event, don't fail the handler - } - }; - - let response = self.register_single_session(connector_uid, &session, SessionCompleteness::Complete).await?; - - // If the session was already known (Aliased), check if it's a replay of a Complete session. - if response.status == RegisterStatus::Aliased { - // Check if the existing session is Complete — if so, this is a replay - // and we should suppress re-archiving of replayed messages. - if let Ok(meta) = self.archivist.get_session_metadata(response.scroll_id, None).await { - if meta.completeness == SessionCompleteness::Complete { - self.replaying_sessions.lock().await.insert(session.id.clone()); - tracing::debug!( - "Session {} marked as replaying (already Complete)", - session.id - ); - } - } - // Upgrade completeness to Complete (idempotent for already-Complete sessions) - if let Err(e) = self.archivist.update_session_completeness( - response.scroll_id, - SessionCompleteness::Complete, - None, - ).await { - tracing::debug!( - "Failed to upgrade completeness for session {}: {}", - session.id, e - ); - } - } - - tracing::info!( - "Registered session {} for connector {}", - session.id, - connector_id - ); - - // Emit SessionRegistered to signal that this session is durably - // registered and the frontend can refresh the session list with - // confidence. Published onto the SharingBus. - let registered_event = Event::SessionRegistered { - connector_id: connector_id.clone(), - session_id: session.id.clone(), - scroll_id: response.scroll_id.to_string(), - }; - if let Some(ref publisher) = self.bus_publisher { - let bus_event = BusEvent::from_archivist_event( - registered_event, - &connector_id, - &session.id, - Some(response.scroll_id), - ); - publisher(bus_event); - } - - // T015: Check if this connector is in the excluded list - // If so, automatically exclude the session from archiving - if self.is_connector_excluded(&connector_id).await { - self.exclude_session(&connector_id, &session.id).await; - tracing::info!( - "Session {} auto-excluded from archiving (connector '{}' is excluded by default)", - session.id, - connector_id - ); - } - - Ok(()) - } - - /// Handle SessionsListed event — register all discovered sessions. - /// - /// Sessions already known return ALIASED (fast, from cache) with metadata refresh. - /// Newly discovered sessions return ACCEPTED and get a fresh scroll_id. - /// Errors on individual sessions are logged and skipped. - async fn handle_sessions_listed( - &self, - connector_id: String, - sessions: Vec, - ) -> Result<()> { - if sessions.is_empty() { - return Ok(()); - } - - let connector_uid = match self.archivist.resolve_connector_uid(&connector_id).await { - Ok(uid) => uid, - Err(e) => { - tracing::warn!( - "SessionsListed: Failed to resolve connector '{}': {}. Skipping {} sessions.", - connector_id, e, sessions.len() - ); - return Ok(()); - } - }; - - let is_excluded = self.is_connector_excluded(&connector_id).await; - let mut accepted = 0u32; - let mut aliased = 0u32; - let mut errors = 0u32; - - for session in &sessions { - match self.register_single_session(connector_uid, session, SessionCompleteness::Discovered).await { - Ok(response) => match response.status { - RegisterStatus::Accepted => { - accepted += 1; - tracing::info!( - "SessionsListed: New session '{}' registered (scroll: {}) for connector '{}'", - session.id, response.scroll_id, connector_id - ); - // Emit SessionRegistered to signal durable registration - // (published onto the SharingBus). - let registered_event = Event::SessionRegistered { - connector_id: connector_id.clone(), - session_id: session.id.clone(), - scroll_id: response.scroll_id.to_string(), - }; - if let Some(ref publisher) = self.bus_publisher { - let bus_event = BusEvent::from_archivist_event( - registered_event, - &connector_id, - &session.id, - Some(response.scroll_id), - ); - publisher(bus_event); - } - if is_excluded { - self.exclude_session(&connector_id, &session.id).await; - } - // Refresh metadata — title/model from session/list may differ from - // what was stored at registration (e.g., session was created via - // SessionCreated with empty title, now session/list has the real title) - let title = if session.title.is_empty() { None } else { Some(session.title.clone()) }; - let model = session.metadata.model.clone(); - if title.is_some() || model.is_some() { - if let Err(e) = self.archivist.update_session_metadata( - response.scroll_id, title, model, None, - ).await { - tracing::debug!( - "SessionsListed: metadata refresh after accept failed for '{}': {}", - session.id, e - ); - } - } - } - RegisterStatus::Aliased => { - aliased += 1; - // Refresh metadata — title/model may have changed upstream - let title = if session.title.is_empty() { None } else { Some(session.title.clone()) }; - let model = session.metadata.model.clone(); - if title.is_some() || model.is_some() { - if let Err(e) = self.archivist.update_session_metadata( - response.scroll_id, title, model, None, - ).await { - tracing::debug!( - "SessionsListed: metadata refresh failed for '{}': {}", - session.id, e - ); - } - } - } - RegisterStatus::Rejected => { - errors += 1; - tracing::warn!( - "SessionsListed: Registration rejected for session '{}' (connector '{}')", - session.id, connector_id - ); - } - }, - Err(e) => { - errors += 1; - tracing::warn!( - "SessionsListed: Failed to register '{}': {}. Continuing.", - session.id, e - ); - } - } - } - - tracing::info!( - "SessionsListed: {} sessions for '{}': {} new, {} known, {} errors", - sessions.len(), connector_id, accepted, aliased, errors - ); - - Ok(()) - } - - /// Handle TurnComplete event - triggers finalization and archiving - /// - /// This is THE primary signal that all content for a turn has been received - /// and the message is ready to be finalized and archived. - async fn handle_turn_complete( - &self, - connector_id: String, - session_id: String, - message_id: String, - trigger: TurnCompleteTrigger, - ) -> Result<()> { - tracing::info!( - "TurnComplete: Finalizing message {} in session {} (connector: {}, trigger: {:?})", - message_id, - session_id, - connector_id, - trigger - ); - - // ALWAYS finalize to remove buffer, even if already archived - let finalized_record = { - let mut acc = self.accumulator.lock().await; - acc.finalize(&message_id) - }; - - // Check if we have a buffer for this message - let Some((mut record, _acc_connector_id, _acc_native_session_id)) = finalized_record else { - tracing::warn!( - "TurnComplete: No accumulated content for message {} in session {} - message may not have been streamed", - message_id, - session_id - ); - // This could happen if: - // 1. Message was sent without streaming (but MessageCompleted should have handled it) - // 2. TurnComplete arrived before any chunks (timing issue) - // 3. Message was already finalized by SessionIdle or stale timeout - // For now, we log and skip - the message may already be archived - return Ok(()); - }; - - // Check if we've already archived this message AFTER finalization - // This prevents duplicate writes while ensuring buffer cleanup - { - let mut archived = self.archived_messages.lock().await; - if archived.contains(&message_id) { - tracing::debug!( - "TurnComplete: Message {} already archived, buffer cleaned but not re-written", - message_id - ); - return Ok(()); - } - // Mark as archived before processing to prevent race conditions - archived.insert(message_id.clone()); - } - - // Tag with TurnComplete-specific metadata - if let Some(metadata_obj) = record.metadata.as_object_mut() { - metadata_obj.insert( - "finalized_via".to_string(), - serde_json::Value::String("turn_complete".to_string()), - ); - metadata_obj.insert( - "turn_complete_trigger".to_string(), - serde_json::to_value(&trigger).unwrap_or(serde_json::json!(null)), - ); - } - - // Canonicalize identity and write to archive - self.canonicalize_and_write( - &mut record, - &connector_id, - &session_id, - &message_id, - "turn_complete", - ) - .await?; - - Ok(()) - } - - /// Handle SessionIdle event - /// Finalizes and writes any buffered messages for the session that haven't been completed yet. - /// This provides a safety net for the race condition where MessageCompleted arrives before - /// chunks have been accumulated, or when MessageCompleted is missed entirely. - async fn handle_session_idle(&self, connector_id: String, session_id: String) -> Result<()> { - tracing::debug!("SessionIdle received for session {} (connector: {})", session_id, connector_id); - - // Get all message IDs that have buffers for this session - let message_ids_for_session = { - let acc = self.accumulator.lock().await; - acc.get_message_ids_for_session(&session_id) - }; - - if message_ids_for_session.is_empty() { - tracing::debug!( - "SessionIdle: No active buffers for session {}, nothing to finalize", - session_id - ); - return Ok(()); - } - - tracing::info!( - "SessionIdle: Finalizing {} buffered message(s) for session {}", - message_ids_for_session.len(), - session_id - ); - - // Finalize each message and write to archive - for message_id in message_ids_for_session { - // ALWAYS finalize to remove buffer, even if already archived - let finalized_record = { - let mut acc = self.accumulator.lock().await; - acc.finalize(&message_id) - }; - - // If no buffer existed, skip (already cleaned up) - let Some((mut record, _acc_connector_id, _acc_native_session_id)) = finalized_record else { - tracing::debug!("SessionIdle: No buffer for message {}", message_id); - continue; - }; - - // Check if already archived to prevent duplicate writes - let already_archived = { - let archived = self.archived_messages.lock().await; - archived.contains(&message_id) - }; - - if already_archived { - tracing::debug!( - "SessionIdle: Message {} already archived, buffer cleaned but not re-written", - message_id - ); - continue; - } - - // Mark as archived - { - let mut archived = self.archived_messages.lock().await; - archived.insert(message_id.clone()); - } - - // Tag with finalization method - if let Some(metadata_obj) = record.metadata.as_object_mut() { - metadata_obj.insert( - "finalized_via".to_string(), - serde_json::Value::String("session_idle".to_string()), - ); - } - - // Canonicalize identity and write to archive - self.canonicalize_and_write( - &mut record, - &connector_id, - &session_id, - &message_id, - "session_idle", - ) - .await?; - } - - Ok(()) - } - - /// Write message to archive using connector_uid to resolve scroll_id - /// - /// This method is kept for potential future use with non-streaming messages, - /// though currently unused since TurnComplete handles all finalization. - #[allow(dead_code)] - async fn write_message_to_archive(&self, connector_uid: Uuid, message: Message) -> Result<()> { - // Convert dirigent_protocol::Message to markdown - let mut content_md = String::new(); - - for part in &message.content { - match part { - MessagePart::Text { text } => { - content_md.push_str(text); - } - MessagePart::Thinking { text } => { - content_md.push_str(&format!("{}", text)); - } - MessagePart::Code { language, code } => { - content_md.push_str(&format!("\n```{}\n{}\n```\n", language, code)); - } - MessagePart::Tool { - tool, - tool_call_id: _, - input, - output, - } => { - content_md.push_str(&format!("\n**Tool**: {}\n", tool)); - content_md.push_str(&format!( - "```json\n{}\n```\n", - serde_json::to_string_pretty(input).unwrap_or_else(|_| "{}".to_string()) - )); - if let Some(output_val) = output { - content_md.push_str(&format!( - "\n**Output**:\n```json\n{}\n```\n", - serde_json::to_string_pretty(output_val) - .unwrap_or_else(|_| "{}".to_string()) - )); - } - } - MessagePart::File { path, content } => { - content_md.push_str(&format!("\n**File**: {}\n```\n{}\n```\n", path, content)); - } - } - } - - // Parse message_id - let message_uuid = Uuid::parse_str(&message.id).unwrap_or_else(|_| { - tracing::warn!("Failed to parse message_id as UUID: {}", message.id); - Uuid::now_v7() - }); - - // Resolve scroll_id from native session_id BEFORE creating MessageRecord - // This ensures we use the canonical session identifier - // If resolution fails, attempt lazy registration as a fallback - let scroll_id = self - .resolve_or_register_session( - connector_uid, - &message.session_id, - "Session mapping missing during message write (non-streaming path)", - ) - .await?; - - // Convert MessageRole to string - let role = match message.role { - MessageRole::User => "user".to_string(), - MessageRole::Assistant => "assistant".to_string(), - }; - - // Serialize metadata - let metadata = message - .metadata - .map(|m| serde_json::to_value(m).unwrap_or(serde_json::json!({}))) - .unwrap_or(serde_json::json!({})); - - // Serialize original content parts for rich UI rendering - let content_parts = serde_json::to_value(&message.content).ok(); - - let record = MessageRecord { - version: 1, - message_id: message_uuid, - session: scroll_id, // Use canonical scroll_id instead of native session UUID - parent_id: None, - ts: message.created_at, - role, - author: None, - content_md, - content_parts, - attachments: Vec::new(), - metadata, - }; - - // Write message to archive - self.archivist - .append_messages(scroll_id, vec![record], None) - .await?; - - tracing::info!( - "Wrote message {} to archive for session {} (scroll_id: {})", - message.id, - message.session_id, - scroll_id - ); - - Ok(()) - } - - /// Handle SessionUpdate event - async fn handle_session_update( - &self, - connector_id: String, - session_id: String, - update: SessionUpdate, - ) -> Result<()> { - let mut acc = self.accumulator.lock().await; - - match update { - SessionUpdate::AgentMessageChunk { - message_id, - content, - .. - } => { - acc.add_chunk(message_id, session_id, connector_id, "assistant".to_string(), content); - } - SessionUpdate::UserMessageChunk { - message_id, - content, - .. - } => { - acc.add_chunk(message_id, session_id, connector_id, "user".to_string(), content); - } - SessionUpdate::AgentThoughtChunk { - message_id, - content, - .. - } => { - // Extract text from ContentBlock - if let ContentBlock::Text { text } = content { - acc.add_thinking(message_id, session_id, connector_id, text); - } - } - SessionUpdate::ToolCall { - message_id, - tool_call, - .. - } => { - // Convert to internal ToolCallData and add/update - // This handles both initial tool call and any subsequent updates - let tool_call_data = crate::accumulator::ToolCallData { - id: tool_call.id.clone(), - tool_name: tool_call.tool_name.clone(), - input: tool_call.raw_input.unwrap_or_else(|| serde_json::json!({})), - output: tool_call.raw_output, - }; - - acc.add_or_update_tool_call(message_id, tool_call_data); - } - SessionUpdate::ToolCallUpdate { - message_id, - tool_call_id, - tool_call, - .. - } => { - // Convert to ToolCallData and merge with existing tool call - let tool_call_data = crate::accumulator::ToolCallData { - id: tool_call_id, - tool_name: tool_call.tool_name.clone(), - input: tool_call.raw_input.unwrap_or_else(|| serde_json::json!({})), - output: tool_call.raw_output, - }; - - acc.add_or_update_tool_call(message_id, tool_call_data); - } - SessionUpdate::Unknown { .. } => { - // Ignore unknown update types - forward compatibility - } - } - - Ok(()) - } - - /// Handle SessionMetadataUpdated event - async fn handle_session_metadata_updated( - &self, - connector_id: String, - session_id: String, - title: Option, - model: Option, - ) -> Result<()> { - tracing::debug!( - "SessionMetadataUpdated event: connector={}, session={}, title={:?}, model={:?}", - connector_id, - session_id, - title, - model - ); - - // Resolve connector_id to connector_uid using the archivist - let connector_uid = match self.archivist.resolve_connector_uid(&connector_id).await { - Ok(uid) => uid, - Err(e) => { - tracing::warn!( - "Failed to resolve connector_uid for connector_id '{}': {}. \ - Skipping metadata update.", - connector_id, - e - ); - return Ok(()); // Skip this event, don't fail the handler - } - }; - - // Resolve session to scroll_id - let scroll_id = match self - .archivist - .resolve_session(connector_uid, &session_id, None) - .await - { - Ok(scroll_id) => scroll_id, - Err(e) => { - tracing::warn!( - "Failed to resolve session {} for connector {}: {}. Skipping metadata update.", - session_id, - connector_uid, - e - ); - return Ok(()); // Skip this event if session not found - } - }; - - // Update session metadata in archive - self.archivist - .update_session_metadata(scroll_id, title.clone(), model.clone(), None) - .await?; - - tracing::info!( - "Updated session metadata: scroll_id={}, title={:?}, model={:?}", - scroll_id, - title, - model - ); - - Ok(()) - } - - /// Handle SessionMetadataReceived event (ACP-specific models/modes/config_options metadata) - async fn handle_session_acp_metadata_received( - &self, - connector_id: String, - session_id: String, - models: Option, - modes: Option, - config_options: Option>, - ) -> Result<()> { - tracing::debug!( - "SessionMetadataReceived event: connector={}, session={}, has_models={}, has_modes={}, has_config_options={}", - connector_id, - session_id, - models.is_some(), - modes.is_some(), - config_options.is_some() - ); - - // Resolve connector_id to connector_uid using the archivist - let connector_uid = match self.archivist.resolve_connector_uid(&connector_id).await { - Ok(uid) => uid, - Err(e) => { - tracing::warn!( - "Failed to resolve connector_uid for connector_id '{}': {}. \ - Skipping ACP metadata update.", - connector_id, - e - ); - return Ok(()); // Skip this event, don't fail the handler - } - }; - - // Resolve session to scroll_id - let scroll_id = match self - .archivist - .resolve_session(connector_uid, &session_id, None) - .await - { - Ok(scroll_id) => scroll_id, - Err(e) => { - tracing::warn!( - "Failed to resolve session {} for connector {}: {}. Skipping ACP metadata update.", - session_id, - connector_uid, - e - ); - return Ok(()); // Skip this event if session not found - } - }; - - // Convert protocol types to JSON for storage - let models_json = models.and_then(|m| serde_json::to_value(m).ok()); - let modes_json = modes.and_then(|m| serde_json::to_value(m).ok()); - let config_options_json = config_options.and_then(|co| serde_json::to_value(co).ok()); - - // Update session ACP metadata in archive - self.archivist - .update_session_acp_metadata(scroll_id, models_json.clone(), modes_json.clone(), config_options_json.clone(), None) - .await?; - - tracing::info!( - "Updated session ACP metadata: scroll_id={}, has_models={}, has_modes={}, has_config_options={}", - scroll_id, - models_json.is_some(), - modes_json.is_some(), - config_options_json.is_some() - ); - - Ok(()) - } - - /// Handle AcpClientConnected event - async fn handle_acp_client_connected( - &self, - client_id: String, - connected_at: String, - _capabilities: Option, - connector_uid_str: String, - ) -> Result<()> { - tracing::info!("ACP client connected: {}", client_id); - - // Parse the connector_uid from the event - // This is the Acceptor connector's UID, used to create meta sessions under the right connector - let connector_uid = match Uuid::parse_str(&connector_uid_str) { - Ok(uid) if uid != Uuid::nil() => uid, - Ok(_) | Err(_) => { - // If parsing fails or we get a nil UUID, log a warning and skip meta session creation - tracing::warn!( - "Invalid connector_uid '{}' for ACP client {}. Meta session will not be created.", - connector_uid_str, - client_id - ); - return Ok(()); - } - }; - - // Create or get meta session - let scroll_id = self.get_or_create_meta_session(&client_id, connector_uid, &connected_at).await?; - - // Append ClientConnected event - let event = MetaEventRecord { - version: 1, - event_id: Uuid::now_v7(), - session: scroll_id, - ts: chrono::DateTime::parse_from_rfc3339(&connected_at) - .map(|dt| dt.with_timezone(&Utc)) - .unwrap_or_else(|_| Utc::now()), - event_type: MetaEventType::ClientConnected, - description: format!("Client {} connected", client_id), - linked_session_id: None, - linked_connector_id: None, - linked_connector_title: None, - metadata: serde_json::json!({}), - }; - - self.archivist.append_meta_events(scroll_id, vec![event], None).await?; - - // Update connection status - self.archivist.update_meta_session_status(scroll_id, true, None, None).await?; - - Ok(()) - } - - /// Handle AcpClientDisconnected event - async fn handle_acp_client_disconnected( - &self, - client_id: String, - disconnected_at: String, - reason: Option, - ) -> Result<()> { - tracing::info!("ACP client disconnected: {} (reason: {:?})", client_id, reason); - - // Find the meta session for this client - let Some(meta_session) = self.archivist.find_meta_session_by_client(&client_id, None).await? else { - tracing::warn!("No meta session found for disconnecting client {}", client_id); - return Ok(()); - }; - - let scroll_id = meta_session.scroll_id; - - // Append ClientDisconnected event - let event = MetaEventRecord { - version: 1, - event_id: Uuid::now_v7(), - session: scroll_id, - ts: chrono::DateTime::parse_from_rfc3339(&disconnected_at) - .map(|dt| dt.with_timezone(&Utc)) - .unwrap_or_else(|_| Utc::now()), - event_type: MetaEventType::ClientDisconnected, - description: format!( - "Client {} disconnected{}", - client_id, - reason.as_ref().map(|r| format!(" ({})", r)).unwrap_or_default() - ), - linked_session_id: None, - linked_connector_id: None, - linked_connector_title: None, - metadata: serde_json::json!({ - "reason": reason - }), - }; - - self.archivist.append_meta_events(scroll_id, vec![event], None).await?; - - // Update connection status to disconnected - self.archivist.update_meta_session_status(scroll_id, false, None, None).await?; - - Ok(()) - } - - /// Handle AcpClientSessionOpened event - async fn handle_acp_session_opened( - &self, - client_id: String, - gateway_session_id: String, - ) -> Result<()> { - tracing::info!("ACP client {} opened session {}", client_id, gateway_session_id); - - // Find the meta session for this client - let Some(meta_session) = self.archivist.find_meta_session_by_client(&client_id, None).await? else { - tracing::warn!("No meta session found for client {} opening session", client_id); - return Ok(()); - }; - - let scroll_id = meta_session.scroll_id; - - // Try to parse gateway_session_id as UUID for linking - let linked_session_id = Uuid::parse_str(&gateway_session_id).ok(); - - // Append SessionOpened event - let event = MetaEventRecord { - version: 1, - event_id: Uuid::now_v7(), - session: scroll_id, - ts: Utc::now(), - event_type: MetaEventType::SessionOpened, - description: format!("Opened session {}", gateway_session_id), - linked_session_id, - linked_connector_id: None, - linked_connector_title: None, - metadata: serde_json::json!({ - "gateway_session_id": gateway_session_id - }), - }; - - self.archivist.append_meta_events(scroll_id, vec![event], None).await?; - - // Update current session - self.archivist.update_meta_session_status(scroll_id, true, linked_session_id, None).await?; - - Ok(()) - } - - /// Handle AcpClientSessionRouted event - async fn handle_acp_session_routed( - &self, - client_id: String, - from_session_id: String, - to_session_id: String, - to_connector_id: String, - ) -> Result<()> { - tracing::info!( - "ACP client {} session routed: {} -> {} (connector: {})", - client_id, from_session_id, to_session_id, to_connector_id - ); - - // Find the meta session for this client - let Some(meta_session) = self.archivist.find_meta_session_by_client(&client_id, None).await? else { - tracing::warn!("No meta session found for client {} during routing", client_id); - return Ok(()); - }; - - let scroll_id = meta_session.scroll_id; - - // Try to parse to_session_id as UUID for linking - let linked_session_id = Uuid::parse_str(&to_session_id).ok(); - - // Append SessionSwitched event - let event = MetaEventRecord { - version: 1, - event_id: Uuid::now_v7(), - session: scroll_id, - ts: Utc::now(), - event_type: MetaEventType::SessionSwitched, - description: format!("Switched to {} via {}", to_session_id, to_connector_id), - linked_session_id, - linked_connector_id: Some(to_connector_id.clone()), - linked_connector_title: None, // Could be resolved later - metadata: serde_json::json!({ - "from_session_id": from_session_id, - "to_session_id": to_session_id, - "to_connector_id": to_connector_id - }), - }; - - self.archivist.append_meta_events(scroll_id, vec![event], None).await?; - - // Update current session - self.archivist.update_meta_session_status(scroll_id, true, linked_session_id, None).await?; - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::Utc; - use dirigent_protocol::MessageStatus; - - /// Create a real `Archivist` backed by a unique temp directory. - /// Previously these tests used a hand-rolled `MockArchivist`, but the - /// trait-object interface was removed in Phase 2 and EventHandler now - /// requires a concrete coordinator. - /// - /// Uses `from_single_backend` (not `new_with_single_archive`) so each test - /// is fully isolated — otherwise tests running in parallel race on the - /// shared `.archives.json` that `new_with_single_archive` writes into the - /// tempdir's parent. - async fn mk_test_archivist() -> Arc { - let tmp = std::env::temp_dir() - .join(format!("events_test_{}", Uuid::now_v7())); - let backend = Arc::new( - crate::backends::JsonlBackend::new(tmp) - .await - .expect("create test backend"), - ); - Arc::new( - Archivist::from_single_backend("main".into(), backend) - .await - .expect("create test archivist"), - ) - } - - - #[tokio::test] - async fn test_event_handler_creation() { - let archivist = mk_test_archivist().await; - let _handler = EventHandler::new(archivist); - } - - #[tokio::test] - async fn test_handle_session_update_agent_chunk() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let update = SessionUpdate::AgentMessageChunk { - message_id: "msg_123".to_string(), - content: ContentBlock::Text { - text: "Hello".to_string(), - }, - _meta: None, - }; - - let result = handler - .handle_session_update( - "connector_123".to_string(), - "session_456".to_string(), - update, - ) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_handle_session_update_thinking_chunk() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let update = SessionUpdate::AgentThoughtChunk { - message_id: "msg_789".to_string(), - content: ContentBlock::Text { - text: "Thinking...".to_string(), - }, - _meta: None, - }; - - let result = handler - .handle_session_update( - "connector_456".to_string(), - "session_abc".to_string(), - update, - ) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_write_message_to_archive() { - use uuid::Uuid; - - let archivist = mk_test_archivist().await; - - // Pre-register the connector so lazy session registration can attach to it. - // (The old MockArchivist answered every call unconditionally; the real - // coordinator requires the connector exist first.) - let connector_uid = Uuid::now_v7(); - archivist - .register_connector( - crate::types::RegisterConnectorRequest { - r#type: "Test".into(), - title: "t".into(), - client_native_id: format!("t@{}", Uuid::now_v7()), - custom_uid: Some(connector_uid), - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await - .unwrap(); - - let handler = EventHandler::new(archivist); - - let message = Message { - id: "01936e8f-e5a7-7000-8000-000000000001".to_string(), - session_id: "01936e8f-e5a7-7000-8000-000000000002".to_string(), - role: MessageRole::User, - created_at: Utc::now(), - content: vec![MessagePart::Text { - text: "Hello, world!".to_string(), - }], - status: MessageStatus::Completed, - metadata: None, - }; - - let result = handler - .write_message_to_archive(connector_uid, message) - .await; - assert!(result.is_ok(), "write_message_to_archive failed: {:?}", result); - } - - #[tokio::test] - async fn test_user_message_turn_complete_event() { - use uuid::Uuid; - - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let connector_id = Uuid::now_v7().to_string(); - let session_id = "01936e8f-e5a7-7000-8000-000000000102".to_string(); - let message_id = "01936e8f-e5a7-7000-8000-000000000101".to_string(); - - // First, add some chunks to accumulator (simulating streaming) - let update = SessionUpdate::UserMessageChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: "What is the capital of France?".to_string(), - }, - _meta: None, - }; - - let _ = handler - .handle_session_update( - connector_id.clone(), - session_id.clone(), - update, - ) - .await; - - // Now handle TurnComplete event for user message - let result = handler - .handle_turn_complete( - connector_id, - session_id, - message_id, - TurnCompleteTrigger::ExplicitSignal, - ) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_assistant_message_turn_complete_event() { - use uuid::Uuid; - - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let connector_id = Uuid::now_v7().to_string(); - let session_id = "01936e8f-e5a7-7000-8000-000000000202".to_string(); - let message_id = "01936e8f-e5a7-7000-8000-000000000201".to_string(); - - // First, add some chunks to accumulator (simulating streaming) - let update = SessionUpdate::AgentMessageChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: "The capital of France is Paris.".to_string(), - }, - _meta: None, - }; - - let _ = handler - .handle_session_update( - connector_id.clone(), - session_id.clone(), - update, - ) - .await; - - // Now handle TurnComplete event for assistant message - let result = handler - .handle_turn_complete( - connector_id, - session_id, - message_id, - TurnCompleteTrigger::ExplicitSignal, - ) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_session_idle_finalizes_buffered_messages() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let session_id = "01936e8f-e5a7-7000-8000-000000000010".to_string(); - let message_id = "01936e8f-e5a7-7000-8000-000000000011".to_string(); - - // Add chunks to accumulator (simulating streaming) - let update = SessionUpdate::AgentMessageChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: "Hello from stream".to_string(), - }, - _meta: None, - }; - - let result = handler - .handle_session_update( - "connector_123".to_string(), - session_id.clone(), - update, - ) - .await; - assert!(result.is_ok()); - - // Verify buffer exists - let has_buffer = { - let acc = handler.accumulator.lock().await; - !acc.get_message_ids_for_session(&session_id).is_empty() - }; - assert!(has_buffer, "Buffer should exist before SessionIdle"); - - // Handle SessionIdle - let result = handler.handle_session_idle("connector_123".to_string(), session_id.clone()).await; - assert!(result.is_ok()); - - // Verify buffer was finalized and cleared - let has_buffer_after = { - let acc = handler.accumulator.lock().await; - !acc.get_message_ids_for_session(&session_id).is_empty() - }; - assert!(!has_buffer_after, "Buffer should be cleared after SessionIdle"); - - // Verify message was marked as archived - let is_archived = { - let archived = handler.archived_messages.lock().await; - archived.contains(&message_id) - }; - assert!(is_archived, "Message should be marked as archived"); - } - - #[tokio::test] - async fn test_session_idle_with_no_buffers() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let session_id = "01936e8f-e5a7-7000-8000-000000000020".to_string(); - - // Handle SessionIdle with no buffers (should be no-op) - let result = handler.handle_session_idle("test-connector".to_string(), session_id).await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_session_idle_skips_already_archived() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let session_id = "01936e8f-e5a7-7000-8000-000000000030".to_string(); - let message_id = "01936e8f-e5a7-7000-8000-000000000031".to_string(); - - // Add chunks to accumulator - let update = SessionUpdate::AgentMessageChunk { - message_id: message_id.clone(), - content: ContentBlock::Text { - text: "Test message".to_string(), - }, - _meta: None, - }; - - handler - .handle_session_update( - "connector_456".to_string(), - session_id.clone(), - update, - ) - .await - .unwrap(); - - // Verify buffer exists before marking as archived - let has_buffer_before = { - let acc = handler.accumulator.lock().await; - !acc.get_message_ids_for_session(&session_id).is_empty() - }; - assert!(has_buffer_before, "Buffer should exist before marking as archived"); - - // Mark message as already archived - { - let mut archived = handler.archived_messages.lock().await; - archived.insert(message_id.clone()); - } - - // Handle SessionIdle - should clean buffer but skip writing already archived message - let result = handler.handle_session_idle("connector_456".to_string(), session_id.clone()).await; - assert!(result.is_ok()); - - // Buffer SHOULD be cleared even though message was already archived - // This is the fix: finalize() is called before checking archived_messages - let has_buffer = { - let acc = handler.accumulator.lock().await; - !acc.get_message_ids_for_session(&session_id).is_empty() - }; - assert!(!has_buffer, "Buffer should be cleared even when message is already archived (prevents leak)"); - } - - fn make_test_session(id: &str, title: &str) -> Session { - Session { - id: id.to_string(), - title: title.to_string(), - created_at: Utc::now(), - updated_at: Utc::now(), - metadata: dirigent_protocol::SessionMetadata { - project_path: "/test/project".to_string(), - model: Some("claude-4".to_string()), - total_messages: 0, - system_message: None, - current_mode_id: None, - _meta: None, - project_id: None, - }, - cwd: None, - models: None, - modes: None, - config_options: None, - acp_client_id: None, - } - } - - #[tokio::test] - async fn test_handle_sessions_listed_registers_sessions() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let connector_id = Uuid::now_v7().to_string(); - let sessions = vec![ - make_test_session("session-1", "First Session"), - make_test_session("session-2", "Second Session"), - ]; - - let result = handler - .handle_sessions_listed(connector_id, sessions) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_handle_sessions_listed_empty_list() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let result = handler - .handle_sessions_listed("some-connector".to_string(), vec![]) - .await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_handle_event_sessions_listed() { - let archivist = mk_test_archivist().await; - let handler = EventHandler::new(archivist); - - let connector_id = Uuid::now_v7().to_string(); - let event = Event::SessionsListed { - connector_id, - sessions: vec![ - make_test_session("sess-a", "Session A"), - make_test_session("sess-b", "Session B"), - make_test_session("sess-c", "Session C"), - ], - }; - - let result = handler.handle_event(event).await; - assert!(result.is_ok()); - } -} diff --git a/crates/dirigent_archivist/src/import/mod.rs b/crates/dirigent_archivist/src/import/mod.rs deleted file mode 100644 index e63dce9..0000000 --- a/crates/dirigent_archivist/src/import/mod.rs +++ /dev/null @@ -1,933 +0,0 @@ -//! Generic import infrastructure for bringing external sessions into the archive. -//! -//! This module provides the shared types and orchestration logic that all importers -//! (Claude, ChatGPT, etc.) reuse. Each importer implements discovery and message -//! conversion, then delegates to [`import_sessions`] for the actual import. - -pub mod progress; -pub mod registry; -pub mod sources; -pub mod trait_def; - -/// Backwards-compatible re-export — external callers (e.g. `api`) import -/// `dirigent_archivist::import::claude::{discover_claude_import, -/// import_claude_sessions}`. Keep the path stable until those callsites -/// migrate to the `Importer` trait. -pub use sources::claude; -#[cfg(feature = "importer-claude")] -pub use sources::claude::ClaudeImporter; - -pub use progress::{ImportProgressEvent, ImportProgressSink, SessionOutcome, StatsDelta}; -pub use registry::ImporterRegistry; -pub use trait_def::{ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget, Importer, ImporterInfo}; - -use std::collections::HashMap; - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{ - MessageRecord, RegisterConnectorRequest, RegisterSessionRequest, RegisterStatus, - SessionCompleteness, -}; - -/// Statistics collected during an import operation. -#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] -pub struct ImportStats { - /// Number of sessions found by the importer's discovery phase. - pub sessions_discovered: usize, - /// Number of sessions successfully imported as new. - pub sessions_imported: usize, - /// Number of sessions skipped (already present with same or more messages). - pub sessions_skipped: usize, - /// Number of sessions that were updated with new messages. - pub sessions_updated: usize, - /// Total number of message records written to the archive. - pub messages_written: usize, - /// Number of messages that were already present (from existing sessions). - pub messages_already_present: usize, - /// Number of sessions skipped because the fingerprint matched (no source changes). - #[serde(default)] - pub sessions_fingerprint_skipped: usize, - /// Errors encountered during import (non-fatal; import continues). - pub errors: Vec, -} - -impl ImportStats { - /// Total sessions processed (imported + skipped + updated + errored). - pub fn total_sessions_processed(&self) -> usize { - self.sessions_imported + self.sessions_skipped + self.sessions_updated + self.errors.len() - } - - /// Whether any errors were encountered during import. - pub fn has_errors(&self) -> bool { - !self.errors.is_empty() - } -} - -/// Intermediate representation for a session discovered by any importer. -/// -/// This is source-agnostic: each importer converts its native session format -/// into `DiscoveredSession` before handing it to [`import_sessions`]. -#[derive(Debug, Clone)] -pub struct DiscoveredSession { - /// The session ID from the original source (e.g., Claude's JSONL filename). - pub native_session_id: String, - /// Human-readable session title, if available. - pub title: Option, - /// When the session was created in the source system. - pub created_at: Option>, - /// When the session was last updated in the source system. - pub updated_at: Option>, - /// Number of messages in the source session (used for skip/update decisions). - pub message_count: usize, - /// Arbitrary source-specific metadata preserved for provenance. - pub metadata: serde_json::Value, - /// Project path associated with the session, if known. - pub project_path: Option, - /// Size of the source file in bytes, if available. Used for fingerprint-based - /// change detection to skip unchanged sessions on re-import. - pub file_size: Option, -} - -/// Snapshot of source-side signals captured after a successful import. -/// -/// Stored in the session's `metadata` JSON under the `"_import_snapshot"` key. -/// On re-import, comparing the current `DiscoveredSession` against the stored -/// snapshot lets us skip expensive full-parse when nothing has changed (O(1) gate). -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub struct ImportSnapshot { - /// Number of messages in the source at the time of import. - pub source_message_count: usize, - /// Source-side `updated_at` timestamp at the time of import. - pub source_updated_at: Option>, - /// Source file size in bytes at the time of import. - pub source_file_size: Option, - /// When this snapshot was recorded. - pub imported_at: DateTime, -} - -/// Key used to store [`ImportSnapshot`] in session metadata JSON. -const IMPORT_SNAPSHOT_KEY: &str = "_import_snapshot"; - -impl ImportSnapshot { - /// Check whether the source signals in `discovered` match this snapshot. - /// - /// Returns `true` if all present signals match, meaning the session has not - /// changed since this snapshot was taken and a full re-parse can be skipped. - pub fn matches(&self, discovered: &DiscoveredSession) -> bool { - if self.source_message_count != discovered.message_count { - return false; - } - if self.source_updated_at != discovered.updated_at { - return false; - } - // file_size: only compare when both sides have a value. - if let (Some(snap_size), Some(disc_size)) = (self.source_file_size, discovered.file_size) { - if snap_size != disc_size { - return false; - } - } - true - } - - /// Build a snapshot from a discovered session (captures current source signals). - pub fn from_discovered(discovered: &DiscoveredSession) -> Self { - Self { - source_message_count: discovered.message_count, - source_updated_at: discovered.updated_at, - source_file_size: discovered.file_size, - imported_at: Utc::now(), - } - } - - /// Try to deserialize a snapshot from a session's metadata JSON. - pub fn from_metadata(metadata: &serde_json::Value) -> Option { - metadata - .get(IMPORT_SNAPSHOT_KEY) - .and_then(|v| serde_json::from_value(v.clone()).ok()) - } - - /// Serialize this snapshot into the session's metadata JSON under the - /// `_import_snapshot` key. - pub fn write_to_metadata(&self, metadata: &mut serde_json::Value) { - if let Some(obj) = metadata.as_object_mut() { - if let Ok(val) = serde_json::to_value(self) { - obj.insert(IMPORT_SNAPSHOT_KEY.to_string(), val); - } - } else { - tracing::warn!("cannot write import snapshot: metadata is not a JSON object"); - } - } -} - -/// Summary returned by the discovery phase before actual import begins. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ImportDiscovery { - /// Human-readable name of the import source (e.g., "Claude Code"). - pub source_name: String, - /// Filesystem path or URI that was scanned. - pub source_path: String, - /// Projects discovered, grouped by name. - pub projects: Vec, - /// Total number of sessions found across all projects. - pub total_sessions: usize, - /// Estimated total messages across all discovered sessions. - pub total_estimated_messages: usize, -} - -/// A project grouping within an import discovery result. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ImportProject { - /// Project name (typically derived from the directory path). - pub name: String, - /// Number of sessions belonging to this project. - pub session_count: usize, -} - -/// Resolves the `updated_at` timestamp for an imported session. -/// -/// Prefers the source-provided timestamp from `discovered.updated_at`; falls -/// back to `Utc::now()` only when the source does not supply one. -fn resolve_updated_at(discovered: &DiscoveredSession) -> DateTime { - discovered.updated_at.unwrap_or_else(chrono::Utc::now) -} - -/// Generic async orchestrator that imports discovered sessions into the archive. -/// -/// This function handles the full import lifecycle: -/// 1. Registers the connector (idempotent via fingerprint). -/// 2. For each discovered session, checks whether it already exists in the archive. -/// 3. New sessions are registered and their messages are converted and appended. -/// 4. Existing sessions with fewer archived messages are logged and skipped (v1). -/// 5. Existing sessions with the same or more archived messages are skipped. -/// -/// The `convert_messages` closure receives a `native_session_id` and returns -/// `MessageRecord`s with `Uuid::nil()` in the `session` field. This function -/// patches each record's `session` to the real `scroll_id` before appending. -/// -/// # Arguments -/// -/// * `archivist` - The archivist to import into. -/// * `connector_req` - Registration request for the import connector. -/// * `sessions` - Sessions discovered by the importer. -/// * `convert_messages` - Closure that converts a native session into `MessageRecord`s. -/// * `archive` - Optional archive name (`None` for default archive). -/// * `progress` - Sink for per-session progress events (use -/// [`ImportProgressSink::noop`] when progress reporting is not needed). -pub async fn import_sessions( - archivist: &Archivist, - connector_req: RegisterConnectorRequest, - sessions: Vec, - convert_messages: F, - archive: Option, - progress: &ImportProgressSink, - force_deep_scan: bool, - project_map: &HashMap, -) -> Result -where - F: Fn(&str) -> Result> + Send + Sync, -{ - let mut stats = ImportStats::default(); - stats.sessions_discovered = sessions.len(); - - // Step 1: Register the connector (idempotent). - let connector_resp = archivist - .register_connector(connector_req, archive.clone()) - .await?; - let connector_uid = connector_resp.connector_uid; - - tracing::info!( - connector_uid = %connector_uid, - status = ?connector_resp.status, - "Import connector registered" - ); - - // Step 2: Process each discovered session. - let total = sessions.len(); - for (index, session) in sessions.iter().enumerate() { - let native_id = &session.native_session_id; - - progress - .send(ImportProgressEvent::SessionStarted { - native_id: native_id.clone(), - index, - total, - }) - .await; - - // Per-session outcome + stats delta. Updated as we go; on the early - // `continue` paths we emit Failed/Skipped before moving on. - let mut messages_written_delta: u64 = 0; - let mut messages_already_present_delta: u64 = 0; - let mut session_changed = false; - - // Helper: emit SessionFinished and fall out of the iteration. - macro_rules! emit_finished { - ($outcome:expr) => {{ - progress - .send(ImportProgressEvent::SessionFinished { - native_id: native_id.clone(), - outcome: $outcome, - stats_delta: StatsDelta { - messages_written: messages_written_delta, - messages_already_present: messages_already_present_delta, - }, - }) - .await; - }}; - } - - // --- Step 1: Resolve or create scroll_id BEFORE convert_messages --- - let (scroll_id, session_is_new) = match archivist - .resolve_session(connector_uid, native_id, archive.clone()) - .await - { - Ok(id) => (id, false), - Err(ArchivistError::SessionUnknown(_)) => { - // Inject project_id from project_map if the session has a - // project_path that maps to a known project. - let mut metadata = session.metadata.clone(); - if let Some(project_path) = session.project_path.as_deref() { - if let Some(pid) = project_map.get(project_path) { - if let Some(obj) = metadata.as_object_mut() { - obj.insert( - "project_id".to_string(), - serde_json::Value::String(pid.clone()), - ); - } - } - } - - let register_req = RegisterSessionRequest { - connector_uid, - native_session_id: native_id.clone(), - title: session.title.clone(), - custom_scroll_id: None, - metadata, - completeness: SessionCompleteness::Complete, - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - match archivist - .register_session(register_req, archive.clone()) - .await - { - Ok(resp) => match resp.status { - RegisterStatus::Accepted => (resp.scroll_id, true), - RegisterStatus::Aliased => { - stats.sessions_skipped += 1; - emit_finished!(SessionOutcome::Skipped); - continue; - } - RegisterStatus::Rejected => { - stats.errors.push(format!( - "Session registration rejected for {native_id}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - }, - Err(e) => { - stats.errors.push(format!( - "Failed to register session {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - } - } - Err(e) => { - stats.errors.push(format!( - "Failed to resolve session {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - }; - - // --- Step 2: Hoist metadata read for existing sessions --- - // Load metadata once; reused for fingerprint check AND title/model diff. - let existing_meta = if !session_is_new { - match archivist - .get_session_metadata(scroll_id, archive.clone()) - .await - { - Ok(m) => Some(m), - Err(e) => { - stats.errors.push(format!( - "Failed to read session metadata for {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - } - } else { - None - }; - - // --- Step 2b: Retroactive project_id linking for existing sessions --- - // Sessions imported before project detection (or before the project was - // created) have project_path but no project_id. Patch it now if the - // project_map has a match — this runs even for fingerprint-skipped - // sessions so re-import can link them without any source-side changes. - if !session_is_new { - if let Some(ref meta) = existing_meta { - let has_project_path = meta - .metadata - .get("project_path") - .and_then(|v| v.as_str()) - .is_some(); - let has_project_id = meta - .metadata - .get("project_id") - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - .is_some(); - - if has_project_path && !has_project_id { - let stored_path = meta - .metadata - .get("project_path") - .and_then(|v| v.as_str()) - .unwrap(); - if let Some(pid) = project_map.get(stored_path) { - if let Ok(primary) = - archivist.resolve_primary(archive.clone()).await - { - let mut patched = meta.clone(); - if let Some(obj) = patched.metadata.as_object_mut() { - obj.insert( - "project_id".to_string(), - serde_json::Value::String(pid.clone()), - ); - } - patched.updated_at = resolve_updated_at(session); - match primary.backend.put_session(patched).await { - Ok(_) => { - tracing::info!( - scroll_id = %scroll_id, - project_id = %pid, - "Retroactively linked session to project" - ); - session_changed = true; - } - Err(e) => { - tracing::warn!( - scroll_id = %scroll_id, - error = %e, - "Failed to retroactively link session to project" - ); - } - } - } - } - } - } - } - - // --- Step 3: Fingerprint gate — skip unchanged sessions --- - if !session_is_new && !force_deep_scan { - if let Some(ref meta) = existing_meta { - if let Some(snapshot) = ImportSnapshot::from_metadata(&meta.metadata) { - if snapshot.matches(session) { - stats.sessions_fingerprint_skipped += 1; - if session_changed { - tracing::debug!( - native_id = %native_id, - "Fingerprint match — skipping message scan (metadata was updated)" - ); - stats.sessions_updated += 1; - emit_finished!(SessionOutcome::Updated); - } else { - tracing::debug!( - native_id = %native_id, - "Fingerprint match — skipping unchanged session" - ); - stats.sessions_skipped += 1; - emit_finished!(SessionOutcome::Skipped); - } - continue; - } - } - } - } - - // --- Step 4: Convert messages (EXPENSIVE — after fingerprint gate) --- - let source_records = match convert_messages(native_id) { - Ok(r) => r, - Err(e) => { - stats.errors.push(format!( - "Failed to convert messages for session {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - }; - - // Build existing_ids set — empty for brand-new sessions. - let existing_ids: std::collections::HashSet = if session_is_new { - std::collections::HashSet::new() - } else { - match archivist.get_messages(scroll_id, archive.clone()).await { - Ok(msgs) => msgs.into_iter().map(|m| m.message_id).collect(), - Err(e) => { - stats.errors.push(format!( - "Failed to read existing messages for session {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - } - }; - - // Patch placeholder session field and partition. - let mut new_messages: Vec = Vec::new(); - let mut already_present_count: usize = 0; - for mut record in source_records { - if record.session == Uuid::nil() { - record.session = scroll_id; - } - if existing_ids.contains(&record.message_id) { - already_present_count += 1; - } else { - new_messages.push(record); - } - } - - let new_count = new_messages.len(); - if new_count > 0 { - if let Err(e) = archivist - .append_messages(scroll_id, new_messages, archive.clone()) - .await - { - stats.errors.push(format!( - "Failed to append messages for session {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - stats.messages_written += new_count; - messages_written_delta = new_count as u64; - session_changed = true; - } - stats.messages_already_present += already_present_count; - messages_already_present_delta = already_present_count as u64; - - // --- Step 5: Metadata diff (reuse hoisted metadata) --- - if !session_is_new { - // SAFETY: existing_meta is Some when !session_is_new (guarded above). - let current_meta = existing_meta.unwrap(); - - let new_title = session.title.as_ref(); - let title_differs = new_title.is_some() && new_title != current_meta.title.as_ref(); - - let new_model = session - .metadata - .get("model") - .and_then(|v| v.as_str()) - .map(String::from); - let current_model = current_meta - .metadata - .get("model") - .and_then(|v| v.as_str()) - .map(String::from); - let model_differs = new_model.is_some() && new_model != current_model; - - if title_differs || model_differs { - if let Err(e) = archivist - .update_session_metadata( - scroll_id, - if title_differs { new_title.cloned() } else { None }, - if model_differs { new_model } else { None }, - archive.clone(), - ) - .await - { - stats.errors.push(format!( - "Failed to update session metadata for {native_id}: {e}" - )); - emit_finished!(SessionOutcome::Failed); - continue; - } - session_changed = true; - } - - let new_project_path = session - .metadata - .get("project_path") - .and_then(|v| v.as_str()) - .map(String::from); - let current_project_path = current_meta - .metadata - .get("project_path") - .and_then(|v| v.as_str()) - .map(String::from); - let project_path_differs = - new_project_path.is_some() && new_project_path != current_project_path; - - if project_path_differs { - // project_path lives in the free-form metadata JSON. - // Re-read to pick up any title/model changes applied above. - let mut patched_meta = archivist - .get_session_metadata(scroll_id, archive.clone()) - .await - .unwrap_or(current_meta); - if let Some(obj) = patched_meta.metadata.as_object_mut() { - let path_val = new_project_path.clone().unwrap_or_default(); - obj.insert( - "project_path".to_string(), - serde_json::Value::String(path_val.clone()), - ); - if let Some(pid) = project_map.get(&path_val) { - obj.insert( - "project_id".to_string(), - serde_json::Value::String(pid.clone()), - ); - } - } - patched_meta.updated_at = resolve_updated_at(session); - if let Ok(primary) = archivist.resolve_primary(archive.clone()).await { - if let Err(e) = primary.backend.put_session(patched_meta).await { - tracing::warn!( - scroll_id = %scroll_id, - error = %e, - "Failed to update project_path in session metadata" - ); - } - } - session_changed = true; - } - } - - // --- Step 6: Write import snapshot after successful import/update --- - { - let snapshot = ImportSnapshot::from_discovered(session); - // Re-read metadata to get the latest state (may have been updated above). - let write_result = async { - let mut meta = archivist - .get_session_metadata(scroll_id, archive.clone()) - .await?; - snapshot.write_to_metadata(&mut meta.metadata); - meta.updated_at = resolve_updated_at(session); - let primary = archivist.resolve_primary(archive.clone()).await?; - primary.backend.put_session(meta).await.map_err(|e| { - ArchivistError::InvalidRequest(format!( - "Failed to write import snapshot: {e}" - )) - }) - } - .await; - if let Err(e) = write_result { - tracing::warn!( - scroll_id = %scroll_id, - error = %e, - "Failed to write import snapshot (session still imported)" - ); - } - } - - // Accounting: exactly one of {imported, updated, skipped} per session. - let outcome = if session_is_new { - stats.sessions_imported += 1; - SessionOutcome::Imported - } else if session_changed { - stats.sessions_updated += 1; - SessionOutcome::Updated - } else { - stats.sessions_skipped += 1; - SessionOutcome::Skipped - }; - - emit_finished!(outcome); - } - - Ok(stats) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_import_stats_default() { - let stats = ImportStats::default(); - assert_eq!(stats.sessions_discovered, 0); - assert_eq!(stats.sessions_imported, 0); - assert_eq!(stats.sessions_skipped, 0); - assert_eq!(stats.sessions_updated, 0); - assert_eq!(stats.messages_written, 0); - assert_eq!(stats.messages_already_present, 0); - assert!(stats.errors.is_empty()); - } - - #[test] - fn test_import_stats_total_sessions_processed() { - let mut stats = ImportStats::default(); - stats.sessions_imported = 3; - stats.sessions_skipped = 2; - stats.sessions_updated = 1; - stats.errors.push("oops".to_string()); - assert_eq!(stats.total_sessions_processed(), 7); - } - - #[test] - fn test_import_stats_has_errors() { - let mut stats = ImportStats::default(); - assert!(!stats.has_errors()); - stats.errors.push("something went wrong".to_string()); - assert!(stats.has_errors()); - } -} - -#[cfg(test)] -mod idempotency_tests { - use super::*; - use crate::Archivist; - use chrono::Utc; - use uuid::Uuid; - - async fn mk() -> (Archivist, std::path::PathBuf) { - let tmp = std::env::temp_dir().join(format!("import_idem_{}", Uuid::now_v7())); - // Use `from_single_backend` rather than `new_with_single_archive` so - // each test's archive is fully self-contained (no shared `.archives.json` - // in the parent tempdir racing against sibling tests). - let backend = std::sync::Arc::new( - crate::backends::JsonlBackend::new(tmp.clone()).await.unwrap(), - ); - let a = Archivist::from_single_backend("main".into(), backend) - .await - .unwrap(); - (a, tmp) - } - - fn connector() -> RegisterConnectorRequest { - // Stable client_native_id so that re-registering within the same test - // (which uses an isolated temp dir per test) aliases onto the same - // connector_uid — otherwise each call would produce a fresh connector - // and defeat idempotency. - RegisterConnectorRequest { - r#type: "Fake".into(), - title: "fake".into(), - client_native_id: "fake@local:stable".into(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - } - } - - fn record(session: Uuid, id: Uuid, role: &str, content: &str) -> MessageRecord { - MessageRecord { - version: 1, - message_id: id, - session, - parent_id: None, - ts: Utc::now(), - role: role.to_string(), - author: None, - content_md: content.to_string(), - content_parts: None, - attachments: Vec::new(), - metadata: serde_json::json!({}), - } - } - - #[tokio::test] - async fn import_skips_already_present_messages() { - let (archivist, tmp) = mk().await; - - let a = Uuid::now_v7(); - let b = Uuid::now_v7(); - let c = Uuid::now_v7(); - - let discovered = vec![DiscoveredSession { - native_session_id: "s1".into(), - title: Some("t".into()), - created_at: None, - updated_at: None, - message_count: 3, - metadata: serde_json::json!({}), - project_path: None, - file_size: None, - }]; - let convert = |_: &str| -> Result> { - Ok(vec![ - record(Uuid::nil(), a, "user", "hi-a"), - record(Uuid::nil(), b, "user", "hi-b"), - record(Uuid::nil(), c, "user", "hi-c"), - ]) - }; - let stats = import_sessions(&archivist, connector(), discovered.clone(), convert, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - assert_eq!(stats.sessions_imported, 1); - assert_eq!(stats.messages_written, 3); - - // Re-import with IDENTICAL records — nothing should be written. - let convert2 = |_: &str| -> Result> { - Ok(vec![ - record(Uuid::nil(), a, "user", "hi-a"), - record(Uuid::nil(), b, "user", "hi-b"), - record(Uuid::nil(), c, "user", "hi-c"), - ]) - }; - let stats2 = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - assert_eq!(stats2.messages_written, 0); - assert_eq!(stats2.messages_already_present, 3); - assert_eq!(stats2.sessions_skipped, 1); - assert_eq!(stats2.sessions_imported, 0); - assert_eq!(stats2.sessions_updated, 0); - - let _ = tokio::fs::remove_dir_all(tmp).await; - } - - #[tokio::test] - async fn import_appends_new_messages_only() { - let (archivist, tmp) = mk().await; - - let a = Uuid::now_v7(); - let b = Uuid::now_v7(); - let c = Uuid::now_v7(); - let d = Uuid::now_v7(); - - let discovered = vec![DiscoveredSession { - native_session_id: "s1".into(), - title: Some("t".into()), - created_at: None, - updated_at: None, - message_count: 2, - metadata: serde_json::json!({}), - project_path: None, - file_size: None, - }]; - let convert1 = |_: &str| -> Result> { - Ok(vec![ - record(Uuid::nil(), a, "user", "hi-a"), - record(Uuid::nil(), b, "user", "hi-b"), - ]) - }; - let _ = import_sessions(&archivist, connector(), discovered.clone(), convert1, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - - // Second run: source has grown to 4 messages. - let convert2 = |_: &str| -> Result> { - Ok(vec![ - record(Uuid::nil(), a, "user", "hi-a"), - record(Uuid::nil(), b, "user", "hi-b"), - record(Uuid::nil(), c, "user", "hi-c"), - record(Uuid::nil(), d, "user", "hi-d"), - ]) - }; - let stats = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - assert_eq!(stats.messages_written, 2); - assert_eq!(stats.messages_already_present, 2); - assert_eq!(stats.sessions_updated, 1); - assert_eq!(stats.sessions_skipped, 0); - assert_eq!(stats.sessions_imported, 0); - - let _ = tokio::fs::remove_dir_all(tmp).await; - } - - #[tokio::test] - async fn import_updates_metadata_only() { - let (archivist, tmp) = mk().await; - - let a = Uuid::now_v7(); - let convert = |_: &str| -> Result> { - Ok(vec![record(Uuid::nil(), a, "user", "hi")]) - }; - - let first = vec![DiscoveredSession { - native_session_id: "s1".into(), - title: Some("old title".into()), - created_at: None, - updated_at: None, - message_count: 1, - metadata: serde_json::json!({}), - project_path: None, - file_size: None, - }]; - let _ = import_sessions(&archivist, connector(), first, convert, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - - // Re-import with same messages but new title. - let second = vec![DiscoveredSession { - native_session_id: "s1".into(), - title: Some("new title".into()), - created_at: None, - updated_at: None, - message_count: 1, - metadata: serde_json::json!({}), - project_path: None, - file_size: None, - }]; - let convert2 = |_: &str| -> Result> { - Ok(vec![record(Uuid::nil(), a, "user", "hi")]) - }; - let stats = import_sessions(&archivist, connector(), second, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - assert_eq!(stats.messages_written, 0); - assert_eq!(stats.sessions_updated, 1); - assert_eq!(stats.sessions_skipped, 0); - - // Verify title landed on disk. - let meta_list = archivist - .list_sessions_paged( - crate::types::SessionListQuery::default().with_limit(50), - ) - .await - .unwrap(); - assert!(meta_list.items.iter().any(|m| m.title.as_deref() == Some("new title"))); - - let _ = tokio::fs::remove_dir_all(tmp).await; - } - - #[tokio::test] - async fn import_handles_metadata_unchanged() { - let (archivist, tmp) = mk().await; - - let a = Uuid::now_v7(); - let discovered = vec![DiscoveredSession { - native_session_id: "s1".into(), - title: Some("t".into()), - created_at: None, - updated_at: None, - message_count: 1, - metadata: serde_json::json!({"model": "claude"}), - project_path: None, - file_size: None, - }]; - let convert = |_: &str| -> Result> { - Ok(vec![record(Uuid::nil(), a, "user", "hi")]) - }; - let _ = import_sessions(&archivist, connector(), discovered.clone(), convert, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - - let convert2 = |_: &str| -> Result> { - Ok(vec![record(Uuid::nil(), a, "user", "hi")]) - }; - let stats = import_sessions(&archivist, connector(), discovered, convert2, None, &ImportProgressSink::noop(), true, &HashMap::new()) - .await - .unwrap(); - assert_eq!(stats.sessions_skipped, 1); - assert_eq!(stats.sessions_updated, 0); - assert_eq!(stats.messages_written, 0); - - let _ = tokio::fs::remove_dir_all(tmp).await; - } -} diff --git a/crates/dirigent_archivist/src/import/progress.rs b/crates/dirigent_archivist/src/import/progress.rs deleted file mode 100644 index 2a9569b..0000000 --- a/crates/dirigent_archivist/src/import/progress.rs +++ /dev/null @@ -1,117 +0,0 @@ -//! ImportProgressSink: bounded mpsc with drop-oldest-non-terminal overflow. -//! Terminal events (ImportDone / ImportFailed) are never dropped — on full -//! channel they evict oldest non-terminal events until they fit. The import -//! thread never backpressures on a slow consumer. - -use serde::{Deserialize, Serialize}; -use tokio::sync::mpsc; - -use super::ImportDiscovery; -use super::ImportStats; - -const DEFAULT_CAPACITY: usize = 64; - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "kind")] -pub enum ImportProgressEvent { - DiscoveryStarted { source: String }, - DiscoveryProgress { scanned: usize, estimated_total: Option }, - DiscoveryDone { discovered: ImportDiscovery }, - SessionStarted { native_id: String, index: usize, total: usize }, - SessionFinished { native_id: String, outcome: SessionOutcome, stats_delta: StatsDelta }, - ImportDone { stats: ImportStats }, - ImportFailed { error: String }, -} - -impl ImportProgressEvent { - pub fn is_terminal(&self) -> bool { - matches!(self, ImportProgressEvent::ImportDone { .. } | ImportProgressEvent::ImportFailed { .. }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SessionOutcome { Imported, Skipped, Updated, Failed } - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct StatsDelta { - pub messages_written: u64, - pub messages_already_present: u64, -} - -pub struct ImportProgressSink { - inner: SinkInner, -} - -enum SinkInner { - Live { tx: mpsc::Sender }, - Noop, -} - -impl ImportProgressSink { - pub fn channel() -> (Self, mpsc::Receiver) { - let (tx, rx) = mpsc::channel(DEFAULT_CAPACITY); - (Self { inner: SinkInner::Live { tx } }, rx) - } - - pub fn noop() -> Self { Self { inner: SinkInner::Noop } } - - pub async fn send(&self, evt: ImportProgressEvent) { - match &self.inner { - SinkInner::Noop => {} - SinkInner::Live { tx } => { - if evt.is_terminal() { - // Force-send: guaranteed delivery of terminal events. - let _ = tx.send(evt).await; - } else { - // Best-effort: drop non-terminal events when the channel is full. - match tx.try_send(evt) { - Ok(()) => {} - Err(mpsc::error::TrySendError::Full(_)) => { - tracing::debug!("import progress: dropped non-terminal event (queue full)"); - } - Err(mpsc::error::TrySendError::Closed(_)) => { - tracing::warn!("import progress: consumer gone"); - } - } - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn terminal_events_always_delivered() { - let (sink, mut rx) = ImportProgressSink::channel(); - // Fill the channel with non-terminal events (mostly drop). - for i in 0..1000 { - sink.send(ImportProgressEvent::SessionStarted { - native_id: format!("s{i}"), index: i, total: 1000, - }).await; - } - // Consumer drains in background. - let handle = tokio::spawn(async move { - let mut saw_done = false; - while let Some(e) = rx.recv().await { - if matches!(e, ImportProgressEvent::ImportDone { .. }) { - saw_done = true; - break; - } - } - saw_done - }); - sink.send(ImportProgressEvent::ImportDone { stats: ImportStats::default() }).await; - let saw_done = tokio::time::timeout(std::time::Duration::from_secs(2), handle).await.unwrap().unwrap(); - assert!(saw_done); - } - - #[tokio::test] - async fn noop_sink_never_fails() { - let sink = ImportProgressSink::noop(); - sink.send(ImportProgressEvent::ImportDone { stats: ImportStats::default() }).await; - } -} diff --git a/crates/dirigent_archivist/src/import/registry.rs b/crates/dirigent_archivist/src/import/registry.rs deleted file mode 100644 index 3902247..0000000 --- a/crates/dirigent_archivist/src/import/registry.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Dynamic registry of Importer implementations. Populated at boot. - -use std::collections::HashMap; -use std::sync::Arc; - -use super::trait_def::{Importer, ImporterInfo}; - -pub struct ImporterRegistry { - importers: HashMap<&'static str, Arc>, -} - -impl ImporterRegistry { - pub fn new() -> Self { - Self { - importers: HashMap::new(), - } - } - - /// Populate with all built-in importers. Feature flags select which ship. - pub fn with_defaults() -> Self { - let mut r = Self::new(); - #[cfg(feature = "importer-claude")] - r.register(Arc::new(super::sources::claude::ClaudeImporter)); - #[cfg(feature = "importer-chatgpt")] - r.register(Arc::new(super::sources::chatgpt::ChatGptImporter)); - #[cfg(feature = "importer-codex")] - r.register(Arc::new(super::sources::codex::CodexImporter)); - r - } - - pub fn register(&mut self, importer: Arc) { - self.importers.insert(importer.source_name(), importer); - } - - pub fn get(&self, name: &str) -> Option> { - self.importers.get(name).cloned() - } - - pub fn list(&self) -> Vec { - self.importers - .values() - .map(|i| ImporterInfo { - source_name: i.source_name().to_string(), - display_name: pretty_name(i.source_name()), - config_shape: i.config_shape(), - }) - .collect() - } -} - -fn pretty_name(source: &str) -> String { - match source { - "claude" => "Claude Code".into(), - "chatgpt" => "ChatGPT (OpenAI)".into(), - "codex" => "OpenAI Codex".into(), - other => other.to_string(), - } -} - -impl Default for ImporterRegistry { - fn default() -> Self { - Self::with_defaults() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn defaults_include_claude_when_feature_enabled() { - let reg = ImporterRegistry::with_defaults(); - let list = reg.list(); - #[cfg(feature = "importer-claude")] - { - assert!(list.iter().any(|i| i.source_name == "claude")); - assert!(reg.get("claude").is_some()); - } - #[cfg(not(feature = "importer-claude"))] - { - let _ = list; - assert!(reg.get("claude").is_none()); - } - } - - #[test] - fn pretty_name_known_sources() { - assert_eq!(pretty_name("claude"), "Claude Code"); - assert_eq!(pretty_name("chatgpt"), "ChatGPT (OpenAI)"); - assert_eq!(pretty_name("codex"), "OpenAI Codex"); - assert_eq!(pretty_name("custom"), "custom"); - } -} diff --git a/crates/dirigent_archivist/src/import/sources/chatgpt.rs b/crates/dirigent_archivist/src/import/sources/chatgpt.rs deleted file mode 100644 index 7af054d..0000000 --- a/crates/dirigent_archivist/src/import/sources/chatgpt.rs +++ /dev/null @@ -1,361 +0,0 @@ -//! ChatGPT importer: takes a path to a conversations.json file. - -use std::path::PathBuf; - -use async_trait::async_trait; -use chrono::Utc; -use uuid::Uuid; - -use dirigent_chatgpt::{ContentPart, ParsedConversation, ParsedMessage}; - -use super::super::progress::ImportProgressSink; -use super::super::trait_def::{ - ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget, - Importer, -}; -use super::super::{ - import_sessions, DiscoveredSession, ImportDiscovery, ImportProject, ImportStats, -}; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{MessageRecord, RegisterConnectorRequest}; - -/// Connector type string used for imported ChatGPT sessions. -pub const CHATGPT_CONNECTOR_TYPE: &str = "ChatGPT"; - -/// Fingerprint prefix for locally-imported ChatGPT exports. -pub const CHATGPT_FINGERPRINT_PREFIX: &str = "import/local:chatgpt"; - -/// Namespace UUID for deterministic UUIDv5 derivations on ChatGPT message ids -/// that are not already valid UUIDs. -const CHATGPT_MESSAGE_NS: Uuid = Uuid::from_u128(0x4e58_a7cb_bf1c_4de2_b7c9_8c31_11b3_1112); - -pub struct ChatGptImporter; - -#[async_trait] -impl Importer for ChatGptImporter { - fn source_name(&self) -> &'static str { - "chatgpt" - } - - fn config_shape(&self) -> ImportConfigShape { - ImportConfigShape { - fields: vec![ConfigField { - key: "path".into(), - label: "conversations.json path".into(), - kind: ConfigFieldKind::File { - extension: Some("json".into()), - }, - required: true, - help: Some( - "Unzipped OpenAI data export \u{2192} conversations.json".into(), - ), - }], - example: ImportConfig { - source: "chatgpt".into(), - params: { - let mut m = std::collections::BTreeMap::new(); - m.insert( - "path".into(), - serde_json::json!("~/Downloads/chatgpt-export/conversations.json"), - ); - m - }, - }, - } - } - - async fn discover( - &self, - cfg: &ImportConfig, - ) -> std::result::Result { - let path = require_path(cfg)?; - let convs = dirigent_chatgpt::parse_export(&path) - .map_err(|e| ImportError::Discovery(e.to_string()))?; - - let total_sessions = convs.len(); - let total_estimated_messages: usize = convs.iter().map(|c| c.messages.len()).sum(); - - // ChatGPT exports don't carry per-project information, so we bucket - // everything into a single synthetic project named after the file. - let project_name = path - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or("ChatGPT export") - .to_string(); - - Ok(ImportDiscovery { - source_name: "ChatGPT".to_string(), - source_path: path.display().to_string(), - projects: vec![ImportProject { - name: project_name, - session_count: total_sessions, - }], - total_sessions, - total_estimated_messages, - }) - } - - async fn import( - &self, - cfg: &ImportConfig, - archivist: &Archivist, - target: ImportTarget, - progress: ImportProgressSink, - ) -> std::result::Result { - let path = require_path(cfg)?; - let convs = dirigent_chatgpt::parse_export(&path) - .map_err(|e| ImportError::Parser(e.to_string()))?; - - // Build discovered-session list + keep the parsed convs handy for - // message conversion inside the closure. - let mut discovered: Vec = Vec::with_capacity(convs.len()); - for c in &convs { - let metadata = serde_json::json!({ - "source": "chatgpt", - "conversation_id": c.id, - "parser_metadata": c.metadata.clone(), - }); - discovered.push(DiscoveredSession { - native_session_id: c.id.clone(), - title: c.title.clone(), - created_at: c.created_at, - updated_at: c.updated_at, - message_count: c.messages.len(), - metadata, - project_path: None, - file_size: None, - }); - } - - // Map native_id -> parsed conversation for O(1) lookup in `convert`. - let conv_lookup: std::collections::HashMap = convs - .into_iter() - .map(|c| (c.id.clone(), c)) - .collect(); - - // Fingerprint the import by the canonical path. Re-running against the - // same file aliases onto the same connector. - let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone()); - let fingerprint = format!("{}:{}", CHATGPT_FINGERPRINT_PREFIX, canonical_path.display()); - - let connector_req = RegisterConnectorRequest { - r#type: CHATGPT_CONNECTOR_TYPE.to_string(), - title: format!("ChatGPT ({})", canonical_path.display()), - client_native_id: fingerprint.clone(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some(fingerprint), - }; - - let convert = |native_id: &str| -> Result> { - let conv = conv_lookup.get(native_id).ok_or_else(|| { - ArchivistError::InvalidRequest(format!( - "Parsed conversation not found for native_id: {}", - native_id - )) - })?; - Ok(convert_conversation_to_records(conv)) - }; - - import_sessions( - archivist, - connector_req, - discovered, - convert, - target.archive, - &progress, - false, - &target.project_map, - ) - .await - .map_err(|e| ImportError::Archivist(e.to_string())) - } -} - -// --------------------------------------------------------------------------- -// Conversion helpers -// --------------------------------------------------------------------------- - -fn require_path(cfg: &ImportConfig) -> std::result::Result { - cfg.params - .get("path") - .and_then(|v| v.as_str()) - .map(PathBuf::from) - .ok_or_else(|| ImportError::Config("missing `path`".into())) -} - -/// Prefer to parse the native id as a UUID if possible; otherwise derive a -/// stable UUIDv5 under [`CHATGPT_MESSAGE_NS`]. -fn parse_or_derive_uuid(native_id: &str) -> Uuid { - Uuid::parse_str(native_id) - .unwrap_or_else(|_| Uuid::new_v5(&CHATGPT_MESSAGE_NS, native_id.as_bytes())) -} - -/// Convert parsed `ContentPart`s into `dirigent_protocol::MessagePart`s. -fn parts_to_message_parts(parts: &[ContentPart]) -> Vec { - parts - .iter() - .map(|p| match p { - ContentPart::Text { text } => dirigent_protocol::MessagePart::Text { - text: text.clone(), - }, - ContentPart::Code { language, text } => dirigent_protocol::MessagePart::Code { - language: language.clone().unwrap_or_default(), - code: text.clone(), - }, - ContentPart::Tool { name, input, output } => dirigent_protocol::MessagePart::Tool { - tool: name.clone(), - tool_call_id: None, - input: input.clone(), - output: output.clone(), - }, - }) - .collect() -} - -/// Flatten a list of parsed content parts into a markdown-y string for the -/// `content_md` fallback surface. -fn parts_to_markdown(parts: &[ContentPart]) -> String { - parts - .iter() - .map(|p| match p { - ContentPart::Text { text } => text.clone(), - ContentPart::Code { language, text } => { - let lang = language.clone().unwrap_or_default(); - format!("```{}\n{}\n```", lang, text) - } - ContentPart::Tool { name, .. } => format!("[Tool: {}]", name), - }) - .collect::>() - .join("\n\n") -} - -/// Convert a parsed ChatGPT conversation into a vector of `MessageRecord`s. -/// -/// Each message's `session` field is left as `Uuid::nil()`; the generic -/// `import_sessions` orchestrator patches it to the real scroll id. -fn convert_conversation_to_records(conv: &ParsedConversation) -> Vec { - conv.messages - .iter() - .filter_map(convert_parsed_message) - .collect() -} - -fn convert_parsed_message(msg: &ParsedMessage) -> Option { - // Skip messages with entirely empty text payloads (nothing to archive). - let content_md = parts_to_markdown(&msg.content); - if content_md.trim().is_empty() && msg.content.iter().all(is_part_empty) { - return None; - } - - let parts = parts_to_message_parts(&msg.content); - let content_parts = serde_json::to_value(&parts).ok(); - - let ts = msg.ts.unwrap_or_else(Utc::now); - let message_id = if msg.id.is_empty() { - // Fallback: derive from role + timestamp + a hash of content. - let key = format!("{}:{}:{}", msg.role, ts.to_rfc3339(), content_md); - Uuid::new_v5(&CHATGPT_MESSAGE_NS, key.as_bytes()) - } else { - parse_or_derive_uuid(&msg.id) - }; - - Some(MessageRecord { - version: 1, - message_id, - session: Uuid::nil(), - parent_id: None, - ts, - role: msg.role.clone(), - author: None, - content_md, - content_parts, - attachments: Vec::new(), - metadata: msg.metadata.clone(), - }) -} - -fn is_part_empty(p: &ContentPart) -> bool { - match p { - ContentPart::Text { text } => text.trim().is_empty(), - ContentPart::Code { text, .. } => text.trim().is_empty(), - ContentPart::Tool { .. } => false, - } -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_or_derive_uuid_parses_real_uuid() { - let real = "12345678-1234-5678-1234-567812345678"; - let u = parse_or_derive_uuid(real); - assert_eq!(u.to_string(), real); - } - - #[test] - fn parse_or_derive_uuid_falls_back_to_v5() { - let a = parse_or_derive_uuid("not-a-uuid"); - let b = parse_or_derive_uuid("not-a-uuid"); - assert_eq!(a, b, "deterministic UUIDv5 derivation"); - let c = parse_or_derive_uuid("different"); - assert_ne!(a, c); - } - - #[test] - fn parts_to_message_parts_covers_all_variants() { - let parts = vec![ - ContentPart::Text { text: "hi".into() }, - ContentPart::Code { - language: Some("rust".into()), - text: "fn main() {}".into(), - }, - ContentPart::Tool { - name: "browser".into(), - input: serde_json::json!({"url": "https://example.com"}), - output: Some(serde_json::json!({"status": 200})), - }, - ]; - let mp = parts_to_message_parts(&parts); - assert_eq!(mp.len(), 3); - assert!(matches!(&mp[0], dirigent_protocol::MessagePart::Text { .. })); - assert!(matches!(&mp[1], dirigent_protocol::MessagePart::Code { .. })); - assert!(matches!(&mp[2], dirigent_protocol::MessagePart::Tool { .. })); - } - - #[test] - fn empty_parsed_message_is_skipped() { - let msg = ParsedMessage { - id: "m1".into(), - role: "system".into(), - ts: None, - content: vec![ContentPart::Text { text: " ".into() }], - metadata: serde_json::Value::Null, - }; - assert!(convert_parsed_message(&msg).is_none()); - } - - #[test] - fn non_empty_parsed_message_round_trips() { - let msg = ParsedMessage { - id: "m1".into(), - role: "user".into(), - ts: None, - content: vec![ContentPart::Text { - text: "hello".into(), - }], - metadata: serde_json::Value::Null, - }; - let record = convert_parsed_message(&msg).expect("should convert"); - assert_eq!(record.role, "user"); - assert_eq!(record.content_md, "hello"); - assert_eq!(record.session, Uuid::nil()); - assert!(record.content_parts.is_some()); - } -} diff --git a/crates/dirigent_archivist/src/import/sources/claude.rs b/crates/dirigent_archivist/src/import/sources/claude.rs deleted file mode 100644 index c72d464..0000000 --- a/crates/dirigent_archivist/src/import/sources/claude.rs +++ /dev/null @@ -1,1356 +0,0 @@ -//! Claude Code session importer. -//! -//! Uses dirigent_anth to discover and parse Claude Code's local JSONL sessions, -//! then converts them into archivist MessageRecords for import. - -use std::collections::HashMap; - -use camino::Utf8PathBuf; -use chrono::{DateTime, Utc}; -use uuid::Uuid; - -use dirigent_anth::types::{ - Content, ContentBlock, RawAssistantMessage, RawMessage, RawUserMessage, SessionRef, -}; -use dirigent_anth::{classify_noise, discover_projects, load_session, parse_timestamp}; - -use async_trait::async_trait; - -use super::super::progress::ImportProgressSink; -use super::super::trait_def::{ - ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget, - Importer, -}; -use super::super::{ - import_sessions, DiscoveredSession, ImportDiscovery, ImportProject, ImportStats, -}; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{MessageRecord, RegisterConnectorRequest}; - -/// Connector type string used for imported Claude Code sessions. -pub const CLAUDE_CONNECTOR_TYPE: &str = "ClaudeCode"; - -/// Fingerprint prefix for locally-imported Claude Code sessions. -pub const CLAUDE_FINGERPRINT_PREFIX: &str = "import/local:claude-code"; - -/// Namespace UUID for deterministic fallback message_ids when Claude's source -/// `uuid` field is missing. Arbitrary, stable constant. -const CLAUDE_MESSAGE_NS: uuid::Uuid = uuid::Uuid::from_bytes([ - 0x43, 0x4c, 0x41, 0x55, 0x44, 0x45, 0x2d, 0x4d, 0x53, 0x47, 0x2d, 0x4e, 0x53, 0x2d, 0x56, 0x35, -]); - -/// Derive a stable `message_id` for a Claude message record. -/// -/// Priority: -/// 1. Parse the `source_uuid` as a UUID (Claude's JSONL carries per-message -/// UUIDs natively). -/// 2. Fall back to `Uuid::new_v5(CLAUDE_MESSAGE_NS, ":::")`. -fn derive_message_id( - source_uuid: Option<&str>, - native_session_id: &str, - ts: &chrono::DateTime, - role: &str, - content_md: &str, -) -> uuid::Uuid { - if let Some(raw) = source_uuid { - if let Ok(parsed) = uuid::Uuid::parse_str(raw) { - return parsed; - } - } - let key = format!("{}:{}:{}:{}", native_session_id, ts.to_rfc3339(), role, content_md); - uuid::Uuid::new_v5(&CLAUDE_MESSAGE_NS, key.as_bytes()) -} - -// --------------------------------------------------------------------------- -// Discovery -// --------------------------------------------------------------------------- - -/// Discover Claude Code sessions available for import. -/// -/// If `claude_home` is `Some`, uses it as the Claude home directory; -/// otherwise auto-detects via `dirigent_anth::discover_claude_home()`. -/// -/// Returns the resolved home path and a summary of discovered projects/sessions. -pub fn discover_claude_import( - claude_home: Option<&str>, -) -> std::result::Result<(Utf8PathBuf, ImportDiscovery), String> { - let home = match claude_home { - Some(p) => Utf8PathBuf::from(p), - None => dirigent_anth::discover_claude_home().map_err(|e| e.to_string())?, - }; - - let projects = discover_projects(&home).map_err(|e| e.to_string())?; - - let mut import_projects = Vec::new(); - let mut total_sessions: usize = 0; - let mut total_estimated_messages: usize = 0; - - for project in &projects { - let session_count = project.sessions.len(); - total_sessions += session_count; - - for session in &project.sessions { - if let Some(ref idx) = session.index_entry { - total_estimated_messages += idx.message_count.unwrap_or(0) as usize; - } - } - - import_projects.push(ImportProject { - name: project.original_path.clone(), - session_count, - }); - } - - let discovery = ImportDiscovery { - source_name: "Claude Code".to_string(), - source_path: home.to_string(), - projects: import_projects, - total_sessions, - total_estimated_messages, - }; - - Ok((home, discovery)) -} - -// --------------------------------------------------------------------------- -// Import orchestration -// --------------------------------------------------------------------------- - -/// Import all Claude Code sessions from `claude_home` into the archivist. -/// -/// This discovers all projects and sessions, registers a connector, and imports -/// each session by parsing its JSONL and converting messages to `MessageRecord`s. -/// -/// Pass `&ImportProgressSink::noop()` when progress reporting is not needed. -pub async fn import_claude_sessions( - archivist: &crate::coordinator::Archivist, - claude_home: &Utf8PathBuf, - archive: Option, - progress: &ImportProgressSink, - project_map: &HashMap, -) -> Result { - let projects = - discover_projects(claude_home).map_err(|e| ArchivistError::InvalidRequest(e.to_string()))?; - - // Build discovered sessions and a lookup map. - let mut discovered: Vec = Vec::new(); - let mut session_lookup: HashMap = HashMap::new(); - - for project in &projects { - for session_ref in &project.sessions { - let idx = session_ref.index_entry.as_ref(); - - let mut title = idx - .and_then(|i| i.summary.clone()) - .or_else(|| idx.and_then(|i| i.first_prompt.clone())); - - // Fallback: derive title from first user message content. - // NOTE: This parses the JSONL file, adding I/O cost during discovery. - // The same session will be parsed again during message conversion. - // Acceptable for a background import task; could be optimized later - // by restructuring to parse once and extract title + messages together. - if title.is_none() { - if let Ok(parsed) = load_session(session_ref) { - for msg in &parsed.messages { - // Skip noise (queue ops, meta, warmup, etc.) - if classify_noise(msg).is_some() { - continue; - } - if let RawMessage::User(user) = msg { - let text = match &user.message.content { - Content::Text(s) => s.clone(), - Content::Blocks(blocks) => blocks - .iter() - .filter_map(|b| match b { - ContentBlock::Text { text } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join(" "), - }; - let trimmed = text.trim(); - // Skip system-injected XML content (commands, caveats, etc.) - if trimmed.is_empty() - || trimmed.starts_with('<') - || is_pure_tool_result_message(user) - { - continue; - } - title = Some(derive_title_from_content(trimmed)); - break; - } - } - } - } - - let created_at = idx - .and_then(|i| i.created.as_ref()) - .and_then(parse_timestamp); - - let updated_at = idx - .and_then(|i| i.modified.as_ref()) - .and_then(parse_timestamp); - - let message_count = idx.and_then(|i| i.message_count).unwrap_or(0) as usize; - - let metadata = serde_json::json!({ - "project_path": project.original_path, - "git_branch": idx.and_then(|i| i.git_branch.clone()), - }); - - // Get file size from the JSONL file for fingerprint-based change detection. - let file_size = std::fs::metadata(session_ref.jsonl_path.as_std_path()) - .ok() - .map(|m| m.len()); - - discovered.push(DiscoveredSession { - native_session_id: session_ref.id.clone(), - title, - created_at, - updated_at, - message_count, - metadata, - project_path: Some(project.original_path.clone()), - file_size, - }); - - session_lookup.insert(session_ref.id.clone(), session_ref.clone()); - } - } - - let fingerprint = format!("{}:{}", CLAUDE_FINGERPRINT_PREFIX, claude_home); - - let connector_req = RegisterConnectorRequest { - r#type: CLAUDE_CONNECTOR_TYPE.to_string(), - title: format!("Claude Code ({})", claude_home), - client_native_id: fingerprint.clone(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some(fingerprint), - }; - - let convert = |native_id: &str| -> Result> { - let session_ref = session_lookup.get(native_id).ok_or_else(|| { - ArchivistError::InvalidRequest(format!( - "Session ref not found for native_id: {}", - native_id - )) - })?; - convert_session_to_records(session_ref) - }; - - let mut stats = import_sessions( - archivist, - connector_req, - discovered, - convert, - archive.clone(), - progress, - false, - project_map, - ) - .await?; - - // Phase 2: Import subagent sessions - import_subagents(archivist, &session_lookup, archive, &mut stats).await?; - - Ok(stats) -} - -/// Import subagent sessions for all parent sessions that have them. -/// -/// For each session in the lookup, loads and parses it, then for each subagent: -/// 1. Registers a new session with `is_subagent=true` and `parent_scroll_id` -/// 2. Converts subagent messages to MessageRecords -/// 3. Writes a DAG edge linking parent to child -async fn import_subagents( - archivist: &crate::coordinator::Archivist, - session_lookup: &HashMap, - archive: Option, - stats: &mut ImportStats, -) -> Result<()> { - use crate::types::{ - Continuation, DagEdge, RegisterSessionRequest, RegisterStatus, SessionCompleteness, - }; - - for (native_id, session_ref) in session_lookup { - // Load the full parsed session (includes subagents with linkage) - let parsed = match load_session(session_ref) { - Ok(p) => p, - Err(e) => { - tracing::debug!(native_id = %native_id, error = %e, "Failed to load session for subagent import"); - continue; - } - }; - - if parsed.subagents.is_empty() { - continue; - } - - // Resolve the parent's scroll_id from the archive - let parent_scroll_id = match archivist - .find_session_owner(native_id, archive.clone()) - .await - { - Ok(Some((_connector_uid, scroll_id))) => scroll_id, - _ => { - tracing::debug!(native_id = %native_id, "Parent session not found in archive, skipping subagent import"); - continue; - } - }; - - // Get parent's connector_uid from metadata - let parent_meta = match archivist.get_session_metadata(parent_scroll_id, archive.clone()).await { - Ok(m) => m, - Err(_) => continue, - }; - - for subagent in &parsed.subagents { - // Use composite native ID for idempotent dedup - let subagent_native_id = format!("{}:agent:{}", native_id, subagent.agent_id); - - // Check if already imported (idempotent) - if archivist - .resolve_session(parent_meta.connector_uid, &subagent_native_id, archive.clone()) - .await - .is_ok() - { - tracing::debug!( - agent_id = %subagent.agent_id, - parent = %native_id, - "Subagent already imported, skipping" - ); - continue; - } - - // Derive a title from the Agent tool call description if available - let title = parsed.tool_exchanges.iter() - .find(|ex| { - ex.call.name == dirigent_anth::types::ToolName::Agent - && subagent.parent_tool_call_id.as_deref() == Some(&ex.call.id) - }) - .and_then(|ex| ex.call.input.get("description").and_then(|v| v.as_str())) - .map(|s| format!("[{}] {}", subagent.meta.agent_type.as_deref().unwrap_or("Agent"), s)); - - let register_req = RegisterSessionRequest { - connector_uid: parent_meta.connector_uid, - native_session_id: subagent_native_id.clone(), - title, - custom_scroll_id: None, - metadata: serde_json::json!({ - "parent_native_session_id": native_id, - }), - completeness: SessionCompleteness::Complete, - parent_scroll_id: Some(parent_scroll_id), - is_subagent: true, - continuation: Some(Continuation::Subagent), - agent_id: Some(subagent.agent_id.clone()), - subagent_type: subagent.meta.agent_type.clone(), - spawning_tool_use_id: subagent.parent_tool_call_id.clone(), - }; - - let child_scroll_id = match archivist - .register_session(register_req, archive.clone()) - .await - { - Ok(resp) => match resp.status { - RegisterStatus::Accepted => resp.scroll_id, - RegisterStatus::Aliased => { - tracing::debug!(agent_id = %subagent.agent_id, "Subagent session aliased"); - continue; - } - RegisterStatus::Rejected => { - stats.errors.push(format!( - "Subagent registration rejected for agent_id {}", - subagent.agent_id - )); - continue; - } - }, - Err(e) => { - stats.errors.push(format!( - "Failed to register subagent {}: {}", - subagent.agent_id, e - )); - continue; - } - }; - - // Convert subagent messages to records - let mut records: Vec = subagent - .messages - .iter() - .filter_map(|msg| { - convert_ant_message_with_exchanges( - msg, - child_scroll_id, - &[], - &subagent_native_id, - ) - }) - .collect(); - - // Patch session field - for record in &mut records { - if record.session == Uuid::nil() { - record.session = child_scroll_id; - } - } - - let msg_count = records.len(); - - if let Err(e) = archivist - .append_messages(child_scroll_id, records, archive.clone()) - .await - { - stats.errors.push(format!( - "Failed to append subagent messages for {}: {}", - subagent.agent_id, e - )); - continue; - } - - // Write DAG edge - let edge = DagEdge { - parent: parent_scroll_id, - child: child_scroll_id, - agent_id: subagent.agent_id.clone(), - subagent_type: subagent.meta.agent_type.clone(), - tool_use_id: subagent.parent_tool_call_id.clone(), - ts: Some(chrono::Utc::now()), - }; - - if let Err(e) = archivist.append_dag_edge(edge, archive.clone()).await { - tracing::warn!( - agent_id = %subagent.agent_id, - error = %e, - "Failed to write DAG edge (subagent still imported)" - ); - } - - stats.sessions_imported += 1; - stats.messages_written += msg_count; - - tracing::info!( - agent_id = %subagent.agent_id, - parent_scroll_id = %parent_scroll_id, - child_scroll_id = %child_scroll_id, - messages = msg_count, - "Subagent session imported" - ); - } - } - - Ok(()) -} - -// --------------------------------------------------------------------------- -// Session → MessageRecord conversion -// --------------------------------------------------------------------------- - -/// Convert a single Claude Code session (referenced by `SessionRef`) into a -/// vector of `MessageRecord`s. -/// -/// Uses `dirigent_anth::load_session` to parse, dedup, and correlate the JSONL, -/// then converts each non-noise message. Tool calls are correlated with their -/// results using `ParsedSession.tool_exchanges`, and user messages that contain -/// only tool results are suppressed (their content is merged into the assistant's -/// `MessagePart::Tool` parts). The `session` field on each record is set to -/// `Uuid::nil()` — the import orchestrator patches it to the real scroll_id. -pub fn convert_session_to_records(session_ref: &SessionRef) -> Result> { - let parsed = load_session(session_ref) - .map_err(|e| ArchivistError::InvalidRequest(format!("Failed to load session: {}", e)))?; - - let placeholder = Uuid::nil(); - let native_session_id = session_ref.id.as_str(); - let records: Vec = parsed - .messages - .iter() - .filter_map(|msg| { - convert_ant_message_with_exchanges( - msg, - placeholder, - &parsed.tool_exchanges, - native_session_id, - ) - }) - .collect(); - - Ok(records) -} - -/// Convert a single `RawMessage` from dirigent_anth into a `MessageRecord`. -/// -/// Returns `None` for noise messages (queue operations, meta, warmup, etc.) -/// and for message types that don't carry user-visible content (Progress, System). -/// -/// This is a convenience wrapper that delegates to [`convert_ant_message_with_exchanges`] -/// with an empty exchange list (tool results won't be correlated). -/// -/// Note: callers using this public API do not benefit from stable fallback -/// message_ids (native_session_id is empty). Prefer -/// [`convert_ant_message_with_exchanges`] with a real `native_session_id` for -/// idempotent imports. -pub fn convert_ant_message(msg: &RawMessage, scroll_id: Uuid) -> Option { - convert_ant_message_with_exchanges(msg, scroll_id, &[], "") -} - -/// Convert a single `RawMessage` with tool exchange correlation. -/// -/// User messages that contain ONLY tool results are suppressed (their content -/// is merged into the assistant's `MessagePart::Tool` parts via exchanges). -/// -/// `native_session_id` is used as part of the deterministic fallback -/// `message_id` when Claude's native per-message `uuid` field is absent or -/// unparseable. Pass the native session id (`session_ref.id`) for idempotent -/// imports; an empty string is acceptable for one-off conversions that do not -/// need stable ids. -pub fn convert_ant_message_with_exchanges( - msg: &RawMessage, - scroll_id: Uuid, - tool_exchanges: &[dirigent_anth::types::ToolExchange], - native_session_id: &str, -) -> Option { - if classify_noise(msg).is_some() { - return None; - } - - match msg { - RawMessage::User(user) => { - // Suppress user messages that are purely tool results - if is_pure_tool_result_message(user) { - return None; - } - convert_user_message(user, scroll_id, native_session_id) - } - RawMessage::Assistant(assistant) => convert_assistant_message_with_exchanges( - assistant, - scroll_id, - tool_exchanges, - native_session_id, - ), - RawMessage::Progress(_) - | RawMessage::System(_) - | RawMessage::QueueOperation(_) - | RawMessage::FileHistorySnapshot(_) - | RawMessage::LastPrompt(_) => None, - } -} - -/// Check if a user message contains only ToolResult blocks (no actual user text). -fn is_pure_tool_result_message(user: &RawUserMessage) -> bool { - match &user.message.content { - Content::Text(s) => s.trim().is_empty(), - Content::Blocks(blocks) => { - blocks - .iter() - .all(|b| matches!(b, ContentBlock::ToolResult { .. })) - } - } -} - -// --------------------------------------------------------------------------- -// User message conversion -// --------------------------------------------------------------------------- - -fn convert_user_message( - user: &RawUserMessage, - scroll_id: Uuid, - native_session_id: &str, -) -> Option { - let ts = user - .timestamp - .as_ref() - .and_then(|s| parse_timestamp_value(&serde_json::Value::String(s.clone()))) - .unwrap_or_else(Utc::now); - - let content_md = match &user.message.content { - Content::Text(s) => s.clone(), - Content::Blocks(blocks) => blocks_to_markdown_user(blocks), - }; - - if content_md.trim().is_empty() { - return None; - } - - // Build content_parts as Vec for proper UI rendering - let content_parts = serde_json::to_value(vec![dirigent_protocol::MessagePart::Text { - text: content_md.clone(), - }]) - .ok(); - - let mut meta = serde_json::Map::new(); - if let Some(ref cwd) = user.cwd { - meta.insert("cwd".to_string(), serde_json::Value::String(cwd.clone())); - } - if let Some(ref branch) = user.git_branch { - meta.insert( - "git_branch".to_string(), - serde_json::Value::String(branch.clone()), - ); - } - if let Some(ref version) = user.version { - meta.insert( - "claude_version".to_string(), - serde_json::Value::String(version.clone()), - ); - } - - Some(MessageRecord { - version: 1, - message_id: derive_message_id( - user.uuid.as_deref(), - native_session_id, - &ts, - "user", - &content_md, - ), - session: scroll_id, - parent_id: None, - ts, - role: "user".to_string(), - author: None, - content_md, - content_parts, - attachments: Vec::new(), - metadata: serde_json::Value::Object(meta), - }) -} - -// --------------------------------------------------------------------------- -// Assistant message conversion -// --------------------------------------------------------------------------- - -fn convert_assistant_message_with_exchanges( - assistant: &RawAssistantMessage, - scroll_id: Uuid, - tool_exchanges: &[dirigent_anth::types::ToolExchange], - native_session_id: &str, -) -> Option { - let ts = assistant - .timestamp - .as_ref() - .and_then(|s| parse_timestamp_value(&serde_json::Value::String(s.clone()))) - .unwrap_or_else(Utc::now); - - let parts = assistant_blocks_to_message_parts(&assistant.message.content, tool_exchanges); - - if parts.is_empty() { - return None; - } - - // Build content_md as fallback from parts - let content_md = parts - .iter() - .map(|p| match p { - dirigent_protocol::MessagePart::Text { text } => text.clone(), - dirigent_protocol::MessagePart::Thinking { text } => { - format!("\n{}\n", text) - } - dirigent_protocol::MessagePart::Tool { tool, .. } => format!("[Tool: {}]", tool), - dirigent_protocol::MessagePart::Code { language, code } => { - format!("```{}\n{}\n```", language, code) - } - dirigent_protocol::MessagePart::File { path, .. } => format!("[File: {}]", path), - }) - .collect::>() - .join("\n\n"); - - let content_parts = serde_json::to_value(&parts).ok(); - - let model = assistant.message.model.clone(); - - let mut meta = serde_json::Map::new(); - if let Some(ref m) = model { - meta.insert("model".to_string(), serde_json::Value::String(m.clone())); - } - if let Some(ref usage) = assistant.message.usage { - meta.insert("usage".to_string(), usage.clone()); - } - if let Some(ref stop_reason) = assistant.message.stop_reason { - meta.insert( - "stop_reason".to_string(), - serde_json::Value::String(stop_reason.clone()), - ); - } - - Some(MessageRecord { - version: 1, - message_id: derive_message_id( - assistant.uuid.as_deref(), - native_session_id, - &ts, - "assistant", - &content_md, - ), - session: scroll_id, - parent_id: None, - ts, - role: "assistant".to_string(), - author: model, - content_md, - content_parts, - attachments: Vec::new(), - metadata: serde_json::Value::Object(meta), - }) -} - -// --------------------------------------------------------------------------- -// MessagePart conversion (structured content for UI rendering) -// --------------------------------------------------------------------------- - -/// Convert assistant content blocks to `Vec`, using tool exchanges -/// for correlating tool_use with their results. -/// -/// Each `ToolUse` block becomes a `MessagePart::Tool` with the correlated result -/// (if found in `tool_exchanges`). `Thinking` and `Text` blocks map directly. -fn assistant_blocks_to_message_parts( - blocks: &[ContentBlock], - tool_exchanges: &[dirigent_anth::types::ToolExchange], -) -> Vec { - // Build lookup: tool_use_id → &ToolExchange - let exchange_map: HashMap<&str, &dirigent_anth::types::ToolExchange> = tool_exchanges - .iter() - .map(|e| (e.call.id.as_str(), e)) - .collect(); - - let mut parts = Vec::new(); - - for block in blocks { - match block { - ContentBlock::Text { text } if !text.is_empty() => { - parts.push(dirigent_protocol::MessagePart::Text { text: text.clone() }); - } - ContentBlock::Thinking { thinking } if !thinking.is_empty() => { - parts.push(dirigent_protocol::MessagePart::Thinking { - text: thinking.clone(), - }); - } - ContentBlock::ToolUse { id, name, input, .. } => { - let output = exchange_map.get(id.as_str()).and_then(|ex| { - ex.result.as_ref().map(|r| { - if r.is_error { - serde_json::json!({ "error": r.content.as_deref().unwrap_or("Unknown error") }) - } else { - serde_json::json!({ "result": r.content.as_deref().unwrap_or("") }) - } - }) - }); - - parts.push(dirigent_protocol::MessagePart::Tool { - tool: name.clone(), - tool_call_id: Some(id.clone()), - input: input.clone(), - output, - }); - } - ContentBlock::Image { .. } => { - parts.push(dirigent_protocol::MessagePart::Text { - text: "[Image]".to_string(), - }); - } - _ => {} // Empty text/thinking, ToolResult on assistant side (shouldn't happen) - } - } - - parts -} - -// --------------------------------------------------------------------------- -// Markdown conversion helpers (legacy, used for markdown-only fallback) -// --------------------------------------------------------------------------- - -/// Convert assistant content blocks to a single markdown string. -/// Retained for tests; production path uses `assistant_blocks_to_message_parts`. -#[cfg(test)] -fn assistant_blocks_to_markdown(blocks: &[ContentBlock]) -> String { - let parts: Vec = blocks - .iter() - .filter_map(|block| match block { - ContentBlock::Text { text } => { - if text.is_empty() { - None - } else { - Some(text.clone()) - } - } - ContentBlock::Thinking { thinking } => { - if thinking.is_empty() { - None - } else { - Some(format!("\n{}\n", thinking)) - } - } - ContentBlock::ToolUse { name, input, .. } => { - let input_pretty = - serde_json::to_string_pretty(input).unwrap_or_else(|_| input.to_string()); - Some(format!("**Tool: {}**\n\n```json\n{}\n```", name, input_pretty)) - } - ContentBlock::ToolResult { - content, is_error, .. - } => { - let label = if *is_error { - "**Tool Error:**" - } else { - "**Tool Result:**" - }; - let body = match content { - Some(Content::Text(s)) => format!("\n\n```\n{}\n```", s), - Some(Content::Blocks(inner)) => { - let text = inner - .iter() - .filter_map(|b| match b { - ContentBlock::Text { text } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join("\n"); - if text.is_empty() { - String::new() - } else { - format!("\n\n```\n{}\n```", text) - } - } - None => String::new(), - }; - Some(format!("{}{}", label, body)) - } - ContentBlock::Image { .. } => Some("[Image]".to_string()), - }) - .collect(); - - parts.join("\n\n") -} - -/// Convert user content blocks (typically tool results) to markdown. -fn blocks_to_markdown_user(blocks: &[ContentBlock]) -> String { - let parts: Vec = blocks - .iter() - .filter_map(|block| match block { - ContentBlock::Text { text } => { - if text.is_empty() { - None - } else { - Some(text.clone()) - } - } - ContentBlock::ToolResult { - content, is_error, .. - } => { - let label = if *is_error { - "**Tool Error:**" - } else { - "**Tool Result:**" - }; - let body = match content { - Some(Content::Text(s)) => format!("\n\n```\n{}\n```", s), - Some(Content::Blocks(inner)) => { - let text = inner - .iter() - .filter_map(|b| match b { - ContentBlock::Text { text } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join("\n"); - if text.is_empty() { - String::new() - } else { - format!("\n\n```\n{}\n```", text) - } - } - None => String::new(), - }; - Some(format!("{}{}", label, body)) - } - _ => None, - }) - .collect(); - - parts.join("\n\n") -} - -// --------------------------------------------------------------------------- -// Title helpers -// --------------------------------------------------------------------------- - -/// Derive a session title from the first line of user content. -/// Truncates to ~100 characters at a char boundary if needed. -fn derive_title_from_content(content: &str) -> String { - let first_line = content.lines().next().unwrap_or(content).trim(); - if first_line.len() > 100 { - // Find the last char boundary at or before byte 100 - let end = first_line - .char_indices() - .take_while(|&(i, _)| i <= 100) - .last() - .map(|(i, _)| i) - .unwrap_or(100); - format!("{}...", &first_line[..end]) - } else { - first_line.to_string() - } -} - -// --------------------------------------------------------------------------- -// Timestamp helpers -// --------------------------------------------------------------------------- - -/// Parse a timestamp from a serde_json::Value. -/// -/// Handles both string (ISO 8601) and numeric (Unix seconds/millis) values. -/// This wraps `dirigent_anth::parse_timestamp` for convenience. -pub fn parse_timestamp_value(v: &serde_json::Value) -> Option> { - parse_timestamp(v) -} - -// --------------------------------------------------------------------------- -// Importer trait wrapper -// --------------------------------------------------------------------------- - -/// `Importer` adapter around the Claude Code session importer. -/// -/// Wraps the free functions [`discover_claude_import`] and -/// [`import_claude_sessions`] so the generic import registry can drive Claude -/// imports the same way as any other source. -pub struct ClaudeImporter; - -#[async_trait] -impl Importer for ClaudeImporter { - fn source_name(&self) -> &'static str { - "claude" - } - - fn config_shape(&self) -> ImportConfigShape { - // Expose one field: path to ~/.claude directory (directory picker). - ImportConfigShape { - fields: vec![ConfigField { - key: "path".into(), - label: "Claude directory".into(), - kind: ConfigFieldKind::Path { directory: true }, - required: true, - help: Some("Usually ~/.claude".into()), - }], - example: ImportConfig { - source: "claude".into(), - params: { - let mut m = std::collections::BTreeMap::new(); - m.insert("path".into(), serde_json::json!("~/.claude")); - m - }, - }, - } - } - - fn detect_defaults(&self) -> Option { - let home = dirigent_anth::discover_claude_home().ok()?; - let mut params = std::collections::BTreeMap::new(); - params.insert("path".into(), serde_json::json!(home.to_string())); - Some(ImportConfig { - source: "claude".into(), - params, - }) - } - - async fn discover(&self, cfg: &ImportConfig) -> std::result::Result { - let path = cfg - .params - .get("path") - .and_then(|v| v.as_str()) - .ok_or_else(|| ImportError::Config("missing `path`".into()))?; - let (_home, discovery) = discover_claude_import(Some(path)) - .map_err(ImportError::Discovery)?; - Ok(discovery) - } - - async fn import( - &self, - cfg: &ImportConfig, - archivist: &Archivist, - target: ImportTarget, - progress: ImportProgressSink, - ) -> std::result::Result { - let path = cfg - .params - .get("path") - .and_then(|v| v.as_str()) - .ok_or_else(|| ImportError::Config("missing `path`".into()))?; - let home = Utf8PathBuf::from(path); - import_claude_sessions(archivist, &home, target.archive, &progress, &target.project_map) - .await - .map_err(|e| ImportError::Archivist(e.to_string())) - } -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - fn make_user_text(text: &str, timestamp: &str) -> RawMessage { - let json = format!( - r#"{{ - "type": "user", - "uuid": "test-uuid-001", - "parentUuid": null, - "timestamp": "{}", - "sessionId": "test-session", - "cwd": "/home/user/project", - "version": "2.1.71", - "gitBranch": "main", - "isSidechain": false, - "message": {{ - "role": "user", - "content": "{}" - }} - }}"#, - timestamp, text - ); - serde_json::from_str(&json).unwrap() - } - - fn make_assistant_text(text: &str, timestamp: &str) -> RawMessage { - let json = format!( - r#"{{ - "type": "assistant", - "uuid": "test-uuid-002", - "parentUuid": "test-uuid-001", - "timestamp": "{}", - "sessionId": "test-session", - "message": {{ - "model": "claude-opus-4-6", - "id": "msg_test", - "role": "assistant", - "content": [{{"type": "text", "text": "{}"}}], - "stop_reason": "end_turn", - "usage": {{"input_tokens": 100, "output_tokens": 50}} - }} - }}"#, - timestamp, text - ); - serde_json::from_str(&json).unwrap() - } - - fn make_assistant_with_tool(tool_name: &str, timestamp: &str) -> RawMessage { - let json = format!( - r#"{{ - "type": "assistant", - "uuid": "test-uuid-003", - "parentUuid": "test-uuid-001", - "timestamp": "{}", - "sessionId": "test-session", - "message": {{ - "model": "claude-opus-4-6", - "id": "msg_test2", - "role": "assistant", - "content": [ - {{"type": "text", "text": "Let me check that."}}, - {{"type": "tool_use", "id": "toolu_abc", "name": "{}", "input": {{"command": "ls"}}}} - ], - "stop_reason": "tool_use" - }} - }}"#, - timestamp, tool_name - ); - serde_json::from_str(&json).unwrap() - } - - fn make_queue_operation() -> RawMessage { - let json = r#"{ - "type": "queue-operation", - "operation": "enqueue", - "timestamp": "2026-03-14T21:15:17.531Z", - "sessionId": "test-session" - }"#; - serde_json::from_str(json).unwrap() - } - - #[test] - fn convert_user_text_message() { - let msg = make_user_text("Hello world", "2026-04-01T12:00:00Z"); - let record = convert_ant_message(&msg, Uuid::nil()).expect("should produce a record"); - - assert_eq!(record.role, "user"); - assert_eq!(record.content_md, "Hello world"); - assert_eq!(record.session, Uuid::nil()); - assert!(record.author.is_none()); - - // Check metadata - let meta = record.metadata.as_object().unwrap(); - assert_eq!(meta.get("cwd").unwrap(), "/home/user/project"); - assert_eq!(meta.get("git_branch").unwrap(), "main"); - assert_eq!(meta.get("claude_version").unwrap(), "2.1.71"); - - // Check content_parts is present - assert!(record.content_parts.is_some()); - } - - #[test] - fn convert_assistant_text_message() { - let msg = make_assistant_text("Here is your answer.", "2026-04-01T12:00:05Z"); - let record = convert_ant_message(&msg, Uuid::nil()).expect("should produce a record"); - - assert_eq!(record.role, "assistant"); - assert_eq!(record.content_md, "Here is your answer."); - assert_eq!(record.author.as_deref(), Some("claude-opus-4-6")); - - // Check metadata - let meta = record.metadata.as_object().unwrap(); - assert_eq!(meta.get("model").unwrap(), "claude-opus-4-6"); - assert_eq!(meta.get("stop_reason").unwrap(), "end_turn"); - assert!(meta.get("usage").is_some()); - } - - #[test] - fn convert_queue_operation_returns_none() { - let msg = make_queue_operation(); - let result = convert_ant_message(&msg, Uuid::nil()); - assert!(result.is_none(), "QueueOperation should be skipped as noise"); - } - - #[test] - fn convert_assistant_with_tool_use_contains_tool_name() { - let msg = make_assistant_with_tool("Bash", "2026-04-01T12:00:10Z"); - let record = convert_ant_message(&msg, Uuid::nil()).expect("should produce a record"); - - assert_eq!(record.role, "assistant"); - assert!( - record.content_md.contains("[Tool: Bash]"), - "markdown should contain tool reference, got: {}", - record.content_md - ); - // content_parts should contain proper MessagePart::Tool - assert!(record.content_parts.is_some()); - let parts: Vec = - serde_json::from_value(record.content_parts.unwrap()).unwrap(); - assert_eq!(parts.len(), 2); - assert!(matches!(&parts[0], dirigent_protocol::MessagePart::Text { text } if text == "Let me check that.")); - assert!(matches!(&parts[1], dirigent_protocol::MessagePart::Tool { tool, .. } if tool == "Bash")); - } - - #[test] - fn parse_timestamp_value_string() { - let v = serde_json::json!("2026-04-01T12:00:00Z"); - let dt = parse_timestamp_value(&v).unwrap(); - assert_eq!(dt.year(), 2026); - assert_eq!(dt.month(), 4); - assert_eq!(dt.day(), 1); - } - - #[test] - fn parse_timestamp_value_numeric_millis() { - let v = serde_json::json!(1769461914249_i64); - let dt = parse_timestamp_value(&v).unwrap(); - assert!(dt.year() >= 2025); - } - - #[test] - fn parse_timestamp_value_null_returns_none() { - let v = serde_json::json!(null); - assert!(parse_timestamp_value(&v).is_none()); - } - - use chrono::Datelike; - - #[test] - fn convert_empty_user_message_returns_none() { - let msg = make_user_text("", "2026-04-01T12:00:00Z"); - let result = convert_ant_message(&msg, Uuid::nil()); - assert!(result.is_none(), "Empty user message should be skipped"); - } - - #[test] - fn assistant_blocks_to_markdown_thinking() { - let blocks = vec![ContentBlock::Thinking { - thinking: "Let me think about this...".to_string(), - }]; - let md = assistant_blocks_to_markdown(&blocks); - assert!(md.contains("")); - assert!(md.contains("Let me think about this...")); - assert!(md.contains("")); - } - - #[test] - fn assistant_blocks_to_markdown_image() { - let blocks = vec![ContentBlock::Image { - source: serde_json::json!({"type": "base64"}), - }]; - let md = assistant_blocks_to_markdown(&blocks); - assert_eq!(md, "[Image]"); - } - - #[test] - fn convert_assistant_with_tools_produces_message_parts() { - use dirigent_protocol::MessagePart; - - let blocks = vec![ - ContentBlock::Text { - text: "Let me check that.".to_string(), - }, - ContentBlock::ToolUse { - id: "toolu_abc".to_string(), - name: "Bash".to_string(), - input: serde_json::json!({"command": "ls"}), - caller: None, - }, - ]; - - let exchanges: Vec = vec![]; - let parts = assistant_blocks_to_message_parts(&blocks, &exchanges); - - assert_eq!(parts.len(), 2); - assert!( - matches!(&parts[0], MessagePart::Text { text } if text == "Let me check that.") - ); - assert!( - matches!(&parts[1], MessagePart::Tool { tool, output, .. } if tool == "Bash" && output.is_none()) - ); - } - - #[test] - fn convert_assistant_with_correlated_tool_result() { - use dirigent_protocol::MessagePart; - - let blocks = vec![ - ContentBlock::Text { - text: "Let me read that file.".to_string(), - }, - ContentBlock::ToolUse { - id: "toolu_xyz".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/tmp/test.rs"}), - caller: None, - }, - ]; - - let exchanges = vec![dirigent_anth::types::ToolExchange { - call: dirigent_anth::types::ToolCall { - id: "toolu_xyz".to_string(), - name: dirigent_anth::types::ToolName::Read, - input: serde_json::json!({"file_path": "/tmp/test.rs"}), - source_message_uuid: "msg1".to_string(), - }, - result: Some(dirigent_anth::types::ToolResultData { - tool_use_id: "toolu_xyz".to_string(), - content: Some("fn main() {}".to_string()), - is_error: false, - source_message_uuid: "msg2".to_string(), - }), - }]; - - let parts = assistant_blocks_to_message_parts(&blocks, &exchanges); - assert_eq!(parts.len(), 2); - if let MessagePart::Tool { - tool, - tool_call_id, - output, - .. - } = &parts[1] - { - assert_eq!(tool, "Read"); - assert_eq!(tool_call_id.as_deref(), Some("toolu_xyz")); - assert!(output.is_some()); - } else { - panic!("Expected Tool part"); - } - } - - #[test] - fn user_message_with_only_tool_results_is_suppressed() { - let user_json = r#"{ - "type": "user", - "uuid": "test-uuid-005", - "timestamp": "2026-04-01T12:00:15Z", - "sessionId": "test-session", - "message": { - "role": "user", - "content": [ - {"type": "tool_result", "tool_use_id": "toolu_abc", "content": "output text"} - ] - } - }"#; - let msg: RawMessage = serde_json::from_str(user_json).unwrap(); - let record = convert_ant_message_with_exchanges(&msg, Uuid::nil(), &[], ""); - assert!( - record.is_none(), - "Pure tool-result user messages should be suppressed" - ); - } - - #[test] - fn derive_title_from_content_short() { - let title = derive_title_from_content("Hello, can you help me refactor this module?"); - assert_eq!(title, "Hello, can you help me refactor this module?"); - } - - #[test] - fn derive_title_from_content_truncates_long() { - let long = "a".repeat(200); - let title = derive_title_from_content(&long); - assert!(title.len() <= 104); // ~100 + "..." - assert!(title.ends_with("...")); - } - - #[test] - fn derive_title_from_content_handles_multibyte_chars() { - // 'x' repeated with a multi-byte char near the boundary - let mut s = "a".repeat(98); - s.push('\u{2019}'); // right single quote (3 bytes) at byte 98..101 - s.push_str(&"b".repeat(100)); - let title = derive_title_from_content(&s); - assert!(title.ends_with("...")); - // Should NOT panic on multi-byte boundary - assert!(title.is_char_boundary(title.len())); - } - - #[test] - fn derive_title_from_content_uses_first_line() { - let title = derive_title_from_content("First line\nSecond line\nThird line"); - assert_eq!(title, "First line"); - } -} - -#[cfg(test)] -mod id_stability_tests { - use super::*; - - #[test] - fn derive_message_id_stable_when_source_uuid_present() { - let ts = chrono::Utc::now(); - let a = derive_message_id( - Some("12345678-1234-5678-1234-567812345678"), - "session-1", - &ts, - "user", - "hello", - ); - let b = derive_message_id( - Some("12345678-1234-5678-1234-567812345678"), - "session-1", - &ts, - "user", - "hello", - ); - assert_eq!(a, b, "same source uuid must produce same message_id"); - } - - #[test] - fn derive_message_id_stable_fallback_when_uuid_absent() { - let ts = chrono::Utc::now(); - let a = derive_message_id(None, "session-1", &ts, "user", "hello"); - let b = derive_message_id(None, "session-1", &ts, "user", "hello"); - assert_eq!(a, b); - } - - #[test] - fn derive_message_id_different_content_different_id() { - let ts = chrono::Utc::now(); - let a = derive_message_id(None, "session-1", &ts, "user", "hello"); - let b = derive_message_id(None, "session-1", &ts, "user", "world"); - assert_ne!(a, b); - } -} diff --git a/crates/dirigent_archivist/src/import/sources/codex.rs b/crates/dirigent_archivist/src/import/sources/codex.rs deleted file mode 100644 index a9cc422..0000000 --- a/crates/dirigent_archivist/src/import/sources/codex.rs +++ /dev/null @@ -1,331 +0,0 @@ -//! OpenAI Codex CLI importer: takes a path to a directory of JSONL session files. - -use std::path::PathBuf; - -use async_trait::async_trait; -use chrono::Utc; -use uuid::Uuid; - -use dirigent_codex::{ParsedMessage, ParsedSession}; - -use super::super::progress::ImportProgressSink; -use super::super::trait_def::{ - ConfigField, ConfigFieldKind, ImportConfig, ImportConfigShape, ImportError, ImportTarget, - Importer, -}; -use super::super::{ - import_sessions, DiscoveredSession, ImportDiscovery, ImportProject, ImportStats, -}; -use crate::coordinator::Archivist; -use crate::error::{ArchivistError, Result}; -use crate::types::{MessageRecord, RegisterConnectorRequest}; - -/// Connector type string used for imported Codex sessions. -pub const CODEX_CONNECTOR_TYPE: &str = "Codex"; - -/// Fingerprint prefix for locally-imported Codex sessions. -pub const CODEX_FINGERPRINT_PREFIX: &str = "import/local:codex"; - -/// Namespace UUID for deterministic UUIDv5 derivations of message ids that -/// Codex does not expose natively. -const CODEX_MESSAGE_NS: Uuid = Uuid::from_u128(0x9e28_b7d4_af9c_4fe2_a8d1_8c41_21b3_2222); - -pub struct CodexImporter; - -#[async_trait] -impl Importer for CodexImporter { - fn source_name(&self) -> &'static str { - "codex" - } - - fn config_shape(&self) -> ImportConfigShape { - ImportConfigShape { - fields: vec![ConfigField { - key: "path".into(), - label: "Codex sessions directory".into(), - kind: ConfigFieldKind::Path { directory: true }, - required: true, - help: Some("Usually ~/.codex/sessions".into()), - }], - example: ImportConfig { - source: "codex".into(), - params: { - let mut m = std::collections::BTreeMap::new(); - m.insert("path".into(), serde_json::json!("~/.codex/sessions")); - m - }, - }, - } - } - - async fn discover( - &self, - cfg: &ImportConfig, - ) -> std::result::Result { - let path = require_path(cfg)?; - let files = dirigent_codex::discover_sessions(&path) - .map_err(|e| ImportError::Discovery(e.to_string()))?; - - // Parse each file to count messages. This is a best-effort estimate — - // malformed lines are skipped by the parser, so counts reflect what - // the importer would actually write. - let mut total_estimated_messages: usize = 0; - for file in &files { - if let Ok(session) = dirigent_codex::parse_file(file) { - total_estimated_messages += session.messages.len(); - } - } - let total_sessions = files.len(); - - // Codex sessions live flat in one directory; bucket them into a - // single synthetic project named after the directory. - let project_name = path - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or("Codex sessions") - .to_string(); - - Ok(ImportDiscovery { - source_name: "Codex".to_string(), - source_path: path.display().to_string(), - projects: vec![ImportProject { - name: project_name, - session_count: total_sessions, - }], - total_sessions, - total_estimated_messages, - }) - } - - async fn import( - &self, - cfg: &ImportConfig, - archivist: &Archivist, - target: ImportTarget, - progress: ImportProgressSink, - ) -> std::result::Result { - let path = require_path(cfg)?; - let files = dirigent_codex::discover_sessions(&path) - .map_err(|e| ImportError::Discovery(e.to_string()))?; - - // Parse every session file up front so that `convert_messages` - // (called by `import_sessions`) can do O(1) lookups. - let mut parsed: Vec = Vec::with_capacity(files.len()); - for file in &files { - match dirigent_codex::parse_file(file) { - Ok(session) => parsed.push(session), - Err(e) => { - tracing::warn!( - path = %file.display(), - error = %e, - "Skipping unreadable Codex session file" - ); - } - } - } - - let mut discovered: Vec = Vec::with_capacity(parsed.len()); - for s in &parsed { - let metadata = serde_json::json!({ - "source": "codex", - "source_path": s.source_path.display().to_string(), - "native_id": s.native_id, - }); - let file_size = std::fs::metadata(&s.source_path).ok().map(|m| m.len()); - - discovered.push(DiscoveredSession { - native_session_id: s.native_id.clone(), - title: None, - created_at: s.created_at, - updated_at: s.updated_at, - message_count: s.messages.len(), - metadata, - project_path: None, - file_size, - }); - } - - // Map native_id -> parsed session for O(1) lookup in `convert`. - let session_lookup: std::collections::HashMap = parsed - .into_iter() - .map(|s| (s.native_id.clone(), s)) - .collect(); - - // Fingerprint the import by the canonical directory path. Re-running - // against the same directory aliases onto the same connector. - let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone()); - let fingerprint = format!("{}:{}", CODEX_FINGERPRINT_PREFIX, canonical_path.display()); - - let connector_req = RegisterConnectorRequest { - r#type: CODEX_CONNECTOR_TYPE.to_string(), - title: format!("Codex ({})", canonical_path.display()), - client_native_id: fingerprint.clone(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some(fingerprint), - }; - - let convert = |native_id: &str| -> Result> { - let session = session_lookup.get(native_id).ok_or_else(|| { - ArchivistError::InvalidRequest(format!( - "Parsed session not found for native_id: {}", - native_id - )) - })?; - Ok(convert_session_to_records(session)) - }; - - import_sessions( - archivist, - connector_req, - discovered, - convert, - target.archive, - &progress, - false, - &target.project_map, - ) - .await - .map_err(|e| ImportError::Archivist(e.to_string())) - } -} - -// --------------------------------------------------------------------------- -// Conversion helpers -// --------------------------------------------------------------------------- - -fn require_path(cfg: &ImportConfig) -> std::result::Result { - cfg.params - .get("path") - .and_then(|v| v.as_str()) - .map(PathBuf::from) - .ok_or_else(|| ImportError::Config("missing `path`".into())) -} - -/// Convert every [`ParsedMessage`] in a session into a [`MessageRecord`], -/// leaving `session = Uuid::nil()` for the generic orchestrator to patch. -fn convert_session_to_records(session: &ParsedSession) -> Vec { - session - .messages - .iter() - .enumerate() - .filter_map(|(idx, m)| convert_parsed_message(&session.native_id, idx, m)) - .collect() -} - -fn convert_parsed_message( - native_session_id: &str, - index: usize, - msg: &ParsedMessage, -) -> Option { - // Skip purely empty messages — nothing to archive. - if msg.content.trim().is_empty() { - return None; - } - - let ts = msg.ts.unwrap_or_else(Utc::now); - - // Codex events don't carry per-message UUIDs, so always derive a stable - // UUIDv5 from (native_session, index, role, ts). Index disambiguates - // otherwise-identical back-to-back messages. - let key = format!( - "{}:{}:{}:{}", - native_session_id, - index, - msg.role, - ts.to_rfc3339(), - ); - let message_id = Uuid::new_v5(&CODEX_MESSAGE_NS, key.as_bytes()); - - let parts = vec![dirigent_protocol::MessagePart::Text { - text: msg.content.clone(), - }]; - let content_parts = serde_json::to_value(&parts).ok(); - - Some(MessageRecord { - version: 1, - message_id, - session: Uuid::nil(), - parent_id: None, - ts, - role: msg.role.clone(), - author: None, - content_md: msg.content.clone(), - content_parts, - attachments: Vec::new(), - metadata: msg.metadata.clone(), - }) -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - fn sample_message(role: &str, content: &str) -> ParsedMessage { - ParsedMessage { - ts: None, - role: role.into(), - content: content.into(), - metadata: serde_json::Value::Null, - } - } - - fn sample_message_at(role: &str, content: &str, ts: chrono::DateTime) -> ParsedMessage { - ParsedMessage { - ts: Some(ts), - role: role.into(), - content: content.into(), - metadata: serde_json::Value::Null, - } - } - - #[test] - fn empty_content_is_skipped() { - let m = sample_message("user", " "); - assert!(convert_parsed_message("s", 0, &m).is_none()); - } - - #[test] - fn non_empty_message_converts() { - let m = sample_message("user", "hello"); - let r = convert_parsed_message("s", 0, &m).expect("converts"); - assert_eq!(r.role, "user"); - assert_eq!(r.content_md, "hello"); - assert_eq!(r.session, Uuid::nil()); - assert!(r.content_parts.is_some()); - } - - #[test] - fn message_id_is_deterministic_per_session_index() { - // Fix ts so we don't accidentally hash Utc::now() into the id key. - let ts = chrono::TimeZone::timestamp_opt(&Utc, 1_735_732_800, 0) - .single() - .unwrap(); - let m = sample_message_at("user", "hello", ts); - let a = convert_parsed_message("session-a", 0, &m).unwrap(); - let b = convert_parsed_message("session-a", 0, &m).unwrap(); - assert_eq!(a.message_id, b.message_id); - - // Different index → different id. - let c = convert_parsed_message("session-a", 1, &m).unwrap(); - assert_ne!(a.message_id, c.message_id); - - // Different session → different id. - let d = convert_parsed_message("session-b", 0, &m).unwrap(); - assert_ne!(a.message_id, d.message_id); - } - - #[test] - fn require_path_reports_missing_config() { - let cfg = ImportConfig { - source: "codex".into(), - params: Default::default(), - }; - let err = require_path(&cfg).expect_err("should fail"); - assert!(matches!(err, ImportError::Config(_))); - } -} diff --git a/crates/dirigent_archivist/src/import/sources/mod.rs b/crates/dirigent_archivist/src/import/sources/mod.rs deleted file mode 100644 index f3cbc31..0000000 --- a/crates/dirigent_archivist/src/import/sources/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Per-source importer implementations. - -pub mod claude; -#[cfg(feature = "importer-chatgpt")] -pub mod chatgpt; -#[cfg(feature = "importer-codex")] -pub mod codex; diff --git a/crates/dirigent_archivist/src/import/trait_def.rs b/crates/dirigent_archivist/src/import/trait_def.rs deleted file mode 100644 index 294400b..0000000 --- a/crates/dirigent_archivist/src/import/trait_def.rs +++ /dev/null @@ -1,113 +0,0 @@ -//! Importer trait and config-shape types consumed by the UI (dynamic form -//! rendering) and the CLI (future). Scripts can serialise ImportConfig as JSON. - -use async_trait::async_trait; -use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, HashMap}; -use thiserror::Error; -use uuid::Uuid; - -use crate::coordinator::Archivist; -use super::progress::ImportProgressSink; - -#[async_trait] -pub trait Importer: Send + Sync { - fn source_name(&self) -> &'static str; - fn config_shape(&self) -> ImportConfigShape; - - async fn discover( - &self, - cfg: &ImportConfig, - ) -> Result; - - async fn import( - &self, - cfg: &ImportConfig, - archivist: &Archivist, - target: ImportTarget, - progress: ImportProgressSink, - ) -> Result; - - /// Attempt to auto-detect default configuration values. - /// - /// Importers that can discover their source location automatically - /// (e.g., Claude Code's `~/.claude` directory) should override this. - /// Returns `None` when auto-detection is not supported or fails. - fn detect_defaults(&self) -> Option { - None - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ImporterInfo { - pub source_name: String, - pub display_name: String, - pub config_shape: ImportConfigShape, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ImportConfigShape { - pub fields: Vec, - pub example: ImportConfig, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ConfigField { - pub key: String, - pub label: String, - pub kind: ConfigFieldKind, - pub required: bool, - pub help: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "type")] -pub enum ConfigFieldKind { - Path { directory: bool }, - File { extension: Option }, - String, - Bool, - Enum { variants: Vec }, -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ImportConfig { - pub source: String, - #[serde(default)] - pub params: BTreeMap, -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ImportTarget { - pub archive: Option, - pub connector_alias: Option, - pub project_id: Option, - /// Maps normalized project_path -> project_id (as string UUID). - /// When a session's project_path is found in this map, the corresponding - /// project_id is injected into the session metadata during import. - #[serde(default)] - pub project_map: HashMap, -} - -#[derive(Debug, Error)] -pub enum ImportError { - #[error("source not found: {0}")] SourceNotFound(String), - #[error("config: {0}")] Config(String), - #[error("discovery: {0}")] Discovery(String), - #[error("I/O: {0}")] Io(#[from] std::io::Error), - #[error("archivist: {0}")] Archivist(String), - #[error("parser: {0}")] Parser(String), - #[error("cancelled")] Cancelled, -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn config_round_trips() { - let cfg = ImportConfig { source: "claude".into(), params: BTreeMap::new() }; - let json = serde_json::to_string(&cfg).unwrap(); - let back: ImportConfig = serde_json::from_str(&json).unwrap(); - assert_eq!(back.source, "claude"); - } -} diff --git a/crates/dirigent_archivist/src/lib.rs b/crates/dirigent_archivist/src/lib.rs deleted file mode 100644 index b3c7449..0000000 --- a/crates/dirigent_archivist/src/lib.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Dirigent Archivist -//! -//! Persistent storage for all agentic interactions in Dirigent. -//! -//! The Archivist provides file-based archival storage using NDJSON, JSON, and TSV -//! formats for durability and human-readability. It implements an archive-first -//! architecture with connector API fallback for session data. -//! -//! # Key Features -//! -//! - File-based storage for easy curation and grep-ability -//! - Content-addressable file storage for attachments -//! - Session lineage tracking (splits, continuations, mutations) -//! - Connector registry with UID coordination -//! - Real-time event streaming for archive updates -//! -//! # Architecture -//! -//! See `docs/building/05_archivist/vision.md` for detailed design. - -pub mod accumulator; -pub mod backend; -pub mod backends; -pub mod backfill; -pub mod coordinator; -pub mod error; -pub mod events; -pub mod import; -pub mod registry; -pub mod session; -pub mod storage; -pub mod types; - -// Re-export commonly used types -pub use accumulator::{MessageAccumulator, ToolCallData}; -pub use backend::{ - ArchiveBackend, ArchiveCapability, CapabilitySet, ConnectorRegistryBackend, - DagBackend, HealthStatus, MetaEventsBackend, SearchBackend, SessionMappingBackend, -}; -pub use backends::JsonlBackend; -pub use backfill::{backfill_from_sessions, convert_message_to_record, BackfillStats}; -pub use coordinator::{ArchiveInfo, ArchiveMetadata, Archivist}; -pub use error::{ArchivistError, Result}; -pub use events::EventHandler; -pub use types::*; diff --git a/crates/dirigent_archivist/src/registry/cache.rs b/crates/dirigent_archivist/src/registry/cache.rs deleted file mode 100644 index d25698d..0000000 --- a/crates/dirigent_archivist/src/registry/cache.rs +++ /dev/null @@ -1,116 +0,0 @@ -//! Positive LRU cache mapping `scroll_id` to the backend that holds the -//! authoritative session metadata, populated on the first successful read. - -use std::num::NonZeroUsize; - -use lru::LruCache; -use tokio::sync::Mutex; -use uuid::Uuid; - -const DEFAULT_CAPACITY: usize = 10_000; - -pub struct ReadCache { - inner: Mutex>, -} - -impl ReadCache { - pub fn new() -> Self { - Self::with_capacity(DEFAULT_CAPACITY) - } - - pub fn with_capacity(capacity: usize) -> Self { - let cap = NonZeroUsize::new(capacity.max(1)).unwrap(); - Self { - inner: Mutex::new(LruCache::new(cap)), - } - } - - pub async fn get(&self, scroll_id: Uuid) -> Option { - let mut guard = self.inner.lock().await; - guard.get(&scroll_id).cloned() - } - - pub async fn put(&self, scroll_id: Uuid, backend_name: String) { - let mut guard = self.inner.lock().await; - guard.put(scroll_id, backend_name); - } - - pub async fn invalidate(&self, scroll_id: Uuid) { - let mut guard = self.inner.lock().await; - guard.pop(&scroll_id); - } - - pub async fn rewrite(&self, scroll_id: Uuid, new_backend: String) { - let mut guard = self.inner.lock().await; - guard.put(scroll_id, new_backend); - } - - pub async fn clear(&self) { - let mut guard = self.inner.lock().await; - guard.clear(); - } - - pub async fn len(&self) -> usize { - let guard = self.inner.lock().await; - guard.len() - } -} - -impl Default for ReadCache { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn id(b: u8) -> Uuid { - Uuid::from_bytes([b; 16]) - } - - #[tokio::test] - async fn put_then_get() { - let c = ReadCache::new(); - c.put(id(1), "main".into()).await; - assert_eq!(c.get(id(1)).await.as_deref(), Some("main")); - assert!(c.get(id(2)).await.is_none()); - } - - #[tokio::test] - async fn invalidate_removes_entry() { - let c = ReadCache::new(); - c.put(id(1), "main".into()).await; - c.invalidate(id(1)).await; - assert!(c.get(id(1)).await.is_none()); - } - - #[tokio::test] - async fn rewrite_changes_backend() { - let c = ReadCache::new(); - c.put(id(1), "a".into()).await; - c.rewrite(id(1), "b".into()).await; - assert_eq!(c.get(id(1)).await.as_deref(), Some("b")); - } - - #[tokio::test] - async fn lru_evicts_oldest() { - let c = ReadCache::with_capacity(2); - c.put(id(1), "a".into()).await; - c.put(id(2), "b".into()).await; - c.put(id(3), "c".into()).await; // evicts id(1) - assert!(c.get(id(1)).await.is_none()); - assert_eq!(c.get(id(2)).await.as_deref(), Some("b")); - assert_eq!(c.get(id(3)).await.as_deref(), Some("c")); - } - - #[tokio::test] - async fn clear_empties() { - let c = ReadCache::new(); - c.put(id(1), "a".into()).await; - c.put(id(2), "b".into()).await; - c.clear().await; - assert_eq!(c.len().await, 0); - } -} diff --git a/crates/dirigent_archivist/src/registry/config.rs b/crates/dirigent_archivist/src/registry/config.rs deleted file mode 100644 index 7c13d9c..0000000 --- a/crates/dirigent_archivist/src/registry/config.rs +++ /dev/null @@ -1,253 +0,0 @@ -//! Declarative `[[archives]]` config block parsed from `dirigent.toml`. -//! -//! The TOML schema is documented in `docs/plans/2026-04-19-archivist-phase3-design.md`. - -use serde::{Deserialize, Serialize}; - -use super::filter::ArchiveFilter; -use super::registration::{FailureMode, OverflowPolicy}; - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct ArchivesConfig { - #[serde(default, rename = "archives")] - pub entries: Vec, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct ArchiveConfig { - pub name: String, - #[serde(rename = "type")] - pub type_name: String, - #[serde(default = "default_write_active")] - pub write_active: bool, - #[serde(default)] - pub failure_mode: FailureMode, - #[serde(default)] - pub read_priority: u32, - #[serde(default = "default_enabled")] - pub enabled: bool, - #[serde(default)] - pub write_policy: WritePolicyConfig, - /// Per-archive include/exclude filter applied during non-primary write - /// fanout. Absent or `{}` means unrestricted. - #[serde(default)] - pub filter: ArchiveFilter, - #[serde(default = "default_params")] - pub params: toml::Value, -} - -fn default_params() -> toml::Value { - toml::Value::Table(toml::value::Table::new()) -} - -fn default_write_active() -> bool { - true -} -fn default_enabled() -> bool { - true -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(untagged)] -pub enum WritePolicyConfig { - Tag(WritePolicyTag), - Detailed(WritePolicyDetailed), -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] -#[serde(rename_all = "snake_case")] -pub enum WritePolicyTag { - Inline, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum WritePolicyDetailed { - Inline, - Queued { - #[serde(default = "default_batch_window_ms")] - batch_window_ms: u64, - #[serde(default = "default_capacity")] - capacity: usize, - #[serde(default)] - overflow: OverflowPolicy, - }, -} - -fn default_batch_window_ms() -> u64 { - 50 -} -fn default_capacity() -> usize { - 1024 -} - -impl Default for WritePolicyConfig { - fn default() -> Self { - WritePolicyConfig::Tag(WritePolicyTag::Inline) - } -} - -impl WritePolicyConfig { - pub fn into_runtime(self) -> super::registration::WritePolicy { - use super::registration::WritePolicy; - match self { - WritePolicyConfig::Tag(WritePolicyTag::Inline) => WritePolicy::Inline, - WritePolicyConfig::Detailed(WritePolicyDetailed::Inline) => WritePolicy::Inline, - WritePolicyConfig::Detailed(WritePolicyDetailed::Queued { - batch_window_ms, - capacity, - overflow, - }) => WritePolicy::Queued { - batch_window_ms, - capacity, - overflow, - }, - } - } -} - -use std::collections::BTreeSet; - -#[derive(Debug, thiserror::Error, PartialEq)] -pub enum ConfigValidationError { - #[error("duplicate archive name `{0}`")] - DuplicateName(String), - #[error("no `required` write-active backend configured (need at least one)")] - NoPrimary, -} - -impl ArchivesConfig { - pub fn validate(&self) -> Result<(), ConfigValidationError> { - let mut seen: BTreeSet<&str> = BTreeSet::new(); - for entry in &self.entries { - if !seen.insert(entry.name.as_str()) { - return Err(ConfigValidationError::DuplicateName(entry.name.clone())); - } - } - - // Empty config is allowed (ephemeral mode). - if self.entries.is_empty() { - return Ok(()); - } - - let has_primary = self - .entries - .iter() - .any(|e| e.enabled && e.write_active && e.failure_mode == FailureMode::Required); - - if !has_primary { - return Err(ConfigValidationError::NoPrimary); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn parse(toml_src: &str) -> ArchivesConfig { - toml::from_str(toml_src).expect("parse") - } - - #[test] - fn empty_config_is_ephemeral() { - let cfg: ArchivesConfig = toml::from_str("").unwrap(); - assert!(cfg.entries.is_empty()); - assert!(cfg.validate().is_ok()); - } - - #[test] - fn minimal_single_archive() { - let cfg = parse( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "dirigent_archive" - "#, - ); - assert_eq!(cfg.entries.len(), 1); - let e = &cfg.entries[0]; - assert_eq!(e.name, "main"); - assert_eq!(e.type_name, "jsonl"); - assert!(e.write_active); - assert_eq!(e.failure_mode, FailureMode::Required); - assert_eq!(e.read_priority, 0); - assert!(e.enabled); - assert!(matches!(e.write_policy, WritePolicyConfig::Tag(WritePolicyTag::Inline))); - cfg.validate().unwrap(); - } - - #[test] - fn duplicate_name_rejected() { - let cfg = parse( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "a" - - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "b" - "#, - ); - assert_eq!( - cfg.validate(), - Err(ConfigValidationError::DuplicateName("main".into())) - ); - } - - #[test] - fn no_primary_rejected() { - let cfg = parse( - r#" - [[archives]] - name = "mirror" - type = "jsonl" - failure_mode = "best_effort" - [archives.params] - path = "a" - "#, - ); - assert_eq!(cfg.validate(), Err(ConfigValidationError::NoPrimary)); - } - - #[test] - fn queued_write_policy_parses() { - let cfg = parse( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "a" - - [archives.write_policy] - type = "queued" - batch_window_ms = 100 - capacity = 4096 - overflow = "drop_oldest" - "#, - ); - let entry = &cfg.entries[0]; - match &entry.write_policy { - WritePolicyConfig::Detailed(WritePolicyDetailed::Queued { - batch_window_ms, - capacity, - overflow, - }) => { - assert_eq!(*batch_window_ms, 100); - assert_eq!(*capacity, 4096); - assert_eq!(*overflow, OverflowPolicy::DropOldest); - } - other => panic!("unexpected write_policy: {:?}", other), - } - } -} diff --git a/crates/dirigent_archivist/src/registry/factory.rs b/crates/dirigent_archivist/src/registry/factory.rs deleted file mode 100644 index dbb5a8a..0000000 --- a/crates/dirigent_archivist/src/registry/factory.rs +++ /dev/null @@ -1,192 +0,0 @@ -//! Pluggable backend instantiation: type-string → factory → backend. - -use std::collections::HashMap; -use std::sync::Arc; - -use async_trait::async_trait; - -use crate::backend::ArchiveBackend; - -#[derive(Debug, thiserror::Error)] -pub enum BackendBuildError { - #[error("unknown backend type `{0}`")] - UnknownType(String), - #[error("invalid params for backend `{name}` (type `{type_name}`): {source}")] - InvalidParams { - name: String, - type_name: String, - #[source] - source: anyhow::Error, - }, - #[error("backend `{name}` (type `{type_name}`) failed to initialise: {source}")] - BackendInit { - name: String, - type_name: String, - #[source] - source: anyhow::Error, - }, -} - -#[async_trait] -pub trait BackendFactory: Send + Sync { - fn type_name(&self) -> &'static str; - - async fn build( - &self, - archive_name: &str, - params: toml::Value, - ) -> Result, BackendBuildError>; -} - -pub struct BackendRegistry { - factories: HashMap<&'static str, Arc>, -} - -impl BackendRegistry { - pub fn new() -> Self { - Self { - factories: HashMap::new(), - } - } - - pub fn register(&mut self, factory: Arc) { - self.factories.insert(factory.type_name(), factory); - } - - pub fn get(&self, type_name: &str) -> Option<&Arc> { - self.factories.get(type_name) - } - - pub async fn build( - &self, - archive_name: &str, - type_name: &str, - params: toml::Value, - ) -> Result, BackendBuildError> { - let factory = self - .get(type_name) - .ok_or_else(|| BackendBuildError::UnknownType(type_name.into()))?; - factory.build(archive_name, params).await - } -} - -impl Default for BackendRegistry { - fn default() -> Self { - Self::new() - } -} - -use std::path::PathBuf; - -use crate::backends::JsonlBackend; - -#[derive(Debug, serde::Deserialize)] -struct JsonlParams { - path: PathBuf, -} - -pub struct JsonlFactory; - -#[async_trait] -impl BackendFactory for JsonlFactory { - fn type_name(&self) -> &'static str { - "jsonl" - } - - async fn build( - &self, - archive_name: &str, - params: toml::Value, - ) -> Result, BackendBuildError> { - let parsed: JsonlParams = - params - .try_into() - .map_err(|e: toml::de::Error| BackendBuildError::InvalidParams { - name: archive_name.into(), - type_name: "jsonl".into(), - source: anyhow::Error::new(e), - })?; - - let backend = JsonlBackend::new(parsed.path).await.map_err(|e| { - BackendBuildError::BackendInit { - name: archive_name.into(), - type_name: "jsonl".into(), - source: anyhow::Error::new(e), - } - })?; - - Ok(Arc::new(backend) as Arc) - } -} - -impl BackendRegistry { - /// Convenience: a registry with `jsonl` pre-registered. - pub fn with_jsonl() -> Self { - let mut r = Self::new(); - r.register(Arc::new(JsonlFactory)); - r - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::backend::mock::MockBackend; - - struct MockFactory; - #[async_trait] - impl BackendFactory for MockFactory { - fn type_name(&self) -> &'static str { - "mock" - } - async fn build( - &self, - _archive_name: &str, - _params: toml::Value, - ) -> Result, BackendBuildError> { - Ok(Arc::new(MockBackend::new()) as Arc) - } - } - - #[tokio::test] - async fn unknown_type_rejected() { - let r = BackendRegistry::new(); - let err = r - .build("a", "nope", toml::Value::Table(Default::default())) - .await - .map(|_| ()) - .unwrap_err(); - assert!(matches!(err, BackendBuildError::UnknownType(s) if s == "nope")); - } - - #[tokio::test] - async fn registered_factory_builds() { - let mut r = BackendRegistry::new(); - r.register(Arc::new(MockFactory)); - let backend = r - .build("a", "mock", toml::Value::Table(Default::default())) - .await - .unwrap(); - let _: &dyn ArchiveBackend = &*backend; - } - - #[tokio::test] - async fn jsonl_factory_builds_under_tempdir() { - let dir = tempfile::tempdir().unwrap(); - let r = BackendRegistry::with_jsonl(); - let mut params = toml::value::Table::new(); - params.insert( - "path".into(), - toml::Value::String(dir.path().to_string_lossy().into_owned()), - ); - let backend = r - .build("main", "jsonl", toml::Value::Table(params)) - .await - .unwrap(); - let health = backend.health_check().await; - assert!(matches!( - health, - crate::backend::HealthStatus::Healthy | crate::backend::HealthStatus::Degraded { .. } - )); - } -} diff --git a/crates/dirigent_archivist/src/registry/filter.rs b/crates/dirigent_archivist/src/registry/filter.rs deleted file mode 100644 index c2eaba8..0000000 --- a/crates/dirigent_archivist/src/registry/filter.rs +++ /dev/null @@ -1,187 +0,0 @@ -//! Per-archive include/exclude filter. Consulted during non-primary write -//! fanout (Task 20). Primary always writes regardless of filter. - -use std::collections::HashSet; - -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use crate::types::SessionMetadata; - -/// Declarative filter applied to non-primary fanout writes. -/// -/// A registration's filter decides whether a given session should be -/// replicated to that archive. The primary write target ignores the filter -/// and always writes. A default filter (`ArchiveFilter::default()`) is -/// unrestricted and allows every session. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub struct ArchiveFilter { - /// If `Some`, only sessions whose `connector_uid` is in the set are - /// accepted. If `None`, any connector is allowed (subject to other rules). - #[serde(default)] - pub include_connectors: Option>, - /// Connector UIDs that are explicitly rejected. Takes precedence over - /// `include_connectors`. - #[serde(default)] - pub exclude_connectors: HashSet, - /// If non-empty, the session must carry at least one of these tags. - #[serde(default)] - pub include_tags: HashSet, - /// Tags that cause the session to be rejected. - #[serde(default)] - pub exclude_tags: HashSet, - /// When `false`, sessions whose `metadata.hidden == true` are rejected. - #[serde(default = "default_include_hidden")] - pub include_hidden: bool, -} - -fn default_include_hidden() -> bool { - true -} - -impl Default for ArchiveFilter { - fn default() -> Self { - Self { - include_connectors: None, - exclude_connectors: HashSet::new(), - include_tags: HashSet::new(), - exclude_tags: HashSet::new(), - include_hidden: true, - } - } -} - -impl ArchiveFilter { - /// Returns true when this session should be written to the archive. - pub fn allows(&self, session: &SessionMetadata, connector_uid: &Uuid) -> bool { - // Exclude rules win. - if self.exclude_connectors.contains(connector_uid) { - return false; - } - if let Some(inc) = &self.include_connectors { - if !inc.contains(connector_uid) { - return false; - } - } - if session.tags.iter().any(|t| self.exclude_tags.contains(t)) { - return false; - } - if !self.include_tags.is_empty() - && !session.tags.iter().any(|t| self.include_tags.contains(t)) - { - return false; - } - if !self.include_hidden - && session.metadata.get("hidden") == Some(&serde_json::Value::Bool(true)) - { - return false; - } - true - } - - /// A filter that allows everything is equivalent to no filter. - pub fn is_unrestricted(&self) -> bool { - self.include_connectors.is_none() - && self.exclude_connectors.is_empty() - && self.include_tags.is_empty() - && self.exclude_tags.is_empty() - && self.include_hidden - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::SessionMetadata; - - fn make_session(tags: Vec, hidden: bool) -> SessionMetadata { - let mut s = SessionMetadata::stub(Uuid::now_v7()); - s.tags = tags; - s.metadata = if hidden { - serde_json::json!({ "hidden": true }) - } else { - serde_json::Value::Null - }; - s - } - - #[test] - fn default_allows_all() { - let f = ArchiveFilter::default(); - let s = make_session(vec![], false); - let uid = Uuid::new_v4(); - assert!(f.allows(&s, &uid)); - assert!(f.is_unrestricted()); - } - - #[test] - fn exclude_connector_rejects() { - let excluded = Uuid::new_v4(); - let mut f = ArchiveFilter::default(); - f.exclude_connectors.insert(excluded); - let s = make_session(vec![], false); - assert!(!f.allows(&s, &excluded)); - assert!(f.allows(&s, &Uuid::new_v4())); - assert!(!f.is_unrestricted()); - } - - #[test] - fn include_connector_only_allows_listed() { - let allowed = Uuid::new_v4(); - let mut f = ArchiveFilter::default(); - f.include_connectors = Some(HashSet::from_iter([allowed])); - let s = make_session(vec![], false); - assert!(f.allows(&s, &allowed)); - assert!(!f.allows(&s, &Uuid::new_v4())); - } - - #[test] - fn tag_intersection_semantics() { - let mut f = ArchiveFilter::default(); - f.include_tags = HashSet::from_iter(["prod".into()]); - let s_prod = make_session(vec!["prod".into()], false); - let s_dev = make_session(vec!["dev".into()], false); - let uid = Uuid::new_v4(); - assert!(f.allows(&s_prod, &uid)); - assert!(!f.allows(&s_dev, &uid)); - } - - #[test] - fn exclude_tag_wins_over_include() { - let mut f = ArchiveFilter::default(); - f.include_tags = HashSet::from_iter(["prod".into()]); - f.exclude_tags = HashSet::from_iter(["sensitive".into()]); - let s = make_session(vec!["prod".into(), "sensitive".into()], false); - let uid = Uuid::new_v4(); - assert!(!f.allows(&s, &uid)); - } - - #[test] - fn include_hidden_false_rejects_hidden_sessions() { - let mut f = ArchiveFilter::default(); - f.include_hidden = false; - let s_hidden = make_session(vec![], true); - let s_visible = make_session(vec![], false); - let uid = Uuid::new_v4(); - assert!(!f.allows(&s_hidden, &uid)); - assert!(f.allows(&s_visible, &uid)); - } - - #[test] - fn default_include_hidden_accepts_hidden_sessions() { - let f = ArchiveFilter::default(); - let s = make_session(vec![], true); - let uid = Uuid::new_v4(); - assert!(f.allows(&s, &uid)); - } - - #[test] - fn toml_roundtrip_default() { - // Serializing via TOML is exercised through ArchiveConfig, but a - // plain JSON roundtrip on the struct catches serde attribute typos. - let f = ArchiveFilter::default(); - let json = serde_json::to_string(&f).unwrap(); - let back: ArchiveFilter = serde_json::from_str(&json).unwrap(); - assert_eq!(f, back); - } -} diff --git a/crates/dirigent_archivist/src/registry/health.rs b/crates/dirigent_archivist/src/registry/health.rs deleted file mode 100644 index 24fd4d6..0000000 --- a/crates/dirigent_archivist/src/registry/health.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Health drift helpers used by both read and write paths. -//! -//! The drift model: successful writes reset `consecutive_failures` to 0 and -//! promote `Degraded` → `Healthy`. Write failures bump the counter and drift -//! to `Degraded { reason }`; after K consecutive failures, the registration -//! drifts to `Unavailable { reason }`, which causes read walks to skip it. -//! -//! Read successes rescue `Degraded` → `Healthy` but don't touch the failure -//! counter (writes are the authoritative health signal). Read failures drift -//! `Healthy` → `Degraded` but never to `Unavailable` by themselves (a truly -//! broken backend will be caught on the next write attempt). - -use chrono::Utc; - -use crate::backend::HealthStatus; -use crate::registry::ArchiveRegistration; - -const FAILURE_THRESHOLD: u32 = 5; - -impl crate::coordinator::Archivist { - pub(crate) async fn record_write_success(&self, reg: &ArchiveRegistration) { - *reg.consecutive_failures.write().await = 0; - let mut h = reg.last_health.write().await; - if !matches!(*h, HealthStatus::Healthy) { - *h = HealthStatus::Healthy; - } - } - - pub(crate) async fn record_read_success(&self, reg: &ArchiveRegistration) { - // Reads don't reset the failure counter — writes are the authoritative - // health signal. But reads DO recover from `Degraded` to `Healthy`. - let mut h = reg.last_health.write().await; - if matches!(*h, HealthStatus::Degraded { .. }) { - *h = HealthStatus::Healthy; - } - } - - pub(crate) async fn record_write_failure( - &self, - reg: &ArchiveRegistration, - reason: &str, - ) { - let mut n = reg.consecutive_failures.write().await; - *n = n.saturating_add(1); - *reg.last_error.write().await = Some((Utc::now(), reason.to_string())); - let mut h = reg.last_health.write().await; - if *n >= FAILURE_THRESHOLD { - *h = HealthStatus::Unavailable { - reason: format!("{} consecutive failures: {reason}", *n), - }; - } else { - *h = HealthStatus::Degraded { - reason: reason.to_string(), - }; - } - } - - pub(crate) async fn record_read_failure(&self, reg: &ArchiveRegistration) { - // Reads alone do not drift to Unavailable. Only drift to Degraded. - let mut h = reg.last_health.write().await; - if matches!(*h, HealthStatus::Healthy) { - *h = HealthStatus::Degraded { - reason: "read failure".into(), - }; - } - } - - #[allow(dead_code)] - pub(crate) async fn current_health(&self, reg: &ArchiveRegistration) -> HealthStatus { - reg.last_health.read().await.clone() - } -} diff --git a/crates/dirigent_archivist/src/registry/mod.rs b/crates/dirigent_archivist/src/registry/mod.rs deleted file mode 100644 index ed0e284..0000000 --- a/crates/dirigent_archivist/src/registry/mod.rs +++ /dev/null @@ -1,22 +0,0 @@ -//! Multi-backend registry: configuration, factory, registration entries, -//! read cache, queued writer tasks, and health drift helpers. -//! -//! The single `registry.rs` file from Phase 2 (on-disk archive metadata -//! persistence) has been replaced; archive declaration moves to -//! `dirigent.toml` and is consumed at boot via -//! `coordinator::boot::Archivist::from_config` in later Phase 3 tasks. - -pub mod cache; -pub mod config; -pub mod factory; -pub mod filter; -pub mod health; -pub mod registration; -pub mod writer; - -pub use config::{ArchiveConfig, ArchivesConfig, ConfigValidationError}; -pub use factory::{BackendBuildError, BackendFactory, BackendRegistry, JsonlFactory}; -pub use filter::ArchiveFilter; -pub use registration::{ - ArchiveRegistration, ArchiveStatus, FailureMode, OverflowPolicy, WritePolicy, -}; diff --git a/crates/dirigent_archivist/src/registry/registration.rs b/crates/dirigent_archivist/src/registry/registration.rs deleted file mode 100644 index 6e6568e..0000000 --- a/crates/dirigent_archivist/src/registry/registration.rs +++ /dev/null @@ -1,181 +0,0 @@ -//! Per-backend configuration value types used by `ArchiveRegistration`. - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum FailureMode { - Required, - BestEffort, -} - -impl Default for FailureMode { - fn default() -> Self { - FailureMode::Required - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum WritePolicy { - Inline, - Queued { - batch_window_ms: u64, - capacity: usize, - overflow: OverflowPolicy, - }, -} - -impl Default for WritePolicy { - fn default() -> Self { - WritePolicy::Inline - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum OverflowPolicy { - Block, - DropOldest, - Error, -} - -impl Default for OverflowPolicy { - fn default() -> Self { - OverflowPolicy::Block - } -} - -use std::sync::Arc; - -use chrono::{DateTime, Utc}; -use tokio::sync::RwLock; - -use crate::backend::{ArchiveBackend, CapabilitySet, HealthStatus}; -use crate::registry::filter::ArchiveFilter; - -use super::writer::WriterHandle; - -pub struct ArchiveRegistration { - pub name: String, - pub type_name: &'static str, - pub backend: Arc, - pub write_active: bool, - pub failure_mode: FailureMode, - pub read_priority: u32, - pub enabled: bool, - pub write_policy: WritePolicy, - /// Per-archive include/exclude filter consulted during non-primary - /// write fanout. Default (unrestricted) accepts every session; the - /// primary target always writes regardless of its filter. - pub filter: ArchiveFilter, - pub last_health: Arc>, - pub last_error: Arc, String)>>>, - pub consecutive_failures: Arc>, - pub writer: Option, -} - -impl ArchiveRegistration { - /// Convenience constructor: builds new `Arc>` instances for the - /// drift trio. Use this for single-process, single-owner registrations - /// (tests and the simple single-archive constructors). - #[allow(clippy::too_many_arguments)] - pub fn new( - name: String, - type_name: &'static str, - backend: Arc, - write_active: bool, - failure_mode: FailureMode, - read_priority: u32, - enabled: bool, - write_policy: WritePolicy, - writer: Option, - initial_health: HealthStatus, - ) -> Self { - Self::new_with_shared_state( - name, - type_name, - backend, - write_active, - failure_mode, - read_priority, - enabled, - write_policy, - writer, - Arc::new(RwLock::new(initial_health)), - Arc::new(RwLock::new(None)), - Arc::new(RwLock::new(0)), - ) - } - - /// Constructor used by `from_config` so the writer task and the - /// registration share the same drift state (both mutate it). - #[allow(clippy::too_many_arguments)] - pub fn new_with_shared_state( - name: String, - type_name: &'static str, - backend: Arc, - write_active: bool, - failure_mode: FailureMode, - read_priority: u32, - enabled: bool, - write_policy: WritePolicy, - writer: Option, - last_health: Arc>, - last_error: Arc, String)>>>, - consecutive_failures: Arc>, - ) -> Self { - Self { - name, - type_name, - backend, - write_active, - failure_mode, - read_priority, - enabled, - write_policy, - filter: ArchiveFilter::default(), - last_health, - last_error, - consecutive_failures, - writer, - } - } - - /// Override the registration's filter. Intended for boot-time wiring - /// (`from_config`) and tests; the field itself is public for other - /// direct consumers. - pub fn with_filter(mut self, filter: ArchiveFilter) -> Self { - self.filter = filter; - self - } - - pub fn capabilities(&self) -> &CapabilitySet { - self.backend.capabilities() - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArchiveStatus { - pub name: String, - pub type_name: String, - pub enabled: bool, - pub write_active: bool, - pub failure_mode: FailureMode, - pub read_priority: u32, - pub capabilities: CapabilitySet, - pub health: HealthStatus, - pub last_error: Option<(DateTime, String)>, - pub queue_depth: Option, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn defaults_are_safe() { - assert_eq!(FailureMode::default(), FailureMode::Required); - assert_eq!(WritePolicy::default(), WritePolicy::Inline); - assert_eq!(OverflowPolicy::default(), OverflowPolicy::Block); - } -} diff --git a/crates/dirigent_archivist/src/registry/writer.rs b/crates/dirigent_archivist/src/registry/writer.rs deleted file mode 100644 index 4f4a146..0000000 --- a/crates/dirigent_archivist/src/registry/writer.rs +++ /dev/null @@ -1,256 +0,0 @@ -//! Per-backend writer task for `WritePolicy::Queued` backends. -//! -//! The task drains a per-backend mpsc, optionally batching/coalescing within -//! a configured window, and invokes `ArchiveBackend` methods directly. Errors -//! drift health on the parent registration; they do not propagate to the -//! caller. - -use std::sync::Arc; -use std::time::Duration; - -use chrono::Utc; -use tokio::sync::{mpsc, oneshot, watch, RwLock}; -use tokio::task::JoinHandle; -use tracing::{debug, warn}; -use uuid::Uuid; - -use crate::backend::{ArchiveBackend, HealthStatus}; - -use super::OverflowPolicy; - -#[derive(Debug)] -pub enum WriteOp { - PutSession(crate::types::SessionMetadata), - AppendMessages { - scroll_id: Uuid, - msgs: Vec, - }, - DeleteSession { - scroll_id: Uuid, - }, - ClearSessionMessages { - scroll_id: Uuid, - }, - AppendDagEdge(crate::types::DagEdge), - AppendMetaEvents { - scroll_id: Uuid, - events: Vec, - }, - Shutdown(oneshot::Sender<()>), -} - -impl WriteOp { - pub fn op_label(&self) -> &'static str { - match self { - WriteOp::PutSession(_) => "put_session", - WriteOp::AppendMessages { .. } => "append_messages", - WriteOp::DeleteSession { .. } => "delete_session", - WriteOp::ClearSessionMessages { .. } => "clear_session_messages", - WriteOp::AppendDagEdge(_) => "append_dag_edge", - WriteOp::AppendMetaEvents { .. } => "append_meta_events", - WriteOp::Shutdown(_) => "shutdown", - } - } -} - -#[derive(Debug)] -pub struct WriterHandle { - pub sender: mpsc::Sender, - pub overflow: OverflowPolicy, - pub queue_depth: watch::Receiver, - pub join: tokio::sync::Mutex>>, - pub backend_name: String, -} - -impl WriterHandle { - pub async fn enqueue(&self, op: WriteOp) -> Result<(), crate::error::ArchivistError> { - match self.overflow { - OverflowPolicy::Block => self.sender.send(op).await.map_err(|_| { - crate::error::ArchivistError::Other(format!( - "writer task for `{}` has closed", - self.backend_name - )) - }), - OverflowPolicy::Error => self.sender.try_send(op).map_err(|e| match e { - mpsc::error::TrySendError::Full(op) => { - crate::error::ArchivistError::WriteQueueFull { - backend: self.backend_name.clone(), - op: op.op_label(), - } - } - mpsc::error::TrySendError::Closed(_) => { - crate::error::ArchivistError::Other(format!( - "writer task for `{}` has closed", - self.backend_name - )) - } - }), - OverflowPolicy::DropOldest => { - // Tokio mpsc can't truly "drop oldest" without draining from the - // other side; we approximate with "drop newest when full". For - // observability sinks this is acceptable — the contract is - // "never block, may lose data". - let _ = self.sender.try_send(op); - Ok(()) - } - } - } - - pub fn queue_depth_now(&self) -> usize { - *self.queue_depth.borrow() - } -} - -#[allow(clippy::too_many_arguments)] -pub fn spawn_writer( - backend: Arc, - backend_name: String, - capacity: usize, - batch_window: Duration, - overflow: OverflowPolicy, - health: Arc>, - last_error: Arc, String)>>>, - consecutive_failures: Arc>, -) -> WriterHandle { - let (tx, mut rx) = mpsc::channel::(capacity); - let (depth_tx, depth_rx) = watch::channel(0usize); - - let join = tokio::spawn({ - let backend_name = backend_name.clone(); - async move { - const FAILURE_THRESHOLD: u32 = 5; - - loop { - let Some(first) = rx.recv().await else { break }; - let mut batch: Vec = vec![first]; - - let deadline = tokio::time::Instant::now() + batch_window; - while tokio::time::Instant::now() < deadline { - match tokio::time::timeout_at(deadline, rx.recv()).await { - Ok(Some(op)) => batch.push(op), - Ok(None) => break, - Err(_) => break, - } - } - - let _ = depth_tx.send(rx.len()); - - let coalesced = coalesce(batch); - - let mut shutdown_ack: Option> = None; - for op in coalesced { - if let WriteOp::Shutdown(ack) = op { - shutdown_ack = Some(ack); - break; - } - match dispatch_op(&*backend, op).await { - Ok(()) => { - *consecutive_failures.write().await = 0; - let mut h = health.write().await; - if matches!(*h, HealthStatus::Degraded { .. }) { - *h = HealthStatus::Healthy; - } - } - Err(e) => { - warn!( - backend = backend_name.as_str(), - error = %e, - "queued write failed; drifting health" - ); - let mut n = consecutive_failures.write().await; - *n = n.saturating_add(1); - *last_error.write().await = Some((Utc::now(), format!("{e}"))); - let mut h = health.write().await; - if *n >= FAILURE_THRESHOLD { - *h = HealthStatus::Unavailable { - reason: format!("{} consecutive failures", *n), - }; - } else { - *h = HealthStatus::Degraded { reason: format!("{e}") }; - } - } - } - } - - if let Some(ack) = shutdown_ack { - debug!(backend = backend_name.as_str(), "writer task shutting down"); - let _ = ack.send(()); - break; - } - } - } - }); - - WriterHandle { - sender: tx, - overflow, - queue_depth: depth_rx, - join: tokio::sync::Mutex::new(Some(join)), - backend_name, - } -} - -fn coalesce(batch: Vec) -> Vec { - let mut out: Vec = Vec::with_capacity(batch.len()); - for op in batch { - let merged = match (out.last_mut(), &op) { - ( - Some(WriteOp::AppendMessages { scroll_id: a, .. }), - WriteOp::AppendMessages { scroll_id: b, .. }, - ) if a == b => true, - ( - Some(WriteOp::AppendMetaEvents { scroll_id: a, .. }), - WriteOp::AppendMetaEvents { scroll_id: b, .. }, - ) if a == b => true, - _ => false, - }; - - if merged { - match out.last_mut().unwrap() { - WriteOp::AppendMessages { msgs: m1, .. } => { - if let WriteOp::AppendMessages { msgs: m2, .. } = op { - m1.extend(m2); - continue; - } - } - WriteOp::AppendMetaEvents { events: e1, .. } => { - if let WriteOp::AppendMetaEvents { events: e2, .. } = op { - e1.extend(e2); - continue; - } - } - _ => {} - } - } - out.push(op); - } - out -} - -async fn dispatch_op(backend: &dyn ArchiveBackend, op: WriteOp) -> crate::error::Result<()> { - match op { - WriteOp::PutSession(meta) => backend.put_session(meta).await, - WriteOp::AppendMessages { scroll_id, msgs } => { - backend.append_messages(scroll_id, msgs).await - } - WriteOp::DeleteSession { scroll_id } => backend.delete_session(scroll_id).await, - WriteOp::ClearSessionMessages { scroll_id } => { - backend.clear_session_messages(scroll_id).await - } - WriteOp::AppendDagEdge(edge) => { - if let Some(d) = backend.as_dag() { - d.append_dag_edge(edge).await - } else { - Ok(()) - } - } - WriteOp::AppendMetaEvents { scroll_id, events } => { - if let Some(m) = backend.as_meta_events() { - m.append_meta_events(scroll_id, events).await - } else { - Ok(()) - } - } - WriteOp::Shutdown(_) => Ok(()), - } -} diff --git a/crates/dirigent_archivist/src/session.rs b/crates/dirigent_archivist/src/session.rs deleted file mode 100644 index 87118b2..0000000 --- a/crates/dirigent_archivist/src/session.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! Session management and lineage tracking. -//! -//! Handles session metadata, lineage relationships (splits, continuations), -//! and session lifecycle operations. - -use crate::error::Result; - -/// Session manager for tracking session metadata and lineage -pub struct SessionManager { - // Placeholder - will be populated in implementation phases -} - -impl SessionManager { - /// Create a new session manager - pub fn new() -> Result { - Ok(Self {}) - } -} - -impl Default for SessionManager { - fn default() -> Self { - Self::new().expect("Failed to create default SessionManager") - } -} diff --git a/crates/dirigent_archivist/src/storage/files.rs b/crates/dirigent_archivist/src/storage/files.rs deleted file mode 100644 index 90f54b4..0000000 --- a/crates/dirigent_archivist/src/storage/files.rs +++ /dev/null @@ -1,465 +0,0 @@ -//! Content-addressable file storage. -//! -//! Handles storage and retrieval of binary files (images, documents, etc.) -//! using content-addressable naming based on SHA-256 hashes. -//! -//! Files are stored with deduplication: -//! - Same content = same file_id = stored once -//! - Multiple sessions can reference the same file -//! - File index tracks all referencing sessions - -use crate::storage::{ndjson, paths::ArchivePaths}; -use crate::types::FileRecord; -use sha2::{Digest, Sha256}; -use uuid::Uuid; - -/// Store a file in the archive -/// -/// This function: -/// 1. Computes SHA-256 hash of content -/// 2. Generates file_id: `sha256:{hex_digest}` -/// 3. Stores blob in sharded directory (if not already exists - deduplication) -/// 4. Updates file index to track the session referencing this file -/// 5. Returns the file_id -/// -/// # Arguments -/// * `paths` - Archive paths helper -/// * `content` - File content bytes -/// * `original_name` - Original filename -/// * `mime` - Optional MIME type -/// * `session` - Session UUID that references this file -/// -/// # Returns -/// The file_id (e.g., "sha256:abc123...") -pub async fn store_file( - paths: &ArchivePaths, - content: &[u8], - original_name: String, - mime: Option, - session: Uuid, -) -> std::io::Result { - // Compute SHA-256 hash - let hash = Sha256::digest(content); - let hex_digest = hex::encode(hash); - let file_id = format!("sha256:{}", hex_digest); - - // Get blob path - let blob_path = paths.file_blob_path(&file_id); - - // Create parent directories for blob - if let Some(parent) = blob_path.parent() { - tokio::fs::create_dir_all(parent).await?; - } - - // Write blob if it doesn't exist (deduplication) - if !blob_path.exists() { - tokio::fs::write(&blob_path, content).await?; - } - - // Update file index - let index_path = paths.root().join(".files").join("file_index.jsonl"); - - // Create .files directory if it doesn't exist - if let Some(parent) = index_path.parent() { - tokio::fs::create_dir_all(parent).await?; - } - - // Serialize the read-modify-rewrite below. Concurrent callers against - // the same archive would otherwise lose records (both read the same - // snapshot) and race on `rename(.tmp → .ndjson)` (second call hits - // ENOENT because the first already consumed the shared temp path). - let index_lock = paths.file_index_lock(); - let _index_guard = index_lock.lock().await; - - // Read existing index - let mut records: Vec = ndjson::read_ndjson(&index_path).await?; - - // Find or create FileRecord - if let Some(existing) = records.iter_mut().find(|r| r.file_id == file_id) { - // File already exists - add session if not already present - if !existing.sessions.contains(&session) { - existing.sessions.push(session); - } - } else { - // New file - create record - let relative_path = blob_path - .strip_prefix(paths.root()) - .unwrap_or(&blob_path) - .to_string_lossy() - .to_string(); - - let new_record = FileRecord { - version: 1, - file_id: file_id.clone(), - path: relative_path, - size: content.len() as u64, - mime: mime.clone(), - original_name: original_name.clone(), - sessions: vec![session], - metadata: serde_json::json!({}), - }; - - records.push(new_record); - } - - // Rewrite entire index atomically - // Use temp file + rename pattern - let temp_index_path = index_path.with_extension("tmp"); - - // Clear temp file and write all records - if temp_index_path.exists() { - tokio::fs::remove_file(&temp_index_path).await?; - } - - for rec in &records { - ndjson::append_ndjson(&temp_index_path, rec).await?; - } - - // Rename to final location - tokio::fs::rename(&temp_index_path, &index_path).await?; - - Ok(file_id) -} - -/// Retrieve a file from the archive -/// -/// # Arguments -/// * `paths` - Archive paths helper -/// * `file_id` - File identifier (e.g., "sha256:abc123...") -/// -/// # Returns -/// File content bytes -pub async fn get_file(paths: &ArchivePaths, file_id: &str) -> std::io::Result> { - let blob_path = paths.file_blob_path(file_id); - tokio::fs::read(&blob_path).await -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_content_deduplication() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let content = b"Hello, world! This is test content."; - let session1 = Uuid::now_v7(); - let session2 = Uuid::now_v7(); - - // Store same content from two different sessions - let file_id1 = store_file( - &paths, - content, - "test1.txt".to_string(), - Some("text/plain".to_string()), - session1, - ) - .await - .unwrap(); - - let file_id2 = store_file( - &paths, - content, - "test2.txt".to_string(), // Different name - Some("text/plain".to_string()), - session2, - ) - .await - .unwrap(); - - // Same content should produce same file_id - assert_eq!(file_id1, file_id2); - - // Verify blob was only written once - let blob_path = paths.file_blob_path(&file_id1); - assert!(blob_path.exists()); - - // Verify index tracks both sessions - let index_path = paths.root().join(".files").join("file_index.jsonl"); - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - - let record = records.iter().find(|r| r.file_id == file_id1).unwrap(); - assert_eq!(record.sessions.len(), 2); - assert!(record.sessions.contains(&session1)); - assert!(record.sessions.contains(&session2)); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_sharding_distributes_files() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let session = Uuid::now_v7(); - - // Store files with different content - let content1 = b"Content A"; - let content2 = b"Content B"; - let content3 = b"Content C"; - - let file_id1 = store_file(&paths, content1, "file1.txt".to_string(), None, session) - .await - .unwrap(); - - let file_id2 = store_file(&paths, content2, "file2.txt".to_string(), None, session) - .await - .unwrap(); - - let file_id3 = store_file(&paths, content3, "file3.txt".to_string(), None, session) - .await - .unwrap(); - - // Verify different content produces different file_ids - assert_ne!(file_id1, file_id2); - assert_ne!(file_id2, file_id3); - - // Verify files are distributed across sharded directories - let blob_path1 = paths.file_blob_path(&file_id1); - let blob_path2 = paths.file_blob_path(&file_id2); - let blob_path3 = paths.file_blob_path(&file_id3); - - assert!(blob_path1.exists()); - assert!(blob_path2.exists()); - assert!(blob_path3.exists()); - - // Verify sharding creates subdirectories - let files_dir = paths.root().join(".files"); - let mut shard_dirs = Vec::new(); - for entry in std::fs::read_dir(&files_dir).unwrap() { - let entry = entry.unwrap(); - if entry.file_type().unwrap().is_dir() { - shard_dirs.push(entry.path()); - } - } - - // Should have at least one shard directory - assert!(!shard_dirs.is_empty()); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_index_tracks_sessions() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let content = b"Shared content"; - let session1 = Uuid::now_v7(); - let session2 = Uuid::now_v7(); - let session3 = Uuid::now_v7(); - - // Store from session1 - let file_id = store_file( - &paths, - content, - "file.txt".to_string(), - Some("text/plain".to_string()), - session1, - ) - .await - .unwrap(); - - // Verify index has 1 session - let index_path = paths.root().join(".files").join("file_index.jsonl"); - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - let record = records.iter().find(|r| r.file_id == file_id).unwrap(); - assert_eq!(record.sessions.len(), 1); - assert_eq!(record.sessions[0], session1); - - // Store same content from session2 - store_file( - &paths, - content, - "file2.txt".to_string(), - Some("text/plain".to_string()), - session2, - ) - .await - .unwrap(); - - // Verify index now has 2 sessions - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - let record = records.iter().find(|r| r.file_id == file_id).unwrap(); - assert_eq!(record.sessions.len(), 2); - assert!(record.sessions.contains(&session1)); - assert!(record.sessions.contains(&session2)); - - // Store same content from session3 - store_file(&paths, content, "file3.txt".to_string(), None, session3) - .await - .unwrap(); - - // Verify index now has 3 sessions - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - let record = records.iter().find(|r| r.file_id == file_id).unwrap(); - assert_eq!(record.sessions.len(), 3); - assert!(record.sessions.contains(&session1)); - assert!(record.sessions.contains(&session2)); - assert!(record.sessions.contains(&session3)); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_concurrent_writes_different_files() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let content1 = b"Content 1"; - let content2 = b"Content 2"; - let session = Uuid::now_v7(); - - // Store concurrently - let (file_id1, file_id2) = tokio::join!( - store_file(&paths, content1, "file1.txt".to_string(), None, session,), - store_file(&paths, content2, "file2.txt".to_string(), None, session,) - ); - - let file_id1 = file_id1.unwrap(); - let file_id2 = file_id2.unwrap(); - - // Verify both files exist - assert_ne!(file_id1, file_id2); - - let retrieved1 = get_file(&paths, &file_id1).await.unwrap(); - let retrieved2 = get_file(&paths, &file_id2).await.unwrap(); - - assert_eq!(retrieved1, content1); - assert_eq!(retrieved2, content2); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_get_file_missing() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - // Try to get non-existent file - let result = get_file(&paths, "sha256:nonexistent").await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::NotFound); - } - Ok(_) => panic!("Expected NotFound error"), - } - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_roundtrip_binary_content() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - // Binary content (not UTF-8) - let content: Vec = (0..256).map(|i| i as u8).collect(); - let session = Uuid::now_v7(); - - // Store - let file_id = store_file( - &paths, - &content, - "binary.dat".to_string(), - Some("application/octet-stream".to_string()), - session, - ) - .await - .unwrap(); - - // Retrieve - let retrieved = get_file(&paths, &file_id).await.unwrap(); - - // Verify exact match - assert_eq!(retrieved, content); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_file_metadata_preserved() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let content = b"Test content"; - let session = Uuid::now_v7(); - let original_name = "document.pdf".to_string(); - let mime = Some("application/pdf".to_string()); - - // Store - let file_id = store_file( - &paths, - content, - original_name.clone(), - mime.clone(), - session, - ) - .await - .unwrap(); - - // Read index - let index_path = paths.root().join(".files").join("file_index.jsonl"); - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - - let record = records.iter().find(|r| r.file_id == file_id).unwrap(); - - // Verify metadata - assert_eq!(record.original_name, original_name); - assert_eq!(record.mime, mime); - assert_eq!(record.size, content.len() as u64); - assert!(record.path.contains(".files")); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_deduplicate_same_session() { - let temp_dir = - std::env::temp_dir().join(format!("archivist_files_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - - let content = b"Duplicate content"; - let session = Uuid::now_v7(); - - // Store same content twice from same session - let file_id1 = store_file(&paths, content, "file1.txt".to_string(), None, session) - .await - .unwrap(); - - let file_id2 = store_file(&paths, content, "file2.txt".to_string(), None, session) - .await - .unwrap(); - - // Same file_id - assert_eq!(file_id1, file_id2); - - // Session should only appear once in the index - let index_path = paths.root().join(".files").join("file_index.jsonl"); - let records: Vec = ndjson::read_ndjson(&index_path).await.unwrap(); - let record = records.iter().find(|r| r.file_id == file_id1).unwrap(); - - assert_eq!(record.sessions.len(), 1); - assert_eq!(record.sessions[0], session); - - // Clean up - tokio::fs::remove_dir_all(&temp_dir).await.ok(); - } -} diff --git a/crates/dirigent_archivist/src/storage/json.rs b/crates/dirigent_archivist/src/storage/json.rs deleted file mode 100644 index ea117d8..0000000 --- a/crates/dirigent_archivist/src/storage/json.rs +++ /dev/null @@ -1,342 +0,0 @@ -//! JSON storage utilities for session metadata. -//! -//! Handles reading and writing JSON files for session and connector metadata. -//! Uses atomic write operations (write-to-temp + rename) to ensure consistency. - -use serde::{Deserialize, Serialize}; -use std::path::Path; -use tokio::io::AsyncWriteExt; - -/// Write a value to a JSON file atomically -/// -/// This function: -/// 1. Serializes the value to pretty-printed JSON -/// 2. Writes to a temporary file (`{path}.tmp`) -/// 3. Renames the temp file to the target path (atomic operation) -/// -/// The rename operation is atomic on most filesystems, ensuring that -/// readers will either see the old complete file or the new complete file, -/// never a partially written file. -/// -/// # Arguments -/// * `path` - Path to the JSON file -/// * `value` - Value to serialize and write -/// -/// # Example -/// ```no_run -/// use dirigent_archivist::storage::json::write_json; -/// use serde::{Serialize, Deserialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct Config { -/// setting: String, -/// } -/// -/// # async fn example() -> std::io::Result<()> { -/// let config = Config { setting: "value".to_string() }; -/// write_json(std::path::Path::new("config.json"), &config).await?; -/// # Ok(()) -/// # } -/// ``` -pub async fn write_json(path: &Path, value: &T) -> std::io::Result<()> { - // Serialize to pretty-printed JSON - let json = serde_json::to_string_pretty(value) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - - // Create temporary file path (same directory for atomic rename) - let temp_path = path.with_extension("tmp"); - - // Write to temporary file - let mut file = tokio::fs::File::create(&temp_path).await?; - file.write_all(json.as_bytes()).await?; - file.sync_all().await?; - drop(file); // Close the file before rename - - // Atomically rename temp file to target path - tokio::fs::rename(&temp_path, path).await?; - - Ok(()) -} - -/// Read a value from a JSON file -/// -/// If the file doesn't exist, returns a NotFound error. -/// -/// # Arguments -/// * `path` - Path to the JSON file -/// -/// # Returns -/// Deserialized value -/// -/// # Example -/// ```no_run -/// use dirigent_archivist::storage::json::read_json; -/// use serde::{Serialize, Deserialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct Config { -/// setting: String, -/// } -/// -/// # async fn example() -> std::io::Result<()> { -/// let config: Config = read_json(std::path::Path::new("config.json")).await?; -/// # Ok(()) -/// # } -/// ``` -pub async fn read_json Deserialize<'de>>(path: &Path) -> std::io::Result { - // Read file to string - let content = tokio::fs::read_to_string(path).await?; - - // Deserialize from JSON - let value: T = serde_json::from_str(&content) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - - Ok(value) -} - -#[cfg(test)] -mod tests { - use super::*; - use serde::{Deserialize, Serialize}; - use uuid::Uuid; - - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - struct TestData { - id: String, - value: i32, - nested: NestedData, - } - - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - struct NestedData { - flag: bool, - items: Vec, - } - - #[tokio::test] - async fn test_write_and_read_roundtrip() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_json_{}.json", Uuid::now_v7())); - - let data = TestData { - id: "test-123".to_string(), - value: 42, - nested: NestedData { - flag: true, - items: vec!["a".to_string(), "b".to_string(), "c".to_string()], - }, - }; - - // Write - write_json(&file_path, &data).await.unwrap(); - - // Read back - let read_data: TestData = read_json(&file_path).await.unwrap(); - - // Verify - assert_eq!(read_data, data); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_pretty_printed_output() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_pretty_{}.json", Uuid::now_v7())); - - let data = TestData { - id: "test".to_string(), - value: 100, - nested: NestedData { - flag: false, - items: vec!["x".to_string()], - }, - }; - - // Write - write_json(&file_path, &data).await.unwrap(); - - // Read as raw string - let content = tokio::fs::read_to_string(&file_path).await.unwrap(); - - // Verify it's pretty-printed (contains newlines and indentation) - assert!(content.contains('\n')); - assert!(content.contains(" ")); // Indentation - assert!(content.contains(r#""id""#)); - assert!(content.contains(r#""value""#)); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_read_missing_file() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("nonexistent_{}.json", Uuid::now_v7())); - - // Should return NotFound error - let result: std::io::Result = read_json(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::NotFound); - } - Ok(_) => panic!("Expected NotFound error"), - } - } - - #[tokio::test] - async fn test_atomic_write() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_atomic_{}.json", Uuid::now_v7())); - - let data1 = TestData { - id: "first".to_string(), - value: 1, - nested: NestedData { - flag: true, - items: vec![], - }, - }; - - let data2 = TestData { - id: "second".to_string(), - value: 2, - nested: NestedData { - flag: false, - items: vec!["updated".to_string()], - }, - }; - - // Write first version - write_json(&file_path, &data1).await.unwrap(); - - // Verify first version - let read1: TestData = read_json(&file_path).await.unwrap(); - assert_eq!(read1.id, "first"); - - // Overwrite with second version - write_json(&file_path, &data2).await.unwrap(); - - // Verify second version - let read2: TestData = read_json(&file_path).await.unwrap(); - assert_eq!(read2.id, "second"); - assert_eq!(read2.value, 2); - - // Temp file should not exist after rename - let temp_path = file_path.with_extension("tmp"); - assert!(!temp_path.exists()); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_invalid_json_error() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_invalid_{}.json", Uuid::now_v7())); - - // Write invalid JSON manually - tokio::fs::write(&file_path, "{ invalid json }") - .await - .unwrap(); - - // Reading should fail with InvalidData error - let result: std::io::Result = read_json(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - } - Ok(_) => panic!("Expected InvalidData error"), - } - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_concurrent_writes_different_files() { - let temp_dir = std::env::temp_dir(); - let file1 = temp_dir.join(format!("test_concurrent_1_{}.json", Uuid::now_v7())); - let file2 = temp_dir.join(format!("test_concurrent_2_{}.json", Uuid::now_v7())); - - let data1 = TestData { - id: "file1".to_string(), - value: 1, - nested: NestedData { - flag: true, - items: vec![], - }, - }; - - let data2 = TestData { - id: "file2".to_string(), - value: 2, - nested: NestedData { - flag: false, - items: vec![], - }, - }; - - // Write concurrently - let (r1, r2) = tokio::join!(write_json(&file1, &data1), write_json(&file2, &data2)); - - r1.unwrap(); - r2.unwrap(); - - // Verify both files - let read1: TestData = read_json(&file1).await.unwrap(); - let read2: TestData = read_json(&file2).await.unwrap(); - - assert_eq!(read1, data1); - assert_eq!(read2, data2); - - // Clean up - tokio::fs::remove_file(&file1).await.ok(); - tokio::fs::remove_file(&file2).await.ok(); - } - - #[tokio::test] - async fn test_write_creates_parent_directory() { - let temp_dir = std::env::temp_dir(); - let base_dir = temp_dir.join(format!("test_parent_{}", Uuid::now_v7())); - - // Note: Parent directory does NOT exist yet - // This test verifies that write_json does NOT auto-create parent dirs - // (Caller is responsible for creating parent directories) - - let file_path = base_dir.join("subdir").join("test.json"); - - let data = TestData { - id: "test".to_string(), - value: 42, - nested: NestedData { - flag: true, - items: vec![], - }, - }; - - // This should fail because parent directory doesn't exist - let result = write_json(&file_path, &data).await; - assert!(result.is_err()); - - // Now create parent directory - tokio::fs::create_dir_all(file_path.parent().unwrap()) - .await - .unwrap(); - - // Now write should succeed - write_json(&file_path, &data).await.unwrap(); - - // Verify - let read_data: TestData = read_json(&file_path).await.unwrap(); - assert_eq!(read_data, data); - - // Clean up - tokio::fs::remove_dir_all(&base_dir).await.ok(); - } -} diff --git a/crates/dirigent_archivist/src/storage/mod.rs b/crates/dirigent_archivist/src/storage/mod.rs deleted file mode 100644 index 0f49474..0000000 --- a/crates/dirigent_archivist/src/storage/mod.rs +++ /dev/null @@ -1,118 +0,0 @@ -//! Storage layer for the Archivist. -//! -//! Provides file-based storage using NDJSON, JSON, and TSV formats, -//! along with content-addressable file storage for attachments. - -use uuid::Uuid; - -pub mod files; -pub mod json; -pub mod ndjson; -pub mod paths; -pub mod tsv; - -// Re-export commonly used types and functions -pub use files::{get_file, store_file}; -pub use json::{read_json, write_json}; -pub use ndjson::{append_ndjson, read_ndjson, write_ndjson}; -pub use paths::ArchivePaths; -pub use tsv::{read_connector_index, write_connector_index}; - -/// Check if a UUID is version 7 (time-ordered). -/// -/// UUID version 7 is used throughout the archivist for scroll_ids and other -/// identifiers that need to be time-ordered and sortable. -/// -/// # Examples -/// -/// ``` -/// use uuid::Uuid; -/// use dirigent_archivist::storage::is_uuid7; -/// -/// let uuid7 = Uuid::now_v7(); -/// assert!(is_uuid7(&uuid7)); -/// -/// let uuid4 = Uuid::new_v4(); -/// assert!(!is_uuid7(&uuid4)); -/// ``` -pub fn is_uuid7(uuid: &Uuid) -> bool { - uuid.get_version_num() == 7 -} - -/// Parse a string as UUID7, returning None for other versions. -/// -/// This function ensures that only UUID version 7 identifiers are accepted, -/// rejecting other UUID versions (v1, v4, v5, etc.) that may be valid UUIDs -/// but don't meet the archivist's time-ordering requirements. -/// -/// # Examples -/// -/// ``` -/// use uuid::Uuid; -/// use dirigent_archivist::storage::parse_uuid7; -/// -/// // UUID7 string parses successfully -/// let uuid7_str = Uuid::now_v7().to_string(); -/// assert!(parse_uuid7(&uuid7_str).is_some()); -/// -/// // UUID4 string is rejected -/// let uuid4_str = Uuid::new_v4().to_string(); -/// assert!(parse_uuid7(&uuid4_str).is_none()); -/// -/// // Invalid UUID string is rejected -/// assert!(parse_uuid7("not-a-uuid").is_none()); -/// ``` -pub fn parse_uuid7(s: &str) -> Option { - Uuid::parse_str(s).ok().filter(|u| is_uuid7(u)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_is_uuid7_accepts_uuid7() { - let uuid7 = Uuid::now_v7(); - assert!(is_uuid7(&uuid7), "UUID7 should be recognized as version 7"); - } - - #[test] - fn test_is_uuid7_rejects_uuid4() { - let uuid4 = Uuid::new_v4(); - assert!(!is_uuid7(&uuid4), "UUID4 should not be recognized as version 7"); - } - - #[test] - fn test_parse_uuid7_accepts_valid_uuid7_string() { - let uuid7 = Uuid::now_v7(); - let uuid7_str = uuid7.to_string(); - let parsed = parse_uuid7(&uuid7_str); - - assert!(parsed.is_some(), "Valid UUID7 string should parse"); - assert_eq!(parsed.unwrap(), uuid7, "Parsed UUID should match original"); - } - - #[test] - fn test_parse_uuid7_rejects_uuid4_string() { - let uuid4 = Uuid::new_v4(); - let uuid4_str = uuid4.to_string(); - let parsed = parse_uuid7(&uuid4_str); - - assert!(parsed.is_none(), "UUID4 string should be rejected"); - } - - #[test] - fn test_parse_uuid7_rejects_invalid_uuid_string() { - let invalid_strings = vec![ - "not-a-uuid", - "12345678-1234-1234-1234-123456789012-extra", - "", - "invalid", - ]; - - for invalid in invalid_strings { - let parsed = parse_uuid7(invalid); - assert!(parsed.is_none(), "Invalid UUID string '{}' should be rejected", invalid); - } - } -} diff --git a/crates/dirigent_archivist/src/storage/ndjson.rs b/crates/dirigent_archivist/src/storage/ndjson.rs deleted file mode 100644 index 34f0676..0000000 --- a/crates/dirigent_archivist/src/storage/ndjson.rs +++ /dev/null @@ -1,361 +0,0 @@ -//! NDJSON (Newline Delimited JSON) storage utilities. -//! -//! Handles reading and writing NDJSON files for incremental message logs. -//! NDJSON format stores one JSON object per line, making it ideal for -//! append-only logs that can be read incrementally. - -use serde::{Deserialize, Serialize}; -use std::path::Path; -use tokio::fs::OpenOptions; -use tokio::io::AsyncWriteExt; - -/// Append a record to an NDJSON file -/// -/// This function: -/// 1. Serializes the record to JSON -/// 2. Opens the file in append mode (creates if not exists) -/// 3. Writes the JSON followed by a newline -/// 4. Calls fsync to ensure durability -/// -/// # Arguments -/// * `path` - Path to the NDJSON file -/// * `record` - Record to append (must be serializable) -/// -/// # Example -/// ```no_run -/// use dirigent_archivist::storage::ndjson::append_ndjson; -/// use serde::{Serialize, Deserialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct LogEntry { -/// message: String, -/// } -/// -/// # async fn example() -> std::io::Result<()> { -/// let entry = LogEntry { message: "Hello".to_string() }; -/// append_ndjson(std::path::Path::new("log.ndjson"), &entry).await?; -/// # Ok(()) -/// # } -/// ``` -pub async fn append_ndjson(path: &Path, record: &T) -> std::io::Result<()> { - // Serialize to JSON string - let json = serde_json::to_string(record) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - - // Open file in append mode (create if not exists) - let mut file = OpenOptions::new() - .create(true) - .append(true) - .open(path) - .await?; - - // Write JSON + newline - file.write_all(json.as_bytes()).await?; - file.write_all(b"\n").await?; - - // Fsync for durability - file.sync_all().await?; - - Ok(()) -} - -/// Atomically rewrite an NDJSON file with the given records. -/// -/// Uses a temp file + rename for crash safety. If the process crashes during -/// the write, the original file remains untouched. Only after the new content -/// is fully written and fsynced is the old file replaced. -/// -/// # Arguments -/// * `path` - Path to the NDJSON file (will be created or overwritten) -/// * `records` - Records to write (one per line) -/// -/// # Example -/// ```no_run -/// use dirigent_archivist::storage::ndjson::write_ndjson; -/// use serde::{Serialize, Deserialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct LogEntry { -/// message: String, -/// } -/// -/// # async fn example() -> std::io::Result<()> { -/// let entries = vec![ -/// LogEntry { message: "First".to_string() }, -/// LogEntry { message: "Second".to_string() }, -/// ]; -/// write_ndjson(std::path::Path::new("log.ndjson"), &entries).await?; -/// # Ok(()) -/// # } -/// ``` -pub async fn write_ndjson(path: &Path, records: &[T]) -> std::io::Result<()> { - let temp_path = path.with_extension("jsonl.tmp"); - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(&temp_path) - .await?; - for record in records { - let json = serde_json::to_string(record) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - file.write_all(json.as_bytes()).await?; - file.write_all(b"\n").await?; - } - file.flush().await?; - file.sync_all().await?; - drop(file); - tokio::fs::rename(&temp_path, path).await?; - Ok(()) -} - -/// Read all records from an NDJSON file -/// -/// This function: -/// 1. Reads the entire file to a string -/// 2. Splits by newlines -/// 3. Deserializes each non-empty line -/// -/// If the file doesn't exist, returns an empty vector. -/// -/// # Arguments -/// * `path` - Path to the NDJSON file -/// -/// # Returns -/// Vector of deserialized records -/// -/// # Example -/// ```no_run -/// use dirigent_archivist::storage::ndjson::read_ndjson; -/// use serde::{Serialize, Deserialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct LogEntry { -/// message: String, -/// } -/// -/// # async fn example() -> std::io::Result<()> { -/// let entries: Vec = read_ndjson(std::path::Path::new("log.ndjson")).await?; -/// # Ok(()) -/// # } -/// ``` -pub async fn read_ndjson Deserialize<'de>>(path: &Path) -> std::io::Result> { - // Check if file exists - if !path.exists() { - return Ok(Vec::new()); - } - - // Read entire file to string - let content = tokio::fs::read_to_string(path).await?; - - // Parse line by line - let mut records = Vec::new(); - for (line_num, line) in content.lines().enumerate() { - // Skip empty lines - if line.trim().is_empty() { - continue; - } - - // Deserialize the line - let record: T = serde_json::from_str(line).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Failed to parse line {}: {}", line_num + 1, e), - ) - })?; - - records.push(record); - } - - Ok(records) -} - -#[cfg(test)] -mod tests { - use super::*; - use serde::{Deserialize, Serialize}; - use uuid::Uuid; - - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - struct TestRecord { - id: String, - value: i32, - } - - #[tokio::test] - async fn test_append_and_read_roundtrip() { - // Create a temporary file - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_ndjson_{}.ndjson", Uuid::now_v7())); - - // Append multiple records - let record1 = TestRecord { - id: "rec1".to_string(), - value: 42, - }; - let record2 = TestRecord { - id: "rec2".to_string(), - value: 100, - }; - let record3 = TestRecord { - id: "rec3".to_string(), - value: -5, - }; - - append_ndjson(&file_path, &record1).await.unwrap(); - append_ndjson(&file_path, &record2).await.unwrap(); - append_ndjson(&file_path, &record3).await.unwrap(); - - // Read back - let records: Vec = read_ndjson(&file_path).await.unwrap(); - - // Verify - assert_eq!(records.len(), 3); - assert_eq!(records[0], record1); - assert_eq!(records[1], record2); - assert_eq!(records[2], record3); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_read_empty_file() { - // Create a temporary empty file - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_empty_{}.ndjson", Uuid::now_v7())); - - tokio::fs::write(&file_path, "").await.unwrap(); - - // Read should return empty vector - let records: Vec = read_ndjson(&file_path).await.unwrap(); - assert_eq!(records.len(), 0); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_read_missing_file() { - // Read from a non-existent file - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("nonexistent_{}.ndjson", Uuid::now_v7())); - - // Should return empty vector, not error - let records: Vec = read_ndjson(&file_path).await.unwrap(); - assert_eq!(records.len(), 0); - } - - #[tokio::test] - async fn test_trailing_newlines() { - // Create a file with trailing newlines - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_trailing_{}.ndjson", Uuid::now_v7())); - - // Write manually with extra newlines - let content = r#"{"id":"rec1","value":42} - -{"id":"rec2","value":100} - - -"#; - tokio::fs::write(&file_path, content).await.unwrap(); - - // Read should skip empty lines - let records: Vec = read_ndjson(&file_path).await.unwrap(); - assert_eq!(records.len(), 2); - assert_eq!(records[0].id, "rec1"); - assert_eq!(records[1].id, "rec2"); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_concurrent_appends() { - // Test appending to different files concurrently - let temp_dir = std::env::temp_dir(); - let file1 = temp_dir.join(format!("test_concurrent_1_{}.ndjson", Uuid::now_v7())); - let file2 = temp_dir.join(format!("test_concurrent_2_{}.ndjson", Uuid::now_v7())); - - let record1 = TestRecord { - id: "file1".to_string(), - value: 1, - }; - let record2 = TestRecord { - id: "file2".to_string(), - value: 2, - }; - - // Append concurrently - let (r1, r2) = tokio::join!( - append_ndjson(&file1, &record1), - append_ndjson(&file2, &record2) - ); - - r1.unwrap(); - r2.unwrap(); - - // Verify both files have correct content - let records1: Vec = read_ndjson(&file1).await.unwrap(); - let records2: Vec = read_ndjson(&file2).await.unwrap(); - - assert_eq!(records1.len(), 1); - assert_eq!(records1[0], record1); - assert_eq!(records2.len(), 1); - assert_eq!(records2[0], record2); - - // Clean up - tokio::fs::remove_file(&file1).await.ok(); - tokio::fs::remove_file(&file2).await.ok(); - } - - #[tokio::test] - async fn test_invalid_json_error() { - // Create a file with invalid JSON - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_invalid_{}.ndjson", Uuid::now_v7())); - - let content = r#"{"id":"rec1","value":42} -invalid json here -{"id":"rec2","value":100}"#; - tokio::fs::write(&file_path, content).await.unwrap(); - - // Reading should fail with InvalidData error - let result: std::io::Result> = read_ndjson(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - assert!(e.to_string().contains("line 2")); - } - Ok(_) => panic!("Expected error"), - } - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_fsync_called() { - // This test verifies that append_ndjson completes without error, - // which implies fsync was called successfully - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_fsync_{}.ndjson", Uuid::now_v7())); - - let record = TestRecord { - id: "test".to_string(), - value: 42, - }; - - // Should complete without error (including fsync) - append_ndjson(&file_path, &record).await.unwrap(); - - // Verify file was written - assert!(file_path.exists()); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } -} diff --git a/crates/dirigent_archivist/src/storage/paths.rs b/crates/dirigent_archivist/src/storage/paths.rs deleted file mode 100644 index b8adce9..0000000 --- a/crates/dirigent_archivist/src/storage/paths.rs +++ /dev/null @@ -1,436 +0,0 @@ -//! Path management for archive directory structure. -//! -//! Defines the archive directory layout and provides utilities for -//! constructing paths to various archive components. - -use std::path::PathBuf; -use std::sync::Arc; - -use tokio::sync::Mutex; -use uuid::Uuid; - -/// Archive path utilities -/// -/// Provides methods to generate paths for all archive components: -/// - Sessions: `.contexts/{scroll_id}/` -/// - Connectors: `.db/connectors/{connector_uid}/` -/// - Files: `.files/{ab}/{cd}/{ef}/{...}` (sharded by SHA-256) -/// -/// Also carries the per-archive mutex that serialises `store_file`'s -/// read-modify-rewrite of `.files/file_index.ndjson` — the shared -/// `file_index.tmp` path made concurrent calls race on `rename`. -pub struct ArchivePaths { - root: PathBuf, - /// Guards the critical section in `storage::files::store_file` that - /// rewrites the per-archive file index. Cloneable `Arc` so callers - /// that share the same `ArchivePaths` instance serialise correctly. - file_index_lock: Arc>, -} - -impl ArchivePaths { - /// Create a new ArchivePaths instance - pub fn new(root: PathBuf) -> Self { - Self { - root, - file_index_lock: Arc::new(Mutex::new(())), - } - } - - /// Get the archive root directory - pub fn root(&self) -> &PathBuf { - &self.root - } - - /// Acquire the per-archive file-index lock. Held across the - /// `read → modify → append-temp → rename` sequence in - /// `storage::files::store_file`. - pub(crate) fn file_index_lock(&self) -> Arc> { - Arc::clone(&self.file_index_lock) - } - - // ======================================================================== - // Session Paths - // ======================================================================== - - /// Get the directory for a specific session - /// - /// Returns: `{root}/.contexts/{scroll_id}` - pub fn session_dir(&self, scroll_id: Uuid) -> PathBuf { - self.root.join(".contexts").join(scroll_id.to_string()) - } - - /// Get the session metadata JSON file path - /// - /// Returns: `{root}/.contexts/{scroll_id}/session.json` - pub fn session_json(&self, scroll_id: Uuid) -> PathBuf { - self.session_dir(scroll_id).join("session.json") - } - - /// Get the messages NDJSON file path for WRITE operations - /// - /// For read operations, use `messages_path_for_read()` which supports both .jsonl and .ndjson - /// - /// Returns: `{root}/.contexts/{scroll_id}/messages.ndjson` - #[deprecated(since = "0.2.0", note = "Use messages_path_for_write() instead")] - pub fn messages_ndjson(&self, scroll_id: Uuid) -> PathBuf { - self.session_dir(scroll_id).join("messages.ndjson") - } - - /// Resolve messages file path for reading. - /// Checks for .jsonl first, falls back to .ndjson - pub fn messages_path_for_read(&self, scroll_id: Uuid) -> PathBuf { - let session_dir = self.session_dir(scroll_id); - self.resolve_ndjson_or_jsonl(&session_dir, "messages") - } - - /// Get the messages file path for WRITE operations. - /// Always returns .jsonl path (new canonical format). - pub fn messages_path_for_write(&self, scroll_id: Uuid) -> PathBuf { - self.session_dir(scroll_id).join("messages.jsonl") - } - - /// Get the events file path for meta sessions (.jsonl format) - /// - /// Meta sessions (AcpConnection) store connection events in events.jsonl - /// instead of messages. These events track connection lifecycle and session navigation. - /// - /// Returns: `{root}/.contexts/{scroll_id}/events.jsonl` - pub fn events_path(&self, scroll_id: Uuid) -> PathBuf { - self.session_dir(scroll_id).join("events.jsonl") - } - - /// Get the DAG index file path. - /// - /// Returns: `{root}/.db/dag.jsonl` - pub fn dag_path(&self) -> PathBuf { - self.root.join(".db").join("dag.jsonl") - } - - /// Resolve sessions mapping file path for reading. - /// Checks for .jsonl first, falls back to .ndjson - pub fn sessions_path_for_read(&self, connector_uid: Uuid) -> PathBuf { - let connector_dir = self.connector_dir(connector_uid); - self.resolve_ndjson_or_jsonl(&connector_dir, "sessions") - } - - /// Get the sessions file path for WRITE operations. - /// Always returns .jsonl path (new canonical format). - pub fn sessions_path_for_write(&self, connector_uid: Uuid) -> PathBuf { - self.connector_dir(connector_uid).join("sessions.jsonl") - } - - // ======================================================================== - // Connector Paths - // ======================================================================== - - /// Get the directory for a specific connector - /// - /// Returns: `{root}/.db/connectors/{connector_uid}` - pub fn connector_dir(&self, connector_uid: Uuid) -> PathBuf { - self.root - .join(".db") - .join("connectors") - .join(connector_uid.to_string()) - } - - /// Get the connector index TSV file path - /// - /// Returns: `{root}/.db/connectors/index.tsv` - pub fn connector_index_tsv(&self) -> PathBuf { - self.root.join(".db").join("connectors").join("index.tsv") - } - - // ======================================================================== - // File Storage Paths - // ======================================================================== - - /// Get the blob path for a file using sharded storage - /// - /// Sharding strategy: - /// - Input: `sha256:abcdef0123456789...` - /// - Strip `sha256:` prefix - /// - Shard by first 6 characters (2-char segments) - /// - Returns: `{root}/.files/ab/cd/ef/0123456789...` - /// - /// # Arguments - /// * `file_id` - File identifier (e.g., "sha256:abcdef...") - pub fn file_blob_path(&self, file_id: &str) -> PathBuf { - // Strip "sha256:" prefix if present - let hash = file_id.strip_prefix("sha256:").unwrap_or(file_id); - - // Extract first 6 chars for sharding (3 levels of 2 chars each) - // If hash is shorter, we'll just use what we have - let (shard1, remainder) = if hash.len() >= 2 { - hash.split_at(2) - } else { - (hash, "") - }; - - let (shard2, remainder) = if remainder.len() >= 2 { - remainder.split_at(2) - } else { - (remainder, "") - }; - - let (shard3, _) = if remainder.len() >= 2 { - remainder.split_at(2) - } else { - (remainder, remainder) - }; - - // Build the sharded path - let mut path = self.root.join(".files"); - - if !shard1.is_empty() { - path = path.join(shard1); - } - if !shard2.is_empty() { - path = path.join(shard2); - } - if !shard3.is_empty() { - path = path.join(shard3); - } - - // Use the full hash (without prefix) as filename - path.join(hash) - } - - // ======================================================================== - // Directory Creation - // ======================================================================== - - /// Ensure all required directories exist for a session - /// - /// Creates the session directory if it doesn't exist. - pub async fn ensure_dirs(&self, scroll_id: Uuid) -> std::io::Result<()> { - let session_dir = self.session_dir(scroll_id); - tokio::fs::create_dir_all(session_dir).await - } - - /// Ensure the connector directory exists - /// - /// Creates `.db/connectors/{connector_uid}/` if it doesn't exist. - /// This should be called before any operations that write to connector-specific files. - pub async fn ensure_connector_dir(&self, connector_uid: Uuid) -> std::io::Result<()> { - let connector_dir = self.connector_dir(connector_uid); - tokio::fs::create_dir_all(&connector_dir).await - } - - /// Generic resolution: prefer .jsonl, fall back to .ndjson - /// - /// This enables backward compatibility with existing .ndjson archives - /// while supporting the more widely-recognized .jsonl extension. - fn resolve_ndjson_or_jsonl(&self, dir: &std::path::Path, base_name: &str) -> PathBuf { - // Check for .jsonl first (newer, more prominent extension) - let jsonl_path = dir.join(format!("{}.jsonl", base_name)); - if jsonl_path.exists() { - return jsonl_path; - } - - // Fall back to .ndjson (legacy format, still canonical for writes in Phase 1) - dir.join(format!("{}.ndjson", base_name)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::path::Path; - - #[test] - fn test_session_dir() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let scroll_id = Uuid::parse_str("018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f").unwrap(); - - let session_dir = paths.session_dir(scroll_id); - assert_eq!( - session_dir, - Path::new("/archive/.contexts/018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f") - ); - } - - #[test] - fn test_session_json() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let scroll_id = Uuid::parse_str("018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f").unwrap(); - - let json_path = paths.session_json(scroll_id); - assert_eq!( - json_path, - Path::new("/archive/.contexts/018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f/session.json") - ); - } - - #[test] - fn test_messages_ndjson() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let scroll_id = Uuid::parse_str("018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f").unwrap(); - - let messages_path = paths.messages_ndjson(scroll_id); - assert_eq!( - messages_path, - Path::new("/archive/.contexts/018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f/messages.ndjson") - ); - } - - #[test] - fn test_connector_dir() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let connector_uid = Uuid::parse_str("018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f").unwrap(); - - let connector_dir = paths.connector_dir(connector_uid); - assert_eq!( - connector_dir, - Path::new("/archive/.db/connectors/018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f") - ); - } - - #[test] - fn test_connector_index_tsv() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - - let index_path = paths.connector_index_tsv(); - assert_eq!(index_path, Path::new("/archive/.db/connectors/index.tsv")); - } - - #[test] - fn test_file_blob_path_with_prefix() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let file_id = "sha256:abcdef0123456789"; - - let blob_path = paths.file_blob_path(file_id); - assert_eq!( - blob_path, - Path::new("/archive/.files/ab/cd/ef/abcdef0123456789") - ); - } - - #[test] - fn test_file_blob_path_without_prefix() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let file_id = "abcdef0123456789"; - - let blob_path = paths.file_blob_path(file_id); - assert_eq!( - blob_path, - Path::new("/archive/.files/ab/cd/ef/abcdef0123456789") - ); - } - - #[test] - fn test_file_blob_path_short_hash() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - - // Very short hash (less than 6 chars) - let file_id = "sha256:abc"; - let blob_path = paths.file_blob_path(file_id); - assert_eq!(blob_path, Path::new("/archive/.files/ab/c/abc")); - - // 4 char hash - let file_id = "sha256:abcd"; - let blob_path = paths.file_blob_path(file_id); - assert_eq!(blob_path, Path::new("/archive/.files/ab/cd/abcd")); - - // 5 char hash - let file_id = "sha256:abcde"; - let blob_path = paths.file_blob_path(file_id); - assert_eq!(blob_path, Path::new("/archive/.files/ab/cd/e/abcde")); - } - - #[test] - fn test_file_blob_path_long_hash() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let file_id = "sha256:abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789"; - - let blob_path = paths.file_blob_path(file_id); - assert_eq!( - blob_path, - Path::new("/archive/.files/ab/cd/ef/abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789") - ); - } - - #[test] - fn test_paths_use_correct_separators() { - let paths = ArchivePaths::new(PathBuf::from("/archive")); - let scroll_id = Uuid::parse_str("018c8f7e-7b6a-7e3c-9f2d-1a2b3c4d5e6f").unwrap(); - - // All paths should use PathBuf which handles platform separators - let session_dir = paths.session_dir(scroll_id); - let session_json = paths.session_json(scroll_id); - let messages_ndjson = paths.messages_ndjson(scroll_id); - - // On Windows, these should contain backslashes; on Unix, forward slashes - // PathBuf handles this automatically, so we just verify the components - assert!(session_dir.to_string_lossy().contains(".contexts")); - assert!(session_json.to_string_lossy().contains("session.json")); - assert!(messages_ndjson - .to_string_lossy() - .contains("messages.ndjson")); - } - - #[tokio::test] - async fn test_ensure_dirs() { - // Create a temporary directory for testing - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - let scroll_id = Uuid::now_v7(); - - // Directory should not exist yet - assert!(!paths.session_dir(scroll_id).exists()); - - // Create the directory - paths.ensure_dirs(scroll_id).await.unwrap(); - - // Directory should now exist - assert!(paths.session_dir(scroll_id).exists()); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_messages_path_for_read_ndjson_only() { - // Create a temporary directory with only .ndjson file - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - let scroll_id = Uuid::now_v7(); - - // Create session directory and .ndjson file - paths.ensure_dirs(scroll_id).await.unwrap(); - let ndjson_path = paths.messages_ndjson(scroll_id); - tokio::fs::write(&ndjson_path, "test content").await.unwrap(); - - // messages_path_for_read should return the .ndjson path - let resolved_path = paths.messages_path_for_read(scroll_id); - assert_eq!(resolved_path, ndjson_path); - assert!(resolved_path.to_string_lossy().ends_with("messages.ndjson")); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_messages_path_for_read_jsonl_preferred() { - // Create a temporary directory with both .ndjson and .jsonl files - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let paths = ArchivePaths::new(temp_dir.clone()); - let scroll_id = Uuid::now_v7(); - - // Create session directory and both files - paths.ensure_dirs(scroll_id).await.unwrap(); - let session_dir = paths.session_dir(scroll_id); - let ndjson_path = session_dir.join("messages.ndjson"); - let jsonl_path = session_dir.join("messages.jsonl"); - - tokio::fs::write(&ndjson_path, "old content").await.unwrap(); - tokio::fs::write(&jsonl_path, "new content").await.unwrap(); - - // messages_path_for_read should prefer .jsonl - let resolved_path = paths.messages_path_for_read(scroll_id); - assert_eq!(resolved_path, jsonl_path); - assert!(resolved_path.to_string_lossy().ends_with("messages.jsonl")); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - } -} diff --git a/crates/dirigent_archivist/src/storage/tsv.rs b/crates/dirigent_archivist/src/storage/tsv.rs deleted file mode 100644 index dc273a2..0000000 --- a/crates/dirigent_archivist/src/storage/tsv.rs +++ /dev/null @@ -1,552 +0,0 @@ -//! TSV (Tab-Separated Values) storage utilities. -//! -//! Handles reading and writing TSV files for session listings and indices. -//! TSV format is human-readable and grep-able, making it ideal for manual -//! inspection and command-line processing. - -use crate::types::ConnectorIndexRow; -use std::path::Path; -use tokio::io::AsyncWriteExt; -use uuid::Uuid; - -/// Write connector index to a TSV file atomically -/// -/// This function: -/// 1. Generates the header line -/// 2. Formats each row as tab-separated values -/// 3. Writes to a temporary file -/// 4. Renames to the target path (atomic operation) -/// -/// TSV format: -/// ```text -/// connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at -/// 018c8f7e-...\tOpenCode\tLocal Dev\topencode@...\t\t2025-01-15T12:34:56Z -/// ``` -/// -/// # Arguments -/// * `path` - Path to the TSV file -/// * `rows` - Rows to write -pub async fn write_connector_index(path: &Path, rows: &[ConnectorIndexRow]) -> std::io::Result<()> { - // Generate header - let header = "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\tfingerprint\n"; - - // Format rows - let mut content = String::from(header); - for row in rows { - let alias_of_str = row - .alias_of - .as_ref() - .map(|u| u.to_string()) - .unwrap_or_default(); - - let fingerprint_str = row.fingerprint.as_deref().unwrap_or(""); - - let line = format!( - "{}\t{}\t{}\t{}\t{}\t{}\t{}\n", - row.connector_uid, - row.r#type, - row.title, - row.client_native_id, - alias_of_str, - row.created_at.to_rfc3339(), - fingerprint_str, - ); - content.push_str(&line); - } - - // Write to temp file - let temp_path = path.with_extension("tmp"); - let mut file = tokio::fs::File::create(&temp_path).await?; - file.write_all(content.as_bytes()).await?; - file.sync_all().await?; - drop(file); - - // Atomically rename - tokio::fs::rename(&temp_path, path).await?; - - Ok(()) -} - -/// Read connector index from a TSV file -/// -/// If the file doesn't exist, returns an empty vector. -/// -/// # Arguments -/// * `path` - Path to the TSV file -/// -/// # Returns -/// Vector of connector index rows -pub async fn read_connector_index(path: &Path) -> std::io::Result> { - // Check if file exists - if !path.exists() { - return Ok(Vec::new()); - } - - // Read file to string - let content = tokio::fs::read_to_string(path).await?; - - // Parse line by line - let mut rows = Vec::new(); - for (line_num, line) in content.lines().enumerate() { - // Skip header (line 0) - if line_num == 0 { - continue; - } - - // Skip empty lines - if line.trim().is_empty() { - continue; - } - - // Split by tab - let parts: Vec<&str> = line.split('\t').collect(); - // Accept 6 columns (legacy, no fingerprint) or 7 columns (with fingerprint) - if parts.len() != 6 && parts.len() != 7 { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Invalid TSV format at line {}: expected 6 or 7 fields, got {}", - line_num + 1, - parts.len() - ), - )); - } - - // Parse fields - let connector_uid = Uuid::parse_str(parts[0]).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid UUID at line {}: {}", line_num + 1, e), - ) - })?; - - let r#type = parts[1].to_string(); - let title = parts[2].to_string(); - let client_native_id = parts[3].to_string(); - - let alias_of = if parts[4].is_empty() { - None - } else { - Some(Uuid::parse_str(parts[4]).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid alias_of UUID at line {}: {}", line_num + 1, e), - ) - })?) - }; - - let created_at = chrono::DateTime::parse_from_rfc3339(parts[5]) - .map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid timestamp at line {}: {}", line_num + 1, e), - ) - })? - .with_timezone(&chrono::Utc); - - // Parse optional fingerprint (7th column, may be absent in legacy files) - let fingerprint = if parts.len() >= 7 && !parts[6].is_empty() { - Some(parts[6].to_string()) - } else { - None - }; - - rows.push(ConnectorIndexRow { - connector_uid, - r#type, - title, - client_native_id, - alias_of, - created_at, - fingerprint, - }); - } - - Ok(rows) -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::{DateTime, Utc}; - use std::time::SystemTime; - - #[tokio::test] - async fn test_write_and_read_roundtrip() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_tsv_{}.tsv", Uuid::now_v7())); - - let uid1 = Uuid::now_v7(); - let uid2 = Uuid::now_v7(); - let uid3 = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let rows = vec![ - ConnectorIndexRow { - connector_uid: uid1, - r#type: "OpenCode".to_string(), - title: "Local Dev".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - alias_of: None, - created_at: now, - fingerprint: None, - }, - ConnectorIndexRow { - connector_uid: uid2, - r#type: "ACP".to_string(), - title: "Remote Agent".to_string(), - client_native_id: "acp@http://localhost:3000".to_string(), - alias_of: Some(uid3), - created_at: now, - fingerprint: None, - }, - ]; - - // Write - write_connector_index(&file_path, &rows).await.unwrap(); - - // Read back - let read_rows = read_connector_index(&file_path).await.unwrap(); - - // Verify - assert_eq!(read_rows.len(), 2); - assert_eq!(read_rows[0].connector_uid, uid1); - assert_eq!(read_rows[0].r#type, "OpenCode"); - assert_eq!(read_rows[0].title, "Local Dev"); - assert_eq!(read_rows[0].alias_of, None); - - assert_eq!(read_rows[1].connector_uid, uid2); - assert_eq!(read_rows[1].r#type, "ACP"); - assert_eq!(read_rows[1].alias_of, Some(uid3)); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_optional_field_handling() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_optional_{}.tsv", Uuid::now_v7())); - - let uid1 = Uuid::now_v7(); - let uid2 = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let rows = vec![ - ConnectorIndexRow { - connector_uid: uid1, - r#type: "Type1".to_string(), - title: "Title1".to_string(), - client_native_id: "client1".to_string(), - alias_of: None, // Empty alias_of - created_at: now, - fingerprint: None, - }, - ConnectorIndexRow { - connector_uid: uid2, - r#type: "Type2".to_string(), - title: "Title2".to_string(), - client_native_id: "client2".to_string(), - alias_of: Some(uid1), // Non-empty alias_of - created_at: now, - fingerprint: None, - }, - ]; - - // Write - write_connector_index(&file_path, &rows).await.unwrap(); - - // Verify raw content has empty string for None - let content = tokio::fs::read_to_string(&file_path).await.unwrap(); - let lines: Vec<&str> = content.lines().collect(); - - // First data line should have empty alias_of (two consecutive tabs) - assert!(lines[1].contains("\t\t")); - - // Second data line should have a UUID for alias_of - assert!(lines[2].contains(&uid1.to_string())); - - // Read back - let read_rows = read_connector_index(&file_path).await.unwrap(); - - // Verify optional field handling - assert_eq!(read_rows[0].alias_of, None); - assert_eq!(read_rows[1].alias_of, Some(uid1)); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_header_generation() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_header_{}.tsv", Uuid::now_v7())); - - // Write empty index - write_connector_index(&file_path, &[]).await.unwrap(); - - // Read raw content - let content = tokio::fs::read_to_string(&file_path).await.unwrap(); - - // Verify header - assert_eq!( - content.trim(), - "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\tfingerprint" - ); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_rfc3339_timestamp_formatting() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_timestamp_{}.tsv", Uuid::now_v7())); - - let uid = Uuid::now_v7(); - let timestamp = DateTime::::from(SystemTime::now()); - - let rows = vec![ConnectorIndexRow { - connector_uid: uid, - r#type: "Test".to_string(), - title: "Title".to_string(), - client_native_id: "client".to_string(), - alias_of: None, - created_at: timestamp, - fingerprint: None, - }]; - - // Write - write_connector_index(&file_path, &rows).await.unwrap(); - - // Read raw content - let content = tokio::fs::read_to_string(&file_path).await.unwrap(); - - // Verify RFC 3339 format in content - assert!(content.contains('T')); - assert!(content.contains('Z') || content.contains('+')); - - // Read back and verify timestamp is preserved - let read_rows = read_connector_index(&file_path).await.unwrap(); - let diff = - (timestamp.timestamp_millis() - read_rows[0].created_at.timestamp_millis()).abs(); - assert!(diff < 1000, "Timestamp difference too large"); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_missing_file_returns_empty_vec() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("nonexistent_{}.tsv", Uuid::now_v7())); - - // Should return empty vec, not error - let rows = read_connector_index(&file_path).await.unwrap(); - assert_eq!(rows.len(), 0); - } - - #[tokio::test] - async fn test_malformed_tsv_error() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_malformed_{}.tsv", Uuid::now_v7())); - - // Write malformed TSV (missing fields) - let content = - "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\nuid1\ttype1\n"; - tokio::fs::write(&file_path, content).await.unwrap(); - - // Should fail with InvalidData - let result = read_connector_index(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - assert!(e.to_string().contains("expected 6 or 7 fields")); - } - Ok(_) => panic!("Expected error"), - } - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_invalid_uuid_error() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_invalid_uuid_{}.tsv", Uuid::now_v7())); - - // Write TSV with invalid UUID - let content = "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\ninvalid-uuid\tType\tTitle\tClient\t\t2025-01-15T12:34:56Z\n"; - tokio::fs::write(&file_path, content).await.unwrap(); - - // Should fail with InvalidData - let result = read_connector_index(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - assert!(e.to_string().contains("Invalid UUID")); - } - Ok(_) => panic!("Expected error"), - } - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_invalid_timestamp_error() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_invalid_timestamp_{}.tsv", Uuid::now_v7())); - - let uid = Uuid::now_v7(); - - // Write TSV with invalid timestamp - let content = format!("connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\n{}\tType\tTitle\tClient\t\tinvalid-timestamp\n", uid); - tokio::fs::write(&file_path, content).await.unwrap(); - - // Should fail with InvalidData - let result = read_connector_index(&file_path).await; - assert!(result.is_err()); - - match result { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - assert!(e.to_string().contains("Invalid timestamp")); - } - Ok(_) => panic!("Expected error"), - } - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_atomic_write() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_atomic_{}.tsv", Uuid::now_v7())); - - let uid1 = Uuid::now_v7(); - let uid2 = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let rows1 = vec![ConnectorIndexRow { - connector_uid: uid1, - r#type: "First".to_string(), - title: "First Write".to_string(), - client_native_id: "client1".to_string(), - alias_of: None, - created_at: now, - fingerprint: None, - }]; - - let rows2 = vec![ConnectorIndexRow { - connector_uid: uid2, - r#type: "Second".to_string(), - title: "Second Write".to_string(), - client_native_id: "client2".to_string(), - alias_of: None, - created_at: now, - fingerprint: None, - }]; - - // Write first version - write_connector_index(&file_path, &rows1).await.unwrap(); - - // Verify first version - let read1 = read_connector_index(&file_path).await.unwrap(); - assert_eq!(read1.len(), 1); - assert_eq!(read1[0].title, "First Write"); - - // Overwrite with second version - write_connector_index(&file_path, &rows2).await.unwrap(); - - // Verify second version - let read2 = read_connector_index(&file_path).await.unwrap(); - assert_eq!(read2.len(), 1); - assert_eq!(read2[0].title, "Second Write"); - - // Temp file should not exist - let temp_path = file_path.with_extension("tmp"); - assert!(!temp_path.exists()); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_legacy_six_column_tsv_compatibility() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_legacy_tsv_{}.tsv", Uuid::now_v7())); - - let uid = Uuid::now_v7(); - - // Write a legacy 6-column TSV (no fingerprint column) - let content = format!( - "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\n{}\tOpenCode\tLegacy\tclient-legacy\t\t2025-01-15T12:34:56Z\n", - uid - ); - tokio::fs::write(&file_path, content).await.unwrap(); - - // Should parse successfully with fingerprint = None - let rows = read_connector_index(&file_path).await.unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0].connector_uid, uid); - assert_eq!(rows[0].r#type, "OpenCode"); - assert_eq!(rows[0].title, "Legacy"); - assert_eq!(rows[0].fingerprint, None); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } - - #[tokio::test] - async fn test_fingerprint_roundtrip() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join(format!("test_fingerprint_{}.tsv", Uuid::now_v7())); - - let uid1 = Uuid::now_v7(); - let uid2 = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let rows = vec![ - ConnectorIndexRow { - connector_uid: uid1, - r#type: "ACP".to_string(), - title: "Claude CLI".to_string(), - client_native_id: "acp-claude-1".to_string(), - alias_of: None, - created_at: now, - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }, - ConnectorIndexRow { - connector_uid: uid2, - r#type: "OpenCode".to_string(), - title: "No Fingerprint".to_string(), - client_native_id: "opencode@localhost".to_string(), - alias_of: None, - created_at: now, - fingerprint: None, - }, - ]; - - // Write - write_connector_index(&file_path, &rows).await.unwrap(); - - // Read back - let read_rows = read_connector_index(&file_path).await.unwrap(); - - assert_eq!(read_rows.len(), 2); - assert_eq!( - read_rows[0].fingerprint, - Some("acp/stdio:/usr/bin/claude".to_string()) - ); - assert_eq!(read_rows[1].fingerprint, None); - - // Clean up - tokio::fs::remove_file(&file_path).await.ok(); - } -} diff --git a/crates/dirigent_archivist/src/types.rs b/crates/dirigent_archivist/src/types.rs deleted file mode 100644 index d3f79d4..0000000 --- a/crates/dirigent_archivist/src/types.rs +++ /dev/null @@ -1,1298 +0,0 @@ -//! Core types for the Archivist. -//! -//! This module defines the fundamental data structures used throughout the archivist, -//! including session metadata, message records, and connector information. -//! -//! All types follow these conventions: -//! - IDs: UUIDv7 for time-ordered identifiers -//! - Timestamps: RFC 3339 UTC -//! - Versioning: Every record carries a "version" field for schema evolution -//! - Metadata: Free-form JSON object reserved for caller-specific fields - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Continuation type indicating how a session relates to its parent. -/// -/// Defines the relationship when a session is derived from another session. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub enum Continuation { - /// Session splits from parent at a specific message - Split, - /// Session is a compacted version of parent - Compact, - /// Session references parent without duplication - Reference, - /// Session is an edited version of parent - Edit, - /// Session is a subagent spawned by parent via Agent tool - Subagent, - /// Unknown continuation type (for forward compatibility) - Unknown, -} - -/// How complete the session data in the archive is. -/// -/// Tracks whether the archivist has full message history or only -/// discovery-level metadata for this session. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub enum SessionCompleteness { - /// Full message history is available (or session is empty-but-known-complete). - /// - /// Set when: - /// - User creates a new session (session/new) - /// - Session history is fully loaded via session/load replay - /// - /// This is the default for backward compatibility: existing session.json - /// files that lack this field will deserialize as Complete. - #[default] - Complete, - /// Session was discovered via connector session/list. - /// Only ID, title, and metadata are known. No messages in the archive. - Discovered, - /// Messages were loaded but may be incomplete (e.g., agent compacted history). - /// Reserved for future refresh/sync functionality. - Partial, -} - -/// Session kind indicating the type of session storage. -/// -/// Distinguishes between regular chat sessions and special meta sessions -/// that track connection events rather than messages. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub enum SessionKind { - /// Regular chat session with messages stored in messages.jsonl - #[default] - Chat, - /// Meta session tracking ACP client connection history. - /// Stores connection events in events.jsonl rather than messages. - /// Linked session content is fetched on-demand, not duplicated. - AcpConnection, -} - -/// Event types for ACP meta sessions. -/// -/// These events track client connection lifecycle and session navigation. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub enum MetaEventType { - /// Client connected to the ACP server - ClientConnected, - /// Client disconnected from the ACP server - ClientDisconnected, - /// Client opened a new session - SessionOpened, - /// Client switched to a different session - SessionSwitched, - /// Client closed a session - SessionClosed, -} - -/// A single event record in an ACP meta session. -/// -/// Stored in `events.jsonl` (one per line) for meta sessions. -/// These events track connection lifecycle and session navigation, -/// NOT the actual message content of sessions. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MetaEventRecord { - /// Schema version for forward compatibility - pub version: u32, - /// Unique event ID (UUIDv7) - pub event_id: Uuid, - /// Meta session this event belongs to (scroll_id) - pub session: Uuid, - /// Timestamp when the event occurred - pub ts: DateTime, - /// Type of event - pub event_type: MetaEventType, - /// Human-readable description of the event - pub description: String, - /// Linked session ID (for SessionOpened, SessionSwitched, SessionClosed events) - #[serde(skip_serializing_if = "Option::is_none")] - pub linked_session_id: Option, - /// Linked connector ID (for session-related events) - #[serde(skip_serializing_if = "Option::is_none")] - pub linked_connector_id: Option, - /// Linked connector title (for display purposes) - #[serde(skip_serializing_if = "Option::is_none")] - pub linked_connector_title: Option, - /// Free-form metadata for additional event data - #[serde(default)] - pub metadata: serde_json::Value, -} - -/// Status returned when registering a connector or session. -/// -/// Indicates whether the registration was accepted, aliased to an existing -/// entity, or rejected. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub enum RegisterStatus { - /// Registration accepted with the provided or generated UID - Accepted, - /// Registration accepted but UID was aliased to an existing entity - Aliased, - /// Registration rejected due to collision or inconsistency - Rejected, -} - -/// Session metadata stored in `session.json`. -/// -/// Contains all metadata about a session including its lineage, connector -/// association, and custom metadata fields. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SessionMetadata { - /// Schema version for forward compatibility - pub version: u32, - /// Unique scroll ID for this session (UUIDv7) - pub scroll_id: Uuid, - /// When the session was created - pub created_at: DateTime, - /// When the session was last updated - pub updated_at: DateTime, - /// Optional human-readable title - #[serde(skip_serializing_if = "Option::is_none")] - pub title: Option, - /// Connector that owns this session - pub connector_uid: Uuid, - /// Native session ID from the connector (if applicable) - #[serde(skip_serializing_if = "Option::is_none")] - pub native_session_id: Option, - /// Agent ID associated with this session (if applicable) - #[serde(skip_serializing_if = "Option::is_none")] - pub agent_id: Option, - /// Parent session this was derived from (if applicable) - #[serde(skip_serializing_if = "Option::is_none")] - pub parent_scroll_id: Option, - /// How this session continues from its parent (if applicable) - #[serde(skip_serializing_if = "Option::is_none")] - pub continuation: Option, - /// Tags for categorization - #[serde(default)] - pub tags: Vec, - /// Free-form metadata for caller-specific fields - #[serde(default)] - pub metadata: serde_json::Value, - /// If true, this session should not appear in default session listings. - /// - /// Sessions with `no_update=true` exist but are hidden from the main archive list. - /// This is useful for sessions that are: - /// - Archived/inactive and shouldn't clutter the UI - /// - System sessions that users don't need to see - /// - Sessions marked for cleanup but not yet deleted - /// - /// Use `list_sessions_all()` or pass `include_no_update=true` to include these sessions. - #[serde(default, skip_serializing_if = "std::ops::Not::not")] - pub no_update: bool, - /// Session kind (Chat or AcpConnection). Defaults to Chat. - #[serde(default)] - pub kind: SessionKind, - /// ACP client ID (only for AcpConnection sessions) - #[serde(skip_serializing_if = "Option::is_none")] - pub acp_client_id: Option, - /// Whether the ACP client is currently connected (only for AcpConnection sessions) - #[serde(skip_serializing_if = "Option::is_none")] - pub is_connected: Option, - /// Currently active linked session ID (only for AcpConnection sessions) - #[serde(skip_serializing_if = "Option::is_none")] - pub current_session_id: Option, - /// Agent models metadata (e.g., available models and current selection). - /// Stored as JSON for forward compatibility with protocol changes. - /// Contains `availableModels` and `currentModelId`. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub models: Option, - /// Agent modes metadata (e.g., permission modes, plan mode). - /// Stored as JSON for forward compatibility with protocol changes. - /// Contains `availableModes` and `currentModeId`. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub modes: Option, - /// ACP config options (JSON-serialized Vec). - /// Stored as JSON to avoid coupling the archivist to protocol types. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub config_options: Option, - /// How complete the archived session data is. - /// Defaults to Complete for backward compatibility with existing archives. - #[serde(default)] - pub completeness: SessionCompleteness, - /// Matrix room ID this session is shared to (e.g. "!abc:matrix.org"). - /// Retained even when sharing is disabled so re-enabling can reconnect. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub matrix_room_id: Option, - /// Whether Matrix sharing is currently active for this session. - #[serde(default)] - pub matrix_sharing_active: bool, - /// ISO 8601 timestamp of when sharing was first enabled. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub matrix_shared_at: Option>, - /// Whether this session is a subagent (non-loadable, hidden from default lists). - #[serde(default, skip_serializing_if = "std::ops::Not::not")] - pub is_subagent: bool, - /// Subagent type (e.g., "Explore", "rust-task-implementer"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub subagent_type: Option, - /// The Agent tool_use block ID in the parent that spawned this subagent. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub spawning_tool_use_id: Option, -} - -/// Reference to an attached file in a message. -/// -/// Links to a file stored in the archive's file storage. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AttachmentRef { - /// File identifier (e.g., "sha256:...") - pub file_id: String, - /// Original filename - pub name: String, - /// MIME type of the file (if known) - #[serde(skip_serializing_if = "Option::is_none")] - pub mime_type: Option, -} - -/// A single message record stored in `messages.ndjson`. -/// -/// Each line in the NDJSON file is one message record. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MessageRecord { - /// Schema version for forward compatibility - pub version: u32, - /// Unique message ID (UUIDv7) - pub message_id: Uuid, - /// Session this message belongs to (scroll_id) - pub session: Uuid, - /// Parent message ID (if this is a response or continuation) - #[serde(skip_serializing_if = "Option::is_none")] - pub parent_id: Option, - /// Timestamp when the message was created - pub ts: DateTime, - /// Message role (e.g., "system", "user", "assistant") - pub role: String, - /// Optional author identifier - #[serde(skip_serializing_if = "Option::is_none")] - pub author: Option, - /// Message content in Markdown format (for search and fallback display) - pub content_md: String, - /// Original content parts for rich rendering (tool calls, code blocks, etc.) - /// This field preserves the structured MessagePart data for proper UI rendering. - /// If None, the UI should fall back to rendering content_md as plain text. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub content_parts: Option, - /// Attached files - #[serde(default)] - pub attachments: Vec, - /// Free-form metadata for connector-specific fields - #[serde(default)] - pub metadata: serde_json::Value, -} - -/// Connector metadata stored in `connector.json`. -/// -/// Contains information about a connector including its type, title, and -/// native client identifier. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ConnectorRecord { - /// Schema version for forward compatibility - pub version: u32, - /// Unique connector UID (UUIDv7) - pub connector_uid: Uuid, - /// Connector type (e.g., "OpenCode", "ACP", "Other") - #[serde(rename = "type")] - pub r#type: String, - /// Human-readable title - pub title: String, - /// Native client identifier (e.g., "opencode@http://localhost:12225") - pub client_native_id: String, - /// If this connector is an alias of another (for deduplication) - #[serde(skip_serializing_if = "Option::is_none")] - pub alias_of: Option, - /// When the connector was registered - pub created_at: DateTime, - /// Free-form metadata - #[serde(default)] - pub metadata: serde_json::Value, - /// Stable fingerprint for identity matching across connector re-registrations. - /// Format: "{transport}:{command_or_url}" e.g. "acp/stdio:/usr/bin/claude" - #[serde(default, skip_serializing_if = "Option::is_none")] - pub fingerprint: Option, -} - -/// Session mapping entry stored in `sessions.ndjson`. -/// -/// Maps native session IDs from connectors to scroll IDs in the archive. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SessionMapping { - /// Schema version for forward compatibility - pub version: u32, - /// Connector this mapping belongs to - pub connector_uid: Uuid, - /// Native session ID from the connector - pub native_session_id: String, - /// Scroll ID in the archive - pub scroll_id: Uuid, - /// When this mapping was created - pub created_at: DateTime, - /// If this session mapping is an alias of another - #[serde(skip_serializing_if = "Option::is_none")] - pub alias_of: Option, -} - -/// Row in the connector index TSV file. -/// -/// Note: TSV serialization is custom, not derived from serde. -#[derive(Debug, Clone)] -pub struct ConnectorIndexRow { - /// Connector UID - pub connector_uid: Uuid, - /// Connector type - pub r#type: String, - /// Connector title - pub title: String, - /// Native client identifier - pub client_native_id: String, - /// Alias of another connector (if applicable) - pub alias_of: Option, - /// Creation timestamp - pub created_at: DateTime, - /// Stable fingerprint for identity matching across connector re-registrations. - pub fingerprint: Option, -} - -/// File record stored in `file_index.ndjson`. -/// -/// Tracks files stored in the archive's content-addressable storage. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileRecord { - /// Schema version for forward compatibility - pub version: u32, - /// File identifier (e.g., "sha256:...") - pub file_id: String, - /// Relative path in archive storage - pub path: String, - /// File size in bytes - pub size: u64, - /// MIME type (if known) - #[serde(skip_serializing_if = "Option::is_none")] - pub mime: Option, - /// Original filename - pub original_name: String, - /// Sessions that reference this file - #[serde(default)] - pub sessions: Vec, - /// Free-form metadata - #[serde(default)] - pub metadata: serde_json::Value, -} - -/// Report from a bulk session move operation. -/// -/// Tracks success/failure counts and collects error messages for any -/// sessions that could not be moved. -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct MoveReport { - /// Number of sessions successfully moved - pub moved: usize, - /// Number of sessions that failed to move - pub failed: usize, - /// Error messages for failed moves (one per failure) - pub errors: Vec, -} - -// ============================================================================ -// API Request/Response Types -// ============================================================================ - -/// Request to register a new connector. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RegisterConnectorRequest { - /// Connector type (e.g., "OpenCode", "ACP") - #[serde(rename = "type")] - pub r#type: String, - /// Human-readable title - pub title: String, - /// Native client identifier - pub client_native_id: String, - /// Optional custom UID (if not provided, one will be generated) - #[serde(skip_serializing_if = "Option::is_none")] - pub custom_uid: Option, - /// Free-form metadata - #[serde(default)] - pub metadata: serde_json::Value, - /// Stable fingerprint for identity matching across connector re-registrations. - /// Format: "{transport}:{command_or_url}" e.g. "acp/stdio:/usr/bin/claude" - #[serde(default, skip_serializing_if = "Option::is_none")] - pub fingerprint: Option, -} - -/// Response from registering a connector. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RegisterConnectorResponse { - /// Registration status - pub status: RegisterStatus, - /// Assigned or existing connector UID - pub connector_uid: Uuid, - /// If aliased, the UID this was aliased to - #[serde(skip_serializing_if = "Option::is_none")] - pub alias_of: Option, - /// Optional note explaining the result - #[serde(skip_serializing_if = "Option::is_none")] - pub note: Option, -} - -/// Request to register a new session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RegisterSessionRequest { - /// Connector that owns this session - pub connector_uid: Uuid, - /// Native session ID from the connector - pub native_session_id: String, - /// Optional human-readable title - #[serde(skip_serializing_if = "Option::is_none")] - pub title: Option, - /// Optional custom scroll ID (if not provided, one will be generated) - #[serde(skip_serializing_if = "Option::is_none")] - pub custom_scroll_id: Option, - /// Free-form metadata - #[serde(default)] - pub metadata: serde_json::Value, - /// Completeness level for this session. Defaults to Complete. - #[serde(default)] - pub completeness: SessionCompleteness, - /// Parent session scroll ID (for subagent linkage). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub parent_scroll_id: Option, - /// Whether this is a subagent session. - #[serde(default)] - pub is_subagent: bool, - /// How this session continues from parent. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub continuation: Option, - /// Agent ID for this session (subagent identifier). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub agent_id: Option, - /// Subagent type (e.g., "Explore"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub subagent_type: Option, - /// The Agent tool_use block ID in the parent that spawned this subagent. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub spawning_tool_use_id: Option, -} - -/// Response from registering a session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RegisterSessionResponse { - /// Registration status - pub status: RegisterStatus, - /// Assigned or existing scroll ID - pub scroll_id: Uuid, - /// If aliased, the scroll ID this was aliased to - #[serde(skip_serializing_if = "Option::is_none")] - pub alias_of: Option, -} - -/// An edge in the session DAG — links a parent session to a child subagent session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DagEdge { - /// Parent session scroll_id - pub parent: Uuid, - /// Child (subagent) session scroll_id - pub child: Uuid, - /// Subagent's agent_id from Claude Code - pub agent_id: String, - /// Subagent type (e.g., "Explore", "rust-task-implementer") - #[serde(default, skip_serializing_if = "Option::is_none")] - pub subagent_type: Option, - /// The tool_use_id of the Agent call that spawned this subagent - #[serde(default, skip_serializing_if = "Option::is_none")] - pub tool_use_id: Option, - /// Timestamp when the subagent was spawned - #[serde(default, skip_serializing_if = "Option::is_none")] - pub ts: Option>, -} - -// --------------------------------------------------------------------------- -// Session listing — cursor-paged query types -// --------------------------------------------------------------------------- - -/// Maximum number of items a single `list_sessions_paged` call may return. -/// The server clamps `SessionListQuery::limit` to this value. -pub const MAX_PAGE_LIMIT: usize = 200; - -/// Cursor into a sorted session listing. -/// -/// Sessions are ordered by `(updated_at DESC, scroll_id DESC)`. A cursor -/// means "items strictly after this `(updated_at, scroll_id)` point in the -/// ordering". -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct SessionCursor { - pub updated_at: chrono::DateTime, - pub scroll_id: uuid::Uuid, -} - -/// Query shape for [`Archivist::list_sessions_paged`]. -/// -/// All filters are AND-combined. `connector_uids` and `project_ids` scope the -/// search; when both are empty, every session in the archive is considered. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SessionListQuery { - pub archive: Option, - - // Scoping - #[serde(default)] - pub connector_uids: Vec, - #[serde(default)] - pub project_ids: Vec, - /// Filter by `metadata.project_path` (exact match on filesystem path). - /// Useful for imported sessions that have a working directory but no - /// bound `Project` entity. - pub project_path: Option, - - // Visibility - /// `false` = hide `no_update=true` and `is_subagent=true` sessions (matches - /// legacy `list_sessions` default). `true` = include them (matches legacy - /// `list_sessions_all`). - pub include_hidden: bool, - - // Filters - /// Case-insensitive substring on `SessionMetadata.title`. Sessions with - /// `title=None` never match when this is `Some(_)`. - pub title_query: Option, - /// AND across tags; empty = no filter. - #[serde(default)] - pub tags: Vec, - /// Case-insensitive substring on `metadata.model`. Sessions without a - /// `metadata.model` string never match when this is `Some(_)`. - pub model_filter: Option, - - // Pagination - pub cursor: Option, - /// Requested page size. The implementation clamps to [`MAX_PAGE_LIMIT`]. - pub limit: usize, -} - -impl Default for SessionListQuery { - fn default() -> Self { - Self { - archive: None, - connector_uids: Vec::new(), - project_ids: Vec::new(), - project_path: None, - include_hidden: false, - title_query: None, - tags: Vec::new(), - model_filter: None, - cursor: None, - limit: 20, - } - } -} - -impl SessionListQuery { - /// Builder-style helper for tests and UI code. - /// - /// Adds a single connector UID to the filter. Can be called multiple - /// times to add more connectors. - pub fn with_connector(mut self, connector_uid: uuid::Uuid) -> Self { - self.connector_uids.push(connector_uid); - self - } - - /// Adds a single project ID to the filter. Can be called multiple - /// times to add more projects. - pub fn with_project(mut self, project_id: impl Into) -> Self { - self.project_ids.push(project_id.into()); - self - } - - pub fn with_project_path(mut self, path: impl Into) -> Self { - self.project_path = Some(path.into()); - self - } - - pub fn with_archive(mut self, archive: impl Into) -> Self { - self.archive = Some(archive.into()); - self - } - - pub fn with_limit(mut self, limit: usize) -> Self { - self.limit = limit; - self - } - - pub fn with_cursor(mut self, cursor: Option) -> Self { - self.cursor = cursor; - self - } - - pub fn with_title_query(mut self, q: impl Into) -> Self { - self.title_query = Some(q.into()); - self - } - - pub fn with_include_hidden(mut self, include: bool) -> Self { - self.include_hidden = include; - self - } -} - -/// One page of results from [`Archivist::list_sessions_paged`]. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SessionPage { - pub items: Vec, - /// `None` when the scan reached the end of the ordering. - pub next_cursor: Option, - /// Total number of sessions matching the query (before pagination). - /// `None` when the backend does not compute it. - #[serde(default)] - pub total_count: Option, -} - -// --------------------------------------------------------------------------- -// Message listing — cursor-paged query types (Phase 2) -// --------------------------------------------------------------------------- - -/// Cursor into a chronologically sorted message listing. -/// -/// Messages are ordered by `(ts ASC, message_id ASC)`. A cursor means -/// "items strictly after this `(ts, message_id)` point in the ordering". -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct MessageCursor { - pub ts: chrono::DateTime, - pub message_id: uuid::Uuid, -} - -/// A single page of messages returned from cursor-paged reads. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MessagePage { - pub items: Vec, - pub next_cursor: Option, -} - -#[cfg(any(test, feature = "test-utils"))] -impl SessionMetadata { - /// Test-only: minimal valid session metadata with defaulted fields. - pub fn stub(scroll_id: Uuid) -> Self { - let now = chrono::Utc::now(); - Self { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: None, - connector_uid: Uuid::nil(), - native_session_id: None, - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: vec![], - metadata: serde_json::Value::Null, - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::default(), - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::time::SystemTime; - - #[test] - fn test_continuation_serialization() { - let continuation = Continuation::Split; - let json = serde_json::to_string(&continuation).unwrap(); - assert_eq!(json, r#""SPLIT""#); - - let deserialized: Continuation = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, continuation); - - // Test all variants - let variants = vec![ - (Continuation::Split, r#""SPLIT""#), - (Continuation::Compact, r#""COMPACT""#), - (Continuation::Reference, r#""REFERENCE""#), - (Continuation::Edit, r#""EDIT""#), - (Continuation::Unknown, r#""UNKNOWN""#), - ]; - - for (variant, expected) in variants { - let json = serde_json::to_string(&variant).unwrap(); - assert_eq!(json, expected); - let deserialized: Continuation = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, variant); - } - } - - #[test] - fn test_register_status_serialization() { - let status = RegisterStatus::Accepted; - let json = serde_json::to_string(&status).unwrap(); - assert_eq!(json, r#""ACCEPTED""#); - - let deserialized: RegisterStatus = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, status); - - // Test all variants - let variants = vec![ - (RegisterStatus::Accepted, r#""ACCEPTED""#), - (RegisterStatus::Aliased, r#""ALIASED""#), - (RegisterStatus::Rejected, r#""REJECTED""#), - ]; - - for (variant, expected) in variants { - let json = serde_json::to_string(&variant).unwrap(); - assert_eq!(json, expected); - let deserialized: RegisterStatus = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, variant); - } - } - - #[test] - fn test_session_metadata_roundtrip() { - let connector_uid = Uuid::now_v7(); - let scroll_id = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let metadata = SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: Some("Test Session".to_string()), - connector_uid, - native_session_id: Some("native-123".to_string()), - agent_id: Some("claude-3-5".to_string()), - parent_scroll_id: None, - continuation: None, - tags: vec!["test".to_string(), "example".to_string()], - metadata: serde_json::json!({ - "source": "OpenCode", - "model": "claude-3-5-sonnet" - }), - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::Complete, - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - }; - - // Serialize to JSON - let json = serde_json::to_string_pretty(&metadata).unwrap(); - - // Deserialize back - let deserialized: SessionMetadata = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.scroll_id, scroll_id); - assert_eq!(deserialized.connector_uid, connector_uid); - assert_eq!(deserialized.title, Some("Test Session".to_string())); - assert_eq!( - deserialized.native_session_id, - Some("native-123".to_string()) - ); - assert_eq!(deserialized.tags, vec!["test", "example"]); - } - - #[test] - fn test_message_record_roundtrip() { - let message_id = Uuid::now_v7(); - let session_id = Uuid::now_v7(); - let parent_id = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let message = MessageRecord { - version: 1, - message_id, - session: session_id, - parent_id: Some(parent_id), - ts: now, - role: "user".to_string(), - author: Some("alice".to_string()), - content_md: "Hello, world!".to_string(), - content_parts: None, - attachments: vec![AttachmentRef { - file_id: "sha256:abc123".to_string(), - name: "spec.pdf".to_string(), - mime_type: Some("application/pdf".to_string()), - }], - metadata: serde_json::json!({ - "connector_msg_id": "native-456" - }), - }; - - // Serialize to JSON - let json = serde_json::to_string(&message).unwrap(); - - // Deserialize back - let deserialized: MessageRecord = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.message_id, message_id); - assert_eq!(deserialized.session, session_id); - assert_eq!(deserialized.parent_id, Some(parent_id)); - assert_eq!(deserialized.role, "user"); - assert_eq!(deserialized.author, Some("alice".to_string())); - assert_eq!(deserialized.content_md, "Hello, world!"); - assert_eq!(deserialized.attachments.len(), 1); - assert_eq!(deserialized.attachments[0].file_id, "sha256:abc123"); - } - - #[test] - fn test_connector_record_roundtrip() { - let connector_uid = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let connector = ConnectorRecord { - version: 1, - connector_uid, - r#type: "OpenCode".to_string(), - title: "OpenCode Local".to_string(), - client_native_id: "opencode@http://localhost:12225".to_string(), - alias_of: None, - created_at: now, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - // Serialize to JSON - let json = serde_json::to_string_pretty(&connector).unwrap(); - - // Verify "type" field is used (not "r#type") - assert!(json.contains(r#""type""#)); - assert!(!json.contains(r#""r#type""#)); - - // Deserialize back - let deserialized: ConnectorRecord = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.connector_uid, connector_uid); - assert_eq!(deserialized.r#type, "OpenCode"); - assert_eq!(deserialized.title, "OpenCode Local"); - assert_eq!( - deserialized.client_native_id, - "opencode@http://localhost:12225" - ); - assert_eq!(deserialized.alias_of, None); - } - - #[test] - fn test_session_mapping_roundtrip() { - let connector_uid = Uuid::now_v7(); - let scroll_id = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let mapping = SessionMapping { - version: 1, - connector_uid, - native_session_id: "abc123".to_string(), - scroll_id, - created_at: now, - alias_of: None, - }; - - // Serialize to JSON - let json = serde_json::to_string(&mapping).unwrap(); - - // Deserialize back - let deserialized: SessionMapping = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.connector_uid, connector_uid); - assert_eq!(deserialized.native_session_id, "abc123"); - assert_eq!(deserialized.scroll_id, scroll_id); - } - - #[test] - fn test_file_record_roundtrip() { - let session1 = Uuid::now_v7(); - let session2 = Uuid::now_v7(); - - let file_record = FileRecord { - version: 1, - file_id: "sha256:abc123def456".to_string(), - path: ".files/ab/cd/abc123def456".to_string(), - size: 123456, - mime: Some("application/pdf".to_string()), - original_name: "spec.pdf".to_string(), - sessions: vec![session1, session2], - metadata: serde_json::json!({ - "source": "upload" - }), - }; - - // Serialize to JSON - let json = serde_json::to_string(&file_record).unwrap(); - - // Deserialize back - let deserialized: FileRecord = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.file_id, "sha256:abc123def456"); - assert_eq!(deserialized.path, ".files/ab/cd/abc123def456"); - assert_eq!(deserialized.size, 123456); - assert_eq!(deserialized.mime, Some("application/pdf".to_string())); - assert_eq!(deserialized.original_name, "spec.pdf"); - assert_eq!(deserialized.sessions.len(), 2); - } - - #[test] - fn test_uuidv7_generation() { - // Generate several UUIDv7s - let uuid1 = Uuid::now_v7(); - std::thread::sleep(std::time::Duration::from_millis(2)); - let uuid2 = Uuid::now_v7(); - std::thread::sleep(std::time::Duration::from_millis(2)); - let uuid3 = Uuid::now_v7(); - - // Verify they're valid UUIDs - assert_eq!(uuid1.get_version_num(), 7); - assert_eq!(uuid2.get_version_num(), 7); - assert_eq!(uuid3.get_version_num(), 7); - - // Verify they're in time order (UUIDv7 is time-ordered) - // Convert to bytes for comparison - let bytes1 = uuid1.as_bytes(); - let bytes2 = uuid2.as_bytes(); - let bytes3 = uuid3.as_bytes(); - - // UUIDv7 should be sortable by bytes (time-ordered) - assert!(bytes1 < bytes2); - assert!(bytes2 < bytes3); - } - - #[test] - fn test_rfc3339_timestamps() { - let now = DateTime::::from(SystemTime::now()); - - // Serialize to RFC 3339 format - let json = serde_json::to_string(&now).unwrap(); - - // Should be in quotes and RFC 3339 format - assert!(json.starts_with('"')); - assert!(json.ends_with('"')); - assert!(json.contains('T')); - assert!(json.contains('Z') || json.contains('+')); - - // Deserialize back - let deserialized: DateTime = serde_json::from_str(&json).unwrap(); - - // Should be within a second of the original (allowing for microsecond precision loss) - let diff = (now.timestamp_millis() - deserialized.timestamp_millis()).abs(); - assert!(diff < 1000, "Timestamp difference too large: {} ms", diff); - } - - #[test] - fn test_api_request_types() { - // Test RegisterConnectorRequest - let request = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "test@localhost".to_string(), - custom_uid: Some(Uuid::now_v7()), - metadata: serde_json::json!({ "key": "value" }), - fingerprint: None, - }; - - let json = serde_json::to_string(&request).unwrap(); - let deserialized: RegisterConnectorRequest = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.r#type, "OpenCode"); - assert_eq!(deserialized.title, "Test Connector"); - - // Test RegisterSessionRequest - let request = RegisterSessionRequest { - connector_uid: Uuid::now_v7(), - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: Some(Uuid::now_v7()), - metadata: serde_json::json!({}), - completeness: SessionCompleteness::Complete, - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let json = serde_json::to_string(&request).unwrap(); - let deserialized: RegisterSessionRequest = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.native_session_id, "native-123"); - assert_eq!(deserialized.title, Some("Test Session".to_string())); - } - - #[test] - fn test_api_response_types() { - // Test RegisterConnectorResponse - let response = RegisterConnectorResponse { - status: RegisterStatus::Accepted, - connector_uid: Uuid::now_v7(), - alias_of: None, - note: Some("Successfully registered".to_string()), - }; - - let json = serde_json::to_string(&response).unwrap(); - let deserialized: RegisterConnectorResponse = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.status, RegisterStatus::Accepted); - assert_eq!( - deserialized.note, - Some("Successfully registered".to_string()) - ); - - // Test RegisterSessionResponse - let response = RegisterSessionResponse { - status: RegisterStatus::Aliased, - scroll_id: Uuid::now_v7(), - alias_of: Some(Uuid::now_v7()), - }; - - let json = serde_json::to_string(&response).unwrap(); - let deserialized: RegisterSessionResponse = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.status, RegisterStatus::Aliased); - assert!(deserialized.alias_of.is_some()); - } - - #[test] - fn test_session_kind_serialization() { - let kind_chat = SessionKind::Chat; - let json = serde_json::to_string(&kind_chat).unwrap(); - assert_eq!(json, r#""CHAT""#); - - let deserialized: SessionKind = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, kind_chat); - - // Test all variants - let variants = vec![ - (SessionKind::Chat, r#""CHAT""#), - (SessionKind::AcpConnection, r#""ACP_CONNECTION""#), - ]; - - for (variant, expected) in variants { - let json = serde_json::to_string(&variant).unwrap(); - assert_eq!(json, expected); - let deserialized: SessionKind = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, variant); - } - } - - #[test] - fn test_session_kind_default() { - let kind: SessionKind = Default::default(); - assert_eq!(kind, SessionKind::Chat); - } - - #[test] - fn test_meta_event_type_serialization() { - let event_type = MetaEventType::ClientConnected; - let json = serde_json::to_string(&event_type).unwrap(); - assert_eq!(json, r#""CLIENT_CONNECTED""#); - - let deserialized: MetaEventType = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, event_type); - - // Test all variants - let variants = vec![ - (MetaEventType::ClientConnected, r#""CLIENT_CONNECTED""#), - (MetaEventType::ClientDisconnected, r#""CLIENT_DISCONNECTED""#), - (MetaEventType::SessionOpened, r#""SESSION_OPENED""#), - (MetaEventType::SessionSwitched, r#""SESSION_SWITCHED""#), - (MetaEventType::SessionClosed, r#""SESSION_CLOSED""#), - ]; - - for (variant, expected) in variants { - let json = serde_json::to_string(&variant).unwrap(); - assert_eq!(json, expected); - let deserialized: MetaEventType = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized, variant); - } - } - - #[test] - fn test_meta_event_record_roundtrip() { - let event_id = Uuid::now_v7(); - let session = Uuid::now_v7(); - let linked_session = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let event = MetaEventRecord { - version: 1, - event_id, - session, - ts: now, - event_type: MetaEventType::SessionOpened, - description: "Client opened session".to_string(), - linked_session_id: Some(linked_session), - linked_connector_id: Some("connector-123".to_string()), - linked_connector_title: Some("Claude Session".to_string()), - metadata: serde_json::json!({ - "client_version": "1.0.0" - }), - }; - - // Serialize to JSON - let json = serde_json::to_string(&event).unwrap(); - - // Deserialize back - let deserialized: MetaEventRecord = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.event_id, event_id); - assert_eq!(deserialized.session, session); - assert_eq!(deserialized.event_type, MetaEventType::SessionOpened); - assert_eq!(deserialized.description, "Client opened session"); - assert_eq!(deserialized.linked_session_id, Some(linked_session)); - assert_eq!(deserialized.linked_connector_id, Some("connector-123".to_string())); - assert_eq!(deserialized.linked_connector_title, Some("Claude Session".to_string())); - } - - #[test] - fn test_meta_event_record_minimal() { - let event_id = Uuid::now_v7(); - let session = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let event = MetaEventRecord { - version: 1, - event_id, - session, - ts: now, - event_type: MetaEventType::ClientConnected, - description: "Client connected".to_string(), - linked_session_id: None, - linked_connector_id: None, - linked_connector_title: None, - metadata: serde_json::json!({}), - }; - - // Serialize to JSON - let json = serde_json::to_string(&event).unwrap(); - - // Verify optional fields are not serialized - assert!(!json.contains("linked_session_id")); - assert!(!json.contains("linked_connector_id")); - assert!(!json.contains("linked_connector_title")); - - // Deserialize back - let deserialized: MetaEventRecord = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.event_id, event_id); - assert_eq!(deserialized.session, session); - assert_eq!(deserialized.event_type, MetaEventType::ClientConnected); - assert_eq!(deserialized.description, "Client connected"); - assert_eq!(deserialized.linked_session_id, None); - assert_eq!(deserialized.linked_connector_id, None); - assert_eq!(deserialized.linked_connector_title, None); - } - - #[test] - fn test_session_metadata_with_meta_fields() { - let connector_uid = Uuid::now_v7(); - let scroll_id = Uuid::now_v7(); - let current_session_id = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - let metadata = SessionMetadata { - version: 1, - scroll_id, - created_at: now, - updated_at: now, - title: Some("ACP Connection".to_string()), - connector_uid, - native_session_id: Some("acp-meta-client-123".to_string()), - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: vec![], - metadata: serde_json::json!({}), - no_update: false, - kind: SessionKind::AcpConnection, - acp_client_id: Some("client-123".to_string()), - is_connected: Some(true), - current_session_id: Some(current_session_id), - models: None, - modes: None, - config_options: None, - completeness: SessionCompleteness::Complete, - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - }; - - // Serialize to JSON - let json = serde_json::to_string_pretty(&metadata).unwrap(); - - // Deserialize back - let deserialized: SessionMetadata = serde_json::from_str(&json).unwrap(); - - // Verify fields - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.scroll_id, scroll_id); - assert_eq!(deserialized.kind, SessionKind::AcpConnection); - assert_eq!(deserialized.acp_client_id, Some("client-123".to_string())); - assert_eq!(deserialized.is_connected, Some(true)); - assert_eq!(deserialized.current_session_id, Some(current_session_id)); - } - - #[test] - fn test_session_metadata_chat_defaults() { - let connector_uid = Uuid::now_v7(); - let scroll_id = Uuid::now_v7(); - let now = DateTime::::from(SystemTime::now()); - - // Simulate deserializing old session metadata without new fields - let json = serde_json::json!({ - "version": 1, - "scroll_id": scroll_id, - "created_at": now, - "updated_at": now, - "title": "Old Session", - "connector_uid": connector_uid, - "tags": [], - "metadata": {} - }); - - let deserialized: SessionMetadata = serde_json::from_value(json).unwrap(); - - // Verify new fields use defaults - assert_eq!(deserialized.kind, SessionKind::Chat); - assert_eq!(deserialized.acp_client_id, None); - assert_eq!(deserialized.is_connected, None); - assert_eq!(deserialized.current_session_id, None); - } -} diff --git a/crates/dirigent_archivist/tests/archive_filter_test.rs b/crates/dirigent_archivist/tests/archive_filter_test.rs deleted file mode 100644 index 055db13..0000000 --- a/crates/dirigent_archivist/tests/archive_filter_test.rs +++ /dev/null @@ -1,334 +0,0 @@ -//! Two-archive fanout tests exercising `ArchiveFilter` semantics. -//! -//! The primary backend is unfiltered; the secondary backend carries a -//! restricted filter. Writes should always reach the primary but only -//! fan out to the secondary when the session passes the filter. - -#![cfg(feature = "test-utils")] - -use std::collections::HashSet; -use std::sync::Arc; - -use chrono::Utc; -use uuid::Uuid; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ - ArchiveFilter, ArchiveRegistration, FailureMode, WritePolicy, -}; -use dirigent_archivist::types::{ - ConnectorRecord, MessageRecord, RegisterSessionRequest, -}; - -fn reg( - name: &str, - backend: Arc, - priority: u32, - filter: ArchiveFilter, -) -> Arc { - Arc::new( - ArchiveRegistration::new( - name.into(), - "mock", - backend as Arc, - /* write_active */ true, - FailureMode::Required, - priority, - /* enabled */ true, - WritePolicy::Inline, - /* writer */ None, - HealthStatus::Healthy, - ) - .with_filter(filter), - ) -} - -/// Seed a connector into a MockBackend directly, bypassing the coordinator. -async fn seed_connector(backend: &MockBackend, connector_uid: Uuid, client_native_id: &str) { - use dirigent_archivist::backend::ConnectorRegistryBackend; - let rec = ConnectorRecord { - version: 1, - connector_uid, - r#type: "Mock".into(), - title: "Mock connector".into(), - client_native_id: client_native_id.into(), - alias_of: None, - created_at: Utc::now(), - metadata: serde_json::Value::Null, - fingerprint: None, - }; - backend - .put_connector(rec) - .await - .expect("put_connector succeeds"); -} - -fn make_msg(session: Uuid, n: u32) -> MessageRecord { - MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session, - parent_id: None, - ts: Utc::now(), - role: "user".into(), - author: None, - content_md: format!("msg {}", n), - content_parts: None, - attachments: vec![], - metadata: serde_json::Value::Null, - } -} - -#[tokio::test] -async fn secondary_archive_filters_by_exclude_connector() { - let primary_backend = Arc::new(MockBackend::new()); - let secondary_backend = Arc::new(MockBackend::new()); - - let connector_a = Uuid::now_v7(); - let connector_b = Uuid::now_v7(); - - // Connector A is excluded from the secondary. - let mut excluded = HashSet::new(); - excluded.insert(connector_a); - let secondary_filter = ArchiveFilter { - exclude_connectors: excluded, - ..Default::default() - }; - - // Seed connectors on primary (and on secondary so mapping writes that DO - // pass the filter don't fail for unrelated reasons). - seed_connector(&primary_backend, connector_a, "native/a").await; - seed_connector(&primary_backend, connector_b, "native/b").await; - - let archivist = Archivist::from_registrations(vec![ - reg("primary", primary_backend.clone(), 0, ArchiveFilter::default()), - reg("secondary", secondary_backend.clone(), 10, secondary_filter), - ]); - - // Register a session for each connector. - let resp_a = archivist - .register_session( - RegisterSessionRequest { - connector_uid: connector_a, - native_session_id: "sess-a".into(), - title: None, - custom_scroll_id: None, - metadata: serde_json::Value::Null, - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await - .expect("register session a"); - let scroll_a = resp_a.scroll_id; - - let resp_b = archivist - .register_session( - RegisterSessionRequest { - connector_uid: connector_b, - native_session_id: "sess-b".into(), - title: None, - custom_scroll_id: None, - metadata: serde_json::Value::Null, - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await - .expect("register session b"); - let scroll_b = resp_b.scroll_id; - - // Append 3 messages to each session. - archivist - .append_messages( - scroll_a, - vec![make_msg(scroll_a, 1), make_msg(scroll_a, 2), make_msg(scroll_a, 3)], - None, - ) - .await - .expect("append to a"); - archivist - .append_messages( - scroll_b, - vec![make_msg(scroll_b, 1), make_msg(scroll_b, 2), make_msg(scroll_b, 3)], - None, - ) - .await - .expect("append to b"); - - // Primary sees every message. - assert_eq!(primary_backend.appended_count(scroll_a), 3); - assert_eq!(primary_backend.appended_count(scroll_b), 3); - - // Secondary excludes connector_a: scroll_a is filtered out, - // scroll_b is replicated. - assert_eq!( - secondary_backend.appended_count(scroll_a), - 0, - "secondary should NOT receive messages for the excluded connector" - ); - assert_eq!( - secondary_backend.appended_count(scroll_b), - 3, - "secondary should receive messages for the allowed connector" - ); - - // Session metadata fanout follows the same rule. - assert!( - primary_backend - .get_session(scroll_a) - .await - .unwrap() - .is_some(), - "primary has scroll_a" - ); - assert!( - secondary_backend - .get_session(scroll_a) - .await - .unwrap() - .is_none(), - "secondary should NOT have scroll_a (excluded connector)" - ); - assert!( - secondary_backend - .get_session(scroll_b) - .await - .unwrap() - .is_some(), - "secondary should have scroll_b (allowed connector)" - ); -} - -#[tokio::test] -async fn secondary_archive_filters_by_include_tag() { - let primary_backend = Arc::new(MockBackend::new()); - let secondary_backend = Arc::new(MockBackend::new()); - - let connector = Uuid::now_v7(); - seed_connector(&primary_backend, connector, "native/tagged").await; - - let mut include = HashSet::new(); - include.insert("prod".to_string()); - let secondary_filter = ArchiveFilter { - include_tags: include, - ..Default::default() - }; - - let archivist = Archivist::from_registrations(vec![ - reg("primary", primary_backend.clone(), 0, ArchiveFilter::default()), - reg("secondary", secondary_backend.clone(), 10, secondary_filter), - ]); - - // Register two sessions on the same connector. - let prod_resp = archivist - .register_session( - RegisterSessionRequest { - connector_uid: connector, - native_session_id: "sess-prod".into(), - title: None, - custom_scroll_id: None, - metadata: serde_json::Value::Null, - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await - .expect("register prod session"); - let scroll_prod = prod_resp.scroll_id; - - let dev_resp = archivist - .register_session( - RegisterSessionRequest { - connector_uid: connector, - native_session_id: "sess-dev".into(), - title: None, - custom_scroll_id: None, - metadata: serde_json::Value::Null, - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await - .expect("register dev session"); - let scroll_dev = dev_resp.scroll_id; - - // Tag the prod session directly on the primary so the coordinator can - // see it on the next fanout metadata lookup. We mutate via the primary - // backend to avoid going through update_session_metadata (which doesn't - // expose a tag API). - { - use dirigent_archivist::backend::ArchiveBackend as _; - let mut md = primary_backend - .get_session(scroll_prod) - .await - .unwrap() - .expect("prod session on primary"); - md.tags.push("prod".into()); - primary_backend.put_session(md).await.unwrap(); - } - - // Append messages AFTER tagging — now the filter consults the tagged metadata. - archivist - .append_messages( - scroll_prod, - vec![ - make_msg(scroll_prod, 1), - make_msg(scroll_prod, 2), - make_msg(scroll_prod, 3), - ], - None, - ) - .await - .expect("append prod"); - archivist - .append_messages( - scroll_dev, - vec![make_msg(scroll_dev, 1), make_msg(scroll_dev, 2), make_msg(scroll_dev, 3)], - None, - ) - .await - .expect("append dev"); - - // Primary keeps both. - assert_eq!(primary_backend.appended_count(scroll_prod), 3); - assert_eq!(primary_backend.appended_count(scroll_dev), 3); - - // Secondary only keeps the tagged session. - assert_eq!( - secondary_backend.appended_count(scroll_prod), - 3, - "secondary receives messages for the `prod`-tagged session" - ); - assert_eq!( - secondary_backend.appended_count(scroll_dev), - 0, - "secondary rejects the untagged session" - ); -} diff --git a/crates/dirigent_archivist/tests/fixtures/claude_minimal/projects/-home-user-myproj/abc12345-1234-1234-1234-abcdef123456.jsonl b/crates/dirigent_archivist/tests/fixtures/claude_minimal/projects/-home-user-myproj/abc12345-1234-1234-1234-abcdef123456.jsonl deleted file mode 100644 index 86e5b37..0000000 --- a/crates/dirigent_archivist/tests/fixtures/claude_minimal/projects/-home-user-myproj/abc12345-1234-1234-1234-abcdef123456.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"type":"user","uuid":"11111111-1111-7111-8111-111111111111","parentUuid":null,"timestamp":"2024-01-01T00:00:00Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"hello"}} -{"type":"assistant","uuid":"22222222-2222-7222-8222-222222222222","parentUuid":"11111111-1111-7111-8111-111111111111","timestamp":"2024-01-01T00:00:01Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"requestId":"req-001","message":{"model":"claude-3-5-sonnet","id":"msg-abc","type":"message","role":"assistant","content":[{"type":"text","text":"hi back"}],"stop_reason":"end_turn","usage":{"input_tokens":10,"output_tokens":5}}} diff --git a/crates/dirigent_archivist/tests/import_claude_idempotency_test.rs b/crates/dirigent_archivist/tests/import_claude_idempotency_test.rs deleted file mode 100644 index da3445c..0000000 --- a/crates/dirigent_archivist/tests/import_claude_idempotency_test.rs +++ /dev/null @@ -1,153 +0,0 @@ -//! End-to-end test: import a Claude fixture twice, expect no duplication; -//! then append a new message and re-import, expect exactly 1 new message. - -use camino::Utf8PathBuf; -use dirigent_archivist::{ - backends::JsonlBackend, - import::{claude::import_claude_sessions, ImportProgressSink}, - Archivist, SessionListQuery, -}; -use std::sync::Arc; -use uuid::Uuid; - -fn fixture_root() -> Utf8PathBuf { - Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()) - .join("tests/fixtures/claude_minimal") -} - -/// Build a self-contained coordinator for a given archive root. -/// -/// Uses `from_single_backend` so that parallel-test runs do not race on a -/// shared `.archives.json` in the tempdir's parent (which is what -/// `new_with_single_archive` would create). -async fn mk_archivist(root: std::path::PathBuf) -> dirigent_archivist::Result { - let backend = Arc::new(JsonlBackend::new(root).await?); - Archivist::from_single_backend("main".into(), backend).await -} - -#[tokio::test] -async fn claude_import_twice_is_idempotent() -> dirigent_archivist::Result<()> { - let tmp = std::env::temp_dir().join(format!("claude_idem_{}", Uuid::now_v7())); - let archivist = mk_archivist(tmp.clone()).await?; - - let fixture = fixture_root(); - - // First run — should import everything. - let stats1 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; - assert!( - stats1.sessions_imported >= 1, - "expected at least one imported session, got stats {:?}", - stats1 - ); - assert!( - stats1.messages_written >= 2, - "expected >=2 messages written, got {:?}", - stats1 - ); - - // Second run — should write nothing (fingerprint gate skips unchanged sessions). - let stats2 = import_claude_sessions(&archivist, &fixture, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; - assert_eq!( - stats2.messages_written, 0, - "expected no re-write on second import, got {:?}", - stats2 - ); - assert_eq!(stats2.sessions_imported, 0); - assert!( - stats2.sessions_skipped >= 1, - "expected at least one skipped session, got {:?}", - stats2 - ); - - // Verify on disk: no duplicate message_ids within any session. - let page = archivist - .list_sessions_paged(SessionListQuery::default().with_limit(200)) - .await?; - for session in &page.items { - let messages = archivist.get_messages(session.scroll_id, None).await?; - let mut seen = std::collections::HashSet::new(); - for m in &messages { - assert!( - seen.insert(m.message_id), - "duplicate message_id {} in session {}", - m.message_id, - session.scroll_id - ); - } - } - - let _ = tokio::fs::remove_dir_all(tmp).await; - Ok(()) -} - -#[tokio::test] -async fn claude_import_picks_up_additive_growth() -> dirigent_archivist::Result<()> { - // Copy the fixture to a mutable temp dir so we can append a message. - let tmp_src = std::env::temp_dir().join(format!("claude_grow_src_{}", Uuid::now_v7())); - let fixture = fixture_root(); - copy_dir_recursive(&fixture.as_std_path().to_path_buf(), &tmp_src).await; - - let tmp_arch = std::env::temp_dir().join(format!("claude_grow_arch_{}", Uuid::now_v7())); - let archivist = mk_archivist(tmp_arch.clone()).await?; - - let src = Utf8PathBuf::from_path_buf(tmp_src.clone()).unwrap(); - let _ = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; - - // Append a new message to the existing JSONL. - let jsonl = find_jsonl(&tmp_src).expect("fixture jsonl not found"); - let extra = r#"{"type":"user","uuid":"33333333-3333-7333-8333-333333333333","parentUuid":"22222222-2222-7222-8222-222222222222","timestamp":"2024-01-01T00:00:02Z","sessionId":"abc12345-1234-1234-1234-abcdef123456","cwd":"/home/user/myproj","version":"2.1.71","gitBranch":"main","isSidechain":false,"isMeta":false,"userType":"external","message":{"role":"user","content":"follow up"}}"#; - use tokio::io::AsyncWriteExt; - let mut f = tokio::fs::OpenOptions::new() - .append(true) - .open(&jsonl) - .await - .unwrap(); - f.write_all(extra.as_bytes()).await.unwrap(); - f.write_all(b"\n").await.unwrap(); - drop(f); - - let stats = import_claude_sessions(&archivist, &src, None, &ImportProgressSink::noop(), &std::collections::HashMap::new()).await?; - assert_eq!( - stats.messages_written, 1, - "expected 1 new message to be imported, got {:?}", - stats - ); - assert_eq!( - stats.sessions_updated, 1, - "expected 1 session updated, got {:?}", - stats - ); - - let _ = tokio::fs::remove_dir_all(tmp_src).await; - let _ = tokio::fs::remove_dir_all(tmp_arch).await; - Ok(()) -} - -async fn copy_dir_recursive(src: &std::path::Path, dst: &std::path::Path) { - tokio::fs::create_dir_all(dst).await.unwrap(); - let mut stack = vec![(src.to_path_buf(), dst.to_path_buf())]; - while let Some((s, d)) = stack.pop() { - let mut entries = tokio::fs::read_dir(&s).await.unwrap(); - while let Some(entry) = entries.next_entry().await.unwrap() { - let from = entry.path(); - let to = d.join(entry.file_name()); - if entry.file_type().await.unwrap().is_dir() { - tokio::fs::create_dir_all(&to).await.unwrap(); - stack.push((from, to)); - } else { - tokio::fs::copy(&from, &to).await.unwrap(); - } - } - } -} - -fn find_jsonl(dir: &std::path::Path) -> Option { - for entry in walkdir::WalkDir::new(dir).into_iter().flatten() { - if entry.file_type().is_file() - && entry.path().extension().and_then(|s| s.to_str()) == Some("jsonl") - { - return Some(entry.path().to_path_buf()); - } - } - None -} diff --git a/crates/dirigent_archivist/tests/import_progress_test.rs b/crates/dirigent_archivist/tests/import_progress_test.rs deleted file mode 100644 index 15176bd..0000000 --- a/crates/dirigent_archivist/tests/import_progress_test.rs +++ /dev/null @@ -1,89 +0,0 @@ -//! Integration test: importer trait progress events fire in expected order. -//! -//! Drives a full `ChatGptImporter::import` against a fixture and asserts on -//! the `ImportProgressEvent` sequence observed on the paired receiver. - -use std::sync::Arc; -use tempfile::TempDir; - -use dirigent_archivist::{ - backends::JsonlBackend, - coordinator::Archivist, - import::{ - ImportConfig, ImportProgressEvent, ImportProgressSink, ImportTarget, ImporterRegistry, - }, -}; - -#[tokio::test] -async fn progress_event_sequence_is_well_formed() { - // 1. Setup an in-memory archivist (JsonlBackend in tempdir). - let dir = TempDir::new().unwrap(); - let backend = Arc::new(JsonlBackend::new(dir.path().to_path_buf()).await.unwrap()); - let archivist = Archivist::from_single_backend("main".into(), backend) - .await - .unwrap(); - let archivist = Arc::new(archivist); - - // 2. Use the chatgpt fixture — a minimal conversations.json with a - // user + assistant message pair. - let fixture = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../dirigent_chatgpt/tests/fixtures/minimal.json"); - assert!( - fixture.exists(), - "chatgpt fixture missing at {}", - fixture.display() - ); - - let cfg = ImportConfig { - source: "chatgpt".into(), - params: { - let mut m = std::collections::BTreeMap::new(); - m.insert("path".into(), serde_json::json!(fixture.display().to_string())); - m - }, - }; - - // 3. Run the import with a channel sink. - let registry = ImporterRegistry::default(); - let importer = registry.get("chatgpt").expect("chatgpt registered"); - let (sink, mut rx) = ImportProgressSink::channel(); - - let archivist_for_job = archivist.clone(); - let job = tokio::spawn(async move { - importer - .import(&cfg, &*archivist_for_job, ImportTarget::default(), sink) - .await - }); - - // 4. Collect all events until the sender side is dropped. - let mut events = Vec::new(); - while let Some(evt) = rx.recv().await { - events.push(evt); - } - let stats = job.await.unwrap().expect("import"); - - // 5. Assertions on the event sequence. - // Must contain at least one SessionStarted before any SessionFinished. - let started_idx = events - .iter() - .position(|e| matches!(e, ImportProgressEvent::SessionStarted { .. })); - let finished_idx = events - .iter() - .position(|e| matches!(e, ImportProgressEvent::SessionFinished { .. })); - - assert!(started_idx.is_some(), "expected a SessionStarted event"); - assert!(finished_idx.is_some(), "expected a SessionFinished event"); - assert!( - started_idx.unwrap() < finished_idx.unwrap(), - "SessionStarted must precede SessionFinished" - ); - - // Stats shows at least 2 messages written (chatgpt fixture has a user - // + assistant pair). - assert!( - stats.messages_written >= 2, - "expected messages to be written, got stats {:?}", - stats - ); - assert_eq!(stats.sessions_imported, 1); -} diff --git a/crates/dirigent_archivist/tests/integration_tests.rs b/crates/dirigent_archivist/tests/integration_tests.rs deleted file mode 100644 index 266cf56..0000000 --- a/crates/dirigent_archivist/tests/integration_tests.rs +++ /dev/null @@ -1,2414 +0,0 @@ -//! Integration tests for dirigent_archivist -//! -//! These tests verify the end-to-end functionality of the archivist, -//! including storage, retrieval, and event streaming. - -#[cfg(test)] -mod tests { - use chrono::Utc; - use dirigent_archivist::{ - Archivist, MessageRecord, RegisterConnectorRequest, - RegisterSessionRequest, RegisterStatus, Result, SessionKind, SessionListQuery, - SessionMetadata, - }; - use dirigent_archivist::storage::ndjson::append_ndjson; - use dirigent_archivist::storage::json::write_json; - use uuid::Uuid; - - #[tokio::test] - async fn test_archivist_creation() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Verify coordinator construction succeeded (smoke test). - let _ = &archivist; - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_connector_acceptance() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response = archivist.register_connector(req, None).await?; - assert_eq!(response.status, RegisterStatus::Accepted); - assert!(response.alias_of.is_none()); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_connector_aliasing() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let req1 = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response1 = archivist.register_connector(req1, None).await?; - assert_eq!(response1.status, RegisterStatus::Accepted); - - // Register again with same client_native_id - let req2 = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector 2".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response2 = archivist.register_connector(req2, None).await?; - assert_eq!(response2.status, RegisterStatus::Aliased); - assert_eq!(response2.connector_uid, response1.connector_uid); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_session_acceptance() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector first - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - // Register session - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - assert_eq!(session_response.status, RegisterStatus::Accepted); - assert!(session_response.alias_of.is_none()); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_session_aliasing() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - // Register session - let session_req1 = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response1 = archivist.register_session(session_req1, None).await?; - - // Register again with same native_session_id - let session_req2 = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session 2".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response2 = archivist.register_session(session_req2, None).await?; - assert_eq!(session_response2.status, RegisterStatus::Aliased); - assert_eq!(session_response2.scroll_id, session_response1.scroll_id); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_append_and_get_messages() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create and append messages - let message1 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "Hello, world!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - let message2 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(message1.message_id), - ts: Utc::now(), - role: "assistant".to_string(), - author: Some("assistant".to_string()), - content_md: "Hi there!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - archivist - .append_messages( - session_response.scroll_id, - vec![message1.clone(), message2.clone()], - None, - ) - .await?; - - // Retrieve messages - let messages = archivist.get_messages(session_response.scroll_id, None).await?; - assert_eq!(messages.len(), 2); - assert_eq!(messages[0].message_id, message1.message_id); - assert_eq!(messages[1].message_id, message2.message_id); - assert_eq!(messages[0].content_md, "Hello, world!"); - assert_eq!(messages[1].content_md, "Hi there!"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_list_sessions() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - // Register multiple sessions - let session_req1 = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-1".to_string(), - title: Some("Session 1".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response1 = archivist.register_session(session_req1, None).await?; - - // Wait a moment to ensure different timestamps - tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; - - let session_req2 = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-2".to_string(), - title: Some("Session 2".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response2 = archivist.register_session(session_req2, None).await?; - - // List sessions - let sessions = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(connector_response.connector_uid) - .with_limit(100), - ) - .await? - .items; - assert_eq!(sessions.len(), 2); - - // Verify sessions are sorted by updated_at descending (newest first) - // Session 2 should be first because it was created later - assert_eq!(sessions[0].scroll_id, session_response2.scroll_id); - assert_eq!(sessions[1].scroll_id, session_response1.scroll_id); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_get_session_metadata() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Get session metadata - let metadata = archivist - .get_session_metadata(session_response.scroll_id, None) - .await?; - - assert_eq!(metadata.scroll_id, session_response.scroll_id); - assert_eq!(metadata.title, Some("Test Session".to_string())); - assert_eq!(metadata.connector_uid, connector_response.connector_uid); - assert_eq!(metadata.native_session_id, Some("native-123".to_string())); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_resolve_session() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Resolve native session ID to scroll ID - let scroll_id = archivist - .resolve_session(connector_response.connector_uid, "native-123", None) - .await?; - - assert_eq!(scroll_id, session_response.scroll_id); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_connector_custom_uid_collision() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let custom_uid = Uuid::now_v7(); - - // Register connector with custom UID - let req1 = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector 1".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: Some(custom_uid), - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response1 = archivist.register_connector(req1, None).await?; - assert_eq!(response1.status, RegisterStatus::Accepted); - assert_eq!(response1.connector_uid, custom_uid); - - // Try to register another connector with same custom UID - let req2 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Test Connector 2".to_string(), - client_native_id: "acp@localhost:3000".to_string(), - custom_uid: Some(custom_uid), - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let result2 = archivist.register_connector(req2, None).await; - assert!(result2.is_err(), "Expected error for custom_uid collision"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_register_session_with_unknown_connector() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Try to register session with unknown connector - let unknown_connector = Uuid::now_v7(); - let session_req = RegisterSessionRequest { - connector_uid: unknown_connector, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let result = archivist.register_session(session_req, None).await; - assert!(result.is_err()); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_get_messages_empty_session() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Get messages from empty session - let messages = archivist.get_messages(session_response.scroll_id, None).await?; - assert_eq!(messages.len(), 0); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_get_messages_unknown_scroll_id() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Try to get messages from unknown session - let unknown_scroll_id = Uuid::now_v7(); - let messages = archivist.get_messages(unknown_scroll_id, None).await?; - - // Should return empty vector for unknown session - assert_eq!(messages.len(), 0); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_multiple_message_appends() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Append messages in multiple batches - let message1 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "First message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - archivist - .append_messages(session_response.scroll_id, vec![message1.clone()], None) - .await?; - - let message2 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(message1.message_id), - ts: Utc::now(), - role: "assistant".to_string(), - author: Some("assistant".to_string()), - content_md: "Second message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - archivist - .append_messages(session_response.scroll_id, vec![message2.clone()], None) - .await?; - - let message3 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(message2.message_id), - ts: Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "Third message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - archivist - .append_messages(session_response.scroll_id, vec![message3.clone()], None) - .await?; - - // Retrieve all messages - let messages = archivist.get_messages(session_response.scroll_id, None).await?; - assert_eq!(messages.len(), 3); - assert_eq!(messages[0].content_md, "First message"); - assert_eq!(messages[1].content_md, "Second message"); - assert_eq!(messages[2].content_md, "Third message"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_messages_sorted_chronologically() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create messages with specific timestamps in chronological order - use chrono::TimeZone; - let base_time = Utc.with_ymd_and_hms(2025, 11, 18, 18, 23, 36).unwrap(); - - let msg_snake_user = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: base_time + chrono::Duration::milliseconds(947), - role: "user".to_string(), - author: Some("user".to_string()), - content_md: "hello please tell me a joke about snakes".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - let msg_snake_assistant = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg_snake_user.message_id), - ts: base_time + chrono::Duration::milliseconds(969), - role: "assistant".to_string(), - author: Some("claude".to_string()), - content_md: "Why don't snakes need cutlery? They have forked tongues!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - let msg_tiger_user = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg_snake_assistant.message_id), - ts: base_time + chrono::Duration::milliseconds(13429), - role: "user".to_string(), - author: Some("user".to_string()), - content_md: "now one about tigers".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - let msg_tiger_assistant = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg_tiger_user.message_id), - ts: base_time + chrono::Duration::milliseconds(13448), - role: "assistant".to_string(), - author: Some("claude".to_string()), - content_md: "What do tigers wear to bed? Striped pajamas!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - let msg_hyena_user = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg_tiger_assistant.message_id), - ts: base_time + chrono::Duration::milliseconds(32623), - role: "user".to_string(), - author: Some("user".to_string()), - content_md: "and a third one about hyenas".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - // Append messages OUT OF ORDER to simulate real-world event arrival - // (assistant replies often arrive after subsequent user messages) - archivist - .append_messages( - session_response.scroll_id, - vec![msg_snake_user.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg_tiger_user.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg_snake_assistant.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg_hyena_user.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg_tiger_assistant.clone()], - None, - ) - .await?; - - // Retrieve messages - should be sorted chronologically despite out-of-order appends - let messages = archivist.get_messages(session_response.scroll_id, None).await?; - - assert_eq!(messages.len(), 5); - - // Verify chronological order by timestamp - assert_eq!(messages[0].message_id, msg_snake_user.message_id); - assert_eq!(messages[0].content_md, "hello please tell me a joke about snakes"); - - assert_eq!(messages[1].message_id, msg_snake_assistant.message_id); - assert_eq!(messages[1].content_md, "Why don't snakes need cutlery? They have forked tongues!"); - - assert_eq!(messages[2].message_id, msg_tiger_user.message_id); - assert_eq!(messages[2].content_md, "now one about tigers"); - - assert_eq!(messages[3].message_id, msg_tiger_assistant.message_id); - assert_eq!(messages[3].content_md, "What do tigers wear to bed? Striped pajamas!"); - - assert_eq!(messages[4].message_id, msg_hyena_user.message_id); - assert_eq!(messages[4].content_md, "and a third one about hyenas"); - - // Verify timestamps are strictly increasing - for i in 1..messages.len() { - assert!( - messages[i].ts >= messages[i - 1].ts, - "Messages not in chronological order: message {} has ts {} which is before message {} with ts {}", - i, - messages[i].ts, - i - 1, - messages[i - 1].ts - ); - } - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_messages_with_identical_timestamps() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create multiple messages with the exact same timestamp - // This tests the secondary sorting by message_id - let same_timestamp = Utc::now(); - - // Create messages with explicitly ordered UUIDs (v7 includes timestamp) - // Sleep briefly between creations to ensure UUIDv7 ordering - let msg1 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: same_timestamp, - role: "user".to_string(), - author: Some("user".to_string()), - content_md: "First message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - tokio::time::sleep(tokio::time::Duration::from_micros(1)).await; - - let msg2 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg1.message_id), - ts: same_timestamp, - role: "assistant".to_string(), - author: Some("assistant".to_string()), - content_md: "Second message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - tokio::time::sleep(tokio::time::Duration::from_micros(1)).await; - - let msg3 = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: Some(msg2.message_id), - ts: same_timestamp, - role: "user".to_string(), - author: Some("user".to_string()), - content_md: "Third message".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - // Append in reverse order to ensure sorting is working - archivist - .append_messages( - session_response.scroll_id, - vec![msg3.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg1.clone()], - None, - ) - .await?; - - archivist - .append_messages( - session_response.scroll_id, - vec![msg2.clone()], - None, - ) - .await?; - - // Retrieve messages - should be sorted by message_id since timestamps are identical - let messages = archivist.get_messages(session_response.scroll_id, None).await?; - - assert_eq!(messages.len(), 3); - - // All timestamps should be the same - assert_eq!(messages[0].ts, same_timestamp); - assert_eq!(messages[1].ts, same_timestamp); - assert_eq!(messages[2].ts, same_timestamp); - - // Messages should be ordered by message_id (UUIDv7 preserves creation order) - assert_eq!(messages[0].message_id, msg1.message_id); - assert_eq!(messages[1].message_id, msg2.message_id); - assert_eq!(messages[2].message_id, msg3.message_id); - - assert_eq!(messages[0].content_md, "First message"); - assert_eq!(messages[1].content_md, "Second message"); - assert_eq!(messages[2].content_md, "Third message"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_append_messages_updates_timestamp() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Get initial metadata - let metadata_before = archivist - .get_session_metadata(session_response.scroll_id, None) - .await?; - - // Wait a moment - tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; - - // Append a message - let message = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "Hello!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - - archivist - .append_messages(session_response.scroll_id, vec![message], None) - .await?; - - // Get updated metadata - let metadata_after = archivist - .get_session_metadata(session_response.scroll_id, None) - .await?; - - // Verify updated_at changed - assert!(metadata_after.updated_at > metadata_before.updated_at); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - // ======================================================================== - // Performance Benchmarks - // These tests are marked with #[ignore] to avoid slowing down regular test runs - // Run with: cargo test --package dirigent_archivist -- --ignored - // ======================================================================== - - #[tokio::test] - #[ignore] - async fn bench_append_1000_messages() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_bench_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Bench Connector".to_string(), - client_native_id: "bench@localhost".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "bench-session".to_string(), - title: Some("Bench Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create 1000 messages - let messages: Vec = (0..1000) - .map(|i| MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(), - author: Some("bench".to_string()), - content_md: format!("Message number {} with some realistic content that might appear in a conversation. This helps simulate real-world usage patterns.", i), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({"index": i}), - }) - .collect(); - - // Benchmark appending messages - let start = std::time::Instant::now(); - archivist - .append_messages(session_response.scroll_id, messages, None) - .await?; - let elapsed = start.elapsed(); - - let messages_per_sec = 1000.0 / elapsed.as_secs_f64(); - println!("\nBenchmark: Append 1000 messages"); - println!(" Total time: {:?}", elapsed); - println!(" Messages/sec: {:.2}", messages_per_sec); - println!(" Avg time per message: {:?}", elapsed / 1000); - - // Target: >100 msg/s - assert!( - messages_per_sec > 100.0, - "Performance degraded: {:.2} msg/s < 100 msg/s", - messages_per_sec - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - #[ignore] - async fn bench_read_100_messages() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_bench_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Bench Connector".to_string(), - client_native_id: "bench@localhost".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "bench-session-100".to_string(), - title: Some("Bench Session 100".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create and append 100 messages - let messages: Vec = (0..100) - .map(|i| MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(), - author: Some("bench".to_string()), - content_md: format!("Message {}", i), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }) - .collect(); - - archivist - .append_messages(session_response.scroll_id, messages, None) - .await?; - - // Benchmark reading messages - let start = std::time::Instant::now(); - let retrieved = archivist.get_messages(session_response.scroll_id, None).await?; - let elapsed = start.elapsed(); - - println!("\nBenchmark: Read 100 messages"); - println!(" Total time: {:?}", elapsed); - println!(" Messages retrieved: {}", retrieved.len()); - - // Target: sub-100ms for typical sessions - assert!( - elapsed.as_millis() < 100, - "Read performance degraded: {:?} > 100ms", - elapsed - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - #[ignore] - async fn bench_read_1000_messages() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_bench_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector and session - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Bench Connector".to_string(), - client_native_id: "bench@localhost".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "bench-session-1000".to_string(), - title: Some("Bench Session 1000".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - let session_response = archivist.register_session(session_req, None).await?; - - // Create and append 1000 messages - let messages: Vec = (0..1000) - .map(|i| MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: session_response.scroll_id, - parent_id: None, - ts: Utc::now(), - role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(), - author: Some("bench".to_string()), - content_md: format!("Message {}", i), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }) - .collect(); - - archivist - .append_messages(session_response.scroll_id, messages, None) - .await?; - - // Benchmark reading messages - let start = std::time::Instant::now(); - let retrieved = archivist.get_messages(session_response.scroll_id, None).await?; - let elapsed = start.elapsed(); - - println!("\nBenchmark: Read 1000 messages"); - println!(" Total time: {:?}", elapsed); - println!(" Messages retrieved: {}", retrieved.len()); - - // Log for tracking (no strict requirement for large sessions) - println!(" Note: Performance acceptable for large session"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - #[ignore] - async fn bench_list_100_sessions() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_bench_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Bench Connector".to_string(), - client_native_id: "bench@localhost".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let connector_response = archivist.register_connector(connector_req, None).await?; - - // Register 100 sessions - for i in 0..100 { - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: format!("bench-session-{}", i), - title: Some(format!("Session {}", i)), - custom_scroll_id: None, - metadata: serde_json::json!({"index": i}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - - archivist.register_session(session_req, None).await?; - } - - // Benchmark listing sessions - let start = std::time::Instant::now(); - let sessions = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(connector_response.connector_uid) - .with_limit(dirigent_archivist::MAX_PAGE_LIMIT), - ) - .await? - .items; - let elapsed = start.elapsed(); - - println!("\nBenchmark: List 100 sessions"); - println!(" Total time: {:?}", elapsed); - println!(" Sessions listed: {}", sessions.len()); - - // Target: sub-100ms for typical connector - assert!( - elapsed.as_millis() < 100, - "List performance degraded: {:?} > 100ms", - elapsed - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_mixed_format_compatibility() { - // Create archivist with temp directory - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await.unwrap() - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await.unwrap(); - - // Register connector - let connector_req = RegisterConnectorRequest { - r#type: "Test".to_string(), - title: "Test Connector".to_string(), - client_native_id: "test-connector".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - let connector_resp = archivist.register_connector(connector_req, None).await.unwrap(); - let connector_uid = connector_resp.connector_uid; - - // Manually create a session with .jsonl format for messages - let scroll_id = Uuid::now_v7(); - let session_metadata = SessionMetadata { - version: 1, - scroll_id, - created_at: Utc::now(), - updated_at: Utc::now(), - title: Some("Test Session".to_string()), - connector_uid, - native_session_id: Some("test-123".to_string()), - agent_id: None, - parent_scroll_id: None, - continuation: None, - tags: vec![], - metadata: serde_json::json!({}), - no_update: false, - kind: SessionKind::Chat, - acp_client_id: None, - is_connected: None, - current_session_id: None, - models: None, - modes: None, - config_options: None, - completeness: Default::default(), - matrix_room_id: None, - matrix_sharing_active: false, - matrix_shared_at: None, - is_subagent: false, - subagent_type: None, - spawning_tool_use_id: None, - }; - - backend.paths().ensure_dirs(scroll_id).await.unwrap(); - write_json(&backend.paths().session_json(scroll_id), &session_metadata).await.unwrap(); - - // Create messages.jsonl (not .ndjson) - let jsonl_path = backend.paths().session_dir(scroll_id).join("messages.jsonl"); - let message = MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: None, - ts: Utc::now(), - role: "user".to_string(), - author: None, - content_md: "Hello from .jsonl file".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - append_ndjson(&jsonl_path, &message).await.unwrap(); - - // Read messages using archivist API - let messages = archivist.get_messages(scroll_id, None).await.unwrap(); - assert_eq!(messages.len(), 1); - assert_eq!(messages[0].content_md, "Hello from .jsonl file"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - } - - #[tokio::test] - async fn test_fingerprint_registration_and_matching() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register first connector with a fingerprint - let fingerprint = "acp/stdio:/usr/bin/claude".to_string(); - let req1 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Claude CLI".to_string(), - client_native_id: "acp-session-abc123".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some(fingerprint.clone()), - }; - - let response1 = archivist.register_connector(req1, None).await?; - assert_eq!(response1.status, RegisterStatus::Accepted); - let original_uid = response1.connector_uid; - - // Register a second connector with a DIFFERENT client_native_id - // but the SAME fingerprint. Should be ALIASED to the original. - let req2 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Claude CLI (re-added)".to_string(), - client_native_id: "acp-session-xyz789".to_string(), - custom_uid: None, - metadata: serde_json::json!({"version": 2}), - fingerprint: Some(fingerprint.clone()), - }; - - let response2 = archivist.register_connector(req2, None).await?; - assert_eq!( - response2.status, - RegisterStatus::Aliased, - "Same fingerprint should cause ALIASED status" - ); - assert_eq!( - response2.connector_uid, original_uid, - "Aliased connector should return the original UID" - ); - assert_eq!(response2.alias_of, Some(original_uid)); - assert!( - response2.note.as_deref().unwrap_or("").contains("fingerprint"), - "Note should mention fingerprint matching" - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_fingerprint_no_match_different_fingerprints() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register first connector with fingerprint A - let req1 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Claude CLI".to_string(), - client_native_id: "acp-claude-1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }; - - let response1 = archivist.register_connector(req1, None).await?; - assert_eq!(response1.status, RegisterStatus::Accepted); - - // Register second connector with fingerprint B (different) - let req2 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Codex Agent".to_string(), - client_native_id: "acp-codex-1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/codex".to_string()), - }; - - let response2 = archivist.register_connector(req2, None).await?; - assert_eq!( - response2.status, - RegisterStatus::Accepted, - "Different fingerprints should both be ACCEPTED" - ); - assert_ne!( - response2.connector_uid, response1.connector_uid, - "Different fingerprints should get different UIDs" - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_fingerprint_none_skips_matching() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register first connector WITH a fingerprint - let req1 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Claude CLI".to_string(), - client_native_id: "acp-claude-1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }; - - let response1 = archivist.register_connector(req1, None).await?; - assert_eq!(response1.status, RegisterStatus::Accepted); - - // Register second connector WITHOUT a fingerprint (different native ID) - // Should NOT match the first connector even though one exists with a fingerprint - let req2 = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Unknown ACP Agent".to_string(), - client_native_id: "acp-unknown-1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response2 = archivist.register_connector(req2, None).await?; - assert_eq!( - response2.status, - RegisterStatus::Accepted, - "Connector with fingerprint=None should not match existing fingerprints" - ); - assert_ne!( - response2.connector_uid, response1.connector_uid, - "Should get a new UID when no fingerprint is provided" - ); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_connector_fingerprint_persistence() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // Register connector with a fingerprint - let fingerprint_value = "acp/stdio:/usr/bin/claude".to_string(); - let connector_req = RegisterConnectorRequest { - r#type: "ACP".to_string(), - title: "Claude CLI".to_string(), - client_native_id: "acp-claude-1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some(fingerprint_value.clone()), - }; - - let response = archivist.register_connector(connector_req, None).await?; - assert_eq!(response.status, RegisterStatus::Accepted); - let connector_uid = response.connector_uid; - - // Verify fingerprint is persisted in connector.json - let connector_json_path = backend.paths() - .connector_dir(connector_uid) - .join("connector.json"); - let raw_json = tokio::fs::read_to_string(&connector_json_path).await.unwrap(); - let connector_record: serde_json::Value = serde_json::from_str(&raw_json).unwrap(); - assert_eq!( - connector_record["fingerprint"].as_str(), - Some(fingerprint_value.as_str()), - "Fingerprint should be persisted in connector.json" - ); - - // Verify fingerprint is persisted in TSV index - let index_path = backend.paths().connector_index_tsv(); - let tsv_content = tokio::fs::read_to_string(&index_path).await.unwrap(); - assert!( - tsv_content.contains(&fingerprint_value), - "Fingerprint should appear in TSV index" - ); - - // Verify fingerprint can be read back via TSV reader - let rows = dirigent_archivist::storage::tsv::read_connector_index(&index_path).await.unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0].fingerprint, Some(fingerprint_value.clone())); - - // Register a second connector WITHOUT a fingerprint (ensure None is handled) - let connector_req2 = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "OpenCode Local".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - - let response2 = archivist.register_connector(connector_req2, None).await?; - assert_eq!(response2.status, RegisterStatus::Accepted); - - // Re-read TSV and verify both connectors - let rows = dirigent_archivist::storage::tsv::read_connector_index(&index_path).await.unwrap(); - assert_eq!(rows.len(), 2); - - // First connector should have fingerprint - let row_with_fp = rows.iter().find(|r| r.connector_uid == connector_uid).unwrap(); - assert_eq!(row_with_fp.fingerprint, Some(fingerprint_value)); - - // Second connector should have no fingerprint - let row_without_fp = rows.iter().find(|r| r.connector_uid == response2.connector_uid).unwrap(); - assert_eq!(row_without_fp.fingerprint, None); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_list_connectors() -> std::result::Result<(), Box> { - let temp_dir = std::env::temp_dir().join(format!("archivist_lc_{}", uuid::Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let req1 = RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Claude".to_string(), - client_native_id: "c1".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }; - archivist.register_connector(req1, None).await?; - - let req2 = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "OC".to_string(), - client_native_id: "c2".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - archivist.register_connector(req2, None).await?; - - let connectors = archivist.list_connectors(None).await?; - assert_eq!(connectors.len(), 2); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_move_session() -> std::result::Result<(), Box> { - let temp_dir = std::env::temp_dir().join(format!("archivist_mv_{}", uuid::Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let c1 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Source".to_string(), - client_native_id: "src".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let c2 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Target".to_string(), - client_native_id: "tgt".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let session = archivist - .register_session( - RegisterSessionRequest { - connector_uid: c1, - native_session_id: "s1".to_string(), - title: Some("Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - - // Verify under c1 - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c1).with_limit(100), - ) - .await? - .items - .len(), - 1 - ); - - // Move to c2 - archivist - .move_session_to_connector(session.scroll_id, c2, None) - .await?; - - // c1 should be empty, c2 should have the session - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c1).with_limit(100), - ) - .await? - .items - .len(), - 0 - ); - let c2_sessions = archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c2).with_limit(100), - ) - .await? - .items; - assert_eq!(c2_sessions.len(), 1); - assert_eq!(c2_sessions[0].connector_uid, c2); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_copy_session() -> std::result::Result<(), Box> { - let temp_dir = std::env::temp_dir().join(format!("archivist_cp_{}", uuid::Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let c1 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Source".to_string(), - client_native_id: "src".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let c2 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Target".to_string(), - client_native_id: "tgt".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let session = archivist - .register_session( - RegisterSessionRequest { - connector_uid: c1, - native_session_id: "s1".to_string(), - title: Some("Original".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - - // Add a message - let msg = MessageRecord { - version: 1, - message_id: uuid::Uuid::now_v7(), - session: session.scroll_id, - parent_id: None, - ts: chrono::Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "Hello".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - archivist - .append_messages(session.scroll_id, vec![msg], None) - .await?; - - // Copy - let new_scroll_id = archivist - .copy_session_to_connector(session.scroll_id, c2, None) - .await?; - assert_ne!(new_scroll_id, session.scroll_id); - - // Original still under c1 - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c1).with_limit(100), - ) - .await? - .items - .len(), - 1 - ); - // Copy under c2 - let c2_sessions = archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c2).with_limit(100), - ) - .await? - .items; - assert_eq!(c2_sessions.len(), 1); - assert_eq!(c2_sessions[0].connector_uid, c2); - // Messages copied - let msgs = archivist.get_messages(new_scroll_id, None).await?; - assert_eq!(msgs.len(), 1); - assert_eq!(msgs[0].content_md, "Hello"); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_move_sessions_bulk() -> std::result::Result<(), Box> { - let temp_dir = - std::env::temp_dir().join(format!("archivist_mvb_{}", uuid::Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let c1 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Source".to_string(), - client_native_id: "src".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let c2 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Target".to_string(), - client_native_id: "tgt".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - let mut scroll_ids = Vec::new(); - for i in 0..3 { - let s = archivist - .register_session( - RegisterSessionRequest { - connector_uid: c1, - native_session_id: format!("s{}", i), - title: Some(format!("Session {}", i)), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - scroll_ids.push(s.scroll_id); - } - - let report = archivist - .move_sessions_to_connector(scroll_ids, c2, None) - .await?; - assert_eq!(report.moved, 3); - assert_eq!(report.failed, 0); - assert!(report.errors.is_empty()); - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c1).with_limit(100), - ) - .await? - .items - .len(), - 0 - ); - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c2).with_limit(100), - ) - .await? - .items - .len(), - 3 - ); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_connector_identity_persistence_e2e() -> std::result::Result<(), Box> - { - let temp_dir = - std::env::temp_dir().join(format!("archivist_e2e_{}", uuid::Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - // 1. Register connector with fingerprint - let req1 = RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Claude v1".to_string(), - client_native_id: "first-run-id".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }; - let resp1 = archivist.register_connector(req1, None).await?; - let original_uid = resp1.connector_uid; - assert_eq!(resp1.status, RegisterStatus::Accepted); - - // 2. Create sessions under this connector - let s1 = archivist - .register_session( - RegisterSessionRequest { - connector_uid: original_uid, - native_session_id: "session-1".to_string(), - title: Some("Important Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - - // 3. Add messages - let msg = MessageRecord { - version: 1, - message_id: uuid::Uuid::now_v7(), - session: s1.scroll_id, - parent_id: None, - ts: chrono::Utc::now(), - role: "user".to_string(), - author: Some("test".to_string()), - content_md: "Don't lose me!".to_string(), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - }; - archivist - .append_messages(s1.scroll_id, vec![msg], None) - .await?; - - // 4. Simulate "remove and re-add" -- new connector_id, same fingerprint - let req2 = RegisterConnectorRequest { - r#type: "Acp".to_string(), - title: "Claude v2 (reinstalled)".to_string(), - client_native_id: "second-run-id".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()), - }; - let resp2 = archivist.register_connector(req2, None).await?; - - // 5. Verify: same UID, ALIASED status - assert_eq!(resp2.status, RegisterStatus::Aliased); - assert_eq!(resp2.connector_uid, original_uid); - - // 6. Verify: sessions still accessible under the same UID - let sessions = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(original_uid) - .with_limit(100), - ) - .await? - .items; - assert_eq!(sessions.len(), 1); - assert_eq!(sessions[0].title, Some("Important Session".to_string())); - - // 7. Verify: messages intact - let messages = archivist.get_messages(s1.scroll_id, None).await?; - assert_eq!(messages.len(), 1); - assert_eq!(messages[0].content_md, "Don't lose me!"); - - // 8. Verify: connector record is preserved under the original UID. - // - // NOTE: Pre-Phase-2 `FileBasedArchivist` ALSO refreshed the matched - // connector's `title`/`metadata` on fingerprint-based ALIASED - // registration. The `Archivist` deliberately drops that - // refresh (see `coordinator/connectors.rs` for the rationale — the - // `ConnectorRegistryBackend` trait has no "update metadata" method - // yet, and `put_connector` would append rather than mutate). The - // identity (UID) is stable; the title stays the original. - let connectors = archivist.list_connectors(None).await?; - let connector = connectors - .iter() - .find(|c| c.connector_uid == original_uid) - .unwrap(); - assert_eq!(connector.title, "Claude v1"); - - // 9. Test move_session works after fingerprint re-association - let c2 = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Secondary".to_string(), - client_native_id: "secondary".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await? - .connector_uid; - - archivist - .move_session_to_connector(s1.scroll_id, c2, None) - .await?; - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(original_uid) - .with_limit(100), - ) - .await? - .items - .len(), - 0 - ); - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default().with_connector(c2).with_limit(100), - ) - .await? - .items - .len(), - 1 - ); - - // Move it back - archivist - .move_session_to_connector(s1.scroll_id, original_uid, None) - .await?; - assert_eq!( - archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(original_uid) - .with_limit(100), - ) - .await? - .items - .len(), - 1 - ); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_paged_walk_fifty_sessions() -> Result<()> { - use chrono::{Duration, Utc}; - use dirigent_archivist::SessionListQuery; - - let temp_dir = std::env::temp_dir().join(format!("paged_walk_{}", Uuid::now_v7())); - let backend = std::sync::Arc::new( - dirigent_archivist::backends::JsonlBackend::new(temp_dir.clone()).await? - ); - let archivist = Archivist::from_single_backend( - "main".into(), backend.clone() - ).await?; - - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "paged-walk".to_string(), - client_native_id: format!("paged-walk@{}", Uuid::now_v7()), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - let cresp = archivist.register_connector(connector_req, None).await?; - let uid = cresp.connector_uid; - - let base = Utc::now(); - for i in 0..50 { - let tag = if i % 2 == 0 { "even" } else { "odd" }; - - let req = RegisterSessionRequest { - connector_uid: uid, - native_session_id: format!("walk-{i}"), - title: Some(format!("title-{i}")), - custom_scroll_id: None, - metadata: serde_json::json!({"model": "claude-3-5-sonnet"}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - let r = archivist.register_session(req, None).await?; - - let mut meta = archivist.get_session_metadata(r.scroll_id, None).await?; - meta.updated_at = base - Duration::seconds(i); - meta.tags = vec![tag.to_string()]; - let path = backend.paths().session_json(r.scroll_id); - dirigent_archivist::storage::json::write_json(&path, &meta) - .await - .map_err(dirigent_archivist::ArchivistError::Io)?; - } - - // Walk in chunks of 10 — 5 pages, 50 items, no dupes. - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - let mut cursor = None; - let mut page_count = 0; - loop { - let page = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(uid) - .with_limit(10) - .with_cursor(cursor.clone()), - ) - .await?; - page_count += 1; - for s in &page.items { - assert!(seen.insert(s.scroll_id), "duplicate scroll_id across pages"); - } - if page.next_cursor.is_none() { - break; - } - cursor = page.next_cursor; - assert!(page_count <= 10, "runaway pagination"); - } - assert_eq!(seen.len(), 50); - assert_eq!(page_count, 5); - - // Compose filter: tag=even AND title contains "1" → titles 10, 12, 14, 16, 18. - let mut q = SessionListQuery::default().with_connector(uid).with_limit(50); - q.tags = vec!["even".into()]; - q.title_query = Some("1".into()); - let page = archivist.list_sessions_paged(q).await?; - assert_eq!( - page.items.len(), - 5, - "got titles {:?}", - page.items.iter().map(|s| s.title.clone()).collect::>() - ); - - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } -} diff --git a/crates/dirigent_archivist/tests/list_sessions_paged_test.rs b/crates/dirigent_archivist/tests/list_sessions_paged_test.rs deleted file mode 100644 index a89a5ca..0000000 --- a/crates/dirigent_archivist/tests/list_sessions_paged_test.rs +++ /dev/null @@ -1,364 +0,0 @@ -//! Tests for `Archivist::list_sessions_paged` — pagination, filters, cursor stability. - -use chrono::{Duration, Utc}; -use dirigent_archivist::{ - backends::JsonlBackend, Archivist, RegisterConnectorRequest, RegisterSessionRequest, - Result, SessionListQuery, -}; -use std::sync::Arc; -use uuid::Uuid; - -/// Scaffold: create a coordinator backed by a single `JsonlBackend` in a -/// unique temp dir, returning the backend alongside it so tests can probe -/// disk paths via `backend.paths()`. -async fn mk_archivist() -> Result<(Archivist, Arc, std::path::PathBuf)> { - let temp_dir = std::env::temp_dir().join(format!("paged_test_{}", Uuid::now_v7())); - let backend = Arc::new(JsonlBackend::new(temp_dir.clone()).await?); - let archivist = - Archivist::from_single_backend("main".into(), backend.clone()).await?; - Ok((archivist, backend, temp_dir)) -} - -/// Register a connector, return its UID. -async fn mk_connector(archivist: &Archivist, title: &str) -> Result { - let resp = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: title.to_string(), - client_native_id: format!("{title}@local:{}", Uuid::now_v7()), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await?; - Ok(resp.connector_uid) -} - -/// Register a session and patch fields that `register_session` does not expose. -#[allow(clippy::too_many_arguments)] -async fn mk_session( - archivist: &Archivist, - backend: &JsonlBackend, - connector_uid: Uuid, - native_id: &str, - title: Option<&str>, - tags: Vec, - model: Option<&str>, - project_id: Option<&str>, - no_update: bool, -) -> Result { - let mut metadata = serde_json::Map::new(); - if let Some(m) = model { - metadata.insert("model".to_string(), serde_json::Value::String(m.to_string())); - } - if let Some(p) = project_id { - metadata.insert( - "project_id".to_string(), - serde_json::Value::String(p.to_string()), - ); - } - - let resp = archivist - .register_session( - RegisterSessionRequest { - connector_uid, - native_session_id: native_id.to_string(), - title: title.map(String::from), - custom_scroll_id: None, - metadata: serde_json::Value::Object(metadata), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await?; - let scroll_id = resp.scroll_id; - - // Patch tags / no_update into session.json on disk. - if !tags.is_empty() || no_update { - let mut meta = archivist.get_session_metadata(scroll_id, None).await?; - meta.tags = tags; - meta.no_update = no_update; - let path = backend.paths().session_json(scroll_id); - dirigent_archivist::storage::json::write_json(&path, &meta) - .await - .map_err(dirigent_archivist::ArchivistError::Io)?; - } - - Ok(scroll_id) -} - -/// Overwrite a session's updated_at on disk for deterministic ordering. -async fn set_updated_at( - archivist: &Archivist, - backend: &JsonlBackend, - scroll_id: Uuid, - when: chrono::DateTime, -) -> Result<()> { - let mut meta = archivist.get_session_metadata(scroll_id, None).await?; - meta.updated_at = when; - let path = backend.paths().session_json(scroll_id); - dirigent_archivist::storage::json::write_json(&path, &meta) - .await - .map_err(dirigent_archivist::ArchivistError::Io)?; - Ok(()) -} - -fn cleanup(path: std::path::PathBuf) { - let _ = std::fs::remove_dir_all(path); -} - -#[tokio::test] -async fn list_sessions_paged_respects_limit() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-a").await?; - - let base = Utc::now(); - for i in 0..30 { - let scroll = mk_session( - &archivist, - &backend, - uid, - &format!("native-{i}"), - Some(&format!("title-{i}")), - Vec::new(), - None, - None, - false, - ) - .await?; - set_updated_at(&archivist, &backend, scroll, base - Duration::seconds(i)).await?; - } - - let page = archivist - .list_sessions_paged(SessionListQuery::default().with_connector(uid).with_limit(10)) - .await?; - - assert_eq!(page.items.len(), 10); - assert!(page.next_cursor.is_some()); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_end_of_list() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-b").await?; - - let base = Utc::now(); - for i in 0..5 { - let scroll = mk_session( - &archivist, - &backend, - uid, - &format!("native-{i}"), - Some(&format!("title-{i}")), - Vec::new(), - None, - None, - false, - ) - .await?; - set_updated_at(&archivist, &backend, scroll, base - Duration::seconds(i)).await?; - } - - let page = archivist - .list_sessions_paged(SessionListQuery::default().with_connector(uid).with_limit(100)) - .await?; - - assert_eq!(page.items.len(), 5); - assert!(page.next_cursor.is_none()); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_cursor_stability() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-c").await?; - - let fixed = Utc::now(); - for i in 0..6 { - let scroll = mk_session( - &archivist, - &backend, - uid, - &format!("native-{i}"), - Some(&format!("title-{i}")), - Vec::new(), - None, - None, - false, - ) - .await?; - set_updated_at(&archivist, &backend, scroll, fixed).await?; - } - - let p1 = archivist - .list_sessions_paged(SessionListQuery::default().with_connector(uid).with_limit(3)) - .await?; - assert_eq!(p1.items.len(), 3); - assert!(p1.next_cursor.is_some()); - - let p2 = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(uid) - .with_limit(3) - .with_cursor(p1.next_cursor.clone()), - ) - .await?; - assert_eq!(p2.items.len(), 3); - - let ids1: std::collections::HashSet<_> = p1.items.iter().map(|s| s.scroll_id).collect(); - let ids2: std::collections::HashSet<_> = p2.items.iter().map(|s| s.scroll_id).collect(); - assert!(ids1.is_disjoint(&ids2), "page 1 and page 2 must not overlap"); - assert!(p2.next_cursor.is_none()); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_title_filter() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-d").await?; - - mk_session(&archivist, &backend, uid, "n1", Some("Alpha beta"), vec![], None, None, false).await?; - mk_session(&archivist, &backend, uid, "n2", Some("BETA only"), vec![], None, None, false).await?; - mk_session(&archivist, &backend, uid, "n3", Some("gamma"), vec![], None, None, false).await?; - mk_session(&archivist, &backend, uid, "n4", None, vec![], None, None, false).await?; - - let page = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(uid) - .with_limit(50) - .with_title_query("beta"), - ) - .await?; - - let titles: Vec<_> = page.items.iter().filter_map(|s| s.title.clone()).collect(); - assert_eq!(titles.len(), 2, "got {titles:?}"); - assert!(titles.iter().any(|t| t == "Alpha beta")); - assert!(titles.iter().any(|t| t == "BETA only")); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_tags_and() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-e").await?; - - mk_session( - &archivist, &backend, uid, "n1", Some("s1"), - vec!["red".into(), "blue".into()], None, None, false, - ).await?; - mk_session( - &archivist, &backend, uid, "n2", Some("s2"), - vec!["red".into()], None, None, false, - ).await?; - mk_session( - &archivist, &backend, uid, "n3", Some("s3"), - vec!["blue".into()], None, None, false, - ).await?; - - let mut q = SessionListQuery::default().with_connector(uid).with_limit(50); - q.tags = vec!["red".into(), "blue".into()]; - - let page = archivist.list_sessions_paged(q).await?; - - assert_eq!(page.items.len(), 1); - assert_eq!(page.items[0].title.as_deref(), Some("s1")); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_model_filter() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-f").await?; - - mk_session(&archivist, &backend, uid, "n1", Some("s1"), vec![], Some("claude-3-5-sonnet"), None, false).await?; - mk_session(&archivist, &backend, uid, "n2", Some("s2"), vec![], Some("gpt-4o"), None, false).await?; - mk_session(&archivist, &backend, uid, "n3", Some("s3"), vec![], None, None, false).await?; - - let mut q = SessionListQuery::default().with_connector(uid).with_limit(50); - q.model_filter = Some("sonnet".into()); - - let page = archivist.list_sessions_paged(q).await?; - - assert_eq!(page.items.len(), 1); - assert_eq!(page.items[0].title.as_deref(), Some("s1")); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_include_hidden() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let uid = mk_connector(&archivist, "connector-g").await?; - - mk_session(&archivist, &backend, uid, "n1", Some("visible"), vec![], None, None, false).await?; - mk_session(&archivist, &backend, uid, "n2", Some("hidden"), vec![], None, None, true).await?; - - let visible_only = archivist - .list_sessions_paged(SessionListQuery::default().with_connector(uid).with_limit(50)) - .await?; - assert_eq!(visible_only.items.len(), 1); - assert_eq!(visible_only.items[0].title.as_deref(), Some("visible")); - - let all = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_connector(uid) - .with_limit(50) - .with_include_hidden(true), - ) - .await?; - assert_eq!(all.items.len(), 2); - - cleanup(temp); - Ok(()) -} - -#[tokio::test] -async fn list_sessions_paged_project_scope() -> Result<()> { - let (archivist, backend, temp) = mk_archivist().await?; - let c1 = mk_connector(&archivist, "connector-h1").await?; - let c2 = mk_connector(&archivist, "connector-h2").await?; - - mk_session(&archivist, &backend, c1, "n1", Some("proj-a-1"), vec![], None, Some("proj-a"), false).await?; - mk_session(&archivist, &backend, c1, "n2", Some("proj-b-1"), vec![], None, Some("proj-b"), false).await?; - mk_session(&archivist, &backend, c2, "n3", Some("proj-a-2"), vec![], None, Some("proj-a"), false).await?; - - let page = archivist - .list_sessions_paged( - SessionListQuery::default() - .with_project("proj-a") - .with_limit(50), - ) - .await?; - - assert_eq!(page.items.len(), 2); - for s in &page.items { - assert_eq!(s.metadata.get("project_id").and_then(|v| v.as_str()), Some("proj-a")); - } - - cleanup(temp); - Ok(()) -} diff --git a/crates/dirigent_archivist/tests/multi_backend_boot_test.rs b/crates/dirigent_archivist/tests/multi_backend_boot_test.rs deleted file mode 100644 index 8ee7a78..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_boot_test.rs +++ /dev/null @@ -1,130 +0,0 @@ -use dirigent_archivist::{ - coordinator::Archivist, - error::ArchivistBootError, - registry::{ArchivesConfig, BackendRegistry}, -}; - -fn parse(toml_src: &str) -> ArchivesConfig { - toml::from_str(toml_src).unwrap() -} - -#[tokio::test] -async fn boot_with_one_jsonl_archive() { - let dir = tempfile::tempdir().unwrap(); - let cfg = parse(&format!( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "{}" - "#, - dir.path().to_string_lossy().replace('\\', "/") - )); - let registry = BackendRegistry::with_jsonl(); - let _archivist = Archivist::from_config(cfg, ®istry, None).await.unwrap(); -} - -#[tokio::test] -async fn boot_empty_config_is_ephemeral() { - let cfg: ArchivesConfig = toml::from_str("").unwrap(); - let registry = BackendRegistry::with_jsonl(); - let archivist = Archivist::from_config(cfg, ®istry, None).await.unwrap(); - let archives = archivist.list_archives().await.unwrap(); - assert!(archives.is_empty()); -} - -#[tokio::test] -async fn boot_unknown_type_errors() { - let cfg = parse( - r#" - [[archives]] - name = "x" - type = "nope" - [archives.params] - "#, - ); - let registry = BackendRegistry::with_jsonl(); - let result = Archivist::from_config(cfg, ®istry, None).await; - match result { - Ok(_) => panic!("expected UnknownType error"), - Err(err) => assert!( - matches!(err, ArchivistBootError::UnknownType { .. }), - "expected UnknownType, got {err:?}" - ), - } -} - -#[tokio::test] -async fn boot_no_primary_errors() { - let cfg = parse( - r#" - [[archives]] - name = "mirror" - type = "jsonl" - failure_mode = "best_effort" - [archives.params] - path = "/tmp/whatever" - "#, - ); - let registry = BackendRegistry::with_jsonl(); - let result = Archivist::from_config(cfg, ®istry, None).await; - match result { - Ok(_) => panic!("expected Validation error"), - Err(err) => assert!( - matches!(err, ArchivistBootError::Validation(_)), - "expected Validation, got {err:?}" - ), - } -} - -#[tokio::test] -async fn boot_duplicate_name_errors() { - let dir = tempfile::tempdir().unwrap(); - let cfg = parse(&format!( - r#" - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "{p}" - - [[archives]] - name = "main" - type = "jsonl" - [archives.params] - path = "{p}" - "#, - p = dir.path().to_string_lossy().replace('\\', "/"), - )); - let registry = BackendRegistry::with_jsonl(); - let result = Archivist::from_config(cfg, ®istry, None).await; - match result { - Ok(_) => panic!("expected Validation error"), - Err(err) => assert!( - matches!(err, ArchivistBootError::Validation(_)), - "expected Validation, got {err:?}" - ), - } -} - -#[test] -fn example_toml_parses() { - // Load the full dirigent.toml.example and parse just the [[archives]] - // section as `ArchivesConfig`. Confirms the example's archive syntax is - // valid Phase 3 schema. - let src = std::fs::read_to_string( - std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("../../dirigent.toml.example"), - ) - .expect("dirigent.toml.example present at workspace root"); - // Parse the whole file as a TOML value, then try to deserialize the - // full document into `ArchivesConfig`. Any `archives` subtable gets picked up; - // other top-level fields (connectors, matrix, ...) are ignored because - // `ArchivesConfig` only has `entries: Vec` via - // `#[serde(rename = "archives")]`. - let cfg: ArchivesConfig = - toml::from_str(&src).expect("ArchivesConfig from full example"); - cfg.validate().expect("example config validates"); - assert!(!cfg.entries.is_empty(), "example must declare at least one archive"); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_capability_test.rs b/crates/dirigent_archivist/tests/multi_backend_capability_test.rs deleted file mode 100644 index 0da0620..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_capability_test.rs +++ /dev/null @@ -1,76 +0,0 @@ -#![cfg(feature = "test-utils")] - -use std::sync::Arc; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, ArchiveCapability, CapabilitySet, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ArchiveRegistration, FailureMode, WritePolicy}; -use dirigent_archivist::types::{MetaEventRecord, MetaEventType}; -use uuid::Uuid; - -fn reg(name: &str, backend: Arc, priority: u32) -> Arc { - Arc::new(ArchiveRegistration::new( - name.into(), - "mock", - backend as Arc, - true, - FailureMode::Required, - priority, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )) -} - -fn stub_meta_event(scroll_id: Uuid) -> MetaEventRecord { - MetaEventRecord { - version: 1, - event_id: Uuid::now_v7(), - session: scroll_id, - ts: chrono::Utc::now(), - event_type: MetaEventType::ClientConnected, - description: "test event".into(), - linked_session_id: None, - linked_connector_id: None, - linked_connector_title: None, - metadata: serde_json::Value::Null, - } -} - -#[tokio::test] -async fn capability_filter_skips_backend_without_meta_events() { - let mut caps_with_meta = CapabilitySet::new(); - caps_with_meta.insert(ArchiveCapability::MetaEvents); - caps_with_meta.insert(ArchiveCapability::SessionMapping); - caps_with_meta.insert(ArchiveCapability::ConnectorRegistry); - let with_meta = Arc::new(MockBackend::with_capabilities(caps_with_meta)); - - let mut caps_without_meta = CapabilitySet::new(); - caps_without_meta.insert(ArchiveCapability::SessionMapping); - caps_without_meta.insert(ArchiveCapability::ConnectorRegistry); - let without_meta = Arc::new(MockBackend::with_capabilities(caps_without_meta)); - - let archivist = Archivist::from_registrations(vec![ - reg("primary", with_meta.clone(), 0), - reg("secondary", without_meta.clone(), 10), - ]); - - let scroll = Uuid::new_v4(); - archivist - .append_meta_events(scroll, vec![stub_meta_event(scroll)], None) - .await - .unwrap(); - - // Primary received the meta event. - assert!( - with_meta.has_meta_events(scroll), - "primary should receive meta event" - ); - // Secondary was capability-skipped. - assert!( - !without_meta.has_meta_events(scroll), - "secondary should be skipped" - ); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_cross_test.rs b/crates/dirigent_archivist/tests/multi_backend_cross_test.rs deleted file mode 100644 index 42c464c..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_cross_test.rs +++ /dev/null @@ -1,121 +0,0 @@ -#![cfg(feature = "test-utils")] - -use std::sync::Arc; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::ArchiveBackend; -use dirigent_archivist::backend::HealthStatus; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::error::ArchivistError; -use dirigent_archivist::registry::{ArchiveRegistration, FailureMode, WritePolicy}; -use dirigent_archivist::types::SessionMetadata; -use uuid::Uuid; - -async fn dual_backend_coordinator() -> (Archivist, Arc, Arc) { - let a = Arc::new(MockBackend::new()); - let b = Arc::new(MockBackend::new()); - let regs = vec![ - Arc::new(ArchiveRegistration::new( - "a".into(), - "mock", - a.clone() as Arc, - true, - FailureMode::Required, - 0, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )), - Arc::new(ArchiveRegistration::new( - "b".into(), - "mock", - b.clone() as Arc, - true, - FailureMode::Required, - 10, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )), - ]; - (Archivist::from_registrations(regs), a, b) -} - -#[tokio::test] -async fn copy_session_carries_metadata_and_messages() { - let (archivist, a, b) = dual_backend_coordinator().await; - let scroll = Uuid::new_v4(); - - // Seed `a` only. - a.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - a.append_messages(scroll, vec![]).await.unwrap(); - - archivist.copy_session(scroll, "a", "b").await.unwrap(); - - assert!(b.get_session(scroll).await.unwrap().is_some()); - assert!(a.get_session(scroll).await.unwrap().is_some()); -} - -#[tokio::test] -async fn move_session_removes_from_source() { - let (archivist, a, b) = dual_backend_coordinator().await; - let scroll = Uuid::new_v4(); - a.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - archivist.move_session(scroll, "a", "b").await.unwrap(); - - assert!(a.get_session(scroll).await.unwrap().is_none()); - assert!(b.get_session(scroll).await.unwrap().is_some()); - assert_eq!( - archivist.read_cache_size().await, - 1, - "cache should now reflect the move" - ); -} - -#[tokio::test] -async fn move_session_partial_failure_returns_partial_move_error() { - let (archivist, a, b) = dual_backend_coordinator().await; - let scroll = Uuid::new_v4(); - a.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - // The source-side delete happens AFTER the copy. Inject ONE write failure - // AFTER the copy has already consumed the write capacity. `MockBackend`'s - // inject_write_failures decrements on every mutating call — so we: - // 1. perform the copy through the archivist (uses put_session+append on `b`, - // but NO writes on `a`, since reads happen on the source side). - // 2. THEN inject a write failure on `a` to make the delete fail. - // - // Actually `copy_session` reads from `a` then writes to `b`, no writes on `a`. - // So we can safely inject BEFORE calling move_session: the only write on `a` - // during move_session is the delete, which will hit the injected failure. - - a.inject_write_failures(1); - - let err = archivist.move_session(scroll, "a", "b").await.unwrap_err(); - assert!(matches!(err, ArchivistError::PartialMove { .. })); - - // Both backends now have the session. - assert!(a.get_session(scroll).await.unwrap().is_some()); - assert!(b.get_session(scroll).await.unwrap().is_some()); -} - -#[tokio::test] -async fn delete_session_fans_out_and_invalidates_cache() { - let (archivist, a, b) = dual_backend_coordinator().await; - let scroll = Uuid::new_v4(); - a.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - b.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - // Prime the cache with a read. - let _ = archivist.get_session_metadata(scroll, None).await.unwrap(); - assert_eq!(archivist.read_cache_size().await, 1); - - archivist.delete_session(scroll, None).await.unwrap(); - - assert!(a.get_session(scroll).await.unwrap().is_none()); - assert!(b.get_session(scroll).await.unwrap().is_none()); - assert_eq!(archivist.read_cache_size().await, 0); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_fanout_test.rs b/crates/dirigent_archivist/tests/multi_backend_fanout_test.rs deleted file mode 100644 index e4e411d..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_fanout_test.rs +++ /dev/null @@ -1,124 +0,0 @@ -#![cfg(feature = "test-utils")] - -use std::sync::Arc; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ArchiveRegistration, FailureMode, WritePolicy}; -use uuid::Uuid; - -fn reg( - name: &str, - backend: Arc, - priority: u32, - failure: FailureMode, -) -> Arc { - Arc::new(ArchiveRegistration::new( - name.into(), - "mock", - backend as Arc, - true, - failure, - priority, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )) -} - -fn sample_message(session: Uuid) -> dirigent_archivist::types::MessageRecord { - dirigent_archivist::types::MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session, - parent_id: None, - ts: chrono::Utc::now(), - role: "user".into(), - author: None, - content_md: "hi".into(), - content_parts: None, - attachments: vec![], - metadata: serde_json::Value::Null, - } -} - -#[tokio::test] -async fn write_fans_out_to_both_backends() { - let a = Arc::new(MockBackend::new()); - let b = Arc::new(MockBackend::new()); - let archivist = Archivist::from_registrations(vec![ - reg("a", a.clone(), 0, FailureMode::Required), - reg("b", b.clone(), 10, FailureMode::BestEffort), - ]); - - // Using a non-empty message vec for a robust positive-count check: - let scroll = Uuid::new_v4(); - let m = sample_message(scroll); - archivist - .append_messages(scroll, vec![m], None) - .await - .unwrap(); - assert_eq!(a.appended_count(scroll), 1); - assert_eq!(b.appended_count(scroll), 1); -} - -#[tokio::test] -async fn best_effort_failure_does_not_propagate() { - let a = Arc::new(MockBackend::new()); - let b = Arc::new(MockBackend::new()); - b.inject_write_failures(1); - - let archivist = Archivist::from_registrations(vec![ - reg("a", a.clone(), 0, FailureMode::Required), - reg("b", b.clone(), 10, FailureMode::BestEffort), - ]); - - archivist - .append_messages(Uuid::new_v4(), vec![], None) - .await - .unwrap(); // Ok despite secondary failure - - let snapshot = archivist.list_archives_with_health().await; - let b_status = snapshot.iter().find(|s| s.name == "b").unwrap(); - assert!(matches!(b_status.health, HealthStatus::Degraded { .. })); -} - -#[tokio::test] -async fn required_secondary_failure_propagates() { - let a = Arc::new(MockBackend::new()); - let b = Arc::new(MockBackend::new()); - b.inject_write_failures(1); - - let archivist = Archivist::from_registrations(vec![ - reg("a", a.clone(), 0, FailureMode::Required), - reg("b", b.clone(), 10, FailureMode::Required), - ]); - - let err = archivist - .append_messages(Uuid::new_v4(), vec![], None) - .await; - assert!(err.is_err(), "expected error when required secondary fails"); -} - -#[tokio::test] -async fn explicit_archive_overrides_default_primary() { - let a = Arc::new(MockBackend::new()); - let b = Arc::new(MockBackend::new()); - let archivist = Archivist::from_registrations(vec![ - reg("a", a.clone(), 0, FailureMode::Required), - reg("b", b.clone(), 10, FailureMode::Required), - ]); - - let scroll = Uuid::new_v4(); - let m = sample_message(scroll); - archivist - .append_messages(scroll, vec![m], Some("b".into())) - .await - .unwrap(); - - // Both receive the write: b is explicit primary, a is secondary via fanout. - assert_eq!(a.appended_count(scroll), 1); - assert_eq!(b.appended_count(scroll), 1); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_health_test.rs b/crates/dirigent_archivist/tests/multi_backend_health_test.rs deleted file mode 100644 index 65363c4..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_health_test.rs +++ /dev/null @@ -1,129 +0,0 @@ -#![cfg(feature = "test-utils")] - -use std::sync::Arc; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ArchiveRegistration, FailureMode, WritePolicy}; -use dirigent_archivist::types::SessionMetadata; -use uuid::Uuid; - -fn reg( - name: &str, - backend: Arc, - priority: u32, - failure: FailureMode, -) -> Arc { - Arc::new(ArchiveRegistration::new( - name.into(), - "mock", - backend as Arc, - true, - failure, - priority, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )) -} - -#[tokio::test] -async fn five_consecutive_failures_drifts_to_unavailable() { - let primary = Arc::new(MockBackend::new()); - let secondary = Arc::new(MockBackend::new()); - secondary.inject_write_failures(10); - - let archivist = Archivist::from_registrations(vec![ - reg("primary", primary.clone(), 0, FailureMode::Required), - reg("secondary", secondary.clone(), 10, FailureMode::BestEffort), - ]); - - for _ in 0..5 { - archivist - .append_messages(Uuid::new_v4(), vec![], None) - .await - .ok(); - } - - let snapshot = archivist.list_archives_with_health().await; - let secondary_status = snapshot.iter().find(|s| s.name == "secondary").unwrap(); - assert!( - matches!(secondary_status.health, HealthStatus::Unavailable { .. }), - "secondary should be Unavailable after 5 failures; got {:?}", - secondary_status.health - ); -} - -#[tokio::test] -async fn success_after_failure_recovers_to_healthy() { - let backend = Arc::new(MockBackend::new()); - backend.inject_write_failures(1); - - let archivist = Archivist::from_registrations(vec![reg( - "only", - backend.clone(), - 0, - FailureMode::Required, - )]); - - // First call fails. - let _ = archivist - .append_messages(Uuid::new_v4(), vec![], None) - .await; - let snapshot = archivist.list_archives_with_health().await; - assert!( - matches!(snapshot[0].health, HealthStatus::Degraded { .. }), - "expected Degraded after first failure; got {:?}", - snapshot[0].health - ); - - // Second call succeeds — health returns to Healthy. - archivist - .append_messages(Uuid::new_v4(), vec![], None) - .await - .unwrap(); - let snapshot = archivist.list_archives_with_health().await; - assert!( - matches!(snapshot[0].health, HealthStatus::Healthy), - "expected Healthy after recovery; got {:?}", - snapshot[0].health - ); -} - -#[tokio::test] -async fn unavailable_backend_skipped_during_read_walk() { - let primary = Arc::new(MockBackend::new()); - let secondary = Arc::new(MockBackend::new()); - - let scroll = Uuid::new_v4(); - secondary - .put_session(SessionMetadata::stub(scroll)) - .await - .unwrap(); - secondary.break_permanently("kaput"); - - let primary_reg = reg("primary", primary.clone(), 0, FailureMode::Required); - let secondary_reg = reg("secondary", secondary.clone(), 10, FailureMode::Required); - // Force secondary's cached health to Unavailable BEFORE the walk, - // so the routing layer skips it entirely rather than attempting + failing. - *secondary_reg.last_health.write().await = HealthStatus::Unavailable { - reason: "test".into(), - }; - - let archivist = Archivist::from_registrations(vec![primary_reg, secondary_reg]); - - // Primary doesn't have the session; secondary has it but is marked Unavailable. - // Read walk should skip secondary → Ok(None)-style ergonomics, bubbling up - // as `SessionUnknown` per `get_session_metadata`'s contract. - let result = archivist.get_session_metadata(scroll, None).await; - assert!( - result.is_err(), - "expected SessionUnknown error when Unavailable backend is skipped" - ); - assert!(matches!( - result.unwrap_err(), - dirigent_archivist::error::ArchivistError::SessionUnknown(_) - )); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_routing_test.rs b/crates/dirigent_archivist/tests/multi_backend_routing_test.rs deleted file mode 100644 index 7255957..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_routing_test.rs +++ /dev/null @@ -1,102 +0,0 @@ -#![cfg(feature = "test-utils")] - -use std::sync::Arc; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::{ArchiveRegistration, FailureMode, WritePolicy}; -use dirigent_archivist::types::SessionMetadata; -use uuid::Uuid; - -fn reg(name: &str, backend: Arc, priority: u32) -> Arc { - Arc::new(ArchiveRegistration::new( - name.into(), - "mock", - backend as Arc, - true, - FailureMode::Required, - priority, - true, - WritePolicy::Inline, - None, - HealthStatus::Healthy, - )) -} - -#[tokio::test] -async fn high_priority_backend_serves_first() { - let high = Arc::new(MockBackend::new()); - let low = Arc::new(MockBackend::new()); - let scroll = Uuid::new_v4(); - high.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - let archivist = Archivist::from_registrations(vec![ - reg("high", high.clone(), 0), - reg("low", low.clone(), 10), - ]); - - let meta = archivist.get_session_metadata(scroll, None).await; - assert!(meta.is_ok(), "expected Ok; got {:?}", meta); - assert_eq!(archivist.read_cache_size().await, 1); -} - -#[tokio::test] -async fn falls_through_to_lower_priority_when_high_misses() { - let high = Arc::new(MockBackend::new()); - let low = Arc::new(MockBackend::new()); - let scroll = Uuid::new_v4(); - low.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - let archivist = Archivist::from_registrations(vec![ - reg("high", high.clone(), 0), - reg("low", low.clone(), 10), - ]); - - let meta = archivist.get_session_metadata(scroll, None).await; - assert!(meta.is_ok(), "expected Ok; got {:?}", meta); - assert_eq!(archivist.read_cache_size().await, 1); -} - -#[tokio::test] -async fn cache_makes_second_read_skip_priority_walk() { - let high = Arc::new(MockBackend::new()); - let low = Arc::new(MockBackend::new()); - let scroll = Uuid::new_v4(); - low.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - let archivist = Archivist::from_registrations(vec![ - reg("high", high.clone(), 0), - reg("low", low.clone(), 10), - ]); - - // Prime the cache. - let _ = archivist.get_session_metadata(scroll, None).await.unwrap(); - - // Inject a read failure on `high` to detect whether the second read walks it. - // If the cache works, `high` must NOT be touched. - high.inject_read_failures(1); - let _ = archivist.get_session_metadata(scroll, None).await.unwrap(); - - let snapshot = archivist.list_archives_with_health().await; - let high_status = snapshot.iter().find(|s| s.name == "high").unwrap(); - assert!( - matches!(high_status.health, HealthStatus::Healthy), - "cache should have skipped `high`; got {:?}", - high_status.health - ); -} - -#[tokio::test] -async fn delete_invalidates_cache() { - let high = Arc::new(MockBackend::new()); - let scroll = Uuid::new_v4(); - high.put_session(SessionMetadata::stub(scroll)).await.unwrap(); - - let archivist = Archivist::from_registrations(vec![reg("high", high.clone(), 0)]); - let _ = archivist.get_session_metadata(scroll, None).await.unwrap(); - assert_eq!(archivist.read_cache_size().await, 1); - - archivist.delete_session(scroll, None).await.unwrap(); - assert_eq!(archivist.read_cache_size().await, 0); -} diff --git a/crates/dirigent_archivist/tests/multi_backend_writer_test.rs b/crates/dirigent_archivist/tests/multi_backend_writer_test.rs deleted file mode 100644 index 9ba8565..0000000 --- a/crates/dirigent_archivist/tests/multi_backend_writer_test.rs +++ /dev/null @@ -1,252 +0,0 @@ -#![cfg(feature = "test-utils")] - -//! Integration tests for Task 17's per-backend queued writer task. -//! -//! These exercise the full enqueue → batch → coalesce → dispatch pipeline -//! end-to-end by constructing real writer tasks against `MockBackend` -//! instances and driving them through the `Archivist` coordinator. -//! -//! The tests are timing-sensitive: the batch window is 25ms and the -//! backpressure test artificially slows the backend. Assertions use -//! tolerant margins so they survive CI jitter. - -use std::sync::Arc; -use std::time::Duration; - -use dirigent_archivist::backend::mock::MockBackend; -use dirigent_archivist::backend::{ArchiveBackend, HealthStatus}; -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::registry::writer::spawn_writer; -use dirigent_archivist::registry::{ - ArchiveRegistration, FailureMode, OverflowPolicy, WritePolicy, -}; -use uuid::Uuid; - -fn sample_message(scroll: Uuid) -> dirigent_archivist::types::MessageRecord { - dirigent_archivist::types::MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll, - parent_id: None, - ts: chrono::Utc::now(), - role: "user".into(), - author: None, - content_md: "hi".into(), - content_parts: None, - attachments: vec![], - metadata: serde_json::Value::Null, - } -} - -fn queued_reg( - name: &str, - backend: Arc, - priority: u32, - overflow: OverflowPolicy, -) -> Arc { - let initial_health = HealthStatus::Healthy; - let policy = WritePolicy::Queued { - batch_window_ms: 25, - capacity: 8, - overflow, - }; - - let health = Arc::new(tokio::sync::RwLock::new(initial_health)); - let last_error = Arc::new(tokio::sync::RwLock::new(None)); - let consecutive = Arc::new(tokio::sync::RwLock::new(0u32)); - - let writer = Some(spawn_writer( - backend.clone() as Arc, - name.into(), - 8, - Duration::from_millis(25), - overflow, - health.clone(), - last_error.clone(), - consecutive.clone(), - )); - - Arc::new(ArchiveRegistration::new_with_shared_state( - name.into(), - "mock", - backend as Arc, - true, - FailureMode::Required, - priority, - true, - policy, - writer, - health, - last_error, - consecutive, - )) -} - -#[tokio::test] -async fn queued_write_returns_immediately_then_eventually_lands() { - let mock = Arc::new(MockBackend::new()); - let archivist = Archivist::from_registrations(vec![queued_reg( - "queued", - mock.clone(), - 0, - OverflowPolicy::Block, - )]); - - let scroll = Uuid::new_v4(); - archivist - .append_messages(scroll, vec![sample_message(scroll)], None) - .await - .unwrap(); - - // Wait up to 500ms for the writer to drain. - let mut landed = false; - for _ in 0..50 { - if mock.appended_count(scroll) > 0 { - landed = true; - break; - } - tokio::time::sleep(Duration::from_millis(10)).await; - } - assert!(landed, "writer task did not drain within 500ms"); - assert_eq!(mock.appended_count(scroll), 1); - - archivist.shutdown().await.unwrap(); -} - -#[tokio::test] -async fn coalescing_merges_consecutive_appends_for_same_scroll() { - let mock = Arc::new(MockBackend::new()); - let archivist = Archivist::from_registrations(vec![queued_reg( - "queued", - mock.clone(), - 0, - OverflowPolicy::Block, - )]); - - let scroll = Uuid::new_v4(); - for _ in 0..5 { - archivist - .append_messages(scroll, vec![sample_message(scroll)], None) - .await - .unwrap(); - } - - // Give the writer time to drain + coalesce, then shut down to guarantee - // any still-queued ops are flushed before we assert. - tokio::time::sleep(Duration::from_millis(200)).await; - archivist.shutdown().await.unwrap(); - - // Five enqueued ops may have been coalesced into fewer backend calls. - // The only strict invariant we can reliably assert is: the total number - // of backend `append_messages` INVOCATIONS is <= 5. - assert!( - mock.append_call_count(scroll) <= 5, - "expected <= 5 backend calls, got {}", - mock.append_call_count(scroll) - ); - assert_eq!( - mock.appended_count(scroll), - 5, - "all 5 messages should land" - ); -} - -#[tokio::test] -async fn overflow_block_applies_backpressure() { - // For backpressure to visibly stall the sender, we need four things: - // 1. A tight queue (capacity=2) so the channel actually fills up. - // 2. A slow backend (per-op 50ms) so the writer stalls in dispatch - // long enough for the channel to fill. - // 3. batch_window=0 so the writer spends (almost) all its time in - // the 50ms per-op sleep instead of draining fast inside the - // batch-collection phase. - // 4. Distinct scroll IDs so the writer's same-scroll coalescing - // doesn't merge everything into one dispatch call (which would - // collapse the entire batch into a single 50ms sleep). - // With those, the writer dispatches N serial 50ms calls; while it's - // sleeping the sender can't fit its next op into the full channel - // and must wait for a drain. - let mock = Arc::new(MockBackend::new()); - mock.set_per_op_delay(Duration::from_millis(50)); - - let capacity = 2usize; - let overflow = OverflowPolicy::Block; - // batch_window=0 means the writer dispatches each op immediately and - // spends (almost) all its time in the 50ms per-op sleep — so the - // channel stays full and the sender has to wait on every drain. - let policy = WritePolicy::Queued { - batch_window_ms: 0, - capacity, - overflow, - }; - - let health = Arc::new(tokio::sync::RwLock::new(HealthStatus::Healthy)); - let last_error = Arc::new(tokio::sync::RwLock::new(None)); - let consecutive = Arc::new(tokio::sync::RwLock::new(0u32)); - - let writer = Some(spawn_writer( - mock.clone() as Arc, - "queued".into(), - capacity, - Duration::from_millis(0), - overflow, - health.clone(), - last_error.clone(), - consecutive.clone(), - )); - - let reg = Arc::new(ArchiveRegistration::new_with_shared_state( - "queued".into(), - "mock", - mock.clone() as Arc, - true, - FailureMode::Required, - 0, - true, - policy, - writer, - health, - last_error, - consecutive, - )); - - let archivist = Archivist::from_registrations(vec![reg]); - - // Prime the writer with one op and wait just long enough for it to - // enter its first 50ms dispatch sleep. After that the writer is NOT - // recv'ing, so the tight capacity=2 channel fills and further sends - // must wait for a drain. - let scroll0 = Uuid::new_v4(); - archivist - .append_messages(scroll0, vec![sample_message(scroll0)], None) - .await - .unwrap(); - tokio::time::sleep(Duration::from_millis(10)).await; - - // Now measure the cost of many more sends with distinct scroll IDs - // so the writer can't coalesce them. Each dispatch call is 50ms, the - // queue holds only 2, so the sender must wait repeatedly for the - // writer to drain cycles. - let start = std::time::Instant::now(); - for _ in 0..24 { - let scroll = Uuid::new_v4(); - archivist - .append_messages(scroll, vec![sample_message(scroll)], None) - .await - .unwrap(); - } - let elapsed = start.elapsed(); - - // With 24 distinct-scroll sends, a capacity=2 queue, batch_window=0, - // and a 50ms per-op delay, the sender cannot finish instantly — the - // writer needs many drain cycles and the sender waits on each. A - // 100ms floor keeps the test meaningful (a non-blocking run measures - // in microseconds) while being lenient on CI jitter. - assert!( - elapsed >= Duration::from_millis(100), - "block policy did not apply backpressure (elapsed: {:?})", - elapsed - ); - - archivist.shutdown().await.unwrap(); -} diff --git a/crates/dirigent_archivist/tests/pagination_test.rs b/crates/dirigent_archivist/tests/pagination_test.rs deleted file mode 100644 index b26191b..0000000 --- a/crates/dirigent_archivist/tests/pagination_test.rs +++ /dev/null @@ -1,142 +0,0 @@ -//! Pagination tests for dirigent_archivist -//! -//! These tests verify the count_messages and get_messages_range functionality. - -#[cfg(test)] -mod pagination_tests { - use chrono::Utc; - use dirigent_archivist::{ - backends::JsonlBackend, Archivist, MessageRecord, RegisterConnectorRequest, - RegisterSessionRequest, Result, - }; - use std::sync::Arc; - use uuid::Uuid; - - /// Build a self-contained coordinator rooted at `archive_root`, backed by - /// a single `JsonlBackend`. Avoids the shared `.archives.json` race that - /// `new_with_single_archive` creates in the tempdir's parent. - async fn mk_archivist(archive_root: std::path::PathBuf) -> Result { - let backend = Arc::new(JsonlBackend::new(archive_root).await?); - Archivist::from_single_backend("main".into(), backend).await - } - - #[tokio::test] - async fn test_pagination_count_and_range() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let archivist = mk_archivist(temp_dir.clone()).await?; - - // Register connector - let connector_req = RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Test Connector".to_string(), - client_native_id: "opencode@localhost:12225".to_string(), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }; - let connector_response = archivist.register_connector(connector_req, None).await?; - - // Register session - let session_req = RegisterSessionRequest { - connector_uid: connector_response.connector_uid, - native_session_id: "native-123".to_string(), - title: Some("Pagination Test".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }; - let session_response = archivist.register_session(session_req, None).await?; - let scroll_id = session_response.scroll_id; - - // Test empty session - let count = archivist.count_messages(scroll_id, None).await?; - assert_eq!(count, 0, "Empty session should have 0 messages"); - - let range = archivist.get_messages_range(scroll_id, 0, 10, None).await?; - assert_eq!(range.len(), 0, "Empty session should return empty range"); - - // Add 25 messages with varying timestamps - let mut messages = Vec::new(); - let base_time = Utc::now(); - for i in 0..25 { - messages.push(MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: None, - ts: base_time + chrono::Duration::seconds(i), - role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(), - author: Some("test".to_string()), - content_md: format!("Message {}", i), - content_parts: None, - attachments: Vec::new(), - metadata: serde_json::json!({}), - }); - } - archivist.append_messages(scroll_id, messages, None).await?; - - // Test count_messages - let count = archivist.count_messages(scroll_id, None).await?; - assert_eq!(count, 25, "Should count 25 messages"); - - // Test get_messages_range - first page - let page1 = archivist.get_messages_range(scroll_id, 0, 10, None).await?; - assert_eq!(page1.len(), 10, "First page should have 10 messages"); - assert_eq!(page1[0].content_md, "Message 0", "First message should be Message 0"); - assert_eq!(page1[9].content_md, "Message 9", "10th message should be Message 9"); - - // Test get_messages_range - second page - let page2 = archivist.get_messages_range(scroll_id, 10, 10, None).await?; - assert_eq!(page2.len(), 10, "Second page should have 10 messages"); - assert_eq!(page2[0].content_md, "Message 10", "11th message should be Message 10"); - assert_eq!(page2[9].content_md, "Message 19", "20th message should be Message 19"); - - // Test get_messages_range - partial last page - let page3 = archivist.get_messages_range(scroll_id, 20, 10, None).await?; - assert_eq!(page3.len(), 5, "Last page should have 5 messages"); - assert_eq!(page3[0].content_md, "Message 20", "21st message should be Message 20"); - assert_eq!(page3[4].content_md, "Message 24", "25th message should be Message 24"); - - // Test get_messages_range - offset beyond messages - let page4 = archivist.get_messages_range(scroll_id, 30, 10, None).await?; - assert_eq!(page4.len(), 0, "Offset beyond messages should return empty"); - - // Verify chronological ordering is maintained in pagination - let all_messages = archivist.get_messages(scroll_id, None).await?; - let first_10_from_all = &all_messages[0..10]; - let first_10_from_page = &page1[..]; - - for i in 0..10 { - assert_eq!( - first_10_from_all[i].message_id, - first_10_from_page[i].message_id, - "Pagination should maintain same order as get_messages()" - ); - } - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } - - #[tokio::test] - async fn test_count_messages_nonexistent_session() -> Result<()> { - let temp_dir = std::env::temp_dir().join(format!("archivist_test_{}", Uuid::now_v7())); - let archivist = mk_archivist(temp_dir.clone()).await?; - - // Count messages for non-existent session (should return 0, not error) - let nonexistent_scroll_id = Uuid::now_v7(); - let count = archivist.count_messages(nonexistent_scroll_id, None).await?; - assert_eq!(count, 0, "Non-existent session should have 0 messages"); - - // Clean up - tokio::fs::remove_dir_all(temp_dir).await.ok(); - Ok(()) - } -} diff --git a/crates/dirigent_chatgpt/CLAUDE.md b/crates/dirigent_chatgpt/CLAUDE.md deleted file mode 100644 index 9af4a4f..0000000 --- a/crates/dirigent_chatgpt/CLAUDE.md +++ /dev/null @@ -1,32 +0,0 @@ -# Package: dirigent_chatgpt - -Pure-Rust parser for OpenAI's ChatGPT `conversations.json` data export. - -## Scope - -- `parse_export(path)` — reads a `conversations.json` file on disk and - returns `Vec`. -- `parse_str(json)` — parses an in-memory JSON string (useful for tests - and piped inputs). -- Types: `ParsedConversation`, `ParsedMessage`, `ContentPart` (`Text`, - `Code`, `Tool`). - -No dirigent-specific types. `dirigent_archivist::import::sources::chatgpt` -consumes this crate and maps into the archivist's internal types. - -## Example - -```rust -let convs = dirigent_chatgpt::parse_export(path)?; -for c in convs { - println!("{}: {} messages", c.title.as_deref().unwrap_or("(untitled)"), c.messages.len()); -} -``` - -## Failure modes - -- Truly broken JSON → `ParseError::Json`. -- Malformed individual messages are skipped where possible. -- Unknown content shapes are preserved as best-effort text in - `ContentPart::Text { text: raw_json }` so no user data is silently - lost. diff --git a/crates/dirigent_chatgpt/Cargo.toml b/crates/dirigent_chatgpt/Cargo.toml deleted file mode 100644 index f52e6a0..0000000 --- a/crates/dirigent_chatgpt/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "dirigent_chatgpt" -version = "0.1.0" -edition = "2021" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" -thiserror = "1" -uuid = { version = "1", features = ["v4", "v7", "serde"] } - -[dev-dependencies] diff --git a/crates/dirigent_chatgpt/src/lib.rs b/crates/dirigent_chatgpt/src/lib.rs deleted file mode 100644 index 164f075..0000000 --- a/crates/dirigent_chatgpt/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! ChatGPT export parser. Zero dirigent-specific types. - -pub mod parser; -pub mod types; - -pub use parser::{parse_export, parse_str, ParseError}; -pub use types::{ContentPart, ParsedConversation, ParsedMessage}; diff --git a/crates/dirigent_chatgpt/src/parser.rs b/crates/dirigent_chatgpt/src/parser.rs deleted file mode 100644 index 00370f7..0000000 --- a/crates/dirigent_chatgpt/src/parser.rs +++ /dev/null @@ -1,349 +0,0 @@ -use std::path::Path; -use chrono::{DateTime, TimeZone, Utc}; -use thiserror::Error; - -use crate::types::{ContentPart, ParsedConversation, ParsedMessage}; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("I/O: {0}")] Io(#[from] std::io::Error), - #[error("JSON: {0}")] Json(#[from] serde_json::Error), - #[error("unsupported shape: {0}")] UnsupportedShape(String), -} - -/// Parse a ChatGPT `conversations.json` file into a list of conversations. -pub fn parse_export(path: &Path) -> Result, ParseError> { - let text = std::fs::read_to_string(path)?; - parse_str(&text) -} - -/// Parse a JSON string of conversations. -pub fn parse_str(json: &str) -> Result, ParseError> { - // ChatGPT conversations.json is a JSON array of conversation objects. - let root: serde_json::Value = serde_json::from_str(json)?; - let arr = root.as_array() - .ok_or_else(|| ParseError::UnsupportedShape("expected JSON array at root".into()))?; - let mut out = Vec::with_capacity(arr.len()); - for conv in arr { - out.push(convert_conversation(conv)?); - } - Ok(out) -} - -fn convert_conversation(conv: &serde_json::Value) -> Result { - let id = conv.get("id") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - let title = conv.get("title").and_then(|v| v.as_str()).map(String::from); - let created_at = conv.get("create_time").and_then(parse_unix_time); - let updated_at = conv.get("update_time").and_then(parse_unix_time); - - // Walk the mapping tree if present; otherwise return empty messages. - let messages = if let Some(mapping) = conv.get("mapping").and_then(|v| v.as_object()) { - walk_mapping(mapping) - } else { - Vec::new() - }; - - // Preserve whatever metadata we can't otherwise capture. - let mut metadata = serde_json::Map::new(); - for key in &["conversation_id", "gizmo_id", "model", "default_model_slug", "moderation_results"] { - if let Some(v) = conv.get(*key) { - metadata.insert((*key).to_string(), v.clone()); - } - } - - Ok(ParsedConversation { - id, - title, - created_at, - updated_at, - messages, - metadata: if metadata.is_empty() { - serde_json::Value::Null - } else { - serde_json::Value::Object(metadata) - }, - }) -} - -/// Walk the `mapping` tree starting at the root (parent=null), DFS in -/// `create_time` order, collecting non-null messages. -fn walk_mapping( - mapping: &serde_json::Map, -) -> Vec { - // Find roots: nodes with parent == null, or (fallback) nodes not - // referenced as a child by any other node. - let mut roots: Vec<&str> = mapping - .iter() - .filter_map(|(id, node)| { - let parent = node.get("parent"); - let is_root = match parent { - None => true, - Some(serde_json::Value::Null) => true, - _ => false, - }; - if is_root { - Some(id.as_str()) - } else { - None - } - }) - .collect(); - - // Fallback: if we didn't find any via the `parent` signal, derive from - // the child-set (a root is a node that nobody lists as a child). - if roots.is_empty() { - let mut referenced: std::collections::HashSet<&str> = std::collections::HashSet::new(); - for node in mapping.values() { - if let Some(children) = node.get("children").and_then(|c| c.as_array()) { - for child in children { - if let Some(s) = child.as_str() { - referenced.insert(s); - } - } - } - } - roots = mapping - .keys() - .filter(|k| !referenced.contains(k.as_str())) - .map(|s| s.as_str()) - .collect(); - } - - let mut out: Vec = Vec::new(); - let mut visited: std::collections::HashSet = std::collections::HashSet::new(); - for root_id in roots { - dfs_collect(mapping, root_id, &mut out, &mut visited); - } - out -} - -fn dfs_collect( - mapping: &serde_json::Map, - node_id: &str, - out: &mut Vec, - visited: &mut std::collections::HashSet, -) { - if !visited.insert(node_id.to_string()) { - return; - } - let node = match mapping.get(node_id) { - Some(n) => n, - None => return, - }; - if let Some(msg) = node.get("message") { - if !msg.is_null() { - if let Some(parsed) = parse_mapping_message(msg) { - out.push(parsed); - } - } - } - - // Children, sorted by create_time when available for deterministic order. - if let Some(children) = node.get("children").and_then(|c| c.as_array()) { - let mut child_refs: Vec<(&str, Option)> = children - .iter() - .filter_map(|v| v.as_str()) - .map(|id| { - let ct = mapping - .get(id) - .and_then(|n| n.get("message")) - .and_then(|m| m.get("create_time")) - .and_then(|v| v.as_f64()); - (id, ct) - }) - .collect(); - child_refs.sort_by(|a, b| match (a.1, b.1) { - (Some(x), Some(y)) => x.partial_cmp(&y).unwrap_or(std::cmp::Ordering::Equal), - (Some(_), None) => std::cmp::Ordering::Less, - (None, Some(_)) => std::cmp::Ordering::Greater, - (None, None) => std::cmp::Ordering::Equal, - }); - for (child_id, _) in child_refs { - dfs_collect(mapping, child_id, out, visited); - } - } -} - -fn parse_mapping_message(msg: &serde_json::Value) -> Option { - let id = msg.get("id").and_then(|v| v.as_str()).unwrap_or("").to_string(); - - let role = msg - .get("author") - .and_then(|a| a.get("role")) - .and_then(|r| r.as_str()) - .unwrap_or("unknown") - .to_string(); - - let ts = msg.get("create_time").and_then(parse_unix_time); - - let content = msg - .get("content") - .map(content_to_parts) - .unwrap_or_default(); - - // Skip purely empty system placeholders (common at the root of chats). - if content.iter().all(|p| is_part_empty(p)) && role == "system" { - return None; - } - - let mut metadata = serde_json::Map::new(); - if let Some(m) = msg.get("metadata").and_then(|v| v.as_object()) { - for (k, v) in m { - metadata.insert(k.clone(), v.clone()); - } - } - if let Some(author) = msg.get("author").and_then(|v| v.as_object()) { - if let Some(name) = author.get("name") { - metadata.insert("author_name".to_string(), name.clone()); - } - } - - Some(ParsedMessage { - id, - role, - ts, - content, - metadata: if metadata.is_empty() { - serde_json::Value::Null - } else { - serde_json::Value::Object(metadata) - }, - }) -} - -fn is_part_empty(p: &ContentPart) -> bool { - match p { - ContentPart::Text { text } => text.trim().is_empty(), - ContentPart::Code { text, .. } => text.trim().is_empty(), - ContentPart::Tool { .. } => false, - } -} - -/// Convert a `content` blob (various shapes) into a list of `ContentPart`s. -fn content_to_parts(content: &serde_json::Value) -> Vec { - // Typical shape: { "content_type": "text", "parts": [ ... ] } - // Other content types seen in the wild: "code", "tether_browsing_display", - // "multimodal_text", "execution_output", "system_error". We do a best-effort - // normalisation here; Task 8+ can specialise further. - let content_type = content.get("content_type").and_then(|v| v.as_str()).unwrap_or("text"); - - if let Some(parts) = content.get("parts").and_then(|v| v.as_array()) { - return parts.iter().map(|p| part_to_content_part(p, content_type)).collect(); - } - - // `content_type = "code"` carries { language, text } - if content_type == "code" { - let language = content.get("language").and_then(|v| v.as_str()).map(String::from); - let text = content - .get("text") - .and_then(|v| v.as_str()) - .map(String::from) - .unwrap_or_default(); - return vec![ContentPart::Code { language, text }]; - } - - // `content_type = "tether_browsing_display"` / execution_output carry - // `text` or `result` fields — treat as text. - if let Some(text) = content.get("text").and_then(|v| v.as_str()) { - return vec![ContentPart::Text { text: text.to_string() }]; - } - if let Some(text) = content.get("result").and_then(|v| v.as_str()) { - return vec![ContentPart::Text { text: text.to_string() }]; - } - - // Unknown shape — serialize the raw JSON. - vec![ContentPart::Text { text: content.to_string() }] -} - -fn part_to_content_part(part: &serde_json::Value, outer_type: &str) -> ContentPart { - // String — plain text (or code, depending on outer content_type). - if let Some(s) = part.as_str() { - if outer_type == "code" { - return ContentPart::Code { language: None, text: s.to_string() }; - } - return ContentPart::Text { text: s.to_string() }; - } - - // Object — inspect fields. - if let Some(obj) = part.as_object() { - // Multimodal text part: { "text": "...", ... } - if let Some(text) = obj.get("text").and_then(|v| v.as_str()) { - return ContentPart::Text { text: text.to_string() }; - } - - // Tool-ish shape: { "tool": "...", "input": {...}, "output": ... } - // (ChatGPT's actual tool shape varies; this is a best-effort catch.) - if let (Some(name), Some(input)) = ( - obj.get("name").or_else(|| obj.get("tool")).and_then(|v| v.as_str()), - obj.get("input"), - ) { - return ContentPart::Tool { - name: name.to_string(), - input: input.clone(), - output: obj.get("output").cloned(), - }; - } - - // Image / asset-pointer parts: describe them inline. - if let Some(asset) = obj.get("asset_pointer").and_then(|v| v.as_str()) { - return ContentPart::Text { text: format!("[asset: {}]", asset) }; - } - } - - // Unknown shape — serialise the raw JSON. - ContentPart::Text { text: part.to_string() } -} - -/// Parse a ChatGPT unix-seconds timestamp (may be float or int, may be null). -fn parse_unix_time(v: &serde_json::Value) -> Option> { - let seconds = v.as_f64()?; - if !seconds.is_finite() { - return None; - } - let secs = seconds.trunc() as i64; - let nanos = ((seconds - secs as f64) * 1_000_000_000.0).round() as u32; - Utc.timestamp_opt(secs, nanos).single() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parses_minimal_fixture() { - let path = std::path::Path::new("tests/fixtures/minimal.json"); - let convs = parse_export(path).expect("parse"); - assert_eq!(convs.len(), 1); - assert_eq!(convs[0].title.as_deref(), Some("Hello")); - assert_eq!(convs[0].messages.len(), 2); - assert_eq!(convs[0].messages[0].role, "user"); - assert_eq!(convs[0].messages[1].role, "assistant"); - } - - #[test] - fn parses_timestamps() { - let path = std::path::Path::new("tests/fixtures/minimal.json"); - let convs = parse_export(path).expect("parse"); - let c = &convs[0]; - assert!(c.created_at.is_some()); - assert!(c.updated_at.is_some()); - // Message timestamps should be derived from create_time. - assert!(c.messages[0].ts.is_some()); - assert!(c.messages[1].ts.is_some()); - } - - #[test] - fn text_content_extracted() { - let path = std::path::Path::new("tests/fixtures/minimal.json"); - let convs = parse_export(path).expect("parse"); - let msg0 = &convs[0].messages[0]; - assert_eq!(msg0.content.len(), 1); - match &msg0.content[0] { - ContentPart::Text { text } => assert_eq!(text, "Hello, world"), - other => panic!("expected Text, got {:?}", other), - } - } -} diff --git a/crates/dirigent_chatgpt/src/types.rs b/crates/dirigent_chatgpt/src/types.rs deleted file mode 100644 index 55b4bab..0000000 --- a/crates/dirigent_chatgpt/src/types.rs +++ /dev/null @@ -1,31 +0,0 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedConversation { - pub id: String, // ChatGPT's conversation_id (hex-ish; may or may not be a UUID) - pub title: Option, - pub created_at: Option>, - pub updated_at: Option>, - pub messages: Vec, - #[serde(default)] - pub metadata: serde_json::Value, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedMessage { - pub id: String, - pub role: String, // "user" | "assistant" | "system" | "tool" - pub ts: Option>, - pub content: Vec, - #[serde(default)] - pub metadata: serde_json::Value, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ContentPart { - Text { text: String }, - Code { language: Option, text: String }, - Tool { name: String, input: serde_json::Value, output: Option }, -} diff --git a/crates/dirigent_chatgpt/tests/fixtures/minimal.json b/crates/dirigent_chatgpt/tests/fixtures/minimal.json deleted file mode 100644 index 1912486..0000000 --- a/crates/dirigent_chatgpt/tests/fixtures/minimal.json +++ /dev/null @@ -1,31 +0,0 @@ -[ - { - "id": "c1", - "title": "Hello", - "create_time": 1700000000.0, - "update_time": 1700000100.0, - "mapping": { - "root": { "id": "root", "message": null, "children": ["m1"] }, - "m1": { - "id": "m1", - "message": { - "id": "m1", - "author": { "role": "user" }, - "create_time": 1700000010.0, - "content": { "content_type": "text", "parts": ["Hello, world"] } - }, - "children": ["m2"] - }, - "m2": { - "id": "m2", - "message": { - "id": "m2", - "author": { "role": "assistant" }, - "create_time": 1700000020.0, - "content": { "content_type": "text", "parts": ["Hi!"] } - }, - "children": [] - } - } - } -] diff --git a/crates/dirigent_codex/CLAUDE.md b/crates/dirigent_codex/CLAUDE.md deleted file mode 100644 index 03b4702..0000000 --- a/crates/dirigent_codex/CLAUDE.md +++ /dev/null @@ -1,30 +0,0 @@ -# Package: dirigent_codex - -Pure-Rust parser for OpenAI Codex JSONL session files. - -## Scope - -- `parse_file(path)` — reads one `*.jsonl` session file on disk and - returns a `ParsedSession`. -- `discover_sessions(dir)` — scans a directory (e.g. - `~/.codex/sessions/`) for session files. -- Types: `ParsedSession`, `ParsedMessage`. - -No dirigent-specific types. `dirigent_archivist::import::sources::codex` -consumes this crate and maps into the archivist's internal types. - -## Example - -```rust -let sessions = dirigent_codex::discover_sessions(dir)?; -for s in sessions { - println!("{}: {} messages", s.id, s.messages.len()); -} -``` - -## Failure modes - -- Individual malformed JSONL lines are skipped where possible. -- Truly broken files return `ParseError::Json`. -- Unknown message shapes are preserved as best-effort text so no user - data is silently lost. diff --git a/crates/dirigent_codex/Cargo.toml b/crates/dirigent_codex/Cargo.toml deleted file mode 100644 index 903c73c..0000000 --- a/crates/dirigent_codex/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "dirigent_codex" -version = "0.1.0" -edition = "2021" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" -thiserror = "1" -uuid = { version = "1", features = ["v4", "v7", "serde"] } - -[dev-dependencies] -tempfile = "3" diff --git a/crates/dirigent_codex/src/lib.rs b/crates/dirigent_codex/src/lib.rs deleted file mode 100644 index 96d1272..0000000 --- a/crates/dirigent_codex/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! OpenAI Codex on-disk session parser. Zero dirigent-specific types. -//! -//! The Codex CLI persists its sessions as JSONL files under -//! `~/.codex/sessions/*.jsonl` (or a caller-supplied equivalent). Each line -//! is a best-effort event object with a `role`, some `content`, and an -//! optional timestamp. Exact schema varies across Codex versions, so this -//! parser is intentionally lenient: unknown/malformed lines are skipped, -//! not failed. - -pub mod parser; -pub mod types; - -pub use parser::{discover_sessions, parse_file, ParseError}; -pub use types::{ParsedMessage, ParsedSession}; diff --git a/crates/dirigent_codex/src/parser.rs b/crates/dirigent_codex/src/parser.rs deleted file mode 100644 index 7e5a9e3..0000000 --- a/crates/dirigent_codex/src/parser.rs +++ /dev/null @@ -1,274 +0,0 @@ -use std::path::{Path, PathBuf}; - -use chrono::{DateTime, TimeZone, Utc}; -use thiserror::Error; - -use crate::types::{ParsedMessage, ParsedSession}; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("I/O: {0}")] - Io(#[from] std::io::Error), - #[error("JSON: {0}")] - Json(#[from] serde_json::Error), - #[error("not found: {0}")] - NotFound(String), -} - -/// Walk `dir` (non-recursively) for Codex session JSONL files. -/// -/// Returns a deterministically ordered list (lexical by path) of every -/// `*.jsonl` file directly under `dir`. Returns `NotFound` if the directory -/// itself doesn't exist. -pub fn discover_sessions(dir: &Path) -> Result, ParseError> { - if !dir.exists() { - return Err(ParseError::NotFound(dir.display().to_string())); - } - let mut out = Vec::new(); - for entry in std::fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if path.is_file() - && path.extension().and_then(|s| s.to_str()) == Some("jsonl") - { - out.push(path); - } - } - out.sort(); - Ok(out) -} - -/// Parse a single Codex session JSONL file. -/// -/// Malformed / unexpected lines are skipped (not fatal). Every line that -/// exposes a `role` and a `content` is turned into a [`ParsedMessage`]. -/// The session's `created_at` / `updated_at` bracket the first and last -/// message timestamps seen. -pub fn parse_file(path: &Path) -> Result { - let text = std::fs::read_to_string(path)?; - let native_id = path - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("unknown") - .to_string(); - - let mut messages = Vec::new(); - let mut created_at: Option> = None; - let mut updated_at: Option> = None; - - for line in text.lines() { - if line.trim().is_empty() { - continue; - } - let val: serde_json::Value = match serde_json::from_str(line) { - Ok(v) => v, - Err(_) => continue, // skip malformed lines - }; - if let Some(msg) = extract_message(&val) { - if let Some(ts) = msg.ts { - if created_at.is_none() { - created_at = Some(ts); - } - updated_at = Some(ts); - } - messages.push(msg); - } - } - - Ok(ParsedSession { - native_id, - source_path: path.to_path_buf(), - created_at, - updated_at, - messages, - }) -} - -/// Best-effort extraction of a [`ParsedMessage`] from an arbitrary JSONL -/// event. Returns `None` if the shape doesn't carry a role + content. -fn extract_message(val: &serde_json::Value) -> Option { - let role = val.get("role").and_then(|v| v.as_str()).map(String::from)?; - let content = extract_content(val.get("content")?)?; - let ts = extract_ts(val); - Some(ParsedMessage { - ts, - role, - content, - metadata: val.clone(), - }) -} - -/// Flatten a `content` field into a single string. -/// -/// - string → as-is -/// - array → strings joined by `\n`; objects with a `text` field use that, -/// otherwise their raw JSON is stringified -/// - object → `text` field if present, otherwise the raw JSON -/// - null → `None` -fn extract_content(content: &serde_json::Value) -> Option { - match content { - serde_json::Value::String(s) => Some(s.clone()), - serde_json::Value::Array(arr) => { - let parts: Vec = arr - .iter() - .filter_map(|p| { - if let Some(s) = p.as_str() { - Some(s.to_string()) - } else if let Some(t) = p.get("text").and_then(|v| v.as_str()) { - Some(t.to_string()) - } else { - Some(p.to_string()) - } - }) - .collect(); - Some(parts.join("\n")) - } - serde_json::Value::Object(_) => { - if let Some(t) = content.get("text").and_then(|v| v.as_str()) { - Some(t.to_string()) - } else { - Some(content.to_string()) - } - } - serde_json::Value::Null => None, - other => Some(other.to_string()), - } -} - -/// Extract a timestamp from one of several possible fields. -/// -/// Accepts RFC 3339 strings or numeric unix-seconds (integer or float). -fn extract_ts(val: &serde_json::Value) -> Option> { - let candidate = val - .get("ts") - .or_else(|| val.get("timestamp")) - .or_else(|| val.get("created_at")) - .or_else(|| val.get("time"))?; - - if let Some(s) = candidate.as_str() { - if let Ok(dt) = DateTime::parse_from_rfc3339(s) { - return Some(dt.with_timezone(&Utc)); - } - } - if let Some(f) = candidate.as_f64() { - if f.is_finite() { - let secs = f.trunc() as i64; - let nanos = ((f - secs as f64) * 1_000_000_000.0).round() as u32; - return Utc.timestamp_opt(secs, nanos).single(); - } - } - if let Some(i) = candidate.as_i64() { - return Utc.timestamp_opt(i, 0).single(); - } - None -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - - fn write_jsonl(dir: &Path, name: &str, lines: &[&str]) -> PathBuf { - let path = dir.join(name); - let mut f = std::fs::File::create(&path).unwrap(); - for line in lines { - writeln!(f, "{}", line).unwrap(); - } - path - } - - #[test] - fn discover_sessions_missing_dir_returns_not_found() { - let err = discover_sessions(Path::new("/tmp/this/does/not/exist/ever")) - .expect_err("should fail"); - assert!(matches!(err, ParseError::NotFound(_))); - } - - #[test] - fn discover_sessions_lists_only_jsonl() { - let tmp = tempfile::tempdir().unwrap(); - let _ = write_jsonl(tmp.path(), "a.jsonl", &[]); - let _ = write_jsonl(tmp.path(), "b.jsonl", &[]); - let _ = write_jsonl(tmp.path(), "not-this.txt", &[]); - let found = discover_sessions(tmp.path()).unwrap(); - assert_eq!(found.len(), 2); - assert!(found[0].ends_with("a.jsonl")); - assert!(found[1].ends_with("b.jsonl")); - } - - #[test] - fn parse_file_extracts_basic_messages() { - let tmp = tempfile::tempdir().unwrap(); - let path = write_jsonl( - tmp.path(), - "session-abc.jsonl", - &[ - r#"{"role":"user","content":"hi","ts":"2025-01-01T12:00:00Z"}"#, - r#"{"role":"assistant","content":"hello","ts":"2025-01-01T12:00:01Z"}"#, - ], - ); - let session = parse_file(&path).unwrap(); - assert_eq!(session.native_id, "session-abc"); - assert_eq!(session.messages.len(), 2); - assert_eq!(session.messages[0].role, "user"); - assert_eq!(session.messages[0].content, "hi"); - assert_eq!(session.messages[1].role, "assistant"); - assert!(session.created_at.is_some()); - assert!(session.updated_at.is_some()); - assert_ne!(session.created_at, session.updated_at); - } - - #[test] - fn parse_file_skips_malformed_and_empty_lines() { - let tmp = tempfile::tempdir().unwrap(); - let path = write_jsonl( - tmp.path(), - "session.jsonl", - &[ - r#"{"role":"user","content":"hi"}"#, - "", - "not json at all", - r#"{"garbled":true}"#, // no role/content → skipped - r#"{"role":"assistant","content":"ok"}"#, - ], - ); - let session = parse_file(&path).unwrap(); - assert_eq!(session.messages.len(), 2); - } - - #[test] - fn parse_file_handles_content_array_and_object() { - let tmp = tempfile::tempdir().unwrap(); - let path = write_jsonl( - tmp.path(), - "session.jsonl", - &[ - r#"{"role":"user","content":["a","b","c"]}"#, - r#"{"role":"user","content":[{"text":"x"},{"text":"y"}]}"#, - r#"{"role":"assistant","content":{"text":"nested"}}"#, - ], - ); - let session = parse_file(&path).unwrap(); - assert_eq!(session.messages.len(), 3); - assert_eq!(session.messages[0].content, "a\nb\nc"); - assert_eq!(session.messages[1].content, "x\ny"); - assert_eq!(session.messages[2].content, "nested"); - } - - #[test] - fn parse_file_accepts_unix_ts() { - let tmp = tempfile::tempdir().unwrap(); - let path = write_jsonl( - tmp.path(), - "session.jsonl", - &[r#"{"role":"user","content":"hi","ts":1735732800}"#], - ); - let session = parse_file(&path).unwrap(); - assert_eq!(session.messages.len(), 1); - assert!(session.messages[0].ts.is_some()); - } -} diff --git a/crates/dirigent_codex/src/types.rs b/crates/dirigent_codex/src/types.rs deleted file mode 100644 index 1ae3d8c..0000000 --- a/crates/dirigent_codex/src/types.rs +++ /dev/null @@ -1,32 +0,0 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -/// A Codex session parsed from a single JSONL file. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedSession { - /// The native session id. For Codex this is the JSONL file stem. - pub native_id: String, - /// The source file the session was loaded from. - pub source_path: PathBuf, - /// First message timestamp seen (if any). - pub created_at: Option>, - /// Last message timestamp seen (if any). - pub updated_at: Option>, - /// Parsed messages in file order. - pub messages: Vec, -} - -/// A single message event from a Codex JSONL session file. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedMessage { - /// Timestamp if one could be extracted (RFC 3339 or unix epoch). - pub ts: Option>, - /// Free-form role, e.g. "user", "assistant", "system", "tool". - pub role: String, - /// Best-effort concatenated text content. - pub content: String, - /// Raw event for provenance. - #[serde(default)] - pub metadata: serde_json::Value, -} diff --git a/crates/dirigent_core/Cargo.toml b/crates/dirigent_core/Cargo.toml index eabfa72..b17d812 100644 --- a/crates/dirigent_core/Cargo.toml +++ b/crates/dirigent_core/Cargo.toml @@ -19,7 +19,7 @@ agent-client-protocol = { version = "0.6", optional = true } # Async streams async-stream = { version = "0.3", optional = true } # Async trait support -async-trait = { version = "0.1", optional = true } +async-trait = "0.1" # Web server axum = { version = "0.8", optional = true } # Base64 encoding for embedded resources @@ -29,13 +29,9 @@ blake3 = { version = "1.5", optional = true } chrono = { version = "0.4", features = ["serde"] } dirigent_acp_api = { path = "../dirigent_acp_api", optional = true } # Workspace dependencies -dirigent_archivist = { path = "../dirigent_archivist", optional = true } dirigent_config = { path = "../dirigent_config", optional = true } dirigent_auth = { path = "../dirigent_auth" } dirigent_process = { path = "../dirigent_process", features = ["tokio"], optional = true } -dirigent_taskrunner = { path = "../dirigent_taskrunner", optional = true } -dirigent_matrix = { path = "../dirigent_matrix", optional = true } -dirigent_zed = { path = "../dirigent_zed", optional = true } dirigent_inspector = { path = "../dirigent_inspector", optional = true } dirigent_protocol = { path = "../dirigent_protocol", features = ["adapters"], optional = true } dirigent_tools = { path = "../dirigent_tools", optional = true } @@ -77,26 +73,16 @@ toml = "0.8" name = "stream_registry_test" required-features = ["test-utils"] -[[test]] -name = "replay_test" -required-features = ["test-utils", "server"] - -[[test]] -name = "matrix_migration_test" -required-features = ["server"] - [features] default = [] test-utils = [] server = [ "dep:agent-client-protocol", "dep:async-stream", - "dep:async-trait", "dep:axum", "dep:base64", "dep:blake3", "dep:dirigent_acp_api", - "dep:dirigent_archivist", "dep:dirigent_config", "dep:dirigent_inspector", "dep:dirigent_protocol", @@ -113,8 +99,5 @@ server = [ "dep:tower-http", "dep:tracing", "dep:tracing-subscriber", - "dep:dirigent_matrix", - "dep:dirigent_zed", - "dep:dirigent_taskrunner", "dep:dirigent_process", ] diff --git a/crates/dirigent_core/src/config.rs b/crates/dirigent_core/src/config.rs index ebbc7df..38c6002 100644 --- a/crates/dirigent_core/src/config.rs +++ b/crates/dirigent_core/src/config.rs @@ -57,15 +57,10 @@ pub struct CoreConfig { /// Archive backend declarations. Phase 3+ replaces `archive_root` with /// this `[[archives]]`-array config. When both are set, `archives` wins. /// - /// Stored as typed `ArchiveConfig` on server builds and as raw - /// `serde_json::Value` on WASM/non-server builds (archivist types pull in - /// the full coordinator which is server-only). - #[cfg(feature = "server")] - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub archives: Vec, - - /// Archive backend declarations (opaque on non-server builds). - #[cfg(not(feature = "server"))] + /// Stored as opaque `serde_json::Value` — the archivist (in + /// `dirigent_server`) deserialises these into typed `ArchiveConfig` + /// at boot. Core only needs to preserve the raw entries for + /// serialization round-trips. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub archives: Vec, @@ -118,14 +113,14 @@ pub struct CoreConfig { #[serde(default, skip_serializing_if = "HashMap::is_empty")] pub accounts: HashMap, - /// Matrix sharing behavior configuration. + /// Matrix sharing behavior configuration (opaque). /// /// References an account by name for identity/credentials. /// If None, Matrix session sharing is disabled. /// - /// Stored as typed `MatrixBehaviorConfig` on server builds and as - /// raw `serde_json::Value` on WASM/non-server builds to avoid pulling - /// in the heavy `matrix-sdk` dependency. + /// Stored as raw `serde_json::Value` so `dirigent_core` does not depend + /// on `dirigent_matrix`. The server layer (`dirigent_server`) deserializes + /// this into `dirigent_matrix::MatrixBehaviorConfig` when needed. /// /// Example in dirigent.toml: /// ```toml @@ -134,12 +129,6 @@ pub struct CoreConfig { /// default_invite = ["@user:example.com"] /// store_path = "matrix/bot/store" /// ``` - #[cfg(feature = "server")] - #[serde(default, skip_serializing_if = "Option::is_none")] - pub matrix: Option, - - /// Matrix sharing behavior configuration (opaque on non-server builds). - #[cfg(not(feature = "server"))] #[serde(default, skip_serializing_if = "Option::is_none")] pub matrix: Option, diff --git a/crates/dirigent_core/src/hooks.rs b/crates/dirigent_core/src/hooks.rs new file mode 100644 index 0000000..881eb3c --- /dev/null +++ b/crates/dirigent_core/src/hooks.rs @@ -0,0 +1,56 @@ +use crate::types::ConnectorKind; +use uuid::Uuid; + +/// Result of registering a connector with external services. +#[derive(Debug, Default)] +pub struct ConnectorRegistration { + /// An external UID assigned by a service (e.g., inspector or archivist). + pub external_uid: Option, +} + +/// Lifecycle hooks invoked by the core runtime when connectors are created or removed. +/// +/// Implementations wire up external services (archivist, inspector, etc.) +/// without the core needing to know about them directly. +#[async_trait::async_trait] +pub trait ConnectorLifecycleHooks: Send + Sync { + async fn on_connector_created( + &self, + connector_id: &str, + kind: ConnectorKind, + title: &str, + owner: &str, + params: &serde_json::Value, + ) -> ConnectorRegistration; + + async fn on_connector_removed(&self, _connector_id: &str) {} + + #[cfg(feature = "server")] + fn inspector(&self) -> Option> { + None + } + + #[cfg(feature = "server")] + fn process_manager( + &self, + ) -> Option> { + None + } +} + +/// No-op implementation for environments that don't need lifecycle hooks. +pub struct NoOpHooks; + +#[async_trait::async_trait] +impl ConnectorLifecycleHooks for NoOpHooks { + async fn on_connector_created( + &self, + _connector_id: &str, + _kind: ConnectorKind, + _title: &str, + _owner: &str, + _params: &serde_json::Value, + ) -> ConnectorRegistration { + ConnectorRegistration::default() + } +} diff --git a/crates/dirigent_core/src/lib.rs b/crates/dirigent_core/src/lib.rs index 3050ecb..60935f2 100644 --- a/crates/dirigent_core/src/lib.rs +++ b/crates/dirigent_core/src/lib.rs @@ -38,6 +38,10 @@ pub mod types; // Plugin system types (scaffolding) - always available pub mod plugins; +// Lifecycle hooks for connector creation/removal - always available (WASM-compatible) +pub mod hooks; +pub use hooks::{ConnectorLifecycleHooks, ConnectorRegistration, NoOpHooks}; + // Tool directive and configuration types - always available (WASM-compatible) pub mod tools; @@ -58,10 +62,6 @@ pub use error::CoreError; #[cfg(feature = "server")] pub use runtime::{CoreHandle, CoreRuntime}; -// Re-export Zed agent → ConnectorConfig conversion (used by API) -#[cfg(feature = "server")] -pub use runtime::zed_detection::{refresh_zed_connector_binaries, zed_agent_to_connector_config}; - // Configuration module (server-only) #[cfg(feature = "server")] pub mod config; diff --git a/crates/dirigent_core/src/runtime/mod.rs b/crates/dirigent_core/src/runtime/mod.rs index a3b02e9..676764c 100644 --- a/crates/dirigent_core/src/runtime/mod.rs +++ b/crates/dirigent_core/src/runtime/mod.rs @@ -26,7 +26,7 @@ //! # async fn example() { //! // Create a runtime with default config //! let config = CoreConfig::default(); -//! let runtime = CoreRuntime::new(config, None); +//! let runtime = CoreRuntime::new(config); //! let handle = CoreHandle::new(runtime); //! //! // List all connectors (none initially) @@ -42,8 +42,6 @@ pub(crate) mod config_manager; mod session_transfer; mod summary_cache; -#[cfg(feature = "server")] -pub mod zed_detection; use crate::config::{AcpServerConfig, ConnectorConfig, CoreConfig}; use crate::connectors::acp::{AcpConfig, AcpConnector}; @@ -62,26 +60,6 @@ use std::time::Duration; use tokio::sync::RwLock; use tracing::{debug, error, info, warn}; -/// Error type for [`CoreRuntime::replay_session_to_stream`]. -/// -/// Wraps the standalone [`crate::sharing::replay::ReplayError`] with the -/// two runtime-level preconditions that the replay fn itself has no way to -/// check (missing stream id, missing archivist). -#[cfg(feature = "server")] -#[derive(Debug, thiserror::Error)] -pub enum ReplaySessionError { - /// No stream with the given id is currently registered. - #[error("stream not found: {0:?}")] - StreamNotFound(crate::sharing::StreamId), - /// The runtime has no archivist configured, so there's nothing to - /// replay from. - #[error("no archivist configured")] - NoArchivist, - /// The replay itself failed (session missing, archive I/O, …). - #[error(transparent)] - Replay(#[from] crate::sharing::replay::ReplayError), -} - /// Tracks an active session transfer for fallback purposes #[derive(Clone, Debug)] struct TransferredSession { @@ -114,7 +92,7 @@ struct TransferredSession { /// /// # Lifecycle /// -/// 1. Create with `CoreRuntime::new(config, None)` +/// 1. Create with `CoreRuntime::new(config)` /// 2. Wrap in `CoreHandle` for cheap cloning /// 3. Create and manage connectors via runtime methods /// 4. Subscribe to global events for system-wide event streaming @@ -147,21 +125,6 @@ pub struct CoreRuntime { #[allow(dead_code)] users: RwLock>, - /// Optional archivist for persistent storage - /// - /// If Some, the archivist coordinates connector UID generation and - /// persists session/message data. If None, the runtime operates - /// in ephemeral mode with no archival storage. - #[cfg(feature = "server")] - archivist: Arc>>>, - - /// Mapping from connector_id (user-facing) to archivist connector_uid (internal UUID) - /// - /// This mapping enables archive-first reads to work with non-UUID connector IDs. - /// Only populated when archivist is available and connectors are successfully registered. - #[cfg(feature = "server")] - archivist_connector_uids: Arc>>, - /// Sync-accessible cache of connector summaries for use by GatewayConnector callbacks /// /// This cache is updated whenever connectors are added or removed. It uses a sync RwLock @@ -182,30 +145,6 @@ pub struct CoreRuntime { /// to Gateway with user notification. transferred_sessions: Arc>>, - /// Optional inspector registry for internal visualization and introspection. - /// - /// When present, connectors and services register themselves as nodes - /// in the inspector tree, enabling monitoring of state, process health, - /// and system resources. - #[cfg(feature = "server")] - inspector: Option>, - - /// Optional process group manager for lifecycle management of child processes. - /// - /// When present, ACP connectors using StdioTransport will create per-process - /// lifecycle handles so spawned agents are tracked in the platform job object / - /// process group and are shut down gracefully on disconnect. - #[cfg(feature = "server")] - process_manager: Option>, - - /// Optional task runner for background process management - #[cfg(feature = "server")] - task_runner: Arc>>>, - - /// Optional Matrix service for session sharing - #[cfg(feature = "server")] - matrix_service: Arc>>>, - /// Registry of active streams wired to the `SharingBus`. /// /// Streams are attached at boot from `[[streams]]` config, or at runtime @@ -217,6 +156,13 @@ pub struct CoreRuntime { /// blocks. Populated by callers at boot with their compiled-in factories; /// defaults to an empty registry for tests and pre-Phase-4 call sites. stream_factories: Arc, + + /// Lifecycle hooks invoked when connectors are created or removed. + /// + /// External systems (e.g. `dirigent_server`) provide a concrete + /// implementation that wires up archivist, inspector, etc. The default + /// is [`crate::hooks::NoOpHooks`]. + hooks: Arc, } impl CoreRuntime { @@ -226,12 +172,10 @@ impl CoreRuntime { /// - Empty connector registry /// - Global event channel with capacity 1000 /// - Empty user registry - /// - Optional archivist for persistent storage /// /// # Arguments /// /// * `config` - Core configuration (port, project directory, etc.) - /// * `archivist` - Optional archivist for persistence (server feature only) /// /// # Returns /// @@ -243,49 +187,30 @@ impl CoreRuntime { /// use dirigent_core::{CoreRuntime, CoreConfig}; /// /// let config = CoreConfig::default(); - /// let runtime = CoreRuntime::new(config, None); + /// let runtime = CoreRuntime::new(config); /// ``` #[cfg(feature = "server")] - pub fn new(config: CoreConfig, archivist: Option>) -> Self { - Self::new_with_inspector(config, archivist, None) - } - - /// Create a new CoreRuntime with archivist and inspector. - /// - /// This is a thin wrapper around [`Self::new_with_factories`] that - /// supplies an empty [`StreamFactoryRegistry`]. Callers that want to - /// attach streams at runtime should use `new_with_factories` directly - /// and pre-register their compiled-in factories. - #[cfg(feature = "server")] - pub fn new_with_inspector( - config: CoreConfig, - archivist: Option>, - inspector: Option>, - ) -> Self { + pub fn new(config: CoreConfig) -> Self { Self::new_with_factories( config, - archivist, - inspector, Arc::new(crate::sharing::StreamFactoryRegistry::default()), ) } - /// Create a new CoreRuntime with archivist, inspector, and a - /// pre-populated [`StreamFactoryRegistry`]. + /// Create a new CoreRuntime with a pre-populated + /// [`StreamFactoryRegistry`]. /// /// Callers at boot build the factory registry with their compiled-in - /// `StreamFactory` implementations (e.g. Matrix, Langfuse, …) and hand + /// `StreamFactory` implementations (e.g. Matrix, Langfuse, ...) and hand /// it in here so that [`CoreRuntime::attach_stream`] can resolve /// `StreamConfig::kind` strings without further plumbing. /// /// Existing call sites that don't yet know about streams should keep - /// calling [`Self::new`] / [`Self::new_with_inspector`], which forward - /// to this constructor with an empty factory registry. + /// calling [`Self::new`], which forwards to this constructor with an + /// empty factory registry. #[cfg(feature = "server")] pub fn new_with_factories( config: CoreConfig, - archivist: Option>, - inspector: Option>, stream_factories: Arc, ) -> Self { // Primary event fan-out. All connector/runtime events flow through @@ -299,21 +224,16 @@ impl CoreRuntime { connectors: RwLock::new(HashMap::new()), sharing_bus, users: RwLock::new(HashMap::new()), - archivist: Arc::new(tokio::sync::RwLock::new(archivist)), - archivist_connector_uids: Arc::new(RwLock::new(HashMap::new())), connector_summary_cache: Arc::new(std::sync::RwLock::new(Vec::new())), self_weak: Arc::new(RwLock::new(None)), transferred_sessions: Arc::new(RwLock::new(HashMap::new())), - inspector, - process_manager: None, - task_runner: Arc::new(tokio::sync::RwLock::new(None)), - matrix_service: Arc::new(tokio::sync::RwLock::new(None)), stream_registry, stream_factories, + hooks: Arc::new(crate::hooks::NoOpHooks), } } - /// Create a new CoreRuntime without archivist (non-server builds) + /// Create a new CoreRuntime (non-server builds) #[cfg(not(feature = "server"))] pub fn new(config: CoreConfig) -> Self { let sharing_bus = SharingBus::new(); @@ -330,23 +250,21 @@ impl CoreRuntime { transferred_sessions: Arc::new(RwLock::new(HashMap::new())), stream_registry, stream_factories, + hooks: Arc::new(crate::hooks::NoOpHooks), } } - /// Set the process group manager used for lifecycle management of stdio agent processes. + /// Replace the lifecycle hooks implementation. /// - /// Call this before creating ACP connectors to ensure newly spawned processes are - /// tracked in the platform job object (Windows) or process group (Unix). After this - /// is set, every new `StdioTransport` will receive a per-process lifecycle handle. - #[cfg(feature = "server")] - pub fn set_process_manager(&mut self, mgr: Arc) { - self.process_manager = Some(mgr); + /// Call this before creating connectors so that the new hooks are invoked + /// for every connector creation. The default is [`crate::hooks::NoOpHooks`]. + pub fn set_hooks(&mut self, hooks: Arc) { + self.hooks = hooks; } - /// Get a reference to the process group manager, if configured. - #[cfg(feature = "server")] - pub fn process_manager(&self) -> Option<&Arc> { - self.process_manager.as_ref() + /// Get a reference to the current lifecycle hooks. + pub fn hooks(&self) -> &Arc { + &self.hooks } /// List all connectors, optionally filtered by owner @@ -533,12 +451,6 @@ impl CoreRuntime { connectors.get(id).cloned() } - /// Get the inspector registry, if configured. - #[cfg(feature = "server")] - pub fn inspector(&self) -> Option<&Arc> { - self.inspector.as_ref() - } - /// Create a new connector from configuration /// /// This method instantiates a connector of the specified type with the given @@ -736,9 +648,9 @@ impl CoreRuntime { let connector = connector.with_tool_configuration(cfg.tool_configuration.clone()); #[cfg(feature = "server")] - let connector = connector.with_inspector(self.inspector.clone()); - #[cfg(feature = "server")] - let connector = connector.with_process_manager(self.process_manager.clone()); + let connector = connector + .with_inspector(self.hooks.inspector()) + .with_process_manager(self.hooks.process_manager()); connector }; @@ -942,9 +854,8 @@ impl CoreRuntime { }); gateway_connector.set_session_transfer_callback(transfer_callback); - // Wire up inspector for session tracking #[cfg(feature = "server")] - gateway_connector.set_inspector(self.inspector.clone()); + gateway_connector.set_inspector(self.hooks.inspector()); // Resolve working directory using global config let config_lock = self.config.read().await; @@ -999,143 +910,19 @@ impl CoreRuntime { .clone() .unwrap_or_else(|| format!("{:?} Connector", cfg.kind)); - // Register connector with archivist (if available and server feature enabled) - #[cfg(feature = "server")] - if let Some(archivist) = self.archivist.read().await.clone() { - // Determine custom_uid: use connector_id if it's a UUID, otherwise let archivist generate one - let custom_uid = uuid::Uuid::parse_str(&connector_id).ok(); - - // Compute a deterministic fingerprint for archivist re-association across restarts - let fingerprint = - crate::connectors::fingerprint::compute_fingerprint(&cfg.kind, &cfg.params); - - // Create registration request - let register_req = dirigent_archivist::types::RegisterConnectorRequest { - custom_uid, // Some(uuid) if connector_id was UUID, None otherwise - r#type: format!("{:?}", connector_kind), - title: connector_title.clone(), - client_native_id: connector_id.clone(), - metadata: serde_json::json!({}), - fingerprint: fingerprint.clone(), - }; - - // Attempt registration (non-blocking, best-effort) - match archivist.register_connector(register_req, None).await { - Ok(response) => { - // Store the mapping for archive-first reads (works for both UUID and custom IDs) - self.archivist_connector_uids - .write() - .await - .insert(connector_id.clone(), response.connector_uid); - - // Backfill fingerprint on existing records that were registered - // before fingerprinting was introduced (Task 11) - if response.status == dirigent_archivist::types::RegisterStatus::Aliased { - if let Some(ref fp) = fingerprint { - let _ = archivist - .update_connector_fingerprint( - response.connector_uid, - fp.clone(), - None, - ) - .await; - } - } - - info!( - connector_id = %connector_id, - connector_uid = %response.connector_uid, - status = ?response.status, - "Registered connector with archivist" - ); - } - Err(e) => { - warn!( - connector_id = %connector_id, - error = %e, - "Failed to register connector with archivist (non-fatal)" - ); - } - } - } - - // Register connector with inspector (if available) - #[cfg(feature = "server")] - if let Some(ref inspector) = self.inspector { - let node_id = - dirigent_inspector::NodeId::new(format!("dirigent/connectors/{}", connector_id)); - let parent_id = dirigent_inspector::NodeId::new("dirigent/connectors"); - let mut metadata = dirigent_inspector::NodeMetadata::new( - dirigent_inspector::NodeKind::Connector, - &connector_title, - ) - .with_state(dirigent_inspector::NodeState::Initializing) - .with_property("kind", serde_json::json!(format!("{:?}", connector_kind))) - .with_property("owner", serde_json::json!(&owner)); - - // Extract command/executable from params for display in inspector - if let Some(params_obj) = cfg.params.as_object() { - if let Some(transport) = params_obj.get("transport") { - match transport.get("type").and_then(|t| t.as_str()) { - Some("stdio") => { - if let Some(cmd) = transport.get("command").and_then(|c| c.as_str()) { - let args = transport - .get("args") - .and_then(|a| a.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .collect::>() - .join(" ") - }) - .unwrap_or_default(); - let full_command = if args.is_empty() { - cmd.to_string() - } else { - format!("{} {}", cmd, args) - }; - metadata = metadata.with_property( - "command", - serde_json::json!(full_command), - ); - } - } - Some("http") => { - if let Some(url) = - transport.get("base_url").and_then(|u| u.as_str()) - { - metadata = metadata - .with_property("command", serde_json::json!(url)); - } - } - _ => {} - } - } else if let Some(base_url) = params_obj.get("base_url").and_then(|u| u.as_str()) - { - // OpenCode connectors have base_url at the top level - metadata = - metadata.with_property("command", serde_json::json!(base_url)); - } - } - match inspector - .register(node_id, &parent_id, metadata, None) - .await - { - Ok(mut handle) => { - handle.detach(); - info!( - connector_id = %connector_id, - "Registered connector with inspector" - ); - } - Err(e) => { - warn!( - connector_id = %connector_id, - error = %e, - "Failed to register connector with inspector (non-fatal)" - ); - } - } + // Lifecycle hooks — let external systems register with the connector + { + let owner_str = owner.to_string(); + let _registration = self + .hooks + .on_connector_created( + &connector_id, + connector_kind.clone(), + &connector_title, + &owner_str, + &cfg.params, + ) + .await; } // Auto-save: persist the new connector to the configuration file @@ -1614,7 +1401,9 @@ impl CoreRuntime { let acp_connector = acp_connector.with_tool_configuration(tool_config); #[cfg(feature = "server")] - let acp_connector = acp_connector.with_process_manager(self.process_manager.clone()); + let acp_connector = acp_connector + .with_inspector(self.hooks.inspector()) + .with_process_manager(self.hooks.process_manager()); // Create new handle with same identity but fresh channels // IMPORTANT: Use new connector's events_sender, not old handle's @@ -1720,9 +1509,8 @@ impl CoreRuntime { Arc::clone(&self.sharing_bus), ); - // Wire up inspector for session tracking #[cfg(feature = "server")] - gateway_connector.set_inspector(self.inspector.clone()); + gateway_connector.set_inspector(self.hooks.inspector()); // Create new handle with same identity but fresh channels // IMPORTANT: Use new connector's events_sender, not old handle's @@ -2098,12 +1886,8 @@ impl CoreRuntime { info!("Last connector removed - transitioning to empty state"); } - // Deregister connector from inspector (if available) - #[cfg(feature = "server")] - if let Some(ref inspector) = self.inspector { - let node_id = dirigent_inspector::NodeId::new(format!("dirigent/connectors/{}", id)); - let _ = inspector.deregister_subtree(&node_id).await; - } + // Lifecycle hooks — let external systems clean up + self.hooks.on_connector_removed(&id).await; // Emit ConnectorRemoved event onto the SharingBus. let bus_event = dirigent_protocol::streaming::BusEvent::from_connector_event( @@ -2585,44 +2369,6 @@ impl CoreRuntime { self.stream_registry.list().await } - /// Replay the archived messages of `scroll_id` onto the stream - /// identified by `stream_id`. - /// - /// Reads metadata + messages from the attached archivist and dispatches - /// synthetic `BusEvent`s with `EventOrigin::Replay { .. }` directly to - /// the target stream, bypassing the `SharingBus`. Live events remain - /// unaffected. - /// - /// Errors if the stream isn't registered, no archivist is configured on - /// this runtime, or the archive read fails fatally. Per-event stream - /// failures are counted in the returned [`ReplayReport`] instead of - /// propagated. - #[cfg(feature = "server")] - pub async fn replay_session_to_stream( - &self, - scroll_id: uuid::Uuid, - stream_id: crate::sharing::StreamId, - opts: crate::sharing::ReplayOptions, - ) -> Result { - let stream = self - .stream_registry - .get_stream(stream_id) - .await - .ok_or(ReplaySessionError::StreamNotFound(stream_id))?; - let archivist_guard = self.archivist.read().await; - let archivist = archivist_guard - .as_ref() - .ok_or(ReplaySessionError::NoArchivist)?; - crate::sharing::replay::replay_session_to_stream( - archivist.as_ref(), - scroll_id, - stream, - opts, - ) - .await - .map_err(ReplaySessionError::Replay) - } - /// Emit a runtime-origin event onto the primary event bus. /// /// This allows non-connector components (e.g., system task registry, @@ -2691,577 +2437,6 @@ impl CoreRuntime { &self.users } - /// Get a reference to the archivist (server feature only) - /// - /// Returns None if no archivist is configured or if not running in server mode. - /// - /// # Returns - /// - /// Option> for archival storage operations - #[cfg(feature = "server")] - pub async fn archivist(&self) -> Option> { - self.archivist.read().await.clone() - } - - /// Set the archivist at runtime (for hot-reload activation) - #[cfg(feature = "server")] - pub async fn set_archivist(&self, archivist: Option>) { - let mut guard = self.archivist.write().await; - *guard = archivist; - } - - /// Get a reference to the archivist slot (for sharing with AppState) - #[cfg(feature = "server")] - pub fn archivist_slot(&self) -> &Arc>>> { - &self.archivist - } - - /// Get the task runner slot (shared reference for AppState integration) - #[cfg(feature = "server")] - pub fn task_runner_slot( - &self, - ) -> &Arc>>> { - &self.task_runner - } - - // ----------------------------------------------------------------------- - // Matrix integration - // ----------------------------------------------------------------------- - - /// Initialize and start the Matrix service if configured. - /// - /// Reads the `matrix` behavior config and resolves the referenced account, - /// then creates and starts the service. - /// Call during server startup after CoreRuntime is created. - #[cfg(feature = "server")] - pub async fn start_matrix_service(&self) -> std::result::Result<(), String> { - let config_guard = self.config.read().await; - let behavior = match &config_guard.matrix { - Some(b) => b.clone(), - None => { - debug!("No [matrix] configuration found, skipping Matrix service startup"); - return Ok(()); - } - }; - - let account = config_guard - .accounts - .get(&behavior.account) - .ok_or_else(|| { - format!( - "Matrix config references account '{}' but it is not defined in [accounts]", - behavior.account - ) - })? - .clone(); - drop(config_guard); - - if account.kind != dirigent_auth::AccountKind::Matrix { - return Err(format!( - "Account '{}' has type {:?}, expected 'matrix'", - behavior.account, account.kind - )); - } - - let data_dir = dirigent_config::DirigentPaths::resolve() - .map_err(|e| format!("Failed to resolve data directory: {}", e))? - .data_dir() - .to_path_buf(); - - let service = dirigent_matrix::MatrixService::from_account(&account, behavior, data_dir) - .map_err(|e| format!("Failed to create Matrix service: {}", e))?; - - service - .login() - .await - .map_err(|e| format!("Matrix login failed: {}", e))?; - service - .start_sync() - .await - .map_err(|e| format!("Matrix sync start failed: {}", e))?; - - let service_arc = Arc::new(service); - *self.matrix_service.write().await = Some(service_arc); - - info!("Matrix service started successfully"); - Ok(()) - } - - /// Create a Matrix session share. - /// - /// Creates a Matrix room and starts a bidirectional bridge between the - /// specified connector session and the room. - /// - /// Returns the Matrix room ID string on success. - #[cfg(feature = "server")] - pub async fn create_matrix_share( - &self, - connector_id: &str, - session_id: &str, - session_title: Option<&str>, - ) -> std::result::Result { - let service_guard = self.matrix_service.read().await; - let service = service_guard - .as_ref() - .ok_or("Matrix service not started")?; - - let client = service - .client_cloned() - .await - .ok_or("Matrix client not logged in")?; - - // Get the connector - let connectors = self.connectors.read().await; - let connector = connectors - .get(connector_id) - .ok_or_else(|| format!("Connector '{}' not found", connector_id))?; - - let event_rx = connector.subscribe(); - let connector_cmd_tx = connector.command_tx(); - drop(connectors); - - // Create room - let room_name = - dirigent_matrix::room::room_name_for_session(connector_id, session_title); - let invite = service.behavior().default_invite.clone(); - - let room_id = dirigent_matrix::room::create_share_room( - &client, - dirigent_matrix::CreateRoomOptions { - name: room_name, - topic: Some(format!( - "Dirigent session share: {} / {}", - connector_id, session_id - )), - invite, - }, - ) - .await - .map_err(|e| format!("Failed to create Matrix room: {}", e))?; - - let room_id_str = room_id.to_string(); - - // Get the Room handle for the share - let room = client - .get_room(&room_id) - .ok_or_else(|| format!("Room {} not found after creation", room_id_str))?; - - // Start the share — returns (share, command_rx) - let (share, mut command_rx) = dirigent_matrix::MatrixSessionShare::start( - connector_id.to_string(), - session_id.to_string(), - room_id_str.clone(), - room, - event_rx, - ); - - // Spawn proxy task: command_rx -> ConnectorCommand::SendMessage - let cmd_tx = connector_cmd_tx.clone(); - tokio::spawn(async move { - while let Some(proxy) = command_rx.recv().await { - let cmd = crate::connectors::ConnectorCommand::SendMessage { - session_id: proxy.session_id, - text: proxy.text, - }; - if cmd_tx.send(cmd).await.is_err() { - break; - } - } - }); - - // Register share - service - .register_share(share) - .await - .map_err(|e| format!("Failed to register share: {}", e))?; - - // Persist sharing metadata in archivist - if let Some(archivist) = self.archivist().await { - if let Some(connector_uid) = self.get_archivist_connector_uid(connector_id).await { - if let Ok(scroll_id) = archivist.resolve_session(connector_uid, session_id, None).await { - if let Err(e) = archivist.update_session_sharing( - scroll_id, - Some(room_id_str.clone()), - true, - None, - ).await { - tracing::warn!("Failed to persist sharing metadata: {}", e); - } - } - } - } - - info!( - connector_id = %connector_id, - session_id = %session_id, - room_id = %room_id_str, - "Matrix session share created" - ); - - Ok(room_id_str) - } - - /// Stop the Matrix service (if running). - /// - /// Shuts down all active shares and releases the client, then clears the - /// service slot. Returns `true` if a service was running and was stopped. - #[cfg(feature = "server")] - pub async fn stop_matrix_service(&self) -> bool { - let mut guard = self.matrix_service.write().await; - if let Some(service) = guard.take() { - service.shutdown().await; - info!("Matrix service stopped"); - true - } else { - false - } - } - - /// Restart the Matrix service. - /// - /// Stops any running instance, then starts a fresh one from the current - /// configuration. This picks up any config changes made since the last - /// start. - #[cfg(feature = "server")] - pub async fn restart_matrix_service(&self) -> std::result::Result<(), String> { - self.stop_matrix_service().await; - self.start_matrix_service().await - } - - /// Get the Matrix service (if started). - #[cfg(feature = "server")] - pub async fn matrix_service(&self) -> Option> { - self.matrix_service.read().await.clone() - } - - /// Register an existing connector with the archivist (for hot-reload) - /// - /// Used during hot-reload to register connectors that were created - /// before the archivist was activated. - #[cfg(feature = "server")] - pub async fn register_connector_with_archivist( - &self, - connector_id: &str, - ) -> Result<(), CoreError> { - let archivist = self - .archivist() - .await - .ok_or_else(|| CoreError::Internal("Archivist not configured".to_string()))?; - - let connector = self - .get_connector(&connector_id.to_string()) - .await - .ok_or(CoreError::NotFound)?; - - let connector_kind = connector.kind(); - let connector_title = connector.title().to_string(); - let fingerprint = { - let config_arc = self.config.read().await; - config_arc - .connectors - .iter() - .find(|c| c.id.as_deref() == Some(connector_id)) - .and_then(|cfg| { - crate::connectors::fingerprint::compute_fingerprint(&cfg.kind, &cfg.params) - }) - }; - - let register_req = dirigent_archivist::types::RegisterConnectorRequest { - custom_uid: uuid::Uuid::try_parse(connector_id).ok(), - r#type: format!("{:?}", connector_kind), - title: connector_title, - client_native_id: connector_id.to_string(), - metadata: serde_json::json!({}), - fingerprint: fingerprint.clone(), - }; - - match archivist.register_connector(register_req, None).await { - Ok(response) => { - self.archivist_connector_uids - .write() - .await - .insert(connector_id.to_string(), response.connector_uid); - info!( - connector_id = %connector_id, - connector_uid = %response.connector_uid, - "Registered existing connector with archivist (hot-reload)" - ); - Ok(()) - } - Err(e) => { - warn!( - connector_id = %connector_id, - error = %e, - "Failed to register connector with archivist" - ); - Err(CoreError::Internal(format!( - "Failed to register connector: {}", - e - ))) - } - } - } - - /// Get the archivist connector_uid for a given connector_id - /// - /// This mapping enables archive-first reads to work with non-UUID connector IDs. - /// When a connector is created with a custom ID (e.g., "opencode-1" from config), - /// the archivist still uses a UUID internally. This method retrieves that UUID. - /// - /// # Arguments - /// - /// * `connector_id` - The connector ID (may be UUID or custom string) - /// - /// # Returns - /// - /// Some(Uuid) if the connector is registered with the archivist, None otherwise - #[cfg(feature = "server")] - pub async fn get_archivist_connector_uid(&self, connector_id: &str) -> Option { - self.archivist_connector_uids - .read() - .await - .get(connector_id) - .copied() - } - - /// Get the connector_id for a given archivist connector_uid (reverse lookup) - /// - /// This is the inverse of `get_archivist_connector_uid`. Since the map is small - /// (typically < 10 entries), iterating is efficient. - /// - /// # Arguments - /// * `uid` - The archivist connector UUID - /// - /// # Returns - /// Some(String) with the connector_id if found, None otherwise - #[cfg(feature = "server")] - pub async fn get_connector_id_by_uid(&self, uid: uuid::Uuid) -> Option { - self.archivist_connector_uids - .read() - .await - .iter() - .find(|(_, &v)| v == uid) - .map(|(k, _)| k.clone()) - } - - /// Backfill sessions from a connector into the archivist - /// - /// This method imports existing sessions from a connector that supports - /// `list_sessions()` and `list_messages()` operations into the archivist. - /// - /// # Arguments - /// - /// * `connector_id` - The unique identifier of the connector to backfill from - /// - /// # Returns - /// - /// Statistics about the backfill operation including number of sessions and messages imported - /// - /// # Errors - /// - /// - `NotFound` - Connector with the specified ID doesn't exist - /// - `Internal` - Archivist is not configured or other internal errors - /// - /// # Example - /// - /// ```no_run - /// # use dirigent_core::{CoreRuntime, CoreConfig}; - /// # async fn example(runtime: &CoreRuntime) -> Result<(), Box> { - /// let stats = runtime.backfill_connector_sessions("opencode-1").await?; - /// println!("Imported {} sessions with {} messages", - /// stats.sessions_imported, stats.messages_imported); - /// # Ok(()) - /// # } - /// ``` - #[cfg(feature = "server")] - pub async fn backfill_connector_sessions( - &self, - connector_id: &str, - ) -> Result { - use futures::future::BoxFuture; - use std::time::Duration; - - info!(connector_id = %connector_id, "Starting connector session backfill"); - - // Get archivist - let archivist = self - .archivist - .read() - .await - .clone() - .ok_or_else(|| CoreError::Internal("Archivist not configured".to_string()))?; - - // Get connector handle - let connector = self - .get_connector(&connector_id.to_string()) - .await - .ok_or_else(|| { - error!(connector_id = %connector_id, "Connector not found for backfill"); - CoreError::NotFound - })?; - - // Get connector UID from archivist mapping - // This is the same UID used for archive-first reads and session registration - let connector_uid = self - .archivist_connector_uids - .read() - .await - .get(connector_id) - .copied() - .ok_or_else(|| { - error!( - connector_id = %connector_id, - "Connector not registered with archivist (no connector_uid mapping)" - ); - CoreError::Internal(format!( - "Connector {} not registered with archivist. \ - This should not happen - connector registration happens on creation.", - connector_id - )) - })?; - - debug!( - connector_id = %connector_id, - connector_uid = %connector_uid, - "Using archivist connector_uid for backfill" - ); - - // Send ListSessions command and wait for response - let mut events = connector.subscribe(); - let cmd_tx = connector.command_tx(); - - cmd_tx - .send(ConnectorCommand::ListSessions) - .await - .map_err(|e| { - error!(connector_id = %connector_id, error = %e, "Failed to send ListSessions command"); - CoreError::Internal(format!("Failed to send command: {}", e)) - })?; - - // Wait for SessionsListed event with timeout - let sessions = tokio::time::timeout(Duration::from_secs(30), async { - while let Ok(event) = events.recv().await { - if let dirigent_protocol::Event::SessionsListed { - connector_id: _, - sessions, - } = event - { - return Ok(sessions); - } - } - Err(CoreError::Internal( - "No SessionsListed event received".to_string(), - )) - }) - .await - .map_err(|_| { - error!(connector_id = %connector_id, "Timeout waiting for sessions list"); - CoreError::Internal("Timeout waiting for sessions list".to_string()) - })??; - - debug!( - connector_id = %connector_id, - session_count = sessions.len(), - "Received {} sessions from connector", - sessions.len() - ); - - // Create closure to fetch messages for each session - let connector_clone = connector.clone(); - let connector_id_clone = connector_id.to_string(); - let fetch_messages = move |session_id: &str| { - let session_id = session_id.to_string(); - let connector = connector_clone.clone(); - let connector_id = connector_id_clone.clone(); - - Box::pin(async move { - debug!( - connector_id = %connector_id, - session_id = %session_id, - "Fetching messages for session" - ); - - // Subscribe to events - let mut events = connector.subscribe(); - let cmd_tx = connector.command_tx(); - - // Send ListMessages command - cmd_tx - .send(ConnectorCommand::ListMessages { - session_id: session_id.clone(), - }) - .await - .map_err(|e| { - dirigent_archivist::ArchivistError::InvalidRequest(format!( - "Failed to send command: {}", - e - )) - })?; - - // Wait for MessagesListed event - let messages = tokio::time::timeout(Duration::from_secs(30), async { - while let Ok(event) = events.recv().await { - if let dirigent_protocol::Event::MessagesListed { messages } = event { - // Note: MessagesListed doesn't have a session_id field - // We assume the messages are for the session we requested - return Ok(messages); - } - } - Err(dirigent_archivist::ArchivistError::InvalidRequest( - "No MessagesListed event received".to_string(), - )) - }) - .await - .map_err(|_| { - dirigent_archivist::ArchivistError::InvalidRequest( - "Timeout waiting for messages list".to_string(), - ) - })??; - - debug!( - connector_id = %connector_id, - session_id = %session_id, - message_count = messages.len(), - "Fetched {} messages for session", - messages.len() - ); - - Ok(messages) - }) - as BoxFuture< - 'static, - Result, dirigent_archivist::ArchivistError>, - > - }; - - // Perform backfill - let stats = dirigent_archivist::backfill_from_sessions( - &*archivist, - connector_uid, - sessions, - fetch_messages, - ) - .await - .map_err(|e| { - error!( - connector_id = %connector_id, - error = %e, - "Backfill operation failed" - ); - CoreError::Internal(format!("Backfill failed: {}", e)) - })?; - - info!( - connector_id = %connector_id, - sessions_imported = stats.sessions_imported, - messages_imported = stats.messages_imported, - errors = stats.errors.len(), - "Connector session backfill completed" - ); - - Ok(stats) - } - /// Update ACP Server configuration /// /// This method updates the ACP Server configuration in the runtime's CoreConfig. @@ -3502,7 +2677,7 @@ mod tests { // `SharingBus::new` spawns a worker task, so we need a Tokio // runtime in scope. `CoreRuntime::new` itself remains sync. let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Runtime should be initialized with empty state. We can't // easily test internal state without async, but we can verify @@ -3513,7 +2688,7 @@ mod tests { #[tokio::test] async fn test_list_connectors_empty() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let connectors = runtime.list_connectors(None).await; assert_eq!(connectors.len(), 0); @@ -3522,7 +2697,7 @@ mod tests { #[tokio::test] async fn test_list_connectors_with_data() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a test connector let (cmd_tx, _cmd_rx) = mpsc::channel(100); @@ -3565,7 +2740,7 @@ mod tests { #[tokio::test] async fn test_list_connectors_filters_by_owner() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add connectors for different users let (cmd_tx1, _) = mpsc::channel(100); @@ -3624,7 +2799,7 @@ mod tests { #[tokio::test] async fn test_get_connector_exists() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a test connector let (cmd_tx, _cmd_rx) = mpsc::channel(100); @@ -3659,7 +2834,7 @@ mod tests { #[tokio::test] async fn test_get_connector_not_found() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let result = runtime.get_connector(&"nonexistent".to_string()).await; assert!(result.is_none()); @@ -3670,7 +2845,7 @@ mod tests { // `SharingBus::new` spawns a worker task, so we need a Tokio // runtime in scope. let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let handle = CoreHandle::new(runtime); // Handle should be created successfully @@ -3680,7 +2855,7 @@ mod tests { #[tokio::test] async fn test_core_handle_deref() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let handle = CoreHandle::new(runtime); // Should be able to call runtime methods directly via Deref @@ -3691,7 +2866,7 @@ mod tests { #[tokio::test] async fn test_core_handle_clone() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let handle1 = CoreHandle::new(runtime); // Clone should be cheap and share the same runtime @@ -3726,7 +2901,7 @@ mod tests { #[tokio::test] async fn test_core_handle_inner() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let handle = CoreHandle::new(runtime); // Should be able to get the inner Arc @@ -3742,7 +2917,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let cfg = ConnectorConfig { id: None, // No ID provided @@ -3775,7 +2950,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let cfg = ConnectorConfig { id: Some("my-connector".to_string()), @@ -3803,7 +2978,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let cfg = ConnectorConfig { id: Some("duplicate".to_string()), @@ -3837,7 +3012,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let cfg = ConnectorConfig { id: None, @@ -3863,7 +3038,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let cfg = ConnectorConfig { id: None, @@ -3885,7 +3060,7 @@ mod tests { #[tokio::test] async fn test_stop_connector_not_found() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let result = runtime.stop_connector(&"nonexistent".to_string()).await; assert!(result.is_err()); @@ -3895,7 +3070,7 @@ mod tests { #[tokio::test] async fn test_stop_connector_success() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a mock connector let (cmd_tx, _cmd_rx) = mpsc::channel(100); @@ -3933,7 +3108,7 @@ mod tests { #[tokio::test] async fn test_remove_connector_not_found() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let result = runtime.remove_connector(&"nonexistent".to_string()).await; assert!(result.is_err()); @@ -3943,7 +3118,7 @@ mod tests { #[tokio::test] async fn test_remove_connector_success() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a mock connector let (cmd_tx, _cmd_rx) = mpsc::channel(100); @@ -3988,7 +3163,7 @@ mod tests { #[tokio::test] async fn test_send_command_not_found() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); let result = runtime .send_command(&"nonexistent".to_string(), ConnectorCommand::ListSessions) @@ -4000,7 +3175,7 @@ mod tests { #[tokio::test] async fn test_send_command_success() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a mock connector let (cmd_tx, mut cmd_rx) = mpsc::channel(100); @@ -4038,7 +3213,7 @@ mod tests { #[tokio::test] async fn test_send_command_all_types() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add a mock connector let (cmd_tx, mut cmd_rx) = mpsc::channel(100); @@ -4128,7 +3303,7 @@ mod tests { #[tokio::test] async fn test_sharing_bus_subscribe_all() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Two independent subscribers to the primary event fan-out. let mut rx1 = runtime.sharing_bus.subscribe_all().await; @@ -4158,7 +3333,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Initially empty let list = runtime.list_connectors(None).await; @@ -4205,7 +3380,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Create multiple connectors for i in 1..=3 { @@ -4261,7 +3436,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Verify empty state initially assert_eq!(runtime.list_connectors(None).await.len(), 0); @@ -4335,7 +3510,7 @@ mod tests { use serde_json::json; let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Perform 5 rapid remove-create cycles for i in 1..=5 { @@ -4382,7 +3557,7 @@ mod tests { #[tokio::test] async fn test_fallback_on_connector_error() { let config = CoreConfig::default(); - let runtime = Arc::new(CoreRuntime::new(config, None)); + let runtime = Arc::new(CoreRuntime::new(config)); // Create a Gateway connector let (gateway_cmd_tx, _) = mpsc::channel(100); @@ -4485,7 +3660,7 @@ mod tests { #[tokio::test] async fn test_cleanup_stale_transfers() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Add old transfer let old_transfer = TransferredSession { @@ -4527,7 +3702,7 @@ mod tests { #[tokio::test] async fn test_find_gateway_connector() { let config = CoreConfig::default(); - let runtime = CoreRuntime::new(config, None); + let runtime = CoreRuntime::new(config); // Initially no Gateway assert!(runtime.find_gateway_connector().await.is_none()); diff --git a/crates/dirigent_core/src/runtime/zed_detection.rs b/crates/dirigent_core/src/runtime/zed_detection.rs deleted file mode 100644 index 1cc6a93..0000000 --- a/crates/dirigent_core/src/runtime/zed_detection.rs +++ /dev/null @@ -1,589 +0,0 @@ -//! Zed editor agent detection and connector config generation. -//! -//! This module detects Zed editor installations and generates `ConnectorConfig` -//! entries for discovered ACP agents. It is called during runtime initialization -//! to auto-populate connectors from Zed's agent configuration. - -use crate::config::{ConnectorConfig, CoreConfig}; -use crate::connectors::acp::config::ConnectorAgentType; -use crate::connectors::acp::{AcpConfig, TransportKind}; -use crate::types::ConnectorKind; -use dirigent_tools::EmbeddingConfig; -use tracing::{debug, info, warn}; - -/// Default supported features for known ACP agent types. -/// -/// These are conservative defaults based on confirmed agent capabilities. -/// Users can override via the connector config UI. -fn default_features_for_agent(agent_type: &ConnectorAgentType) -> Vec { - match agent_type { - ConnectorAgentType::Claude => vec![ - "cancellation".to_string(), - "session_resume".to_string(), - "session_list".to_string(), - ], - ConnectorAgentType::Codex => vec![ - "session_resume".to_string(), - ], - // Gemini: no confirmed features yet (hangs on connect — BUG-7) - ConnectorAgentType::Gemini => vec![], - ConnectorAgentType::Custom => vec![], - } -} - -/// Convert a discovered Zed agent into a `ConnectorConfig` for the runtime. -/// -/// Only creates connectors for agents with resolved binary paths (typically -/// registry agents whose binaries have been downloaded by Zed). -/// -/// Returns `None` if the agent has no binary path. -pub fn zed_agent_to_connector_config(agent: &dirigent_zed::ZedAgent) -> Option { - let binary_path = agent.binary_path.as_ref()?; - - // Map agent names to proper types. Handles both Zed settings keys - // (e.g. "claude-acp") and external_agents directory names (e.g. "claude-agent-acp"). - let name_lower = agent.name.to_lowercase(); - let (default_title, default_icon, agent_type): (&str, &str, ConnectorAgentType) = - if name_lower.contains("claude") { - ("Claude (Zed)", "claude", ConnectorAgentType::Claude) - } else if name_lower.contains("codex") { - ("Codex (Zed)", "codex", ConnectorAgentType::Codex) - } else if name_lower.contains("gemini") { - ("Gemini (Zed)", "gemini", ConnectorAgentType::Gemini) - } else { - (agent.name.as_str(), "acp", ConnectorAgentType::Custom) - }; - - // Use registry display name with "(Zed)" suffix when available, falling - // back to the hardcoded title. - let title: String = match agent.display_name.as_deref() { - Some(display) => format!("{display} (Zed)"), - None => default_title.to_string(), - }; - - // Use the locally cached SVG icon path from the registry when available, - // otherwise fall back to the built-in icon name. - let icon: String = match agent.icon_local_path.as_ref() { - Some(path) => path.to_string_lossy().to_string(), - None => default_icon.to_string(), - }; - - let features = default_features_for_agent(&agent_type); - - // Build a proper AcpConfig with stdio transport pointing to the Zed-managed binary. - let env: Vec<(String, String)> = agent - .env_overrides - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); - - // Use args from registry metadata (e.g. ["--acp"]) when available. - let args = if agent.args.is_empty() { - vec![] - } else { - agent.args.clone() - }; - - let acp_config = AcpConfig { - transport: TransportKind::Stdio { - command: binary_path.to_string_lossy().to_string(), - args, - cwd: None, - env, - }, - protocol_version: 1, - cwd: ".".to_string(), - retry: Default::default(), - embedding: EmbeddingConfig::default(), - default_ownership: Default::default(), - acp_log_dir: None, - agent_type, - }; - - let params = match serde_json::to_value(&acp_config) { - Ok(v) => v, - Err(e) => { - warn!( - agent = %agent.name, - error = %e, - "Failed to serialize AcpConfig for Zed agent" - ); - return None; - } - }; - - Some(ConnectorConfig { - id: None, - kind: ConnectorKind::Acp, - owner: None, - title: Some(title), - working_directory: None, - params, - icon_path: Some(icon), - show_type_overlay: false, - supported_features: features, - tool_configuration: None, - plugin_assignments: vec![], - use_in_new_projects: true, - source: None, - zed_agent_name: Some(agent.name.clone()), - }) -} - -/// Refresh binary paths for Zed-sourced connectors in the config. -/// -/// When Zed upgrades agent binaries in the background, the binary path changes -/// (e.g. new version directory). This function re-detects the current binary -/// paths from Zed installations and updates any connector that has a -/// `zed_agent_name` set. -/// -/// Returns the number of connectors updated. -pub fn refresh_zed_connector_binaries(config: &mut CoreConfig) -> usize { - let installations = dirigent_zed::detect_installations(); - if installations.is_empty() { - debug!("No Zed installations detected, skipping binary refresh"); - return 0; - } - - // Build a map of agent_name -> latest binary path from all Zed installations. - let mut agent_binaries: std::collections::HashMap = - std::collections::HashMap::new(); - for installation in &installations { - for agent in &installation.agents { - if let Some(ref binary_path) = agent.binary_path { - agent_binaries - .insert(agent.name.clone(), binary_path.to_string_lossy().to_string()); - } - } - } - - let mut updated = 0usize; - - for connector in &mut config.connectors { - let zed_name = match connector.zed_agent_name.as_deref() { - Some(n) => n, - None => continue, - }; - - let new_binary = match agent_binaries.get(zed_name) { - Some(b) => b.clone(), - None => continue, - }; - - // Parse current ACP config to check the existing binary path. - let mut acp_config: AcpConfig = match serde_json::from_value(connector.params.clone()) { - Ok(c) => c, - Err(_) => continue, - }; - - let current_command = match &acp_config.transport { - TransportKind::Stdio { command, .. } => command.clone(), - _ => continue, - }; - - if current_command == new_binary { - continue; - } - - // Update the transport command to the new binary path. - match &mut acp_config.transport { - TransportKind::Stdio { command, .. } => { - info!( - zed_agent = %zed_name, - old = %current_command, - new = %new_binary, - "Updating Zed connector binary path" - ); - *command = new_binary; - } - _ => continue, - } - - // Re-serialize back into params. - match serde_json::to_value(&acp_config) { - Ok(v) => { - connector.params = v; - updated += 1; - } - Err(e) => { - warn!( - zed_agent = %zed_name, - error = %e, - "Failed to re-serialize AcpConfig after binary update" - ); - } - } - } - - if updated > 0 { - info!(count = updated, "Updated Zed connector binary paths"); - } - - updated -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::CoreConfig; - - #[test] - fn test_zed_agent_to_connector_config_no_binary() { - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: None, - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - assert!(zed_agent_to_connector_config(&agent).is_none()); - } - - #[test] - fn test_zed_agent_to_connector_config_with_binary() { - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/usr/local/bin/claude-acp")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.kind, ConnectorKind::Acp); - assert_eq!(config.title.as_deref(), Some("Claude (Zed)")); - assert_eq!(config.icon_path.as_deref(), Some("claude")); - assert_eq!(config.source, None); - - // Verify the params contain a proper AcpConfig - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - match &acp_config.transport { - TransportKind::Stdio { command, .. } => { - assert_eq!(command, "/usr/local/bin/claude-acp"); - } - _ => panic!("Expected stdio transport"), - } - assert_eq!(acp_config.agent_type, ConnectorAgentType::Claude); - assert_eq!(config.supported_features, vec!["cancellation", "session_resume", "session_list"]); - } - - #[test] - fn test_zed_agent_to_connector_config_with_env() { - let mut env = std::collections::HashMap::new(); - env.insert( - "CLAUDE_CODE_EXECUTABLE".to_string(), - "/usr/bin/claude".to_string(), - ); - - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/binary")), - env_overrides: env, - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - match &acp_config.transport { - TransportKind::Stdio { env, .. } => { - assert!(env - .iter() - .any(|(k, v)| k == "CLAUDE_CODE_EXECUTABLE" && v == "/usr/bin/claude")); - } - _ => panic!("Expected stdio transport"), - } - } - - #[test] - fn test_zed_agent_to_connector_config_unknown_agent() { - let agent = dirigent_zed::ZedAgent { - name: "my-custom-agent".to_string(), - agent_type: dirigent_zed::AgentServerType::Custom, - binary_path: Some(std::path::PathBuf::from("/path/to/custom")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.title.as_deref(), Some("my-custom-agent")); - assert_eq!(config.icon_path.as_deref(), Some("acp")); - - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - assert_eq!(acp_config.agent_type, ConnectorAgentType::Custom); - assert!(config.supported_features.is_empty()); - } - - #[test] - fn test_zed_agent_codex() { - let agent = dirigent_zed::ZedAgent { - name: "codex-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/codex")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.title.as_deref(), Some("Codex (Zed)")); - assert_eq!(config.icon_path.as_deref(), Some("codex")); - - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - assert_eq!(acp_config.agent_type, ConnectorAgentType::Codex); - assert_eq!(config.supported_features, vec!["session_resume"]); - } - - #[test] - fn test_zed_agent_gemini() { - let agent = dirigent_zed::ZedAgent { - name: "gemini".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/gemini")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.title.as_deref(), Some("Gemini (Zed)")); - assert_eq!(config.icon_path.as_deref(), Some("gemini")); - - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - assert_eq!(acp_config.agent_type, ConnectorAgentType::Gemini); - assert!(config.supported_features.is_empty()); - } - - #[test] - fn test_dismissed_zed_agent_title_matches_generated_config() { - // Verify that a dismissed title like "Claude (Zed)" matches the title - // generated by zed_agent_to_connector_config for a claude-acp agent - let mut core_config = CoreConfig::default(); - core_config - .dismissed_zed_agents - .push("Claude (Zed)".to_string()); - - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/usr/local/bin/claude-acp")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let connector_config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(connector_config.title.as_deref(), Some("Claude (Zed)")); - // The dismissed list should contain the generated title - assert!(core_config - .dismissed_zed_agents - .contains(connector_config.title.as_ref().unwrap())); - } - - #[test] - fn test_dismissed_list_does_not_block_other_agents() { - // Dismissing Claude should not block Codex or Gemini - let mut core_config = CoreConfig::default(); - core_config - .dismissed_zed_agents - .push("Claude (Zed)".to_string()); - - let codex_title = "Codex (Zed)".to_string(); - let gemini_title = "Gemini (Zed)".to_string(); - - assert!(!core_config.dismissed_zed_agents.contains(&codex_title)); - assert!(!core_config.dismissed_zed_agents.contains(&gemini_title)); - } - - #[test] - fn test_dismissed_zed_agents_serde_roundtrip() { - // Verify dismissed_zed_agents survives serialization/deserialization - let mut core_config = CoreConfig::default(); - core_config - .dismissed_zed_agents - .push("Claude (Zed)".to_string()); - core_config - .dismissed_zed_agents - .push("Gemini (Zed)".to_string()); - - let json = serde_json::to_string(&core_config).unwrap(); - let deserialized: CoreConfig = serde_json::from_str(&json).unwrap(); - - assert_eq!(deserialized.dismissed_zed_agents.len(), 2); - assert!(deserialized - .dismissed_zed_agents - .contains(&"Claude (Zed)".to_string())); - assert!(deserialized - .dismissed_zed_agents - .contains(&"Gemini (Zed)".to_string())); - } - - #[test] - fn test_dismissed_zed_agents_empty_not_serialized() { - // With skip_serializing_if = "Vec::is_empty", empty list should not appear in JSON - let core_config = CoreConfig::default(); - let json = serde_json::to_string(&core_config).unwrap(); - assert!( - !json.contains("dismissed_zed_agents"), - "Empty dismissed_zed_agents should be omitted from serialization" - ); - } - - #[test] - fn test_dismissed_zed_agents_deserialized_from_missing_field() { - // Old config files without dismissed_zed_agents should still deserialize - // thanks to #[serde(default)] - let json = r#"{"project_dir":".","connectors":[]}"#; - let config: CoreConfig = serde_json::from_str(json).unwrap(); - assert!(config.dismissed_zed_agents.is_empty()); - } - - #[test] - fn test_zed_agent_name_is_set() { - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/usr/local/bin/claude-acp")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.zed_agent_name.as_deref(), Some("claude-acp")); - } - - #[test] - fn test_zed_agent_name_preserves_original_name() { - // The zed_agent_name should be the exact Zed agent name, not the display title - let agent = dirigent_zed::ZedAgent { - name: "claude-agent-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/binary")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.title.as_deref(), Some("Claude (Zed)")); - assert_eq!(config.zed_agent_name.as_deref(), Some("claude-agent-acp")); - } - - #[test] - fn test_zed_agent_name_serde_roundtrip() { - let agent = dirigent_zed::ZedAgent { - name: "codex".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/codex")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - let config = zed_agent_to_connector_config(&agent).unwrap(); - let json = serde_json::to_string(&config).unwrap(); - assert!(json.contains("\"zed_agent_name\":\"codex\"")); - let deserialized: ConnectorConfig = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.zed_agent_name.as_deref(), Some("codex")); - } - - #[test] - fn test_non_zed_connector_has_no_zed_agent_name() { - // ConnectorConfig created via template should not have zed_agent_name - let config = ConnectorConfig::default(); - assert!(config.zed_agent_name.is_none()); - } - - #[test] - fn test_enriched_display_name_used_in_title() { - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/claude")), - env_overrides: std::collections::HashMap::new(), - display_name: Some("Claude Agent".to_string()), - description: Some("ACP wrapper for Anthropic's Claude".to_string()), - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - // Title should use the registry display name with "(Zed)" suffix. - assert_eq!(config.title.as_deref(), Some("Claude Agent (Zed)")); - } - - #[test] - fn test_enriched_args_passed_to_transport() { - let agent = dirigent_zed::ZedAgent { - name: "auggie".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/auggie")), - env_overrides: std::collections::HashMap::new(), - display_name: Some("Auggie CLI".to_string()), - description: None, - args: vec!["--acp".to_string()], - icon_local_path: None, - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - let acp_config: AcpConfig = serde_json::from_value(config.params).unwrap(); - match &acp_config.transport { - TransportKind::Stdio { args, .. } => { - assert_eq!(args, &["--acp"]); - } - _ => panic!("Expected stdio transport"), - } - } - - #[test] - fn test_enriched_icon_path_used() { - let agent = dirigent_zed::ZedAgent { - name: "claude-acp".to_string(), - agent_type: dirigent_zed::AgentServerType::Registry, - binary_path: Some(std::path::PathBuf::from("/path/to/claude")), - env_overrides: std::collections::HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: Some(std::path::PathBuf::from("/icons/claude-acp.svg")), - icon_url: None, - }; - - let config = zed_agent_to_connector_config(&agent).unwrap(); - assert_eq!(config.icon_path.as_deref(), Some("/icons/claude-acp.svg")); - } -} diff --git a/crates/dirigent_core/src/sharing/health.rs b/crates/dirigent_core/src/sharing/health.rs index e37b080..fede88a 100644 --- a/crates/dirigent_core/src/sharing/health.rs +++ b/crates/dirigent_core/src/sharing/health.rs @@ -1,10 +1,17 @@ //! Consecutive-failure health drift for streams (K=5 threshold). //! -//! Mirrors the archivist's drift logic but tracks a single stream's -//! outcomes. Re-exports the shared `HealthStatus` enum from the -//! archivist's backend module to avoid duplication. +//! Tracks a single stream's health based on delivery outcomes. -pub use dirigent_archivist::backend::HealthStatus; +/// Health status of a stream, tracking consecutive delivery failures. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum HealthStatus { + /// Stream is delivering events successfully. + Healthy, + /// Stream has experienced failures but is still below the threshold. + Degraded { reason: String }, + /// Stream has exceeded the failure threshold and is considered offline. + Unavailable { reason: String }, +} /// Number of consecutive failures before a stream drifts from `Degraded` /// to `Unavailable`. Matches the archivist's backend drift threshold. diff --git a/crates/dirigent_core/src/sharing/matrix.rs b/crates/dirigent_core/src/sharing/matrix.rs deleted file mode 100644 index d092cac..0000000 --- a/crates/dirigent_core/src/sharing/matrix.rs +++ /dev/null @@ -1,217 +0,0 @@ -//! `MatrixFactory`: build a Matrix [`SessionStream`] from a `[[streams]]` -//! config block. -//! -//! This is the first stream-side factory wired for the Phase 4 migration -//! (Task 18). The factory lives in `dirigent_core` rather than -//! `dirigent_matrix` because `StreamFactory` is defined here and -//! `dirigent_core` already depends on `dirigent_matrix` — putting it in -//! `dirigent_matrix` would create a cycle. -//! -//! ## Scope -//! -//! The factory's responsibility is narrow: parse `cfg.params`, resolve -//! the target Matrix room via a running `MatrixService`, and construct a -//! `MatrixSessionShare` configured for the stream path (no legacy -//! forwarder task). Command-proxy wiring (Matrix → Dirigent -//! `ConnectorCommand::SendMessage`) remains in -//! `CoreRuntime::create_matrix_share` for now; a follow-up will extend -//! the factory to cover that path. -//! -//! ## Config shape -//! -//! ```toml -//! [[streams]] -//! name = "matrix-main" -//! type = "matrix" -//! -//! [streams.scope] -//! kind = "session" -//! scroll_id = "01985d00-..." -//! -//! [streams.params] -//! connector_id = "opencode-1" # dirigent connector key -//! session_id = "native-abc123" # native connector session id -//! room_id = "!abc:matrix.org" # pre-existing room to attach to -//! homeserver_url = "https://matrix.org" # informational (service already knows) -//! ``` - -use std::sync::Arc; - -use async_trait::async_trait; -use serde::Deserialize; - -use dirigent_protocol::streaming::{SessionStream, StreamScope}; - -use super::config::StreamConfig; -use super::factory::{StreamBuildError, StreamFactory}; - -/// Stream-side factory for Matrix. See module docs for the expected -/// TOML shape. -pub struct MatrixFactory { - service: Arc, -} - -impl MatrixFactory { - /// Build a factory bound to a running `MatrixService`. The service - /// is expected to be logged in and sync-started by the time - /// `build()` is called; if it isn't, `build()` returns - /// `StreamBuildError::Transport`. - pub fn new(service: Arc) -> Self { - Self { service } - } -} - -#[derive(Debug, Deserialize)] -struct MatrixStreamParams { - /// Dirigent connector id that owns the session being bridged. - connector_id: String, - /// Native connector session id. - session_id: String, - /// Matrix room id — must be a pre-existing room the bot can access. - /// Room creation is still handled by - /// `CoreRuntime::create_matrix_share` until the factory path is - /// expanded to cover it. - room_id: String, - /// Informational; the logged-in `MatrixService` is the authority on - /// which homeserver to talk to. Accepted so configs can be - /// self-documenting and round-trip through TOML. - #[serde(default)] - #[allow(dead_code)] - homeserver_url: Option, -} - -#[async_trait] -impl StreamFactory for MatrixFactory { - fn kind(&self) -> &'static str { - "matrix" - } - - async fn build( - &self, - cfg: &StreamConfig, - ) -> Result, StreamBuildError> { - // Scope must be Session; Matrix shares are intrinsically - // per-session bi-directional bridges. - let scroll_id = match &cfg.scope { - StreamScope::Session { scroll_id } => *scroll_id, - other => { - return Err(StreamBuildError::Config(format!( - "matrix stream requires scope.kind = \"session\", got {:?}", - other - ))); - } - }; - - // Parse type-specific params. - let params: MatrixStreamParams = cfg - .params - .clone() - .try_into() - .map_err(|e: toml::de::Error| { - StreamBuildError::Config(format!( - "matrix stream '{}': invalid params: {}", - cfg.name, e - )) - })?; - - // Look up the room via the service. We intentionally don't - // create or join rooms here — the room must already exist. - // Creation remains the responsibility of - // `CoreRuntime::create_matrix_share`. - let room = match self.service.room_by_id(¶ms.room_id).await { - Ok(Some(room)) => room, - Ok(None) => { - return Err(StreamBuildError::Transport(format!( - "matrix stream '{}': room '{}' not found on client \ - — ensure the bot has joined it", - cfg.name, params.room_id - ))); - } - Err(dirigent_matrix::MatrixError::NotLoggedIn) => { - return Err(StreamBuildError::Transport( - "matrix service is not logged in; cannot build stream" - .to_string(), - )); - } - Err(dirigent_matrix::MatrixError::Config(msg)) => { - return Err(StreamBuildError::Config(format!( - "matrix stream '{}': {}", - cfg.name, msg - ))); - } - Err(other) => { - return Err(StreamBuildError::Transport(format!( - "matrix stream '{}': {}", - cfg.name, other - ))); - } - }; - - // Construct the share for the stream path (no legacy forwarder - // task). We drop the command receiver on the floor here — the - // Matrix → Dirigent direction is not covered by this factory - // yet; see the follow-up TODO in the module docs. - let (share, _command_rx) = dirigent_matrix::MatrixSessionShare::new_for_stream( - params.connector_id, - params.session_id, - scroll_id, - params.room_id, - room, - ); - - Ok(Arc::new(share) as Arc) - } -} - -// ─── Tests ─────────────────────────────────────────────────────────────────── - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn factory_kind_is_matrix() { - // The factory's `kind()` is static and doesn't require a running - // MatrixService to read — covered by a minimal construction - // check in the integration test suite. - fn assert_is_factory(_: &F) {} - - // We can't easily build a MatrixService in a unit test (it needs - // an Account + data dir + SQLite store). The full smoke test - // lives in `packages/dirigent_matrix/tests/factory_test.rs` and - // the cross-crate registry test in - // `packages/dirigent_core/tests/matrix_migration_test.rs`. - // - // This module-local test exists only to assert that the impl - // block type-checks against the `StreamFactory` trait bound. - fn _compile_check(f: &MatrixFactory) { - assert_is_factory(f); - } - } - - #[test] - fn matrix_stream_params_deserialise_ok() { - let toml_str = r#" -connector_id = "opencode-1" -session_id = "native-abc" -room_id = "!foo:example.com" -homeserver_url = "https://matrix.org" -"#; - let p: MatrixStreamParams = toml::from_str(toml_str).expect("parse"); - assert_eq!(p.connector_id, "opencode-1"); - assert_eq!(p.session_id, "native-abc"); - assert_eq!(p.room_id, "!foo:example.com"); - assert_eq!(p.homeserver_url.as_deref(), Some("https://matrix.org")); - } - - #[test] - fn matrix_stream_params_reject_missing_required() { - // Missing room_id should fail. - let toml_str = r#" -connector_id = "opencode-1" -session_id = "native-abc" -"#; - let err: Result = toml::from_str(toml_str); - assert!(err.is_err()); - } -} diff --git a/crates/dirigent_core/src/sharing/mod.rs b/crates/dirigent_core/src/sharing/mod.rs index ff37eec..1492541 100644 --- a/crates/dirigent_core/src/sharing/mod.rs +++ b/crates/dirigent_core/src/sharing/mod.rs @@ -1,23 +1,19 @@ -//! SharingBus, StreamRegistry, and replay. See docs/plans/2026-04-21-archivist-phase4-design.md. +//! SharingBus, StreamRegistry, and stream health. See docs/plans/2026-04-21-archivist-phase4-design.md. +//! +//! Replay functionality has moved to `dirigent_server::replay`. pub mod bus; pub mod config; pub mod factory; pub mod health; -#[cfg(feature = "server")] -pub mod matrix; #[cfg(any(test, feature = "test-utils"))] pub mod mock; pub mod registry; -pub mod replay; pub use bus::{BusReceiver, SharingBus}; pub use config::{StreamConfig, StreamsConfig}; pub use factory::{StreamBuildError, StreamFactory, StreamFactoryRegistry}; pub use health::HealthStatus; -#[cfg(feature = "server")] -pub use matrix::MatrixFactory; pub use registry::{StreamId, StreamInfo, StreamRegistration, StreamRegistry}; -pub use replay::{ReplayError, ReplayOptions, ReplayReport, ReplaySpeed}; #[cfg(any(test, feature = "test-utils"))] pub use mock::MockStream; diff --git a/crates/dirigent_core/src/sharing/replay.rs b/crates/dirigent_core/src/sharing/replay.rs deleted file mode 100644 index 6cc7c23..0000000 --- a/crates/dirigent_core/src/sharing/replay.rs +++ /dev/null @@ -1,226 +0,0 @@ -//! Replay: reads a session from the archive and dispatches synthetic -//! `BusEvent`s with `EventOrigin::Replay` directly to a target stream, -//! bypassing the `SharingBus`. -//! -//! Consumed by `CoreRuntime::replay_session_to_stream` (task 16). This -//! module intentionally exposes a free function that takes -//! `&Archivist`, `scroll_id`, `Arc`, and `ReplayOptions` -//! so it can be unit-tested without a full runtime. - -use std::sync::Arc; -use std::time::Duration; - -use uuid::Uuid; - -use dirigent_archivist::coordinator::Archivist; -use dirigent_archivist::error::ArchivistError; -use dirigent_archivist::types::MessageRecord; -use dirigent_protocol::{ - Event, Message, MessagePart, MessageRole, MessageStatus, - streaming::{BusEvent, EventOrigin, EventRouting, SessionStream, StreamOutcome}, -}; - -/// Options controlling a replay pass. -#[derive(Debug, Clone)] -pub struct ReplayOptions { - /// When true and the session is an AcpConnection, meta-events are read from - /// the archive (currently only counted — rendering meta events as - /// `BusEvent`s is out of scope for Phase 4). - pub include_meta_events: bool, - /// Pace events in real time (sleep between consecutive timestamps) or emit - /// as fast as the target stream can consume. - pub speed: ReplaySpeed, -} - -impl Default for ReplayOptions { - fn default() -> Self { - Self { - include_meta_events: false, - speed: ReplaySpeed::AsFastAsPossible, - } - } -} - -/// Controls inter-event pacing during replay. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ReplaySpeed { - /// Sleep the wall-clock delta between consecutive message timestamps. - Realtime, - /// Emit events as fast as the stream can consume. - AsFastAsPossible, -} - -/// Outcome of a replay pass. -#[derive(Debug, Default, Clone)] -pub struct ReplayReport { - /// Total events dispatched to the stream (includes failed attempts). - pub events_sent: usize, - /// Events the stream rejected (`StreamOutcome::Failed`). - pub failures: usize, - /// Wall-clock duration of the replay in milliseconds. - pub duration_ms: u64, -} - -/// Errors raised by `replay_session_to_stream` itself. Stream-side failures are -/// counted in `ReplayReport::failures` rather than propagated, so one bad event -/// doesn't abort the replay. -#[derive(Debug, thiserror::Error)] -pub enum ReplayError { - /// The archive has no session with the given scroll id. - #[error("session not found: {0}")] - SessionNotFound(Uuid), - /// Archivist returned a non-SessionUnknown error (I/O, decoding, etc). - #[error("archivist: {0}")] - Archivist(String), -} - -/// Replay a session's archived messages to a single `SessionStream`. -/// -/// Reads metadata + messages from `archivist`, synthesises a `BusEvent` per -/// message with `EventOrigin::Replay { replay_id }`, and dispatches directly -/// to the target stream. The `SharingBus` is not involved; live events remain -/// unaffected. -/// -/// The function continues on stream failures and records the count in the -/// returned `ReplayReport`; only unrecoverable archive errors propagate. -pub async fn replay_session_to_stream( - archivist: &Archivist, - scroll_id: Uuid, - stream: Arc, - opts: ReplayOptions, -) -> Result { - let start = std::time::Instant::now(); - let replay_id = Uuid::new_v4(); - - // Load metadata. Translate the archivist's typed `SessionUnknown` into - // the replay-level `SessionNotFound` variant; everything else becomes - // `Archivist(_)` so callers can distinguish "missing" from "broken". - let metadata = archivist - .get_session_metadata(scroll_id, None) - .await - .map_err(|e| match e { - ArchivistError::SessionUnknown(id) => ReplayError::SessionNotFound(id), - other => ReplayError::Archivist(other.to_string()), - })?; - - let messages = archivist - .get_messages(scroll_id, None) - .await - .map_err(|e| ReplayError::Archivist(e.to_string()))?; - - let connector_uid = Some(metadata.connector_uid); - let native_session_id = metadata.native_session_id.clone(); - // We do not persist the orchestrator-side `connector_id` string in session - // metadata; the native session id is the best reversible handle we have. - let connector_id = native_session_id.clone().unwrap_or_default(); - - let mut events_sent = 0usize; - let mut failures = 0usize; - let mut prev_ts: Option> = None; - - for record in messages { - if matches!(opts.speed, ReplaySpeed::Realtime) { - if let Some(prev) = prev_ts { - let delta = record.ts.signed_duration_since(prev); - if let Ok(d) = delta.to_std() { - // Cap per-step sleep at 1h to avoid pathological archives - // where a session sat idle for days. - if d > Duration::from_millis(0) && d < Duration::from_secs(3600) { - tokio::time::sleep(d).await; - } - } - } - prev_ts = Some(record.ts); - } - - let message = message_from_record(&record, native_session_id.as_deref()); - let event = Event::MessageCompleted { - connector_id: connector_id.clone(), - message, - }; - - let mut routing = EventRouting::derive(&event, connector_uid, &connector_id); - // `derive()` leaves scroll_id=None (the bus cache normally fills it in). - // During replay we have the authoritative scroll_id up front. - routing.scroll_id = Some(scroll_id); - - let bus_event = BusEvent { - routing, - origin: EventOrigin::Replay { replay_id }, - event: Arc::new(event), - }; - - match stream.on_event(&bus_event).await { - StreamOutcome::Ok | StreamOutcome::Skipped => { - events_sent += 1; - } - StreamOutcome::Failed(_err) => { - failures += 1; - events_sent += 1; // count attempted regardless - } - } - } - - if opts.include_meta_events { - // Meta-events exist only on AcpConnection sessions; the read is - // cheap and idempotent, so we don't gate on `metadata.kind`. Render- - // as-BusEvent is out of scope for Phase 4 — we just probe the - // archive so missing meta-event storage surfaces as a log line - // here rather than later in the call chain. - let _ = archivist.get_meta_events(scroll_id, None).await; - } - - Ok(ReplayReport { - events_sent, - failures, - duration_ms: start.elapsed().as_millis() as u64, - }) -} - -/// Synthesize a protocol `Message` from an archived `MessageRecord`. -/// -/// The session_id we emit is the connector's native session id when known, -/// falling back to the stringified scroll_id so downstream routing at least -/// has a stable handle. -fn message_from_record(record: &MessageRecord, native_session_id: Option<&str>) -> Message { - Message { - id: record.message_id.to_string(), - session_id: native_session_id - .map(str::to_string) - .unwrap_or_else(|| record.session.to_string()), - role: parse_role(&record.role), - created_at: record.ts, - content: content_parts_from_record(record), - status: MessageStatus::Completed, - metadata: None, - } -} - -/// Parse the archivist's stringly-typed role into the protocol enum. -/// -/// `MessageRole` only has `User` and `Assistant` today; archived "system" / -/// "tool" rows (which the protocol layer does not support) fall back to -/// `User` rather than drop the message entirely. Lossy but preserves content. -fn parse_role(role: &str) -> MessageRole { - match role { - "assistant" => MessageRole::Assistant, - "user" => MessageRole::User, - // Protocol has no System/Tool variant; surface these as user messages - // so their content still reaches the stream. - _ => MessageRole::User, - } -} - -/// Prefer the archived structured `content_parts` (round-trips tool calls, -/// code blocks, etc). Fall back to a single `Text` part built from the -/// markdown rendering when parts are missing or fail to parse. -fn content_parts_from_record(record: &MessageRecord) -> Vec { - if let Some(parts) = &record.content_parts { - if let Ok(parsed) = serde_json::from_value::>(parts.clone()) { - return parsed; - } - } - vec![MessagePart::Text { - text: record.content_md.clone(), - }] -} diff --git a/crates/dirigent_core/tests/matrix_migration_test.rs b/crates/dirigent_core/tests/matrix_migration_test.rs deleted file mode 100644 index c702fb2..0000000 --- a/crates/dirigent_core/tests/matrix_migration_test.rs +++ /dev/null @@ -1,207 +0,0 @@ -//! Integration test: Matrix migration onto StreamRegistry (Phase 4, Task 18). -//! -//! Scope: -//! - `MatrixFactory::kind()` reports `"matrix"`. -//! - A fresh `StreamFactoryRegistry` with the factory registered can look it -//! up and rejects unknown kinds. -//! - Building a Matrix stream from a config with an `archive_wide` scope is -//! rejected with `StreamBuildError::Config`. -//! - Building a Matrix stream against a not-logged-in service is rejected -//! with `StreamBuildError::Transport` (does not panic, does not spin up -//! a real Matrix connection). -//! -//! This does NOT exercise end-to-end Matrix delivery — that requires a -//! live homeserver or a stub client, which is outside Task 18's scope. -//! The share-side `SessionStream` impl is covered separately by -//! `dirigent_matrix` unit tests. - -#![cfg(feature = "server")] - -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::Arc; - -use uuid::Uuid; - -use dirigent_auth::{Account, AccountKind, AccountProfile, SecretSource}; -use dirigent_core::sharing::{ - MatrixFactory, StreamBuildError, StreamConfig, StreamFactory, StreamFactoryRegistry, -}; -use dirigent_matrix::{MatrixBehaviorConfig, MatrixService}; -use dirigent_protocol::streaming::StreamScope; - -// ─── Helpers ──────────────────────────────────────────────────────────────── - -fn sample_matrix_account() -> Account { - let mut credentials = HashMap::new(); - credentials.insert( - "password".to_string(), - SecretSource::Inline { - value: "bot-pass".to_string(), - }, - ); - - let mut properties = HashMap::new(); - properties.insert( - "homeserver".to_string(), - serde_json::json!("https://matrix.example.com"), - ); - properties.insert( - "device_id".to_string(), - serde_json::json!("DIRIGENT_TEST"), - ); - - Account { - kind: AccountKind::Matrix, - config_name: "matrix-test".to_string(), - user_id: None, - credentials, - profile: AccountProfile { - username: Some("bot".to_string()), - display_name: Some("Test Bot".to_string()), - ..Default::default() - }, - properties, - } -} - -fn behavior() -> MatrixBehaviorConfig { - MatrixBehaviorConfig { - account: "matrix-test".to_string(), - mode: Default::default(), - default_invite: vec![], - store_path: "matrix/test/store".to_string(), - rooms: vec![], - } -} - -/// Build a `MatrixService` without calling `login()`. Any code path that -/// needs a live Client will surface a clean error (not a panic). -fn not_logged_in_service() -> Arc { - let account = sample_matrix_account(); - let tmp = tempfile::tempdir().expect("tempdir"); - let data_dir: PathBuf = tmp.path().to_path_buf(); - // Leak the TempDir so the path survives the life of the service for - // the duration of the test. The sqlite store is only created when - // login() runs — we never call it in these tests. - std::mem::forget(tmp); - let service = MatrixService::from_account(&account, behavior(), data_dir) - .expect("from_account"); - Arc::new(service) -} - -// ─── Tests ────────────────────────────────────────────────────────────────── - -#[test] -fn matrix_factory_kind_is_matrix() { - let service = not_logged_in_service(); - let f = MatrixFactory::new(service); - assert_eq!(f.kind(), "matrix"); -} - -#[test] -fn registry_returns_registered_matrix_factory() { - let service = not_logged_in_service(); - let reg = StreamFactoryRegistry::new().register(MatrixFactory::new(service)); - assert!(reg.get("matrix").is_some(), "matrix factory should be found"); - assert!( - reg.get("langfuse").is_none(), - "unregistered kinds must return None" - ); -} - -#[tokio::test] -async fn build_rejects_archive_wide_scope_with_config_error() { - let service = not_logged_in_service(); - let factory = MatrixFactory::new(service); - - let params_toml = r#" -connector_id = "opencode-1" -session_id = "native-abc" -room_id = "!room:example.com" -"#; - let params: toml::Value = toml::from_str(params_toml).unwrap(); - - let cfg = StreamConfig { - name: "matrix-wrong-scope".to_string(), - kind: "matrix".to_string(), - scope: StreamScope::ArchiveWide { acknowledged: false }, - enabled: true, - params, - }; - - let err = factory.build(&cfg).await.err().expect("build should fail"); - match err { - StreamBuildError::Config(msg) => { - assert!( - msg.contains("session"), - "expected 'session' hint in error, got: {msg}" - ); - } - other => panic!("expected Config error, got {other:?}"), - } -} - -#[tokio::test] -async fn build_rejects_missing_params_with_config_error() { - let service = not_logged_in_service(); - let factory = MatrixFactory::new(service); - - // Missing room_id — required field. - let params_toml = r#" -connector_id = "opencode-1" -session_id = "native-abc" -"#; - let params: toml::Value = toml::from_str(params_toml).unwrap(); - - let cfg = StreamConfig { - name: "matrix-missing-room".to_string(), - kind: "matrix".to_string(), - scope: StreamScope::Session { - scroll_id: Uuid::now_v7(), - }, - enabled: true, - params, - }; - - let err = factory.build(&cfg).await.err().expect("build should fail"); - assert!( - matches!(err, StreamBuildError::Config(_)), - "expected Config error, got {err:?}" - ); -} - -#[tokio::test] -async fn build_reports_transport_error_when_service_not_logged_in() { - let service = not_logged_in_service(); - let factory = MatrixFactory::new(service); - - let params_toml = r#" -connector_id = "opencode-1" -session_id = "native-abc" -room_id = "!room:example.com" -"#; - let params: toml::Value = toml::from_str(params_toml).unwrap(); - - let cfg = StreamConfig { - name: "matrix-not-logged-in".to_string(), - kind: "matrix".to_string(), - scope: StreamScope::Session { - scroll_id: Uuid::now_v7(), - }, - enabled: true, - params, - }; - - let err = factory.build(&cfg).await.err().expect("build should fail"); - match err { - StreamBuildError::Transport(msg) => { - assert!( - msg.to_lowercase().contains("logged in") - || msg.to_lowercase().contains("matrix service"), - "expected transport error to mention login state, got: {msg}" - ); - } - other => panic!("expected Transport error, got {other:?}"), - } -} diff --git a/crates/dirigent_core/tests/replay_test.rs b/crates/dirigent_core/tests/replay_test.rs deleted file mode 100644 index f60ef03..0000000 --- a/crates/dirigent_core/tests/replay_test.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Integration test: replay archived session into a `MockStream`. -//! -//! Builds a single-backend in-memory (tempdir) archivist, registers a -//! session, appends 10 messages with ascending timestamps, then exercises -//! `replay_session_to_stream` end-to-end. - -use std::sync::Arc; - -use chrono::{Duration as ChronoDuration, Utc}; -use uuid::Uuid; - -use dirigent_archivist::{ - Archivist, MessageRecord, RegisterConnectorRequest, RegisterSessionRequest, - backends::JsonlBackend, -}; -use dirigent_core::sharing::{ - MockStream, - replay::{ReplayOptions, ReplaySpeed, replay_session_to_stream}, -}; -use dirigent_protocol::streaming::{EventOrigin, SessionStream, StreamScope}; - -/// Build an in-memory-ish archivist backed by a tempdir + JsonlBackend. -/// -/// Matches the pattern used by `dirigent_archivist/tests/integration_tests.rs`. -/// The tempdir is leaked for the duration of the test process — acceptable -/// because the test binary exits immediately after. -async fn build_in_memory_archivist() -> Arc { - let temp_dir = std::env::temp_dir().join(format!("core_replay_test_{}", Uuid::now_v7())); - let backend = Arc::new( - JsonlBackend::new(temp_dir.clone()) - .await - .expect("JsonlBackend construction"), - ); - let archivist = Archivist::from_single_backend("main".into(), backend) - .await - .expect("Archivist::from_single_backend"); - Arc::new(archivist) -} - -/// Register a fresh connector + session and append `n` messages with -/// timestamps one second apart. Returns the scroll_id. -async fn seed_session_with_messages(archivist: &Archivist, n: usize) -> Uuid { - let connector_resp = archivist - .register_connector( - RegisterConnectorRequest { - r#type: "OpenCode".to_string(), - title: "Replay Test Connector".to_string(), - client_native_id: format!("replay-test@{}", Uuid::now_v7()), - custom_uid: None, - metadata: serde_json::json!({}), - fingerprint: None, - }, - None, - ) - .await - .expect("register_connector"); - - let session_resp = archivist - .register_session( - RegisterSessionRequest { - connector_uid: connector_resp.connector_uid, - native_session_id: format!("native-{}", Uuid::now_v7()), - title: Some("Replay Test Session".to_string()), - custom_scroll_id: None, - metadata: serde_json::json!({}), - completeness: Default::default(), - parent_scroll_id: None, - is_subagent: false, - continuation: None, - agent_id: None, - subagent_type: None, - spawning_tool_use_id: None, - }, - None, - ) - .await - .expect("register_session"); - - let scroll_id = session_resp.scroll_id; - let base_ts = Utc::now(); - - let messages: Vec = (0..n) - .map(|i| { - let role = if i % 2 == 0 { "user" } else { "assistant" }; - MessageRecord { - version: 1, - message_id: Uuid::now_v7(), - session: scroll_id, - parent_id: None, - ts: base_ts + ChronoDuration::seconds(i as i64), - role: role.to_string(), - author: None, - content_md: format!("message {i}"), - content_parts: None, - attachments: vec![], - metadata: serde_json::json!({}), - } - }) - .collect(); - - archivist - .append_messages(scroll_id, messages, None) - .await - .expect("append_messages"); - - scroll_id -} - -#[tokio::test] -async fn replay_delivers_archived_messages_to_stream() { - let archivist = build_in_memory_archivist().await; - let scroll_id = seed_session_with_messages(&archivist, 10).await; - - let mock = MockStream::new("mock", StreamScope::Session { scroll_id }); - let stream: Arc = mock.clone(); - - let report = replay_session_to_stream( - archivist.as_ref(), - scroll_id, - stream, - ReplayOptions { - include_meta_events: false, - speed: ReplaySpeed::AsFastAsPossible, - }, - ) - .await - .expect("replay_session_to_stream"); - - assert_eq!(report.events_sent, 10, "events_sent"); - assert_eq!(report.failures, 0, "failures"); - assert_eq!(mock.received_count(), 10, "mock received count"); - - let received = mock.received.lock().unwrap(); - for evt in received.iter() { - assert!( - matches!(evt.origin, EventOrigin::Replay { .. }), - "every replayed event must carry EventOrigin::Replay" - ); - assert_eq!( - evt.routing.scroll_id, - Some(scroll_id), - "every replayed event must carry the authoritative scroll_id" - ); - } -} - -#[tokio::test] -async fn replay_continues_on_stream_failure() { - let archivist = build_in_memory_archivist().await; - let scroll_id = seed_session_with_messages(&archivist, 10).await; - - let mock = MockStream::new("mock", StreamScope::Session { scroll_id }); - mock.fail_next(3); - let stream: Arc = mock.clone(); - - let report = replay_session_to_stream( - archivist.as_ref(), - scroll_id, - stream, - ReplayOptions { - include_meta_events: false, - speed: ReplaySpeed::AsFastAsPossible, - }, - ) - .await - .expect("replay_session_to_stream"); - - // events_sent counts attempted (ok + failed); failures counts Failed only. - assert_eq!(report.events_sent, 10, "events_sent counts every attempt"); - assert_eq!(report.failures, 3, "first 3 events rejected by mock"); - assert_eq!( - mock.received_count(), - 7, - "mock buffer contains the 7 successful events" - ); -} diff --git a/crates/dirigent_fermata/CLAUDE.md b/crates/dirigent_fermata/CLAUDE.md deleted file mode 100644 index 49ffa67..0000000 --- a/crates/dirigent_fermata/CLAUDE.md +++ /dev/null @@ -1,34 +0,0 @@ -# Package: dirigent_fermata - -Harness-agnostic policy gate for AI coding agents. - -## Quick Facts -- **Type**: Library + binary (`fermata`) -- **Main Entry**: `src/lib.rs`, `src/bin/fermata.rs` -- **Dependencies**: `ignore`, `toml`, `regex`, `globset`, `serde`, `clap` (cli feature) -- **Status**: v0.1 — library + CLI + Claude hook adapter - -## Layering - -Three concentric layers; nothing inner imports from anything outer. - -- **`core/`** — harness-unaware, transport-unaware, sync. Types (`Op`, `Decision`), `.botignore` walker, `botignore.toml` parser, `Policy::check` / `check_command`, path extraction. Sync, no tokio. -- **`harness/`** — `HarnessAdapter` trait over a normalized `ToolCall`. Each adapter (Claude, future Codex, etc.) lives in its own submodule, feature-gated. -- **`bin/fermata.rs`** — only place where `clap`, stdio, and exit codes appear. - -## Release Model - -Developed in this monorepo; planned to be exported as a standalone repo in the future for advertising / external distribution. Development stays here. See `docs/tools/fermata.md`. - -## Dependency Direction - -`dirigent_tools` depends on `dirigent_fermata`, never the reverse. Fermata must remain usable as a standalone hook/MCP without dragging in the in-process ACP tool runtime. - -## Out of scope (v0.1) - -Codex / Gemini hook adapters, MCP server mode, PostToolUse envelope, `readonly_only` Bash mode, audit log, filesystem watcher. Each is a future task with its own plan. - -## See also - -- `docs/tools/fermata.md` — Dirigent integration plan -- `docs/workpad/brainstorm/fermata.md` — canonical product spec diff --git a/crates/dirigent_fermata/Cargo.toml b/crates/dirigent_fermata/Cargo.toml deleted file mode 100644 index 779a399..0000000 --- a/crates/dirigent_fermata/Cargo.toml +++ /dev/null @@ -1,40 +0,0 @@ -[package] -name = "dirigent_fermata" -version = "0.1.0" -edition = "2021" -rust-version = "1.75" -description = "Harness-agnostic policy gate for AI coding agents (.botignore + botignore.toml)" -license = "MIT OR Apache-2.0" -repository = "https://git.g4b.org/dirigence/fermata" -readme = "README.md" -keywords = ["ai", "agents", "security", "policy", "gitignore"] -categories = ["command-line-utilities", "development-tools"] - -[lib] -path = "src/lib.rs" - -[[bin]] -name = "fermata" -path = "src/bin/fermata.rs" -required-features = ["cli"] - -[dependencies] -globset = "0.4" -ignore = "0.4" -walkdir = "2" -toml = "0.8" -regex = "1.10" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "2.0" -clap = { version = "4.5", features = ["derive"], optional = true } - -[dev-dependencies] -tempfile = "3.10" -assert_cmd = "2.0" -predicates = "3.1" - -[features] -default = ["cli", "harness-claude"] -cli = ["dep:clap"] -harness-claude = [] diff --git a/crates/dirigent_fermata/LICENSE-APACHE b/crates/dirigent_fermata/LICENSE-APACHE deleted file mode 100644 index b2ea092..0000000 --- a/crates/dirigent_fermata/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for describing the origin of the Work and - reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Support. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or support. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2026 Gabor Körber and contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/crates/dirigent_fermata/LICENSE-MIT b/crates/dirigent_fermata/LICENSE-MIT deleted file mode 100644 index 0440791..0000000 --- a/crates/dirigent_fermata/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2026 Gabor Körber and contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/crates/dirigent_fermata/README.md b/crates/dirigent_fermata/README.md deleted file mode 100644 index ae03652..0000000 --- a/crates/dirigent_fermata/README.md +++ /dev/null @@ -1,214 +0,0 @@ -# 𝄐 dirigent_fermata - -**A fast, harness-agnostic policy gate for AI coding agents.** - -Drop a `.botignore` file in your project root. Fermata reads it and blocks your agent from reading, writing, or running things it shouldn't — before the tool call happens. - -``` -.env -.env.* -secrets/** -conf/settings.local.yaml -``` - -That's all it takes. - ---- - -## Why Fermata - -AI coding agents are powerful, but they don't have an innate sense of "don't touch `.env`." Native hook systems in tools like Claude Code let you intercept every file operation — but wiring up your own secure, fast hook for each project is friction. Fermata is that hook, ready to drop in. - -- **Fast** — written in Rust; ~1–5ms per call. Hooks fire on every read, write, and bash operation. Python cold-start (~50–150ms) compounds fast. Fermata doesn't. -- **Familiar syntax** — `.botignore` uses gitignore rules via the `ignore` crate (the same engine powering ripgrep). -- **Per-operation control** — `botignore.toml` lets you block writes to `vendor/**` while still allowing reads, or deny specific bash patterns without touching path rules. -- **Harness-agnostic** — plain CLI exit codes work from any shell wrapper; the hook adapter speaks Claude Code's JSON natively. - ---- - -## Status: v0.1 - -| Component | Status | -|-----------|--------| -| Library (`Op`, `Decision`, `Policy::check`, `Policy::check_command`) | Done | -| `.botignore` walker (project-root walk-up, gitignore semantics) | Done | -| `botignore.toml` parser (read / write / bash namespaces) | Done | -| Path identification heuristics | Done | -| CLI: `fermata check ...` | Done | -| CLI: `fermata hook --harness claude` | Done | -| Claude Code PreToolUse adapter | Done | - -Out of scope for v0.1: Codex / Gemini hook adapters, MCP server mode, audit log, filesystem watcher. - ---- - -## Install - -From source (this monorepo): - -```bash -cargo install --path crates/dirigent_fermata --features cli -``` - -This installs the `fermata` binary into `~/.cargo/bin/`. - ---- - -## Usage - -### Checking a path - -```bash -fermata check --op read /path/to/.env -# exit 1 — blocked -# stderr: blocked by rule ".env" in /your/project/.botignore - -fermata check --op write /path/to/src/main.rs -# exit 0 — allowed -``` - -### Claude Code hook adapter - -```bash -fermata hook --harness claude < hook_payload.json -``` - -Reads the PreToolUse JSON from stdin, extracts the tool name and path or command, applies policy, and emits the Claude-shaped JSON response. The hook's exit code is always `0`; the verdict is in the JSON body. - ---- - -## Configuration - -### `.botignore` — the 80% case - -Create a `.botignore` at your project root. Gitignore syntax. Blocks both reads and writes. - -```gitignore -# Secrets -.env -.env.* -secrets/** - -# Local config overrides -conf/settings.local.yaml -conf/settings.test.yaml - -# Generated files — let the tools rebuild them, not patch them -dist/** -*.lock -``` - -Fermata walks up from the target file to find the nearest `.botignore`, so it works correctly even when an agent changes directory. - -### `botignore.toml` — per-operation rules - -For cases where `.botignore`'s uniform read+write block isn't granular enough: - -```toml -[read] -# Block reading secrets outright -patterns = [".env*", "secrets/**", "conf/settings.local.yaml"] - -[write] -# Allow reading vendor code but block patching it -patterns = ["vendor/**", "*.lock"] - -[bash] -# Hard-block destructive or exfiltrating commands -deny = [ - "rm -rf /", - "curl * | sh", - "git push --force*", -] -# Ask before any removal or move -ask = ["rm:*", "mv:*"] -# Narrow allowlist for automated commands -allow_prefixes = ["make test", "git checkout:*"] -``` - ---- - -## How it fits into Claude Code - -Add fermata as a `PreToolUse` hook in `.claude/settings.json`: - -```json -{ - "hooks": { - "PreToolUse": [ - { - "matcher": "Bash|Read|Edit|Write", - "hooks": [ - { - "type": "command", - "command": "fermata hook --harness claude" - } - ] - } - ] - } -} -``` - -When Claude attempts a `Read(.env)`, `Write(vendor/foo.js)`, or `Bash(rm ./secrets/key.pem)`, fermata intercepts the call, checks policy, and returns a deny with a human-readable reason — before any damage is done. - ---- - -## Real-world scenario - -A project has `.env`, `conf/settings.local.yaml`, and a `vendor/` tree it doesn't want patched. With `.botignore`: - -```gitignore -.env -.env.* -conf/settings.local.yaml -vendor/** -``` - -Claude attempts to read credentials: - -``` -Tool: Read -Path: ./conf/settings.local.yaml -Decision: BLOCK — matched rule "conf/settings.local.yaml" (.botignore) -``` - -Claude attempts to read application code: - -``` -Tool: Read -Path: ./src/app/main.rs -Decision: ALLOW -``` - -Claude attempts to run `cat .env` via bash — which would bypass a path-only check: - -```toml -# botignore.toml -[bash] -deny = ["cat .env*", "cat conf/settings.local*"] -``` - -``` -Tool: Bash -Command: cat .env -Decision: BLOCK — matched bash deny rule "cat .env*" -``` - ---- - -## Architecture - -Three concentric layers; nothing inner imports from anything outer: - -- **`core/`** — harness-unaware, sync. Types, `.botignore` walker, `botignore.toml` parser, `Policy::check` / `check_command`, path extraction. -- **`harness/`** — `HarnessAdapter` trait over a normalized `ToolCall`. Each adapter lives in its own submodule, feature-gated. -- **`bin/fermata.rs`** — the only place `clap`, stdio, and exit codes appear. - ---- - -## See also - -- `docs/tools/fermata.md` — Dirigent integration plan -- `docs/workpad/brainstorm/fermata.md` — full product spec and field notes -- `docs/architecture/crates.md` — crate dependency map diff --git a/crates/dirigent_fermata/src/bin/fermata.rs b/crates/dirigent_fermata/src/bin/fermata.rs deleted file mode 100644 index 23dbcb5..0000000 --- a/crates/dirigent_fermata/src/bin/fermata.rs +++ /dev/null @@ -1,205 +0,0 @@ -use clap::{Parser, Subcommand, ValueEnum}; -use dirigent_fermata::core::{project::find_project_root, Decision, Op, Policy}; -use std::io::{Read, Write}; -use std::path::PathBuf; -use std::process::ExitCode; - -#[derive(Parser)] -#[command(name = "fermata", about = "Harness-agnostic policy gate for AI coding agents")] -struct Cli { - #[command(subcommand)] - cmd: Cmd, -} - -#[derive(Subcommand)] -enum Cmd { - /// Check whether `path` is allowed for the given `--op`. - Check { - #[arg(long, value_enum, default_value_t = OpArg::Read)] - op: OpArg, - #[arg(long)] - json: bool, - paths: Vec, - }, - /// Read a harness hook payload from stdin and render the decision. - Hook { - #[arg(long)] - harness: String, - }, -} - -#[derive(Copy, Clone, ValueEnum)] -enum OpArg { - Read, - Write, - Execute, -} - -impl From for Op { - fn from(a: OpArg) -> Self { - match a { - OpArg::Read => Op::Read, - OpArg::Write => Op::Write, - OpArg::Execute => Op::Execute, - } - } -} - -fn main() -> ExitCode { - let cli = Cli::parse(); - match cli.cmd { - Cmd::Check { op, json, paths } => run_check(op.into(), json, &paths), - Cmd::Hook { harness } => run_hook(&harness), - } -} - -fn run_check(op: Op, json: bool, paths: &[PathBuf]) -> ExitCode { - let mut worst: Option = None; - for p in paths { - let root = match find_project_root(p) { - Some(r) => r, - None => continue, - }; - let policy = match Policy::load(&root) { - Ok(p) => p, - Err(e) => { - eprintln!("fermata: load error: {e}"); - return ExitCode::from(2); - } - }; - let d = match policy.check(op, p) { - Ok(d) => d, - Err(e) => { - eprintln!("fermata: check error: {e}"); - return ExitCode::from(2); - } - }; - worst = Some(merge_worst(worst.take(), d)); - } - let decision = worst.unwrap_or(Decision::Allow); - if json { - let _ = serde_json::to_writer(std::io::stdout().lock(), &decision); - let _ = writeln!(std::io::stdout().lock()); - } else if let Decision::Deny(ref r) = decision { - println!("{}", r.message); - } else if let Decision::Ask(ref r) = decision { - println!("ASK: {}", r.message); - } - match decision { - Decision::Allow => ExitCode::from(0), - Decision::Ask(_) => ExitCode::from(0), - Decision::Deny(_) => ExitCode::from(1), - } -} - -fn run_hook(harness: &str) -> ExitCode { - let adapter = match dirigent_fermata::harness::lookup(harness) { - Some(a) => a, - None => { - eprintln!("fermata: unknown harness '{harness}'"); - return ExitCode::from(2); - } - }; - let mut buf = Vec::new(); - if let Err(e) = std::io::stdin().lock().read_to_end(&mut buf) { - eprintln!("fermata: stdin: {e}"); - return ExitCode::from(2); - } - let call = match adapter.parse_request(&buf) { - Ok(c) => c, - Err(e) => { - eprintln!("fermata: parse: {e}"); - return ExitCode::from(2); - } - }; - - use dirigent_fermata::harness::{PathKind, ToolOp}; - let decision = match &call.op { - ToolOp::Path { path, kind } => { - let root = match find_project_root(path) { - // No project root → fail-open allow (hook must always exit 0 with a verdict). - // run_check silently skips these paths; here we must still emit JSON. - Some(r) => r, - None => { - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - }; - let policy = match Policy::load(&root) { - Ok(p) => p, - Err(e) => { - eprintln!("fermata: load error: {e}"); - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - }; - let op = match kind { - PathKind::Read => Op::Read, - PathKind::Write => Op::Write, - }; - match policy.check(op, path) { - Ok(d) => d, - Err(e) => { - eprintln!("fermata: check error: {e}"); - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - } - } - ToolOp::Command { text } => { - // For commands, we look up the project from cwd (no path argument). - let cwd = match std::env::current_dir() { - Ok(d) => d, - Err(e) => { - eprintln!("fermata: cwd error: {e}"); - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - }; - match find_project_root(&cwd) { - // No project root → fail-open allow (see Path branch note above). - None => Decision::Allow, - Some(root) => { - let policy = match Policy::load(&root) { - Ok(p) => p, - Err(e) => { - eprintln!("fermata: load error: {e}"); - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - }; - match policy.check_command(text) { - Ok(d) => d, - Err(e) => { - eprintln!("fermata: check error: {e}"); - let out = adapter.render_decision(&call, &Decision::Allow).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - return ExitCode::from(0); - } - } - } - } - } - }; - let out = adapter.render_decision(&call, &decision).unwrap_or_default(); - let _ = std::io::stdout().lock().write_all(&out); - ExitCode::from(0) // hook bins always exit 0; the JSON carries the verdict -} - -fn merge_worst(a: Option, b: Decision) -> Decision { - let rank = |d: &Decision| match d { - Decision::Allow => 0, - Decision::Ask(_) => 1, - Decision::Deny(_) => 2, - }; - match a { - None => b, - Some(a) if rank(&a) >= rank(&b) => a, - Some(_) => b, - } -} diff --git a/crates/dirigent_fermata/src/core/botignore.rs b/crates/dirigent_fermata/src/core/botignore.rs deleted file mode 100644 index 4c9ac6c..0000000 --- a/crates/dirigent_fermata/src/core/botignore.rs +++ /dev/null @@ -1,91 +0,0 @@ -use crate::core::decision::Rule; -use ignore::gitignore::{Gitignore, GitignoreBuilder}; -use std::path::{Path, PathBuf}; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum BotignoreError { - #[error("failed to read .botignore: {0}")] - Io(#[from] std::io::Error), - #[error("failed to compile .botignore: {0}")] - Compile(#[source] ignore::Error), -} - -struct ScopedMatcher { - /// Path of the source `.botignore` file. - source: PathBuf, - /// Directory the matcher is rooted at (parent of `source`). - dir: PathBuf, - /// Depth of `dir` (component count) — deeper = more specific. - depth: usize, - matcher: Gitignore, -} - -/// A collection of `.botignore` matchers, one per file discovered under the -/// project root. Each matcher is rooted at its source file's directory so -/// gitignore-style semantics (anchored vs unanchored patterns, per-directory -/// scope) work correctly. At match time, the deepest applicable matcher -/// wins; whitelist (`!` negation) at any depth overrides an ignore at -/// shallower depth. -pub struct BotignoreSet { - matchers: Vec, -} - -impl BotignoreSet { - /// Walk `root` recursively, building a per-file matcher for every - /// `.botignore` encountered. Empty if none are found. - pub fn load(root: &Path) -> Result { - let mut matchers = Vec::new(); - for entry in walkdir::WalkDir::new(root).into_iter().filter_map(Result::ok) { - if !(entry.file_type().is_file() && entry.file_name() == ".botignore") { - continue; - } - let source = entry.path().to_path_buf(); - let dir = source.parent().unwrap_or(root).to_path_buf(); - let mut builder = GitignoreBuilder::new(&dir); - if let Some(err) = builder.add(&source) { - return Err(BotignoreError::Compile(err)); - } - let matcher = builder.build().map_err(BotignoreError::Compile)?; - let depth = dir.components().count(); - matchers.push(ScopedMatcher { - source, - dir, - depth, - matcher, - }); - } - // Shallowest first so iteration applies broader rules then more-specific overrides. - matchers.sort_by_key(|m| m.depth); - Ok(Self { matchers }) - } - - /// Returns `Some(Rule)` if `path` is matched (and not negated by a - /// deeper-scoped whitelist), else `None`. The deepest matcher whose - /// directory contains `path` wins. - pub fn matched(&self, path: &Path) -> Result, BotignoreError> { - let is_dir = path.is_dir(); - let mut current: Option<&ScopedMatcher> = None; - let mut current_pattern: Option = None; - - for sm in &self.matchers { - if !path.starts_with(&sm.dir) { - continue; - } - let m = sm.matcher.matched(path, is_dir); - if m.is_ignore() { - current = Some(sm); - current_pattern = m.inner().map(|g| g.original().to_string()); - } else if m.is_whitelist() { - // Deeper whitelist overrides any shallower ignore. - current = None; - current_pattern = None; - } - } - - Ok(current.map(|sm| Rule { - source: sm.source.clone(), - pattern: current_pattern.unwrap_or_default(), - })) - } -} diff --git a/crates/dirigent_fermata/src/core/decision.rs b/crates/dirigent_fermata/src/core/decision.rs deleted file mode 100644 index cd7dae5..0000000 --- a/crates/dirigent_fermata/src/core/decision.rs +++ /dev/null @@ -1,30 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct Rule { - /// Source file the rule came from (e.g. `/proj/.botignore`). - pub source: PathBuf, - /// Pattern text as it appeared in the source. - pub pattern: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct Reason { - pub message: String, - pub rule: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "lowercase")] -pub enum Decision { - Allow, - Ask(Reason), - Deny(Reason), -} - -impl Decision { - pub fn is_blocking(&self) -> bool { - matches!(self, Decision::Deny(_)) - } -} diff --git a/crates/dirigent_fermata/src/core/extract.rs b/crates/dirigent_fermata/src/core/extract.rs deleted file mode 100644 index 6535513..0000000 --- a/crates/dirigent_fermata/src/core/extract.rs +++ /dev/null @@ -1,50 +0,0 @@ -use regex::Regex; -use std::sync::OnceLock; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Confidence { - /// Absolute path or path with explicit separator. - High, - /// Bare filename with extension; could be a word. - Low, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct PathCandidate { - pub text: String, - pub confidence: Confidence, -} - -/// Heuristically extract path-like substrings from arbitrary text. -/// Confident matches (absolute paths, paths containing separators) → `High`. -/// Bare filenames with an extension → `Low` (advisory only). -pub fn extract_path_candidates(text: &str) -> Vec { - static UNIX_ABS: OnceLock = OnceLock::new(); - static WIN_ABS: OnceLock = OnceLock::new(); - static REL_WITH_SEP: OnceLock = OnceLock::new(); - static BARE_NAME: OnceLock = OnceLock::new(); - - let unix_abs = UNIX_ABS.get_or_init(|| Regex::new(r"(?m)(?:^|\s)(/[\w./~\-_]+)").unwrap()); - let win_abs = WIN_ABS.get_or_init(|| Regex::new(r#"(?m)(?:^|\s)([A-Za-z]:\\[\w.\\\-_]+)"#).unwrap()); - let rel = REL_WITH_SEP.get_or_init(|| Regex::new(r"(?m)(?:^|\s)((?:\./|\.\./|[\w\-_]+/)[\w./\-_]+)").unwrap()); - let bare = BARE_NAME.get_or_init(|| Regex::new(r"(?m)(?:^|\s)([\w\-_]+\.[A-Za-z]{1,8})(?:\s|[.,;:!?]|$)").unwrap()); - - let mut out = Vec::new(); - let mut seen = std::collections::HashSet::new(); - - for re in [unix_abs, win_abs, rel] { - for cap in re.captures_iter(text) { - let m = cap.get(1).unwrap().as_str().trim_end_matches(['.', ',', ';', ':', '!', '?']); - if seen.insert(m.to_string()) { - out.push(PathCandidate { text: m.to_string(), confidence: Confidence::High }); - } - } - } - for cap in bare.captures_iter(text) { - let m = cap.get(1).unwrap().as_str(); - if seen.insert(m.to_string()) { - out.push(PathCandidate { text: m.to_string(), confidence: Confidence::Low }); - } - } - out -} diff --git a/crates/dirigent_fermata/src/core/mod.rs b/crates/dirigent_fermata/src/core/mod.rs deleted file mode 100644 index 519bf69..0000000 --- a/crates/dirigent_fermata/src/core/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Core policy layer. Harness-unaware, transport-unaware, sync. - -pub mod botignore; -pub mod decision; -pub mod extract; -pub mod op; -pub mod policy; -pub mod project; -pub mod toml_config; - -pub use decision::{Decision, Reason, Rule}; -pub use extract::{extract_path_candidates, Confidence, PathCandidate}; -pub use op::Op; -pub use policy::Policy; diff --git a/crates/dirigent_fermata/src/core/op.rs b/crates/dirigent_fermata/src/core/op.rs deleted file mode 100644 index 99e5ee8..0000000 --- a/crates/dirigent_fermata/src/core/op.rs +++ /dev/null @@ -1,9 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum Op { - Read, - Write, - Execute, -} diff --git a/crates/dirigent_fermata/src/core/policy.rs b/crates/dirigent_fermata/src/core/policy.rs deleted file mode 100644 index 5479c30..0000000 --- a/crates/dirigent_fermata/src/core/policy.rs +++ /dev/null @@ -1,164 +0,0 @@ -use crate::core::botignore::{BotignoreError, BotignoreSet}; -use crate::core::decision::{Decision, Reason, Rule}; -use crate::core::op::Op; -use crate::core::toml_config::{BotignoreToml, TomlConfigError}; -use std::path::{Path, PathBuf}; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum PolicyError { - #[error(transparent)] - Botignore(#[from] BotignoreError), - #[error(transparent)] - Toml(#[from] TomlConfigError), - #[error("invalid pattern in botignore.toml: {0}")] - BadPattern(String), -} - -pub struct Policy { - root: PathBuf, - botignore: BotignoreSet, - toml: BotignoreToml, - read_globs: globset::GlobSet, - write_globs: globset::GlobSet, - read_patterns: Vec, - write_patterns: Vec, -} - -impl Policy { - pub fn load(root: &Path) -> Result { - let botignore = BotignoreSet::load(root)?; - let toml = BotignoreToml::load(root)?; - - let (read_globs, read_patterns) = compile_globs( - toml.read.as_ref().map(|r| r.patterns.as_slice()).unwrap_or(&[]), - )?; - let (write_globs, write_patterns) = compile_globs( - toml.write.as_ref().map(|r| r.patterns.as_slice()).unwrap_or(&[]), - )?; - - Ok(Self { - root: root.to_path_buf(), - botignore, - toml, - read_globs, - write_globs, - read_patterns, - write_patterns, - }) - } - - pub fn check_command(&self, command: &str) -> Result { - let bash = match self.toml.bash.as_ref() { - Some(b) => b, - None => return Ok(Decision::Allow), - }; - - // 1. Deny wins over everything else. - if let Some(pat) = match_command(command, &bash.deny)? { - return Ok(Decision::Deny(Reason { - message: format!("blocked by botignore.toml [bash.deny]: {}", pat), - rule: Some(Rule { - source: self.root.join("botignore.toml"), - pattern: pat, - }), - })); - } - - // 2. Allow prefixes — if any matches, allow. - for prefix in &bash.allow_prefixes { - if command_matches_prefix(command, prefix) { - return Ok(Decision::Allow); - } - } - - // 3. Ask patterns. - if let Some(pat) = match_command(command, &bash.ask)? { - return Ok(Decision::Ask(Reason { - message: format!("requires confirmation [bash.ask]: {}", pat), - rule: Some(Rule { - source: self.root.join("botignore.toml"), - pattern: pat, - }), - })); - } - - Ok(Decision::Allow) - } - - pub fn check(&self, op: Op, path: &Path) -> Result { - // 1. .botignore is path-only and applies to read+write equally. - if matches!(op, Op::Read | Op::Write) { - if let Some(rule) = self.botignore.matched(path)? { - return Ok(Decision::Deny(Reason { - message: format!("blocked by .botignore: {}", rule.pattern), - rule: Some(rule), - })); - } - } - - // 2. botignore.toml namespace-specific rules. - let (set, patterns) = match op { - Op::Read => (&self.read_globs, &self.read_patterns), - Op::Write => (&self.write_globs, &self.write_patterns), - Op::Execute => return Ok(Decision::Allow), // path-based check_command handles bash - }; - - let rel = path.strip_prefix(&self.root).unwrap_or(path); - let matches = set.matches(rel); - if let Some(idx) = matches.first() { - let pattern = patterns[*idx].clone(); - return Ok(Decision::Deny(Reason { - message: format!("blocked by botignore.toml [{:?}]: {}", op, pattern), - rule: Some(Rule { - source: self.root.join("botignore.toml"), - pattern, - }), - })); - } - - Ok(Decision::Allow) - } -} - -/// Substring-or-glob match of `command` against `patterns`. -/// Patterns containing glob metachars (`*`, `?`, `[`) are treated as globs; -/// others are matched as literal substrings. -fn match_command(command: &str, patterns: &[String]) -> Result, PolicyError> { - for pat in patterns { - if is_glob(pat) { - let g = globset::Glob::new(pat) - .map_err(|e| PolicyError::BadPattern(e.to_string()))? - .compile_matcher(); - if g.is_match(command) { - return Ok(Some(pat.clone())); - } - } else if command.contains(pat.as_str()) { - return Ok(Some(pat.clone())); - } - } - Ok(None) -} - -fn is_glob(pat: &str) -> bool { - pat.contains('*') || pat.contains('?') || pat.contains('[') -} - -/// `prefix` is `"name"` or `"name:*"`. Both treat `name` as a leading word -/// boundary in `command`. Mirrors Claude Code's `Bash(name:*)` style. -fn command_matches_prefix(command: &str, prefix: &str) -> bool { - let needle = prefix.trim_end_matches(":*"); - command.trim_start().starts_with(needle) -} - -fn compile_globs(patterns: &[String]) -> Result<(globset::GlobSet, Vec), PolicyError> { - let mut builder = globset::GlobSetBuilder::new(); - for pat in patterns { - let glob = globset::Glob::new(pat).map_err(|e| PolicyError::BadPattern(e.to_string()))?; - builder.add(glob); - } - let set = builder - .build() - .map_err(|e| PolicyError::BadPattern(e.to_string()))?; - Ok((set, patterns.to_vec())) -} diff --git a/crates/dirigent_fermata/src/core/project.rs b/crates/dirigent_fermata/src/core/project.rs deleted file mode 100644 index 91beed6..0000000 --- a/crates/dirigent_fermata/src/core/project.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::path::{Path, PathBuf}; - -/// Strong markers that definitively identify a project root. -const STRONG_MARKERS: &[&str] = &["botignore.toml", ".botignore.toml", ".git"]; - -/// Walk upward from `target` (or its parent if `target` is a file) looking -/// for the nearest project root. Strong markers (`botignore.toml`, -/// `.botignore.toml`, `.git`) stop the walk immediately. A `.botignore` -/// file is remembered as a fallback but does not stop the walk — the search -/// continues upward for a stronger boundary. If none is found, the -/// `.botignore` location is used. -pub fn find_project_root(target: &Path) -> Option { - let start = if target.is_file() { - target.parent()? - } else { - target - }; - - let mut fallback: Option = None; - let mut current = Some(start); - while let Some(dir) = current { - for marker in STRONG_MARKERS { - if dir.join(marker).exists() { - return Some(dir.to_path_buf()); - } - } - if fallback.is_none() && dir.join(".botignore").exists() { - fallback = Some(dir.to_path_buf()); - } - current = dir.parent(); - } - fallback -} diff --git a/crates/dirigent_fermata/src/core/toml_config.rs b/crates/dirigent_fermata/src/core/toml_config.rs deleted file mode 100644 index 8f2997f..0000000 --- a/crates/dirigent_fermata/src/core/toml_config.rs +++ /dev/null @@ -1,47 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::path::Path; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum TomlConfigError { - #[error("io error: {0}")] - Io(#[from] std::io::Error), - #[error("toml parse error: {0}")] - Parse(#[from] toml::de::Error), -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize)] -pub struct OpRules { - #[serde(default)] - pub patterns: Vec, -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize)] -pub struct BashRules { - #[serde(default)] - pub deny: Vec, - #[serde(default)] - pub ask: Vec, - #[serde(default)] - pub allow_prefixes: Vec, -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize)] -pub struct BotignoreToml { - pub read: Option, - pub write: Option, - pub bash: Option, -} - -impl BotignoreToml { - /// Load `/botignore.toml` if present, else return an empty config. - pub fn load(root: &Path) -> Result { - let path = root.join("botignore.toml"); - if !path.exists() { - return Ok(Self::default()); - } - let text = std::fs::read_to_string(&path)?; - let cfg = toml::from_str(&text)?; - Ok(cfg) - } -} diff --git a/crates/dirigent_fermata/src/harness/claude.rs b/crates/dirigent_fermata/src/harness/claude.rs deleted file mode 100644 index e0c3576..0000000 --- a/crates/dirigent_fermata/src/harness/claude.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! Claude Code hook adapter (PreToolUse). -//! -//! Wire format: stdin is one JSON object with `tool_name` and `tool_input`. -//! Stdout is `{"hookSpecificOutput": {...}}` with exit code 0; the JSON -//! carries the verdict. - -use super::{AdapterError, HarnessAdapter, PathKind, ToolCall, ToolOp}; -use crate::core::Decision; -use serde_json::{json, Value}; -use std::path::PathBuf; - -pub struct ClaudeAdapter; - -impl HarnessAdapter for ClaudeAdapter { - fn name(&self) -> &'static str { - "claude" - } - - fn parse_request(&self, input: &[u8]) -> Result { - let v: Value = serde_json::from_slice(input)?; - let tool_name = v - .get("tool_name") - .and_then(|x| x.as_str()) - .ok_or_else(|| AdapterError::Parse("missing tool_name".into()))? - .to_string(); - let tool_input = v.get("tool_input").cloned().unwrap_or(Value::Null); - - let op = match tool_name.as_str() { - "Read" => path_op(&tool_input, PathKind::Read)?, - "Write" | "Edit" | "MultiEdit" => path_op(&tool_input, PathKind::Write)?, - "Bash" => command_op(&tool_input)?, - other => return Err(AdapterError::UnsupportedTool(other.to_string())), - }; - - Ok(ToolCall { - tool_name, - op, - raw: v, - }) - } - - fn render_decision(&self, _call: &ToolCall, decision: &Decision) -> Result, AdapterError> { - let (verdict, reason) = match decision { - Decision::Allow => ("allow", String::new()), - Decision::Ask(r) => ("ask", r.message.clone()), - Decision::Deny(r) => ("deny", r.message.clone()), - }; - let out = json!({ - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "permissionDecision": verdict, - "permissionDecisionReason": reason, - } - }); - Ok(serde_json::to_vec(&out)?) - } -} - -fn path_op(tool_input: &Value, kind: PathKind) -> Result { - let p = tool_input - .get("file_path") - .and_then(|x| x.as_str()) - .ok_or_else(|| AdapterError::Parse("missing tool_input.file_path".into()))?; - Ok(ToolOp::Path { - path: PathBuf::from(p), - kind, - }) -} - -fn command_op(tool_input: &Value) -> Result { - let c = tool_input - .get("command") - .and_then(|x| x.as_str()) - .ok_or_else(|| AdapterError::Parse("missing tool_input.command".into()))?; - Ok(ToolOp::Command { text: c.to_string() }) -} diff --git a/crates/dirigent_fermata/src/harness/mod.rs b/crates/dirigent_fermata/src/harness/mod.rs deleted file mode 100644 index 347b7b3..0000000 --- a/crates/dirigent_fermata/src/harness/mod.rs +++ /dev/null @@ -1,67 +0,0 @@ -//! Harness adapter layer. Normalizes harness-specific payloads into -//! `core` types and renders `Decision` back to harness wire format. - -use crate::core::Decision; -use std::path::PathBuf; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum AdapterError { - #[error("invalid request payload: {0}")] - Parse(String), - #[error("unsupported tool: {0}")] - UnsupportedTool(String), - #[error("io: {0}")] - Io(#[from] std::io::Error), - #[error("json: {0}")] - Json(#[from] serde_json::Error), -} - -/// Normalized tool-call shape consumed by `core::Policy`. -/// Adapters translate harness-specific payloads into this; nothing in -/// `core` knows about adapters. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ToolCall { - /// Harness's tool name (e.g. "Read", "Write", "Edit", "Bash"). - pub tool_name: String, - /// Op classification derived from `tool_name`. - pub op: ToolOp, - /// Original raw payload for the adapter to consult when rendering. - pub raw: serde_json::Value, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ToolOp { - Path { path: PathBuf, kind: PathKind }, - Command { text: String }, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PathKind { - Read, - Write, -} - -/// Trait implemented by each harness adapter. Adapters parse the harness's -/// hook stdin payload into `ToolCall` and render a `Decision` back to the -/// harness's expected stdout format. -pub trait HarnessAdapter { - /// The CLI name (e.g. "claude", "codex", "gemini"). - fn name(&self) -> &'static str; - - fn parse_request(&self, input: &[u8]) -> Result; - - fn render_decision(&self, call: &ToolCall, decision: &Decision) -> Result, AdapterError>; -} - -#[cfg(feature = "harness-claude")] -pub mod claude; - -/// Look up a registered adapter by CLI name. -pub fn lookup(name: &str) -> Option> { - match name { - #[cfg(feature = "harness-claude")] - "claude" => Some(Box::new(claude::ClaudeAdapter)), - _ => None, - } -} diff --git a/crates/dirigent_fermata/src/lib.rs b/crates/dirigent_fermata/src/lib.rs deleted file mode 100644 index 4bb9af6..0000000 --- a/crates/dirigent_fermata/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! `dirigent_fermata` — harness-agnostic policy gate. -//! -//! See `docs/tools/fermata.md` (Dirigent integration plan) and -//! `docs/workpad/brainstorm/fermata.md` (product spec). - -pub mod core; -pub mod harness; diff --git a/crates/dirigent_fermata/tests/cargo_publish_metadata.rs b/crates/dirigent_fermata/tests/cargo_publish_metadata.rs deleted file mode 100644 index 6243c56..0000000 --- a/crates/dirigent_fermata/tests/cargo_publish_metadata.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Guards that fermata's Cargo.toml carries the metadata required for -//! `cargo publish` and a useful `cargo install`. Reads the manifest as -//! plain text to avoid pulling cargo internals. - -use std::fs; - -fn manifest() -> String { - fs::read_to_string(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.toml")) - .expect("read Cargo.toml") -} - -#[test] -fn has_license() { - let m = manifest(); - assert!(m.contains("license ="), "Cargo.toml missing `license` field"); -} - -#[test] -fn has_repository() { - let m = manifest(); - assert!(m.contains("repository ="), "Cargo.toml missing `repository` field"); -} - -#[test] -fn has_description() { - let m = manifest(); - assert!(m.contains("description ="), "Cargo.toml missing `description`"); -} - -#[test] -fn has_readme() { - let m = manifest(); - assert!(m.contains("readme ="), "Cargo.toml missing `readme` field"); -} - -#[test] -fn has_keywords_and_categories() { - let m = manifest(); - assert!(m.contains("keywords ="), "Cargo.toml missing `keywords`"); - assert!(m.contains("categories ="), "Cargo.toml missing `categories`"); -} - -#[test] -fn has_rust_version() { - let m = manifest(); - assert!(m.contains("rust-version ="), "Cargo.toml missing `rust-version` (MSRV)"); -} diff --git a/crates/dirigent_fermata/tests/cli_check.rs b/crates/dirigent_fermata/tests/cli_check.rs deleted file mode 100644 index d909902..0000000 --- a/crates/dirigent_fermata/tests/cli_check.rs +++ /dev/null @@ -1,52 +0,0 @@ -use assert_cmd::Command; -use predicates::prelude::*; -use std::fs; - -#[test] -fn check_blocks_botignore_match() { - let tmp = tempfile::tempdir().unwrap(); - fs::write(tmp.path().join(".botignore"), ".env\n").unwrap(); - let target = tmp.path().join(".env"); - fs::write(&target, "").unwrap(); - - Command::cargo_bin("fermata") - .unwrap() - .args(["check", "--op", "read", target.to_str().unwrap()]) - .assert() - .failure() - .code(1) - .stdout(predicate::str::contains(".env")); -} - -#[test] -fn check_allows_unmatched() { - let tmp = tempfile::tempdir().unwrap(); - fs::write(tmp.path().join(".botignore"), ".env\n").unwrap(); - let target = tmp.path().join("src.rs"); - fs::write(&target, "").unwrap(); - - Command::cargo_bin("fermata") - .unwrap() - .args(["check", "--op", "read", target.to_str().unwrap()]) - .assert() - .success(); -} - -#[test] -fn check_emits_json_with_flag() { - let tmp = tempfile::tempdir().unwrap(); - fs::write(tmp.path().join(".botignore"), ".env\n").unwrap(); - let target = tmp.path().join(".env"); - fs::write(&target, "").unwrap(); - - let out = Command::cargo_bin("fermata") - .unwrap() - .args(["check", "--op", "read", "--json", target.to_str().unwrap()]) - .assert() - .failure() - .get_output() - .stdout - .clone(); - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["kind"], "deny"); -} diff --git a/crates/dirigent_fermata/tests/cli_hook_claude.rs b/crates/dirigent_fermata/tests/cli_hook_claude.rs deleted file mode 100644 index 40fe591..0000000 --- a/crates/dirigent_fermata/tests/cli_hook_claude.rs +++ /dev/null @@ -1,69 +0,0 @@ -use assert_cmd::Command; -use std::fs; - -#[test] -fn hook_blocks_read_of_botignore_match() { - let tmp = tempfile::tempdir().unwrap(); - fs::write(tmp.path().join(".botignore"), ".env\n").unwrap(); - let target = tmp.path().join(".env"); - fs::write(&target, "").unwrap(); - - let payload = serde_json::json!({ - "tool_name": "Read", - "tool_input": { "file_path": target.to_str().unwrap() } - }) - .to_string(); - - let out = Command::cargo_bin("fermata") - .unwrap() - .args(["hook", "--harness", "claude"]) - .write_stdin(payload) - .assert() - .success() // hook always exits 0 - .get_output() - .stdout - .clone(); - - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "deny"); - assert!(v["hookSpecificOutput"]["permissionDecisionReason"] - .as_str() - .unwrap() - .contains(".env")); -} - -#[test] -fn hook_allows_unrelated_read() { - let tmp = tempfile::tempdir().unwrap(); - fs::write(tmp.path().join(".botignore"), ".env\n").unwrap(); - let target = tmp.path().join("src.rs"); - fs::write(&target, "").unwrap(); - - let payload = serde_json::json!({ - "tool_name": "Read", - "tool_input": { "file_path": target.to_str().unwrap() } - }) - .to_string(); - - let out = Command::cargo_bin("fermata") - .unwrap() - .args(["hook", "--harness", "claude"]) - .write_stdin(payload) - .assert() - .success() - .get_output() - .stdout - .clone(); - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "allow"); -} - -#[test] -fn hook_unknown_harness_errors() { - Command::cargo_bin("fermata") - .unwrap() - .args(["hook", "--harness", "doesnotexist"]) - .write_stdin("{}") - .assert() - .code(2); -} diff --git a/crates/dirigent_fermata/tests/core_botignore.rs b/crates/dirigent_fermata/tests/core_botignore.rs deleted file mode 100644 index b54ae0f..0000000 --- a/crates/dirigent_fermata/tests/core_botignore.rs +++ /dev/null @@ -1,135 +0,0 @@ -use dirigent_fermata::core::botignore::BotignoreSet; -use std::fs; -use tempfile::TempDir; - -#[test] -fn matches_simple_pattern() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::write(root.join(".botignore"), ".env\nsecrets/\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let env = root.join(".env"); - fs::write(&env, "").unwrap(); - let m = set.matched(&env).unwrap(); - assert!(m.is_some(), ".env should be matched"); - assert_eq!(m.unwrap().pattern, ".env"); -} - -#[test] -fn does_not_match_unrelated_files() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::write(root.join(".botignore"), ".env\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let other = root.join("README.md"); - fs::write(&other, "").unwrap(); - assert!(set.matched(&other).unwrap().is_none()); -} - -#[test] -fn negation_pattern_excludes() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::write(root.join(".botignore"), "*.log\n!keep.log\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let blocked = root.join("foo.log"); - fs::write(&blocked, "").unwrap(); - assert!(set.matched(&blocked).unwrap().is_some()); - - let allowed = root.join("keep.log"); - fs::write(&allowed, "").unwrap(); - assert!(set.matched(&allowed).unwrap().is_none()); -} - -#[test] -fn empty_or_missing_botignore_is_ok() { - let tmp = TempDir::new().unwrap(); - let set = BotignoreSet::load(tmp.path()).unwrap(); - let any = tmp.path().join("anything.txt"); - std::fs::write(&any, "").unwrap(); - assert!(set.matched(&any).unwrap().is_none()); -} - -#[test] -fn nested_botignore_is_scoped_to_its_directory() { - // A `.botignore` in a subdirectory only applies under that subdirectory, - // matching gitignore semantics: a sibling file with the same name at the - // root is NOT affected. - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("frontend")).unwrap(); - fs::write(root.join("frontend/.botignore"), "secret.key\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let blocked = root.join("frontend/secret.key"); - fs::write(&blocked, "").unwrap(); - let m = set - .matched(&blocked) - .unwrap() - .expect("frontend/secret.key should match"); - let src = m.source.to_string_lossy().replace('\\', "/"); - assert!( - src.ends_with("frontend/.botignore"), - "Rule.source should point at the nested file; was {}", - src, - ); - - let unblocked = root.join("secret.key"); - fs::write(&unblocked, "").unwrap(); - assert!( - set.matched(&unblocked).unwrap().is_none(), - "top-level secret.key should NOT be matched (rule scoped to frontend/)", - ); -} - -#[test] -fn nested_botignore_anchored_pattern_is_local() { - // A leading `/` anchors the pattern to the directory of the .botignore - // file it's declared in. - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("frontend")).unwrap(); - fs::write(root.join("frontend/.botignore"), "/secret.key\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let blocked = root.join("frontend/secret.key"); - fs::write(&blocked, "").unwrap(); - assert!(set.matched(&blocked).unwrap().is_some()); - - let unblocked = root.join("secret.key"); - fs::write(&unblocked, "").unwrap(); - assert!( - set.matched(&unblocked).unwrap().is_none(), - "anchored /secret.key should NOT match outside frontend/", - ); -} - -#[test] -fn nested_botignore_overrides_root() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::write(root.join(".botignore"), "*.log\n").unwrap(); - fs::create_dir_all(root.join("logs")).unwrap(); - fs::write(root.join("logs/.botignore"), "!keep.log\n").unwrap(); - - let set = BotignoreSet::load(root).unwrap(); - - let blocked = root.join("logs/foo.log"); - fs::write(&blocked, "").unwrap(); - assert!(set.matched(&blocked).unwrap().is_some()); - - let kept = root.join("logs/keep.log"); - fs::write(&kept, "").unwrap(); - assert!( - set.matched(&kept).unwrap().is_none(), - "logs/.botignore should un-ignore keep.log", - ); -} diff --git a/crates/dirigent_fermata/tests/core_extract.rs b/crates/dirigent_fermata/tests/core_extract.rs deleted file mode 100644 index 72a07d9..0000000 --- a/crates/dirigent_fermata/tests/core_extract.rs +++ /dev/null @@ -1,39 +0,0 @@ -use dirigent_fermata::core::extract::{extract_path_candidates, Confidence}; - -#[test] -fn extracts_absolute_unix_path() { - let s = "the file is at /home/user/.env and was modified"; - let cs = extract_path_candidates(s); - assert!(cs.iter().any(|c| c.text == "/home/user/.env" && c.confidence == Confidence::High)); -} - -#[test] -fn extracts_absolute_windows_path() { - let s = r"see C:\Users\me\secret.toml for details"; - let cs = extract_path_candidates(s); - assert!(cs.iter().any(|c| c.text == r"C:\Users\me\secret.toml" && c.confidence == Confidence::High)); -} - -#[test] -fn extracts_relative_with_separator() { - let s = "modified src/lib.rs and tests/foo.rs"; - let cs = extract_path_candidates(s); - let texts: Vec<_> = cs.iter().map(|c| c.text.as_str()).collect(); - assert!(texts.contains(&"src/lib.rs")); - assert!(texts.contains(&"tests/foo.rs")); -} - -#[test] -fn bare_filename_with_extension_is_low_confidence() { - let s = "open README.md please"; - let cs = extract_path_candidates(s); - let r = cs.iter().find(|c| c.text == "README.md").unwrap(); - assert_eq!(r.confidence, Confidence::Low); -} - -#[test] -fn ignores_pure_words() { - let s = "the quick brown fox"; - let cs = extract_path_candidates(s); - assert!(cs.is_empty()); -} diff --git a/crates/dirigent_fermata/tests/core_op_decision.rs b/crates/dirigent_fermata/tests/core_op_decision.rs deleted file mode 100644 index ee35d7a..0000000 --- a/crates/dirigent_fermata/tests/core_op_decision.rs +++ /dev/null @@ -1,42 +0,0 @@ -use dirigent_fermata::core::{Decision, Op, Reason, Rule}; - -#[test] -fn op_variants_exist() { - let _ = Op::Read; - let _ = Op::Write; - let _ = Op::Execute; -} - -#[test] -fn decision_allow_is_simple() { - let d = Decision::Allow; - assert!(matches!(d, Decision::Allow)); -} - -#[test] -fn decision_deny_carries_reason() { - let rule = Rule { - source: "/proj/.botignore".into(), - pattern: ".env".into(), - }; - let d = Decision::Deny(Reason { - message: "blocked by .botignore".into(), - rule: Some(rule), - }); - match d { - Decision::Deny(r) => { - assert_eq!(r.message, "blocked by .botignore"); - assert!(r.rule.is_some()); - } - _ => panic!("expected Deny"), - } -} - -#[test] -fn decision_ask_carries_reason() { - let d = Decision::Ask(Reason { - message: "needs confirmation".into(), - rule: None, - }); - assert!(matches!(d, Decision::Ask(_))); -} diff --git a/crates/dirigent_fermata/tests/core_policy_command.rs b/crates/dirigent_fermata/tests/core_policy_command.rs deleted file mode 100644 index 6d49110..0000000 --- a/crates/dirigent_fermata/tests/core_policy_command.rs +++ /dev/null @@ -1,52 +0,0 @@ -use dirigent_fermata::core::{Decision, Policy}; -use std::fs; -use tempfile::TempDir; - -fn project_with(toml: &str) -> TempDir { - let tmp = TempDir::new().unwrap(); - fs::write(tmp.path().join("botignore.toml"), toml).unwrap(); - tmp -} - -#[test] -fn deny_substring_blocks() { - let tmp = project_with("[bash]\ndeny = [\"rm -rf /\"]\n"); - let p = Policy::load(tmp.path()).unwrap(); - assert!(matches!(p.check_command("sudo rm -rf / now").unwrap(), Decision::Deny(_))); -} - -#[test] -fn deny_glob_blocks() { - let tmp = project_with("[bash]\ndeny = [\"git push --force*\"]\n"); - let p = Policy::load(tmp.path()).unwrap(); - assert!(matches!(p.check_command("git push --force-with-lease").unwrap(), Decision::Deny(_))); -} - -#[test] -fn ask_returns_ask() { - let tmp = project_with("[bash]\nask = [\"rm *\"]\n"); - let p = Policy::load(tmp.path()).unwrap(); - assert!(matches!(p.check_command("rm somefile").unwrap(), Decision::Ask(_))); -} - -#[test] -fn allow_prefixes_allows() { - let tmp = project_with("[bash]\nallow_prefixes = [\"make test\"]\n"); - let p = Policy::load(tmp.path()).unwrap(); - assert_eq!(p.check_command("make test").unwrap(), Decision::Allow); - assert_eq!(p.check_command("make test-unit").unwrap(), Decision::Allow); -} - -#[test] -fn no_rules_means_allow() { - let tmp = project_with(""); - let p = Policy::load(tmp.path()).unwrap(); - assert_eq!(p.check_command("anything goes").unwrap(), Decision::Allow); -} - -#[test] -fn deny_takes_precedence_over_allow_prefix() { - let tmp = project_with("[bash]\ndeny = [\"rm -rf /\"]\nallow_prefixes = [\"rm\"]\n"); - let p = Policy::load(tmp.path()).unwrap(); - assert!(matches!(p.check_command("rm -rf /").unwrap(), Decision::Deny(_))); -} diff --git a/crates/dirigent_fermata/tests/core_policy_path.rs b/crates/dirigent_fermata/tests/core_policy_path.rs deleted file mode 100644 index 4612079..0000000 --- a/crates/dirigent_fermata/tests/core_policy_path.rs +++ /dev/null @@ -1,64 +0,0 @@ -use dirigent_fermata::core::{Decision, Op, Policy}; -use std::fs; -use tempfile::TempDir; - -fn make_project(botignore: &str, toml_text: &str) -> TempDir { - let tmp = TempDir::new().unwrap(); - fs::write(tmp.path().join(".botignore"), botignore).unwrap(); - if !toml_text.is_empty() { - fs::write(tmp.path().join("botignore.toml"), toml_text).unwrap(); - } - tmp -} - -#[test] -fn botignore_blocks_read() { - let tmp = make_project(".env\n", ""); - let policy = Policy::load(tmp.path()).unwrap(); - let target = tmp.path().join(".env"); - fs::write(&target, "").unwrap(); - let d = policy.check(Op::Read, &target).unwrap(); - assert!(matches!(d, Decision::Deny(_))); -} - -#[test] -fn botignore_blocks_write_too() { - let tmp = make_project(".env\n", ""); - let policy = Policy::load(tmp.path()).unwrap(); - let target = tmp.path().join(".env"); - let d = policy.check(Op::Write, &target).unwrap(); - assert!(matches!(d, Decision::Deny(_))); -} - -#[test] -fn unmatched_path_allowed() { - let tmp = make_project(".env\n", ""); - let policy = Policy::load(tmp.path()).unwrap(); - let target = tmp.path().join("src/main.rs"); - fs::create_dir_all(target.parent().unwrap()).unwrap(); - fs::write(&target, "").unwrap(); - let d = policy.check(Op::Read, &target).unwrap(); - assert_eq!(d, Decision::Allow); -} - -#[test] -fn toml_read_block_applies_only_to_read() { - let tmp = make_project("", "[read]\npatterns = [\"secrets/**\"]\n"); - let policy = Policy::load(tmp.path()).unwrap(); - let target = tmp.path().join("secrets/key.pem"); - fs::create_dir_all(target.parent().unwrap()).unwrap(); - fs::write(&target, "").unwrap(); - assert!(matches!(policy.check(Op::Read, &target).unwrap(), Decision::Deny(_))); - assert_eq!(policy.check(Op::Write, &target).unwrap(), Decision::Allow); -} - -#[test] -fn toml_write_block_applies_only_to_write() { - let tmp = make_project("", "[write]\npatterns = [\"vendor/**\"]\n"); - let policy = Policy::load(tmp.path()).unwrap(); - let target = tmp.path().join("vendor/lib.rs"); - fs::create_dir_all(target.parent().unwrap()).unwrap(); - fs::write(&target, "").unwrap(); - assert_eq!(policy.check(Op::Read, &target).unwrap(), Decision::Allow); - assert!(matches!(policy.check(Op::Write, &target).unwrap(), Decision::Deny(_))); -} diff --git a/crates/dirigent_fermata/tests/core_project.rs b/crates/dirigent_fermata/tests/core_project.rs deleted file mode 100644 index 98baa10..0000000 --- a/crates/dirigent_fermata/tests/core_project.rs +++ /dev/null @@ -1,120 +0,0 @@ -use dirigent_fermata::core::project::find_project_root; -use std::fs; -use tempfile::TempDir; - -#[test] -fn finds_botignore_toml_first() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("sub/deep")).unwrap(); - fs::write(root.join("botignore.toml"), "").unwrap(); - fs::write(root.join(".botignore.toml"), "").unwrap(); - fs::create_dir_all(root.join(".git")).unwrap(); - - let target = root.join("sub/deep/file.rs"); - fs::write(&target, "").unwrap(); - - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn finds_dot_botignore_toml() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("sub")).unwrap(); - fs::write(root.join(".botignore.toml"), "").unwrap(); - - let target = root.join("sub/file.rs"); - fs::write(&target, "").unwrap(); - - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn botignore_alone_does_not_stop_walk() { - // A bare .botignore is a policy file, not a project boundary. - // The walk should continue past it to find a real root marker. - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("a/b")).unwrap(); - fs::create_dir_all(root.join(".git")).unwrap(); - fs::write(root.join("a/.botignore"), "*.secret").unwrap(); - - let target = root.join("a/b/file.rs"); - fs::write(&target, "").unwrap(); - - // Should find root (with .git), NOT root/a (with .botignore) - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn botignore_used_as_fallback() { - // If only .botignore exists (no strong marker), it serves as a fallback - // root so that policy is still enforced. - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("sub")).unwrap(); - fs::write(root.join(".botignore"), "*.secret").unwrap(); - - let target = root.join("sub/file.rs"); - fs::write(&target, "").unwrap(); - - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn strong_marker_preferred_over_botignore_fallback() { - // .botignore at a/b/, .git at root — walk past .botignore, use root. - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("a/b/c")).unwrap(); - fs::create_dir_all(root.join(".git")).unwrap(); - fs::write(root.join("a/b/.botignore"), "*.key").unwrap(); - - let target = root.join("a/b/c/file.rs"); - fs::write(&target, "").unwrap(); - - // Should find root (with .git), not a/b (with .botignore) - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn falls_back_to_git() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("sub")).unwrap(); - fs::create_dir_all(root.join(".git")).unwrap(); - - let target = root.join("sub/file.rs"); - fs::write(&target, "").unwrap(); - - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root); -} - -#[test] -fn returns_none_when_no_marker() { - let tmp = TempDir::new().unwrap(); - let target = tmp.path().join("file.rs"); - std::fs::write(&target, "").unwrap(); - assert!(find_project_root(&target).is_none()); -} - -#[test] -fn walks_up_from_file_path_not_cwd() { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::create_dir_all(root.join("a/b/c")).unwrap(); - fs::write(root.join("a/botignore.toml"), "").unwrap(); - - let target = root.join("a/b/c/file.rs"); - fs::write(&target, "").unwrap(); - - let found = find_project_root(&target).unwrap(); - assert_eq!(found, root.join("a")); -} diff --git a/crates/dirigent_fermata/tests/core_toml_config.rs b/crates/dirigent_fermata/tests/core_toml_config.rs deleted file mode 100644 index 1ed9e36..0000000 --- a/crates/dirigent_fermata/tests/core_toml_config.rs +++ /dev/null @@ -1,47 +0,0 @@ -use dirigent_fermata::core::toml_config::{BotignoreToml, OpRules, BashRules}; - -#[test] -fn parses_full_config() { - let src = r#" -[read] -patterns = [".env*", "secrets/**"] - -[write] -patterns = ["vendor/**", "*.lock"] - -[bash] -deny = ["rm -rf /", "git push --force*"] -ask = ["rm:*"] -allow_prefixes = ["make test", "git checkout:*"] -"#; - let cfg: BotignoreToml = toml::from_str(src).unwrap(); - assert_eq!(cfg.read.unwrap().patterns, vec![".env*", "secrets/**"]); - assert_eq!(cfg.write.unwrap().patterns, vec!["vendor/**", "*.lock"]); - let bash = cfg.bash.unwrap(); - assert_eq!(bash.deny, vec!["rm -rf /", "git push --force*"]); - assert_eq!(bash.ask, vec!["rm:*"]); - assert_eq!(bash.allow_prefixes, vec!["make test", "git checkout:*"]); -} - -#[test] -fn empty_config_is_valid() { - let cfg: BotignoreToml = toml::from_str("").unwrap(); - assert!(cfg.read.is_none()); - assert!(cfg.write.is_none()); - assert!(cfg.bash.is_none()); -} - -#[test] -fn loads_from_disk_when_present() { - let tmp = tempfile::tempdir().unwrap(); - std::fs::write(tmp.path().join("botignore.toml"), "[read]\npatterns = [\".env\"]\n").unwrap(); - let cfg = BotignoreToml::load(tmp.path()).unwrap(); - assert_eq!(cfg.read.unwrap().patterns, vec![".env"]); -} - -#[test] -fn loads_empty_when_missing() { - let tmp = tempfile::tempdir().unwrap(); - let cfg = BotignoreToml::load(tmp.path()).unwrap(); - assert!(cfg.read.is_none()); -} diff --git a/crates/dirigent_fermata/tests/fixtures_a4.rs b/crates/dirigent_fermata/tests/fixtures_a4.rs deleted file mode 100644 index eea6639..0000000 --- a/crates/dirigent_fermata/tests/fixtures_a4.rs +++ /dev/null @@ -1,112 +0,0 @@ -//! Smoke-test contract from `docs/workpad/brainstorm/fermata.md` Appendix A.4. - -use dirigent_fermata::core::{Decision, Op, Policy}; -use std::fs; -use tempfile::TempDir; - -fn fixture() -> TempDir { - let tmp = TempDir::new().unwrap(); - let root = tmp.path(); - fs::write(root.join(".botignore"), ".env\n.env.*\nconf/cert/**\nconf/mitmproxy/**\n").unwrap(); - fs::write( - root.join("botignore.toml"), - r#" -[read] -patterns = [ - "conf/localtestsettings.yaml", - "conf/localsettings.yaml", - "conf/default-secrets.yaml", - ".claude/self-reflections/**", -] - -[write] -patterns = [ - "conf/localtestsettings.yaml", - "conf/localsettings.yaml", - "conf/default-secrets.yaml", -] - -[bash] -deny = ["localtestsettings.yaml", "localsettings.yaml", "default-secrets.yaml", ".env"] -ask = ["rm *", "mv *"] -allow_prefixes = ["make test"] -"#, - ) - .unwrap(); - - fs::create_dir_all(root.join("conf")).unwrap(); - fs::create_dir_all(root.join("datatap")).unwrap(); - fs::create_dir_all(root.join(".claude/self-reflections")).unwrap(); - for f in [".env", "conf/localsettings.yaml", "datatap/foo.py", ".claude/self-reflections/x.md"] { - fs::write(root.join(f), "").unwrap(); - } - tmp -} - -#[test] -fn read_dot_env_denied() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert!(matches!(p.check(Op::Read, &t.path().join(".env")).unwrap(), Decision::Deny(_))); -} - -#[test] -fn bash_cat_dot_env_denied() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert!(matches!(p.check_command("cat ./.env").unwrap(), Decision::Deny(_))); -} - -#[test] -fn bash_rm_localsettings_denied() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert!(matches!( - p.check_command("rm ./conf/localsettings.yaml").unwrap(), - Decision::Deny(_) - )); -} - -#[test] -fn write_localsettings_denied() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert!(matches!( - p.check(Op::Write, &t.path().join("conf/localsettings.yaml")).unwrap(), - Decision::Deny(_) - )); -} - -#[test] -fn edit_datatap_foo_py_allowed() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert_eq!( - p.check(Op::Write, &t.path().join("datatap/foo.py")).unwrap(), - Decision::Allow - ); -} - -#[test] -fn bash_make_test_allowed() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert_eq!(p.check_command("make test").unwrap(), Decision::Allow); -} - -#[test] -fn bash_rm_somefile_asks() { - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - assert!(matches!(p.check_command("rm somefile").unwrap(), Decision::Ask(_))); -} - -#[test] -fn read_self_reflections_asks() { - // Note: A.4 has self-reflections under "ask" — current toml schema uses `[read].patterns` - // for hard reads. This documents the gap; once toml has a `[read].ask`, switch to Ask. - let t = fixture(); - let p = Policy::load(t.path()).unwrap(); - let d = p.check(Op::Read, &t.path().join(".claude/self-reflections/x.md")).unwrap(); - assert!(matches!(d, Decision::Deny(_))); -} diff --git a/crates/dirigent_fermata/tests/harness_claude.rs b/crates/dirigent_fermata/tests/harness_claude.rs deleted file mode 100644 index 2521973..0000000 --- a/crates/dirigent_fermata/tests/harness_claude.rs +++ /dev/null @@ -1,86 +0,0 @@ -use dirigent_fermata::core::{Decision, Reason}; -use dirigent_fermata::harness::{HarnessAdapter, PathKind, ToolOp}; -use dirigent_fermata::harness::claude::ClaudeAdapter; - -#[test] -fn parses_read_payload() { - let payload = br#"{"tool_name":"Read","tool_input":{"file_path":"/proj/.env"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - assert_eq!(call.tool_name, "Read"); - match call.op { - ToolOp::Path { path, kind } => { - assert_eq!(path.to_string_lossy(), "/proj/.env"); - assert_eq!(kind, PathKind::Read); - } - _ => panic!("expected Path op"), - } -} - -#[test] -fn parses_write_payload() { - let payload = br#"{"tool_name":"Write","tool_input":{"file_path":"/proj/out.txt"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - assert!(matches!(call.op, ToolOp::Path { kind: PathKind::Write, .. })); -} - -#[test] -fn parses_edit_as_write() { - let payload = br#"{"tool_name":"Edit","tool_input":{"file_path":"/proj/src.rs"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - assert!(matches!(call.op, ToolOp::Path { kind: PathKind::Write, .. })); -} - -#[test] -fn parses_multiedit_as_write() { - let payload = br#"{"tool_name":"MultiEdit","tool_input":{"file_path":"/proj/src.rs","edits":[]}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - assert!(matches!(call.op, ToolOp::Path { kind: PathKind::Write, .. })); -} - -#[test] -fn parses_bash_payload() { - let payload = br#"{"tool_name":"Bash","tool_input":{"command":"rm -rf /"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - match call.op { - ToolOp::Command { text } => assert_eq!(text, "rm -rf /"), - _ => panic!("expected Command op"), - } -} - -#[test] -fn renders_deny_as_hookspecificoutput() { - let payload = br#"{"tool_name":"Read","tool_input":{"file_path":"/proj/.env"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - let d = Decision::Deny(Reason { - message: "blocked by .botignore: .env".into(), - rule: None, - }); - let out = ClaudeAdapter.render_decision(&call, &d).unwrap(); - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["hookSpecificOutput"]["hookEventName"], "PreToolUse"); - assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "deny"); - assert!(v["hookSpecificOutput"]["permissionDecisionReason"] - .as_str() - .unwrap() - .contains(".env")); -} - -#[test] -fn renders_allow_as_allow() { - let payload = br#"{"tool_name":"Read","tool_input":{"file_path":"/proj/src/main.rs"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - let out = ClaudeAdapter.render_decision(&call, &Decision::Allow).unwrap(); - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "allow"); -} - -#[test] -fn renders_ask_as_ask() { - let payload = br#"{"tool_name":"Bash","tool_input":{"command":"rm something"}}"#; - let call = ClaudeAdapter.parse_request(payload).unwrap(); - let out = ClaudeAdapter - .render_decision(&call, &Decision::Ask(Reason { message: "confirm".into(), rule: None })) - .unwrap(); - let v: serde_json::Value = serde_json::from_slice(&out).unwrap(); - assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "ask"); -} diff --git a/crates/dirigent_matrix/CLAUDE.md b/crates/dirigent_matrix/CLAUDE.md deleted file mode 100644 index 2d5793d..0000000 --- a/crates/dirigent_matrix/CLAUDE.md +++ /dev/null @@ -1,96 +0,0 @@ -# Package: dirigent_matrix - -Matrix integration for Dirigent session sharing. - -## Quick Facts -- **Type**: Library -- **Main Entry**: src/lib.rs -- **Dependencies**: matrix-sdk, dirigent_protocol, tokio, serde, thiserror, async-trait -- **Status**: Phase 1 -- Bot-mode session sharing - -## Purpose - -Provides bidirectional bridging between Dirigent sessions and Matrix rooms. -A session can be "shared" to a Matrix room, allowing Matrix users to send -messages to the agent and see responses in real-time. - -## Architecture - -### MatrixService (`service.rs`) -Central singleton owning the matrix-sdk Client. Handles: -- Bot authentication (login with username/password, session restore via SQLite store) -- Background sync loop for receiving Matrix events -- Share registry (tracks active session shares by connector_id + session_id) -- Room message dispatch to appropriate shares - -### MatrixSessionShare (`share.rs`) -Bidirectional bridge for one (connector_id, session_id) to one Matrix room: -- **Dirigent to Matrix**: Subscribes to connector events, forwards completed assistant messages as m.notice -- **Matrix to Dirigent**: Receives room messages via MatrixService dispatch, sends ConnectorCommandProxy through mpsc channel -- Implements the `SessionShare` trait from dirigent_protocol - -### MatrixConfig (`config.rs`) -Configuration parsed from `[matrix]` section in dirigent.toml: -- Homeserver URL, username, password source (env var or inline) -- Device ID for session persistence across restarts -- Display name, default invite list, store path - -### Room Management (`room.rs`) -- Private, non-federated room creation for session shares -- Room naming conventions (`"Dirigent: "`) - -## Configuration - -Identity and credentials live in an Account; sharing behavior in `[matrix]`: - -```toml -[accounts.matrix-bot] -type = "matrix" -homeserver = "https://matrix.example.com" -username = "dirigent_bot" -device_id = "DIRIGENT_01" -display_name = "Dirigent Bot" - -[accounts.matrix-bot.credentials.password] -source = "env" -key = "DIRIGENT_MATRIX_PASSWORD" - -[matrix] -account = "matrix-bot" -default_invite = ["@user:example.com"] -store_path = "matrix/bot/store" -``` - -## Key Types -- `MatrixService` -- Singleton service, owns Client and share registry -- `MatrixSessionShare` -- Bidirectional session-to-room bridge -- `MatrixBehaviorConfig` -- Sharing behavior (account ref, invites, store path) -- `ConnectorCommandProxy` -- Message proxy decoupling from dirigent_core types -- `CreateRoomOptions` -- Room creation parameters - -## Integration with CoreRuntime - -The MatrixService is wired into CoreRuntime as an optional component (like archivist): -- `CoreRuntime::start_matrix_service()` -- Resolves Account from config, creates and starts service -- `CoreRuntime::create_matrix_share()` -- Creates room, starts bridge, spawns command proxy task -- `CoreRuntime::matrix_service()` -- Accessor for the running service - -## Event Flow - -``` -Connector emits Event::MessageCompleted (role=assistant) - -> MatrixSessionShare event forwarder task - -> Sends m.notice to Matrix room - -Matrix user sends message in room - -> MatrixService sync loop receives SyncRoomMessageEvent - -> Looks up share by room_id - -> share.inject_message(text) -> ConnectorCommandProxy - -> Proxy task translates to ConnectorCommand::SendMessage - -> Connector processes message -``` - -## Related Packages -- **dirigent_protocol**: SessionShare trait, Event types consumed by share forwarder -- **dirigent_core**: CoreRuntime integration, ConnectorCommand, ConnectorHandle -- **dirigent_config**: Path resolution (DIRIGENT_DATA_DIR for SQLite store) diff --git a/crates/dirigent_matrix/Cargo.toml b/crates/dirigent_matrix/Cargo.toml deleted file mode 100644 index 142ac40..0000000 --- a/crates/dirigent_matrix/Cargo.toml +++ /dev/null @@ -1,47 +0,0 @@ -[package] -name = "dirigent_matrix" -version = "0.1.0" -edition = "2021" - -[lib] -path = "src/lib.rs" - -[features] -default = ["bundled-sqlite"] -bundled-sqlite = ["matrix-sdk/bundled-sqlite"] - -[dependencies] -# Matrix SDK -matrix-sdk = { version = "0.9", default-features = false, features = ["rustls-tls", "sqlite"] } - -# Internal dependencies -dirigent_protocol = { path = "../dirigent_protocol" } -dirigent_auth = { path = "../dirigent_auth" } - -# Async runtime -tokio = { version = "1.42", features = ["sync", "time", "macros", "rt"] } - -# Markdown rendering -pulldown-cmark = "0.12" - -# Serialization -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" - -# Logging -tracing = "0.1" - -# Error handling -thiserror = "2.0" - -# Async traits -async-trait = "0.1" - -# UUID -uuid = { version = "1.11", features = ["v7"] } - -# Timestamps (for StreamSummary::active_since) -chrono = { version = "0.4", features = ["serde"] } - -[dev-dependencies] -tokio = { version = "1.42", features = ["full"] } diff --git a/crates/dirigent_matrix/src/config.rs b/crates/dirigent_matrix/src/config.rs deleted file mode 100644 index 3582974..0000000 --- a/crates/dirigent_matrix/src/config.rs +++ /dev/null @@ -1,73 +0,0 @@ -use serde::{Deserialize, Serialize}; - -/// How Dirigent connects to Matrix. -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum MatrixConnectionMode { - /// Dedicated bot user with username/password login (existing behavior). - #[default] - Bot, - /// Appservice-provisioned virtual user with stored access token. - Provisioned, -} - -/// A persistent Matrix room defined in configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PersistentRoom { - /// Human-readable label for this room (shown in room picker). - pub label: String, - /// Matrix room ID (e.g. "!abc:matrix.org"). - pub room_id: String, -} - -/// Matrix sharing behavior — separate from identity. -/// -/// Identity and credentials come from an Account referenced by name. -/// This struct only defines sharing behavior. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MatrixBehaviorConfig { - /// Account name (key in [accounts.*]) for the Matrix connection. - pub account: String, - /// Connection mode: "bot" (default) or "provisioned". - #[serde(default)] - pub mode: MatrixConnectionMode, - /// Matrix user IDs to invite to newly created share rooms. - #[serde(default)] - pub default_invite: Vec, - /// Directory for matrix-sdk SQLite store (relative to DIRIGENT_DATA_DIR). - #[serde(default = "default_store_path")] - pub store_path: String, - /// Pre-defined rooms that always appear in the room selection UI. - #[serde(default)] - pub rooms: Vec, -} - -fn default_store_path() -> String { - "matrix/bot/store".to_string() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_default_mode_is_bot() { - let json = r#"{"account": "matrix-bot"}"#; - let config: MatrixBehaviorConfig = serde_json::from_str(json).unwrap(); - assert_eq!(config.mode, MatrixConnectionMode::Bot); - } - - #[test] - fn test_provisioned_mode() { - let json = r#"{"account": "matrix-virt", "mode": "provisioned"}"#; - let config: MatrixBehaviorConfig = serde_json::from_str(json).unwrap(); - assert_eq!(config.mode, MatrixConnectionMode::Provisioned); - } - - #[test] - fn test_bot_mode_explicit() { - let json = r#"{"account": "matrix-bot", "mode": "bot"}"#; - let config: MatrixBehaviorConfig = serde_json::from_str(json).unwrap(); - assert_eq!(config.mode, MatrixConnectionMode::Bot); - } -} diff --git a/crates/dirigent_matrix/src/error.rs b/crates/dirigent_matrix/src/error.rs deleted file mode 100644 index 1ccbbe0..0000000 --- a/crates/dirigent_matrix/src/error.rs +++ /dev/null @@ -1,39 +0,0 @@ -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum MatrixError { - #[error("Matrix SDK error: {0}")] - Sdk(#[from] matrix_sdk::Error), - - #[error("Matrix HTTP error: {0}")] - Http(#[from] matrix_sdk::HttpError), - - #[error("Matrix client build error: {0}")] - ClientBuild(#[from] matrix_sdk::ClientBuildError), - - #[error("Not logged in")] - NotLoggedIn, - - #[error("Room not found: {0}")] - RoomNotFound(String), - - #[error("Share not found: connector={connector_id} session={session_id}")] - ShareNotFound { - connector_id: String, - session_id: String, - }, - - #[error("Share already exists: connector={connector_id} session={session_id}")] - ShareAlreadyExists { - connector_id: String, - session_id: String, - }, - - #[error("Configuration error: {0}")] - Config(String), - - #[error("Channel closed")] - ChannelClosed, -} - -pub type Result = std::result::Result; diff --git a/crates/dirigent_matrix/src/lib.rs b/crates/dirigent_matrix/src/lib.rs deleted file mode 100644 index d2fcb58..0000000 --- a/crates/dirigent_matrix/src/lib.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Matrix integration for Dirigent session sharing -//! -//! This package provides bidirectional bridging between Dirigent sessions -//! and Matrix rooms. A session can be "shared" to a Matrix room, allowing -//! Matrix users to interact with the agent and see responses in real-time. - -pub mod config; -pub mod error; -pub mod room; -pub mod service; -pub mod share; - -pub use config::MatrixBehaviorConfig; -pub use error::{MatrixError, Result}; -pub use room::CreateRoomOptions; -pub use service::MatrixService; -pub use share::{ConnectorCommandProxy, MatrixSessionShare}; diff --git a/crates/dirigent_matrix/src/room.rs b/crates/dirigent_matrix/src/room.rs deleted file mode 100644 index 9c27ce1..0000000 --- a/crates/dirigent_matrix/src/room.rs +++ /dev/null @@ -1,81 +0,0 @@ -//! Matrix room creation and management helpers. - -use matrix_sdk::{ - ruma::{ - api::client::room::create_room::v3::{ - CreationContent, Request as CreateRoomRequest, RoomPreset, - }, - OwnedRoomId, OwnedUserId, - }, - Client, -}; -use tracing::debug; - -/// Options for creating a new Matrix room for session sharing. -pub struct CreateRoomOptions { - /// Human-readable room name. - pub name: String, - /// Optional room topic. - pub topic: Option, - /// Matrix user IDs (as strings, e.g. "@user:example.com") to invite at - /// creation time. Invalid IDs are silently skipped. - pub invite: Vec, -} - -/// Create a private, non-federated Matrix room for bridging a Dirigent session. -/// -/// The room is configured as a `PrivateChat` (invite-only, shared history) and -/// the `m.federate` flag is disabled so the room does not appear on remote -/// homeservers. -/// -/// # Errors -/// -/// Returns [`crate::MatrixError::NotLoggedIn`] if the client is not authenticated. -/// Other errors propagate from the Matrix SDK. -pub async fn create_share_room( - client: &Client, - options: CreateRoomOptions, -) -> crate::Result { - if !client.logged_in() { - return Err(crate::MatrixError::NotLoggedIn); - } - - let invite: Vec = options - .invite - .iter() - .filter_map(|id| id.parse::().ok()) - .collect(); - - let mut request = CreateRoomRequest::new(); - request.name = Some(options.name.clone()); - request.topic = options.topic.clone(); - request.invite = invite; - request.preset = Some(RoomPreset::PrivateChat); - - // Disable federation so the room stays on the local homeserver. - let mut creation_content = CreationContent::new(); - creation_content.federate = false; - request.creation_content = - Some(matrix_sdk::ruma::serde::Raw::new(&creation_content).map_err(|e| { - crate::MatrixError::Config(format!( - "Failed to serialize creation content: {}", - e - )) - })?); - - debug!(room_name = %options.name, "Creating Matrix share room"); - - let room = client.create_room(request).await?; - Ok(room.room_id().to_owned()) -} - -/// Generate a human-readable Matrix room name for a session. -/// -/// Format: `"Dirigent: "` or `"Dirigent: "` when -/// no title is available. -pub fn room_name_for_session(connector_id: &str, session_title: Option<&str>) -> String { - match session_title.filter(|t| !t.is_empty()) { - Some(title) => format!("Dirigent: {}", title), - None => format!("Dirigent: {}", connector_id), - } -} diff --git a/crates/dirigent_matrix/src/service.rs b/crates/dirigent_matrix/src/service.rs deleted file mode 100644 index 2366729..0000000 --- a/crates/dirigent_matrix/src/service.rs +++ /dev/null @@ -1,436 +0,0 @@ -use std::{ - collections::HashMap, - path::PathBuf, - sync::Arc, -}; - -use matrix_sdk::{ - config::SyncSettings, - ruma::{ - events::room::message::{MessageType, SyncRoomMessageEvent}, - OwnedUserId, - }, - Client, Room, -}; -use tokio::sync::RwLock; -use tracing::{debug, error, info, warn}; - -use crate::{config::MatrixBehaviorConfig, error::MatrixError, share::MatrixSessionShare, Result}; - -/// Key for the share registry: (connector_id, session_id) -type ShareKey = (String, String); - -/// Central Matrix service. -/// -/// Owns the SDK [`Client`], handles login/session-restore, manages the sync -/// loop, and maintains a registry of [`MatrixSessionShare`]s that bridge -/// Dirigent sessions to Matrix rooms. -pub struct MatrixService { - account: dirigent_auth::Account, - homeserver: String, - username: String, - display_name_str: String, - device_id: String, - behavior: MatrixBehaviorConfig, - data_dir: PathBuf, - client: Arc>>, - shares: Arc>>, -} - -impl MatrixService { - /// Create a new (not yet logged-in) service from an Account and behavior config. - pub fn from_account( - account: &dirigent_auth::Account, - behavior: MatrixBehaviorConfig, - data_dir: PathBuf, - ) -> Result { - let homeserver = account - .property_str("homeserver") - .ok_or_else(|| MatrixError::Config("Account missing 'homeserver' property".into()))? - .to_string(); - let username = account - .profile - .username - .clone() - .ok_or_else(|| { - MatrixError::Config("Account missing 'username' in profile".into()) - })?; - let device_id = account - .property_str_or("device_id", "DIRIGENT_01") - .to_string(); - let display_name_str = account.display_name().to_string(); - - Ok(Self { - account: account.clone(), - homeserver, - username, - display_name_str, - device_id, - behavior, - data_dir, - client: Arc::new(RwLock::new(None)), - shares: Arc::new(RwLock::new(HashMap::new())), - }) - } - - /// Return the current behavior configuration. - pub fn behavior(&self) -> &MatrixBehaviorConfig { - &self.behavior - } - - /// Return a clone of the inner [`Client`], if logged in. - pub async fn client_cloned(&self) -> Option { - self.client.read().await.clone() - } - - /// Resolve a Matrix [`Room`] handle from its string room id. - /// - /// Returns: - /// - `Ok(Some(room))` — client is logged in and knows the room - /// - `Ok(None)` — client is logged in but the room isn't known (not - /// joined / never invited / wrong id) - /// - `Err(MatrixError::NotLoggedIn)` — no client yet - /// - `Err(MatrixError::Config(..))` — `room_id` isn't a valid Matrix id - /// - /// Exposed for consumers (e.g. `dirigent_core`'s `MatrixFactory`) - /// that need to look up a pre-existing room without taking a - /// `matrix_sdk` dependency of their own. - pub async fn room_by_id(&self, room_id: &str) -> Result> { - let client = self - .client_cloned() - .await - .ok_or(MatrixError::NotLoggedIn)?; - - let parsed: matrix_sdk::ruma::OwnedRoomId = room_id - .parse() - .map_err(|e: matrix_sdk::ruma::IdParseError| { - MatrixError::Config(format!("invalid room_id '{}': {}", room_id, e)) - })?; - - Ok(client.get_room(&parsed)) - } - - // ----------------------------------------------------------------------- - // Authentication - // ----------------------------------------------------------------------- - - /// Build the SDK client (with SQLite store) and authenticate. - /// - /// Attempts to restore a previously persisted session first. Falls back - /// to a fresh username/password login when no session is found. - pub async fn login(&self) -> Result<()> { - let store_path = self.data_dir.join(&self.behavior.store_path); - std::fs::create_dir_all(&store_path).map_err(|e| { - MatrixError::Config(format!("Failed to create store directory: {}", e)) - })?; - - let client = Client::builder() - .homeserver_url(&self.homeserver) - .sqlite_store(&store_path, None) - .build() - .await?; - - // Try to restore an existing session from the store. - if client.logged_in() { - info!( - homeserver = %self.homeserver, - username = %self.username, - "Restored existing Matrix session" - ); - *self.client.write().await = Some(client); - return Ok(()); - } - - // No stored session — authenticate based on the configured mode. - match self.behavior.mode { - crate::config::MatrixConnectionMode::Bot => { - // Existing flow: login with bot username/password. - let password = self - .account - .resolve_credential("password") - .map_err(|e| { - MatrixError::Config(format!("Failed to resolve password: {}", e)) - })?; - - info!( - homeserver = %self.homeserver, - username = %self.username, - "Performing fresh Matrix login (bot mode)" - ); - - client - .matrix_auth() - .login_username(&self.username, &password) - .device_id(&self.device_id) - .initial_device_display_name(&self.display_name_str) - .send() - .await?; - } - crate::config::MatrixConnectionMode::Provisioned => { - // Restore session from a stored virtual-user access token. - let token = self - .account - .resolve_credential("token") - .map_err(|e| { - MatrixError::Config(format!("Failed to resolve token: {}", e)) - })?; - - let user_id_str = self - .account - .property_str("user_id") - .ok_or_else(|| { - MatrixError::Config( - "Provisioned mode requires 'user_id' property on account".into(), - ) - })?; - - info!( - homeserver = %self.homeserver, - user_id = %user_id_str, - "Restoring provisioned session with access token" - ); - - use matrix_sdk::matrix_auth::MatrixSession; - use matrix_sdk::ruma::{OwnedDeviceId, OwnedUserId}; - - let user_id: OwnedUserId = user_id_str.try_into().map_err(|_| { - MatrixError::Config(format!("Invalid user_id: {}", user_id_str)) - })?; - let device_id: OwnedDeviceId = self.device_id.clone().into(); - - let session = MatrixSession { - meta: matrix_sdk::SessionMeta { - user_id, - device_id, - }, - tokens: matrix_sdk::matrix_auth::MatrixSessionTokens { - access_token: token, - refresh_token: None, - }, - }; - - client.matrix_auth().restore_session(session).await?; - } - } - - info!( - homeserver = %self.homeserver, - username = %self.username, - "Matrix login successful" - ); - - *self.client.write().await = Some(client); - Ok(()) - } - - // ----------------------------------------------------------------------- - // Sync loop - // ----------------------------------------------------------------------- - - /// Start the background sync task. - /// - /// Registers an event handler for incoming room messages and spawns a - /// task that runs `client.sync()` indefinitely. Returns immediately - /// after spawning. - /// - /// # Errors - /// - /// Returns [`MatrixError::NotLoggedIn`] if `login()` was not called first. - pub async fn start_sync(&self) -> Result<()> { - let client = self - .client - .read() - .await - .clone() - .ok_or(MatrixError::NotLoggedIn)?; - - if !client.logged_in() { - return Err(MatrixError::NotLoggedIn); - } - - let shares = Arc::clone(&self.shares); - let bot_user_id: Option = client.user_id().map(|u| u.to_owned()); - - // Register the room message event handler. - client.add_event_handler({ - let shares = Arc::clone(&shares); - let bot_user_id = bot_user_id.clone(); - move |ev: SyncRoomMessageEvent, room: Room| { - let shares = Arc::clone(&shares); - let bot_user_id = bot_user_id.clone(); - async move { - on_room_message(ev, room, shares, bot_user_id).await; - } - } - }); - - // Spawn the sync loop. - tokio::spawn(async move { - info!("Starting Matrix sync loop"); - if let Err(e) = client.sync(SyncSettings::default()).await { - error!("Matrix sync loop exited with error: {}", e); - } - }); - - Ok(()) - } - - // ----------------------------------------------------------------------- - // Share registry - // ----------------------------------------------------------------------- - - /// Register a share, making it eligible to receive Matrix messages. - /// - /// # Errors - /// - /// Returns [`MatrixError::ShareAlreadyExists`] if a share for the same - /// `(connector_id, session_id)` pair is already registered. - pub async fn register_share(&self, share: MatrixSessionShare) -> Result<()> { - let key = (share.connector_id.clone(), share.session_id.clone()); - let mut map = self.shares.write().await; - if map.contains_key(&key) { - return Err(MatrixError::ShareAlreadyExists { - connector_id: key.0, - session_id: key.1, - }); - } - map.insert(key, share); - Ok(()) - } - - /// Remove a share from the registry and shut it down. - /// - /// # Errors - /// - /// Returns [`MatrixError::ShareNotFound`] if no matching share exists. - pub async fn remove_share(&self, connector_id: &str, session_id: &str) -> Result<()> { - let key = (connector_id.to_owned(), session_id.to_owned()); - let share = self - .shares - .write() - .await - .remove(&key) - .ok_or_else(|| MatrixError::ShareNotFound { - connector_id: connector_id.to_owned(), - session_id: session_id.to_owned(), - })?; - share.shutdown().await; - Ok(()) - } - - /// Return the number of currently registered shares. - pub async fn share_count(&self) -> usize { - self.shares.read().await.len() - } - - /// Return the room IDs and keys for all currently registered shares. - /// - /// Returns a list of `(connector_id, session_id, room_id)` tuples. - pub async fn list_shares(&self) -> Vec<(String, String, String)> { - self.shares - .read() - .await - .iter() - .map(|((cid, sid), s)| (cid.clone(), sid.clone(), s.room_id.clone())) - .collect() - } - - /// Look up a share by connector and session ID. - /// - /// Returns `None` when not found. - pub async fn get_share( - &self, - connector_id: &str, - session_id: &str, - ) -> Option<(String, bool)> { - let key = (connector_id.to_owned(), session_id.to_owned()); - let map = self.shares.read().await; - if let Some(share) = map.get(&key) { - let room_id = share.room_id.clone(); - // is_active requires an await; we can't hold the read guard across - // the await point. Drop the guard first. - drop(map); - let active = { - // Re-acquire to call is_active (we still have shares Arc) - let map = self.shares.read().await; - if let Some(s) = map.get(&key) { - s.is_active().await - } else { - false - } - }; - Some((room_id, active)) - } else { - None - } - } - - /// Shut down all shares and signal the service to stop. - pub async fn shutdown(&self) { - let mut map = self.shares.write().await; - for (_, share) in map.drain() { - share.shutdown().await; - } - // Release the client so the sync loop can terminate naturally. - *self.client.write().await = None; - } -} - -// --------------------------------------------------------------------------- -// Event handler: Matrix → Dirigent -// --------------------------------------------------------------------------- - -/// Called by the SDK for every incoming room message. -/// -/// Looks up which registered share owns this room, then calls -/// `share.inject_message(text)` so the text flows into the Dirigent session. -/// Messages from the bot itself are skipped. -async fn on_room_message( - ev: SyncRoomMessageEvent, - room: Room, - shares: Arc>>, - bot_user_id: Option, -) { - // We only care about original (non-redacted, non-edited) messages. - let original = match ev.as_original() { - Some(o) => o, - None => return, - }; - - // Skip bot's own messages. - if let Some(bot_id) = &bot_user_id { - if original.sender == *bot_id { - return; - } - } - - // Extract plain-text body. - let text = match &original.content.msgtype { - MessageType::Text(t) => t.body.clone(), - MessageType::Notice(_) => return, // ignore notices (including our own) - _ => return, - }; - - let room_id_str = room.room_id().as_str().to_owned(); - - // Find the share that owns this room. - let map = shares.read().await; - let matching = map - .values() - .find(|s| s.room_id == room_id_str); - - if let Some(share) = matching { - debug!( - room_id = %room_id_str, - connector_id = %share.connector_id, - session_id = %share.session_id, - "Injecting Matrix message into Dirigent session" - ); - share.inject_message(&text).await; - } else { - warn!( - room_id = %room_id_str, - "Received message in unregistered room, ignoring" - ); - } -} diff --git a/crates/dirigent_matrix/src/share.rs b/crates/dirigent_matrix/src/share.rs deleted file mode 100644 index 9a487b4..0000000 --- a/crates/dirigent_matrix/src/share.rs +++ /dev/null @@ -1,723 +0,0 @@ -use std::sync::Arc; - -use chrono::{DateTime, Utc}; -use matrix_sdk::Room; -use tokio::sync::{broadcast, mpsc, oneshot, Mutex, RwLock}; -use tracing::{debug, error, warn}; -use uuid::Uuid; - -use dirigent_protocol::accumulator::{AccumulatedMessage, AccumulatedPart, MessageAccumulator, ToolCallData}; -use dirigent_protocol::{ContentBlock, Event, MessageRole, SessionUpdate}; - -/// Command proxy sent from Matrix → Dirigent direction. -/// -/// The caller who wires up the share is responsible for translating this -/// into a real `ConnectorCommand::SendMessage` (or equivalent) using their -/// own connector/session handle. -#[derive(Debug, Clone)] -pub struct ConnectorCommandProxy { - pub session_id: String, - pub text: String, -} - -/// A bidirectional bridge between a Dirigent session and a Matrix room. -/// -/// **Dirigent → Matrix**: Subscribes to a broadcast event stream, forwards -/// completed assistant messages and session errors into the Matrix room as -/// `m.notice` messages. -/// -/// **Matrix → Dirigent**: The `inject_message` method is called by -/// `MatrixService` when a Matrix message arrives; it sends a -/// `ConnectorCommandProxy` through an mpsc channel that the owner can read. -pub struct MatrixSessionShare { - /// Connector that owns the session. - pub connector_id: String, - /// Session being bridged (native connector session ID). - pub session_id: String, - /// Scroll ID for the archived session this share is scoped to. - /// - /// Required by `SessionStream::scope()` to select `StreamScope::Session`. - pub scroll_id: Uuid, - /// Matrix room ID (as a string, e.g. "!abc:example.com"). - pub room_id: String, - /// Room handle used by the stream `on_event` path when no legacy - /// forwarder task is running. Behind an `Option` so `start()` — which - /// consumes the `Room` when spawning the legacy forwarder — can leave - /// it empty. - room_for_stream: Option, - /// Shared message accumulator so streaming chunks survive across - /// multiple `on_event` calls (and the legacy forwarder task). - accumulator: Arc>, - /// When this share was activated (for `StreamSummary::active_since`). - active_since: DateTime, - - /// Sender side of the Matrix→Dirigent command channel. - command_tx: mpsc::Sender, - /// Shutdown signal for the event-forwarder task. - shutdown_tx: Arc>>>, - /// Whether the forwarder task is still running. - is_active: Arc>, -} - -impl MatrixSessionShare { - /// Construct and start a new `MatrixSessionShare`. - /// - /// Spawns a background task that reads from `event_rx`, filters for - /// events belonging to `(connector_id, session_id)`, and forwards - /// relevant ones into the Matrix `room`. - /// - /// Returns the share and the receiver end of the Matrix→Dirigent channel. - pub fn start( - connector_id: String, - session_id: String, - room_id: String, - room: Room, - event_rx: broadcast::Receiver, - ) -> (Self, mpsc::Receiver) { - // Legacy start() keeps ownership of the Room inside the forwarder - // task; `scroll_id` isn't known by the legacy call path so we - // default it to `Uuid::nil()`. The stream-path consumers use - // `new_for_stream` instead and supply a real scroll_id. - Self::start_with_scroll( - connector_id, - session_id, - Uuid::nil(), - room_id, - room, - event_rx, - ) - } - - /// Same as `start`, but lets callers attach the `scroll_id` of the - /// archived session — required for `SessionStream::scope()` to be - /// meaningful when the share is also driven as a stream. - pub fn start_with_scroll( - connector_id: String, - session_id: String, - scroll_id: Uuid, - room_id: String, - room: Room, - event_rx: broadcast::Receiver, - ) -> (Self, mpsc::Receiver) { - let (command_tx, command_rx) = mpsc::channel(32); - let (shutdown_tx, shutdown_rx) = oneshot::channel(); - let is_active = Arc::new(RwLock::new(true)); - let accumulator = Arc::new(Mutex::new(MessageAccumulator::new())); - - let share = MatrixSessionShare { - connector_id: connector_id.clone(), - session_id: session_id.clone(), - scroll_id, - room_id: room_id.clone(), - // Legacy path: the forwarder task owns the Room, so we don't - // hold a second handle on the struct. `on_event` would double- - // drive delivery if both were active. - room_for_stream: None, - accumulator: accumulator.clone(), - active_since: Utc::now(), - command_tx, - shutdown_tx: Arc::new(RwLock::new(Some(shutdown_tx))), - is_active: is_active.clone(), - }; - - // Spawn the event-forwarder task - tokio::spawn(run_event_forwarder( - connector_id, - session_id, - room, - event_rx, - shutdown_rx, - is_active, - accumulator, - )); - - (share, command_rx) - } - - /// Construct a share wired for the stream path (`SessionStream`) only. - /// - /// No legacy event-forwarder task is spawned — the - /// `StreamRegistry` worker will drive `on_event` instead. The Room - /// handle is retained on the struct so `on_event` can deliver to it. - /// - /// Returns the share plus the receiver end of the Matrix→Dirigent - /// command channel (identical semantics to `start`). - pub fn new_for_stream( - connector_id: String, - session_id: String, - scroll_id: Uuid, - room_id: String, - room: Room, - ) -> (Self, mpsc::Receiver) { - let (command_tx, command_rx) = mpsc::channel(32); - let (shutdown_tx, _shutdown_rx) = oneshot::channel(); - let is_active = Arc::new(RwLock::new(true)); - let accumulator = Arc::new(Mutex::new(MessageAccumulator::new())); - - let share = MatrixSessionShare { - connector_id, - session_id, - scroll_id, - room_id, - room_for_stream: Some(room), - accumulator, - active_since: Utc::now(), - command_tx, - shutdown_tx: Arc::new(RwLock::new(Some(shutdown_tx))), - is_active, - }; - - (share, command_rx) - } - - /// Inject a message received from Matrix into the Dirigent session. - /// - /// Called by `MatrixService` when a room message arrives (filtered to - /// skip the bot's own messages). Sends a `ConnectorCommandProxy` through - /// the internal mpsc channel. - pub async fn inject_message(&self, text: &str) { - let proxy = ConnectorCommandProxy { - session_id: self.session_id.clone(), - text: text.to_owned(), - }; - if let Err(e) = self.command_tx.send(proxy).await { - warn!( - connector_id = %self.connector_id, - session_id = %self.session_id, - "Failed to inject Matrix message into session (channel closed): {}", - e - ); - } - } - - /// Signal the event-forwarder task to stop and wait for it to finish. - pub async fn shutdown(&self) { - let tx = self.shutdown_tx.write().await.take(); - if let Some(tx) = tx { - let _ = tx.send(()); - } - // Give the task a moment to notice the shutdown signal. - // The is_active flag is set to false by the task itself when it exits. - } - - /// Whether the event-forwarder task is still running. - pub async fn is_active(&self) -> bool { - *self.is_active.read().await - } -} - -// --------------------------------------------------------------------------- -// Internal event-forwarder task -// --------------------------------------------------------------------------- - -async fn run_event_forwarder( - connector_id: String, - session_id: String, - room: Room, - mut event_rx: broadcast::Receiver, - shutdown_rx: oneshot::Receiver<()>, - is_active: Arc>, - accumulator: Arc>, -) { - // Fuse the shutdown signal so we can use it inside tokio::select! - let mut shutdown_rx = shutdown_rx; - - loop { - tokio::select! { - _ = &mut shutdown_rx => { - debug!( - connector_id = %connector_id, - session_id = %session_id, - "MatrixSessionShare forwarder received shutdown signal" - ); - break; - } - result = event_rx.recv() => { - match result { - Err(broadcast::error::RecvError::Closed) => { - debug!( - connector_id = %connector_id, - session_id = %session_id, - "Event broadcast channel closed, stopping forwarder" - ); - break; - } - Err(broadcast::error::RecvError::Lagged(n)) => { - warn!( - connector_id = %connector_id, - session_id = %session_id, - "Event forwarder lagged by {} messages", - n - ); - continue; - } - Ok(event) => { - let mut acc = accumulator.lock().await; - handle_event( - &event, - &connector_id, - &session_id, - &room, - &mut *acc, - ).await; - } - } - } - } - } - - *is_active.write().await = false; -} - -/// Handle a single protocol event: forward relevant ones to the Matrix room. -async fn handle_event( - event: &Event, - connector_id: &str, - session_id: &str, - room: &Room, - accumulator: &mut MessageAccumulator, -) { - use matrix_sdk::ruma::events::room::message::RoomMessageEventContent; - - match event { - // -- Streaming accumulation: gather chunks into the accumulator -- - Event::SessionUpdate { - connector_id: cid, - session_id: sid, - update, - } if cid == connector_id && sid == session_id => { - // Send typing indicator on first chunk of a new message - let is_agent_chunk = matches!( - update, - SessionUpdate::AgentMessageChunk { .. } - | SessionUpdate::AgentThoughtChunk { .. } - | SessionUpdate::ToolCall { .. } - ); - let msg_id = match update { - SessionUpdate::AgentMessageChunk { message_id, .. } - | SessionUpdate::AgentThoughtChunk { message_id, .. } - | SessionUpdate::ToolCall { message_id, .. } - | SessionUpdate::ToolCallUpdate { message_id, .. } => Some(message_id.as_str()), - _ => None, - }; - if is_agent_chunk { - if let Some(mid) = msg_id { - if !accumulator.has_buffer(mid) { - // First chunk for this message — start typing indicator - let _ = room.typing_notice(true).await; - } - } - } - - match update { - SessionUpdate::AgentMessageChunk { - message_id, - content, - .. - } => { - accumulator.add_chunk(message_id, session_id, &connector_id, "assistant", content.clone()); - } - SessionUpdate::AgentThoughtChunk { - message_id, - content, - .. - } => { - if let ContentBlock::Text { text } = content { - accumulator.add_thinking(message_id, session_id, &connector_id, text); - } - } - SessionUpdate::ToolCall { - message_id, - tool_call, - .. - } => { - accumulator.add_or_update_tool_call( - message_id, - ToolCallData { - id: tool_call.id.clone(), - tool_name: tool_call.tool_name.clone(), - input: tool_call.raw_input.clone().unwrap_or_default(), - output: tool_call.raw_output.clone(), - }, - ); - } - SessionUpdate::ToolCallUpdate { - message_id, - tool_call, - .. - } => { - accumulator.add_or_update_tool_call( - message_id, - ToolCallData { - id: tool_call.id.clone(), - tool_name: tool_call.tool_name.clone(), - input: tool_call.raw_input.clone().unwrap_or_default(), - output: tool_call.raw_output.clone(), - }, - ); - } - _ => {} // UserMessageChunk, Unknown -- not forwarded - } - } - - // -- Streaming finalization: send accumulated content on TurnComplete -- - Event::TurnComplete { - connector_id: cid, - session_id: sid, - message_id, - .. - } if cid == connector_id && sid == session_id => { - if let Some(accumulated) = accumulator.finalize(message_id) { - if accumulated.role == "assistant" && !accumulated.is_empty() { - send_accumulated_to_matrix(&accumulated, room).await; - } - } - // Stop typing indicator - let _ = room.typing_notice(false).await; - debug!( - connector_id = %connector_id, - session_id = %session_id, - message_id = %message_id, - "TurnComplete received for bridged session" - ); - } - - // -- Non-streaming fallback: send content from MessageCompleted -- - Event::MessageCompleted { - connector_id: cid, - message, - } if cid == connector_id && message.session_id == session_id => { - if message.role != MessageRole::Assistant { - debug!( - connector_id = %connector_id, - session_id = %session_id, - role = ?message.role, - "Skipping non-assistant MessageCompleted" - ); - return; - } - - // Non-streaming path: content populated directly in MessageCompleted. - // Skip if the accumulator already has data for this message (streaming - // path handles delivery via TurnComplete instead). - if !message.content.is_empty() && !accumulator.has_buffer(&message.id) { - let accumulated = AccumulatedMessage::from_message_parts( - message.id.clone(), - message.session_id.clone(), - connector_id.to_string(), - "assistant".to_string(), - &message.content, - ); - send_accumulated_to_matrix(&accumulated, room).await; - } - } - - Event::SessionError { - connector_id: cid, - session_id: sid, - error_message, - is_recoverable, - .. - } if cid == connector_id && sid == session_id => { - let notice = if *is_recoverable { - format!("\u{26a0}\u{fe0f} Session warning: {}", error_message) - } else { - format!("\u{274c} Session error (unrecoverable): {}", error_message) - }; - let content = RoomMessageEventContent::notice_plain(notice); - if let Err(e) = room.send(content).await { - error!( - connector_id = %connector_id, - session_id = %session_id, - "Failed to send session error to Matrix room: {}", - e - ); - } - } - - // Events for *this* session but not handled above (expected, low noise) - Event::SessionIdle { session_id: sid, .. } - | Event::SessionMetadataUpdated { - session_id: sid, .. - } - if sid == session_id => - { - debug!( - connector_id = %connector_id, - session_id = %session_id, - event = event_name(event), - "Ignoring non-forwarded event for this session" - ); - } - - // Events for a *different* session/connector -- expected, just noise - Event::MessageCompleted { - connector_id: cid, - message, - } if cid != connector_id || message.session_id != session_id => { - // Different session's message -- expected on a shared broadcast channel - } - Event::TurnComplete { - connector_id: cid, - session_id: sid, - .. - } if cid != connector_id || sid != session_id => { - // Different session -- expected - } - Event::SessionError { - connector_id: cid, - session_id: sid, - .. - } if cid != connector_id || sid != session_id => { - // Different session -- expected - } - Event::SessionUpdate { - connector_id: cid, - session_id: sid, - .. - } if cid != connector_id || sid != session_id => { - // Different session -- expected - } - - // Connector lifecycle, inspector, system events -- expected on broadcast - Event::ConnectorCreated { .. } - | Event::ConnectorRemoved { .. } - | Event::ConnectorStateChanged { .. } - | Event::Connected - | Event::Disconnected - | Event::InspectorSnapshot { .. } - | Event::InspectorNodeRegistered { .. } - | Event::InspectorNodeRemoved { .. } - | Event::InspectorStateChanged { .. } - | Event::InspectorPropertiesUpdated { .. } - | Event::SystemTaskStatusChanged { .. } - | Event::SessionsListed { .. } - | Event::SessionCreated { .. } - | Event::SessionUpdated { .. } - | Event::SessionDeleted { .. } - | Event::SessionClosed { .. } - | Event::MessagesListed { .. } - | Event::SessionSystemMessageSet { .. } - | Event::SessionMetadataReceived { .. } - | Event::SessionTransferred { .. } - | Event::ForwardingPanic { .. } - | Event::AgentRequest { .. } - | Event::AcpClientConnected { .. } - | Event::AcpClientDisconnected { .. } - | Event::AcpClientSessionOpened { .. } - | Event::AcpClientSessionRouted { .. } - | Event::MessageStarted { .. } - | Event::MessageFailed { .. } - | Event::Error { .. } - | Event::SessionRegistered { .. } => { - // Expected broadcast traffic, not relevant to this share - } - - other => { - warn!( - connector_id = %connector_id, - session_id = %session_id, - event = event_name(other), - "Unhandled event type in Matrix forwarder" - ); - } - } -} - -/// Render markdown text to HTML for Matrix consumption. -fn markdown_to_html(markdown: &str) -> String { - use pulldown_cmark::{Options, Parser}; - - let mut options = Options::empty(); - options.insert(Options::ENABLE_STRIKETHROUGH); - options.insert(Options::ENABLE_TABLES); - - let parser = Parser::new_ext(markdown, options); - let mut html = String::new(); - pulldown_cmark::html::push_html(&mut html, parser); - html -} - -/// Send an accumulated message to a Matrix room, one message per content part. -async fn send_accumulated_to_matrix(msg: &AccumulatedMessage, room: &Room) { - use matrix_sdk::ruma::events::room::message::RoomMessageEventContent; - - for part in &msg.parts { - let content = match part { - AccumulatedPart::Text { text } if !text.is_empty() => { - let html = markdown_to_html(text); - Some(RoomMessageEventContent::text_html(text.clone(), html)) - } - AccumulatedPart::Thinking { text } if !text.is_empty() => { - Some(RoomMessageEventContent::notice_plain(format!( - "\u{1f4ad} {text}" - ))) - } - AccumulatedPart::Tool { data } => { - let mut notice = format!("\u{1f527} Tool: {}", data.tool_name); - if let Some(out) = &data.output { - let out_str = if let Some(s) = out.as_str() { - s.to_string() - } else { - serde_json::to_string_pretty(out).unwrap_or_default() - }; - if !out_str.is_empty() { - let truncated = if out_str.len() > 500 { - format!("{}... (truncated)", &out_str[..500]) - } else { - out_str - }; - notice.push_str(&format!("\nOutput: {truncated}")); - } - } - Some(RoomMessageEventContent::notice_plain(notice)) - } - _ => None, - }; - if let Some(content) = content { - if let Err(e) = room.send(content).await { - error!("Failed to send message part to Matrix room: {}", e); - } - } - } -} - -/// Return a human-readable name for an event variant (for logging). -fn event_name(event: &Event) -> &'static str { - match event { - Event::SessionsListed { .. } => "SessionsListed", - Event::SessionCreated { .. } => "SessionCreated", - Event::SessionUpdated { .. } => "SessionUpdated", - Event::SessionMetadataUpdated { .. } => "SessionMetadataUpdated", - Event::SessionDeleted { .. } => "SessionDeleted", - Event::SessionClosed { .. } => "SessionClosed", - Event::SessionSystemMessageSet { .. } => "SessionSystemMessageSet", - Event::SessionIdle { .. } => "SessionIdle", - Event::SessionMetadataReceived { .. } => "SessionMetadataReceived", - Event::TurnComplete { .. } => "TurnComplete", - Event::SessionError { .. } => "SessionError", - Event::SessionTransferred { .. } => "SessionTransferred", - Event::ForwardingPanic { .. } => "ForwardingPanic", - Event::SessionUpdate { .. } => "SessionUpdate", - Event::AgentRequest { .. } => "AgentRequest", - Event::AcpClientConnected { .. } => "AcpClientConnected", - Event::AcpClientDisconnected { .. } => "AcpClientDisconnected", - Event::AcpClientSessionOpened { .. } => "AcpClientSessionOpened", - Event::AcpClientSessionRouted { .. } => "AcpClientSessionRouted", - Event::MessagesListed { .. } => "MessagesListed", - Event::MessageStarted { .. } => "MessageStarted", - Event::MessageCompleted { .. } => "MessageCompleted", - Event::MessageFailed { .. } => "MessageFailed", - Event::ConnectorCreated { .. } => "ConnectorCreated", - Event::ConnectorRemoved { .. } => "ConnectorRemoved", - Event::ConnectorStateChanged { .. } => "ConnectorStateChanged", - Event::Connected => "Connected", - Event::Disconnected => "Disconnected", - Event::Error { .. } => "Error", - Event::InspectorSnapshot { .. } => "InspectorSnapshot", - Event::InspectorNodeRegistered { .. } => "InspectorNodeRegistered", - Event::InspectorNodeRemoved { .. } => "InspectorNodeRemoved", - Event::InspectorStateChanged { .. } => "InspectorStateChanged", - Event::InspectorPropertiesUpdated { .. } => "InspectorPropertiesUpdated", - Event::SessionRegistered { .. } => "SessionRegistered", - Event::SystemTaskStatusChanged { .. } => "SystemTaskStatusChanged", - } -} - -// --------------------------------------------------------------------------- -// SessionShare trait implementation -// --------------------------------------------------------------------------- - -#[async_trait::async_trait] -impl dirigent_protocol::sharing::SessionShare for MatrixSessionShare { - fn summary(&self) -> dirigent_protocol::sharing::ShareSummary { - dirigent_protocol::sharing::ShareSummary { - id: format!("matrix:{}:{}", self.connector_id, self.session_id), - connector_id: self.connector_id.clone(), - session_id: self.session_id.clone(), - backend: "matrix".to_string(), - destination: self.room_id.clone(), - active: self.is_active.try_read().map(|g| *g).unwrap_or(false), - } - } - - fn is_active(&self) -> bool { - self.is_active.try_read().map(|g| *g).unwrap_or(false) - } - - async fn shutdown(&self) { - // Delegate to the existing shutdown method (same implementation) - let tx = self.shutdown_tx.write().await.take(); - if let Some(tx) = tx { - let _ = tx.send(()); - } - } -} - -// --------------------------------------------------------------------------- -// SessionStream trait implementation (Phase 4 migration, Task 18) -// --------------------------------------------------------------------------- -// -// Dual-impl: `MatrixSessionShare` keeps its bi-directional `SessionShare` -// impl (room management, `inject_message`) while also gaining a -// uni-directional `SessionStream` impl so the `StreamRegistry` can drive it -// via the central `SharingBus`. -// -// When driven as a stream, the caller (factory) is expected to construct -// the share via `new_for_stream(..)` so a Room handle is stored on the -// struct. `on_event` then translates bus events back to the legacy -// `handle_event` dispatcher, preserving the accumulator state across -// calls via the shared `Arc>`. - -#[async_trait::async_trait] -impl dirigent_protocol::streaming::SessionStream for MatrixSessionShare { - fn summary(&self) -> dirigent_protocol::streaming::StreamSummary { - dirigent_protocol::streaming::StreamSummary { - name: format!("{}:{}", self.connector_id, self.session_id), - kind: dirigent_protocol::streaming::StreamKind::Matrix, - target: format!("matrix:{}", self.room_id), - active_since: self.active_since, - } - } - - fn scope(&self) -> dirigent_protocol::streaming::StreamScope { - dirigent_protocol::streaming::StreamScope::Session { - scroll_id: self.scroll_id, - } - } - - async fn on_event( - &self, - event: &dirigent_protocol::streaming::BusEvent, - ) -> dirigent_protocol::streaming::StreamOutcome { - // If we were started via the legacy `start()` path, the Room - // handle lives inside the forwarder task and this method has no - // way to deliver. Treat that as a deliberate skip rather than a - // transport failure. - let room = match &self.room_for_stream { - Some(r) => r, - None => return dirigent_protocol::streaming::StreamOutcome::Skipped, - }; - - let mut acc = self.accumulator.lock().await; - handle_event( - &*event.event, - &self.connector_id, - &self.session_id, - room, - &mut *acc, - ) - .await; - dirigent_protocol::streaming::StreamOutcome::Ok - } - - async fn shutdown(&self) { - // Delegate to the bi-directional shutdown; both impls share the - // same underlying oneshot + is_active signal. - let tx = self.shutdown_tx.write().await.take(); - if let Some(tx) = tx { - let _ = tx.send(()); - } - *self.is_active.write().await = false; - } -} diff --git a/crates/dirigent_taskrunner/CLAUDE.md b/crates/dirigent_taskrunner/CLAUDE.md deleted file mode 100644 index f5910c0..0000000 --- a/crates/dirigent_taskrunner/CLAUDE.md +++ /dev/null @@ -1,73 +0,0 @@ -# Package: dirigent_taskrunner - -Background task runner for managing child processes with output capture. - -## Quick Facts -- **Type**: Library -- **Main Entry**: src/lib.rs -- **Dependencies**: tokio, serde, chrono, thiserror, tracing, uuid - -## Overview - -The dirigent_taskrunner package provides a `TaskRunner` service that spawns, manages, and captures output from arbitrary shell commands. Tasks are defined with a title, slug name, command, arguments, and various options (working directory, startup behavior, output persistence, log rotation). - -## Architecture - -### Core Types - -- **TaskDefinition** — Configuration for a task: command, args, cwd, run_at_startup, persist_to_disk, rotate_previous, env vars -- **TaskStatus** — Runtime state enum: Stopped, Running{pid}, Finished{exit_code}, Failed{error} -- **TaskInfo** — Definition + status + timestamps (started_at, stopped_at) -- **OutputKind** — Stdout, Stderr, Combined -- **TaskId** — String alias (the task slug/name) - -### TaskRunner - -The main service. Uses interior mutability (RwLock) — all methods take `&self`. Designed to be wrapped in `Arc` and shared across async tasks. - -Key operations: -- `register(def)` — Add a task definition -- `start(name)` — Spawn the process, capture stdout/stderr to files -- `stop(name)` — Kill the process -- `poll_completed()` — Check running processes for exit (called from periodic timer) -- `list_tasks()` — Get all tasks with status -- `read_output(name, kind, tail_lines)` — Read captured output -- `remove(name)` — Delete a task definition - -### Output Storage - -Output is stored in `{tasks_dir}/{task_name}/`: -- `stdout.log` — Captured stdout -- `stderr.log` — Captured stderr -- `combined.log` — Interleaved with `[stdout]`/`[stderr]` prefixes - -Log rotation creates `.log.1`, `.log.2`, etc. - -## Integration - -- **Config**: `CoreConfig.tasks: Vec` in dirigent.toml (`[[tasks]]` sections) -- **Runtime**: `CoreRuntime.task_runner_slot()` holds `Arc>>>` -- **API**: Server functions in `api::tasks` (list, start, stop, output, create, update, delete) -- **UI**: Tasks ribbon mode + Configuration > Tasks section -- **Inspector**: Registered as `dirigent/services/task-runner` -- **Paths**: `DirigentPaths::tasks_dir()` returns `{data_dir}/tasks/` - -## Configuration Example - -```toml -[[tasks]] -name = "lspmux" -title = "LSP Mux Server" -command = "lspmux" -args = ["server"] -run_at_startup = true -persist_to_disk = true -rotate_previous = true -``` - -## Key Files - -- `src/types.rs` — TaskDefinition, TaskStatus, TaskInfo, OutputKind -- `src/runner.rs` — TaskRunner service, TaskError -- `src/output.rs` — TaskOutputManager (file I/O, rotation) -- `src/lib.rs` — Public exports diff --git a/crates/dirigent_taskrunner/Cargo.toml b/crates/dirigent_taskrunner/Cargo.toml deleted file mode 100644 index a045a16..0000000 --- a/crates/dirigent_taskrunner/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "dirigent_taskrunner" -version = "0.1.0" -edition = "2021" - -[lib] -path = "src/lib.rs" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -dirigent_process = { path = "../dirigent_process", features = ["tokio"] } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "2.0" -tokio = { version = "1", features = ["process", "io-util", "fs", "sync", "time", "rt"] } -tracing = "0.1" -uuid = { version = "1.0", features = ["v7", "serde"] } diff --git a/crates/dirigent_taskrunner/src/lib.rs b/crates/dirigent_taskrunner/src/lib.rs deleted file mode 100644 index 8e5fb56..0000000 --- a/crates/dirigent_taskrunner/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -pub mod types; -pub mod output; -mod runner; - -pub use types::*; -pub use output::TaskOutputManager; -pub use runner::{TaskRunner, TaskError}; diff --git a/crates/dirigent_taskrunner/src/output.rs b/crates/dirigent_taskrunner/src/output.rs deleted file mode 100644 index bcaa56e..0000000 --- a/crates/dirigent_taskrunner/src/output.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::path::PathBuf; -use tokio::fs; - -use crate::types::OutputKind; - -/// Manages output files for a single task -pub struct TaskOutputManager { - base_dir: PathBuf, -} - -impl TaskOutputManager { - pub fn new(base_dir: PathBuf) -> Self { - Self { base_dir } - } - - pub async fn ensure_dir(&self) -> std::io::Result<()> { - fs::create_dir_all(&self.base_dir).await - } - - pub fn stdout_path(&self) -> PathBuf { - self.base_dir.join("stdout.log") - } - pub fn stderr_path(&self) -> PathBuf { - self.base_dir.join("stderr.log") - } - pub fn combined_path(&self) -> PathBuf { - self.base_dir.join("combined.log") - } - - /// Rotate existing files (.log -> .log.1, .log.1 -> .log.2, etc.) - pub async fn rotate(&self) -> std::io::Result<()> { - for name in &["stdout.log", "stderr.log", "combined.log"] { - let path = self.base_dir.join(name); - if fs::try_exists(&path).await.unwrap_or(false) { - let mut n = 1; - loop { - let rotated = self.base_dir.join(format!("{}.{}", name, n)); - if !fs::try_exists(&rotated).await.unwrap_or(false) { - fs::rename(&path, &rotated).await?; - break; - } - n += 1; - } - } - } - Ok(()) - } - - /// Read output file contents (tail N lines if specified) - pub async fn read_output( - &self, - kind: OutputKind, - tail_lines: Option, - ) -> std::io::Result { - let path = match kind { - OutputKind::Stdout => self.stdout_path(), - OutputKind::Stderr => self.stderr_path(), - OutputKind::Combined => self.combined_path(), - }; - - if !fs::try_exists(&path).await.unwrap_or(false) { - return Ok(String::new()); - } - - let content = fs::read_to_string(&path).await?; - - if let Some(n) = tail_lines { - let lines: Vec<&str> = content.lines().collect(); - let start = lines.len().saturating_sub(n); - Ok(lines[start..].join("\n")) - } else { - Ok(content) - } - } - - pub async fn clear(&self) -> std::io::Result<()> { - for path in &[self.stdout_path(), self.stderr_path(), self.combined_path()] { - if fs::try_exists(path).await.unwrap_or(false) { - fs::write(path, b"").await?; - } - } - Ok(()) - } -} diff --git a/crates/dirigent_taskrunner/src/runner.rs b/crates/dirigent_taskrunner/src/runner.rs deleted file mode 100644 index 2a8d9d9..0000000 --- a/crates/dirigent_taskrunner/src/runner.rs +++ /dev/null @@ -1,467 +0,0 @@ -use crate::output::TaskOutputManager; -use crate::types::*; -use std::collections::HashMap; -use std::path::PathBuf; -use tokio::io::AsyncBufReadExt; -use tokio::process::Command; -use tokio::sync::RwLock; - -#[derive(Debug, thiserror::Error)] -pub enum TaskError { - #[error("Task '{0}' not found")] - NotFound(String), - #[error("Task '{0}' is already running")] - AlreadyRunning(String), - #[error("Task '{0}' is not running")] - NotRunning(String), - #[error("Failed to spawn process: {0}")] - SpawnFailed(String), - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - #[error("Task name '{0}' already exists")] - DuplicateName(String), -} - -struct RunningTask { - abort_handles: Vec>, - child: tokio::process::Child, - lifecycle: Option>, -} - -/// The main task runner service. -/// All methods take &self — uses interior mutability for shared access. -pub struct TaskRunner { - definitions: RwLock>, - statuses: RwLock>, - started_at: RwLock>>, - stopped_at: RwLock>>, - running: RwLock>, - tasks_dir: PathBuf, - default_working_dir: PathBuf, - process_manager: Option>, -} - -impl TaskRunner { - pub fn new( - tasks_dir: PathBuf, - default_working_dir: PathBuf, - process_manager: Option>, - ) -> Self { - Self { - definitions: RwLock::new(HashMap::new()), - statuses: RwLock::new(HashMap::new()), - started_at: RwLock::new(HashMap::new()), - stopped_at: RwLock::new(HashMap::new()), - running: RwLock::new(HashMap::new()), - tasks_dir, - default_working_dir, - process_manager, - } - } - - pub fn tasks_dir(&self) -> &PathBuf { - &self.tasks_dir - } - - /// Register a task definition (does not start it). - /// Allows re-registration to update an existing task. - pub async fn register(&self, def: TaskDefinition) -> Result<(), TaskError> { - let name = def.name.clone(); - self.definitions.write().await.insert(name.clone(), def); - self.statuses - .write() - .await - .entry(name) - .or_insert(TaskStatus::Stopped); - Ok(()) - } - - /// Remove a task definition (stops it if running) - pub async fn remove(&self, name: &str) -> Result<(), TaskError> { - if self.is_running(name).await { - self.stop(name).await?; - } - self.definitions.write().await.remove(name); - self.statuses.write().await.remove(name); - self.started_at.write().await.remove(name); - self.stopped_at.write().await.remove(name); - Ok(()) - } - - pub async fn is_running(&self, name: &str) -> bool { - matches!( - self.statuses.read().await.get(name), - Some(TaskStatus::Running { .. }) - ) - } - - /// Start a task by name - pub async fn start(&self, name: &str) -> Result<(), TaskError> { - let def = { - let defs = self.definitions.read().await; - defs.get(name) - .cloned() - .ok_or_else(|| TaskError::NotFound(name.to_string()))? - }; - - if self.is_running(name).await { - return Err(TaskError::AlreadyRunning(name.to_string())); - } - - let output_mgr = TaskOutputManager::new(self.tasks_dir.join(&def.name)); - output_mgr.ensure_dir().await?; - - if def.rotate_previous { - if let Err(e) = output_mgr.rotate().await { - tracing::warn!("Failed to rotate output for task {}: {}", name, e); - } - } - - // Resolve working directory: explicit > default > current process dir - let raw_cwd = def - .working_directory - .clone() - .unwrap_or_else(|| self.default_working_dir.clone()); - - // Canonicalize to an absolute path; fall back to current dir if invalid - let cwd = match std::fs::canonicalize(&raw_cwd) { - Ok(p) => p, - Err(_) => { - tracing::warn!( - "Task '{}': working directory '{}' invalid, falling back to current dir", - name, - raw_cwd.display() - ); - std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")) - } - }; - - let lifecycle = self.process_manager.as_ref().map(|mgr| mgr.create_lifecycle()); - - let mut cmd = Command::new(&def.command); - cmd.args(&def.args) - .current_dir(&cwd) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .kill_on_drop(true); - - if let Some(ref lc) = lifecycle { - lc.configure_async_command(&mut cmd); - } - - for (key, value) in &def.env { - cmd.env(key, value); - } - - let mut child = match cmd.spawn() { - Ok(child) => child, - Err(e) => { - let error_msg = format!("{} (cwd: {}): {}", def.command, cwd.display(), e); - // Write error to stderr.log and combined.log so the user can see it in the output viewer - let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); - let log_line = format!("[{}] Failed to start: {}\n", timestamp, error_msg); - if def.persist_to_disk { - let _ = tokio::fs::write(output_mgr.stderr_path(), log_line.as_bytes()).await; - let _ = tokio::fs::write(output_mgr.combined_path(), format!("[stderr] {}", log_line).as_bytes()).await; - } - // Set status to Failed so the UI shows it - self.statuses.write().await.insert(name.to_string(), TaskStatus::Failed { error: error_msg.clone() }); - self.stopped_at.write().await.insert(name.to_string(), chrono::Utc::now()); - return Err(TaskError::SpawnFailed(error_msg)); - } - }; - let pid = child.id().unwrap_or(0); - tracing::info!("Task '{}' started with PID {} (cwd: {})", name, pid, cwd.display()); - - if let Some(ref lc) = lifecycle { - if let Some(child_pid) = child.id() { - if let Err(e) = lc.register_child(child_pid) { - tracing::warn!(error = %e, "Failed to register task child with process lifecycle"); - } - } - } - - let stdout = child.stdout.take(); - let stderr = child.stderr.take(); - let persist = def.persist_to_disk; - let mut abort_handles = Vec::new(); - - // When not rotating, truncate old logs so we don't accumulate output across restarts - let truncate = !def.rotate_previous; - if truncate && persist { - let _ = tokio::fs::write(output_mgr.stdout_path(), b"").await; - let _ = tokio::fs::write(output_mgr.stderr_path(), b"").await; - let _ = tokio::fs::write(output_mgr.combined_path(), b"").await; - } - - // Stdout capture task - if let Some(stdout) = stdout { - let stdout_path = output_mgr.stdout_path(); - let combined_path = output_mgr.combined_path(); - let task_name = name.to_string(); - let h = tokio::spawn(async move { - let reader = tokio::io::BufReader::new(stdout); - let mut lines = reader.lines(); - let mut stdout_file = if persist { - tokio::fs::OpenOptions::new() - .create(true) - .append(true) - .open(&stdout_path) - .await - .ok() - } else { - None - }; - let mut combined_file = if persist { - tokio::fs::OpenOptions::new() - .create(true) - .append(true) - .open(&combined_path) - .await - .ok() - } else { - None - }; - - while let Ok(Some(line)) = lines.next_line().await { - let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); - if let Some(ref mut f) = stdout_file { - let _ = tokio::io::AsyncWriteExt::write_all( - f, - format!("[{}] {}\n", ts, line).as_bytes(), - ) - .await; - } - if let Some(ref mut f) = combined_file { - let _ = tokio::io::AsyncWriteExt::write_all( - f, - format!("[{}] [stdout] {}\n", ts, line).as_bytes(), - ) - .await; - } - } - tracing::debug!("Stdout capture ended for task '{}'", task_name); - }); - abort_handles.push(h); - } - - // Stderr capture task - if let Some(stderr) = stderr { - let stderr_path = output_mgr.stderr_path(); - let combined_path = output_mgr.combined_path(); - let task_name = name.to_string(); - let h = tokio::spawn(async move { - let reader = tokio::io::BufReader::new(stderr); - let mut lines = reader.lines(); - let mut stderr_file = if persist { - tokio::fs::OpenOptions::new() - .create(true) - .append(true) - .open(&stderr_path) - .await - .ok() - } else { - None - }; - let mut combined_file = if persist { - tokio::fs::OpenOptions::new() - .create(true) - .append(true) - .open(&combined_path) - .await - .ok() - } else { - None - }; - - while let Ok(Some(line)) = lines.next_line().await { - let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); - if let Some(ref mut f) = stderr_file { - let _ = tokio::io::AsyncWriteExt::write_all( - f, - format!("[{}] {}\n", ts, line).as_bytes(), - ) - .await; - } - if let Some(ref mut f) = combined_file { - let _ = tokio::io::AsyncWriteExt::write_all( - f, - format!("[{}] [stderr] {}\n", ts, line).as_bytes(), - ) - .await; - } - } - tracing::debug!("Stderr capture ended for task '{}'", task_name); - }); - abort_handles.push(h); - } - - self.statuses - .write() - .await - .insert(name.to_string(), TaskStatus::Running { pid }); - self.started_at - .write() - .await - .insert(name.to_string(), chrono::Utc::now()); - self.stopped_at.write().await.remove(name); - - self.running.write().await.insert( - name.to_string(), - RunningTask { - abort_handles, - child, - lifecycle, - }, - ); - - Ok(()) - } - - /// Stop a running task - pub async fn stop(&self, name: &str) -> Result<(), TaskError> { - if !self.is_running(name).await { - return Err(TaskError::NotRunning(name.to_string())); - } - - let mut running = self.running.write().await; - if let Some(mut task) = running.remove(name) { - if let Some(ref lifecycle) = task.lifecycle { - dirigent_process::graceful_shutdown_async( - lifecycle.as_ref(), - &mut task.child, - std::time::Duration::from_secs(3), - ) - .await; - } else { - let _ = task.child.kill().await; - } - for h in task.abort_handles { - h.abort(); - } - tracing::info!("Task '{}' stopped", name); - } - - self.statuses - .write() - .await - .insert(name.to_string(), TaskStatus::Stopped); - self.stopped_at - .write() - .await - .insert(name.to_string(), chrono::Utc::now()); - Ok(()) - } - - /// Poll running tasks for completion (call periodically from a timer) - pub async fn poll_completed(&self) { - let mut running = self.running.write().await; - let mut completed = Vec::new(); - - for (name, task) in running.iter_mut() { - match task.child.try_wait() { - Ok(Some(status)) => { - let exit_code = status.code(); - tracing::info!( - "Task '{}' finished with exit code: {:?}", - name, - exit_code - ); - completed.push((name.clone(), exit_code)); - } - Ok(None) => {} - Err(e) => { - tracing::error!("Error checking task '{}': {}", name, e); - completed.push((name.clone(), None)); - } - } - } - - let mut statuses = self.statuses.write().await; - let mut stopped_at = self.stopped_at.write().await; - for (name, exit_code) in completed { - running.remove(&name); - statuses.insert(name.clone(), TaskStatus::Finished { exit_code }); - stopped_at.insert(name.clone(), chrono::Utc::now()); - } - } - - /// List all tasks with their info - pub async fn list_tasks(&self) -> Vec { - let defs = self.definitions.read().await; - let statuses = self.statuses.read().await; - let started = self.started_at.read().await; - let stopped = self.stopped_at.read().await; - - defs.values() - .map(|def| TaskInfo { - definition: def.clone(), - status: statuses - .get(&def.name) - .cloned() - .unwrap_or(TaskStatus::Stopped), - started_at: started.get(&def.name).cloned(), - stopped_at: stopped.get(&def.name).cloned(), - }) - .collect() - } - - /// Get info for a specific task - pub async fn get_task(&self, name: &str) -> Option { - let defs = self.definitions.read().await; - let def = defs.get(name)?; - let statuses = self.statuses.read().await; - let started = self.started_at.read().await; - let stopped = self.stopped_at.read().await; - Some(TaskInfo { - definition: def.clone(), - status: statuses - .get(name) - .cloned() - .unwrap_or(TaskStatus::Stopped), - started_at: started.get(name).cloned(), - stopped_at: stopped.get(name).cloned(), - }) - } - - /// Read output for a task - pub async fn read_output( - &self, - name: &str, - kind: OutputKind, - tail_lines: Option, - ) -> Result { - { - let defs = self.definitions.read().await; - if !defs.contains_key(name) { - return Err(TaskError::NotFound(name.to_string())); - } - } - let mgr = TaskOutputManager::new(self.tasks_dir.join(name)); - mgr.read_output(kind, tail_lines).await.map_err(TaskError::Io) - } - - /// Get all task definitions (for config persistence) - pub async fn get_definitions(&self) -> Vec { - self.definitions.read().await.values().cloned().collect() - } - - /// Update a task definition (stops if running, re-registers) - pub async fn update(&self, def: TaskDefinition) -> Result<(), TaskError> { - let name = def.name.clone(); - if self.is_running(&name).await { - self.stop(&name).await?; - } - self.register(def).await - } - - /// Stop all running tasks. Used during graceful shutdown. - pub async fn stop_all(&self) { - let names: Vec = self.running.read().await.keys().cloned().collect(); - for name in names { - if let Err(e) = self.stop(&name).await { - tracing::warn!(task = %name, error = %e, "Failed to stop task during shutdown"); - } - } - } -} diff --git a/crates/dirigent_taskrunner/src/types.rs b/crates/dirigent_taskrunner/src/types.rs deleted file mode 100644 index 2e4efbb..0000000 --- a/crates/dirigent_taskrunner/src/types.rs +++ /dev/null @@ -1,71 +0,0 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -/// Unique identifier for a task (the slug/name) -pub type TaskId = String; - -/// How a task is defined -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TaskDefinition { - /// Human-readable title - pub title: String, - /// Unique slug (used as TOML key and file directory name) - pub name: String, - /// The command to execute (e.g. "lspmux", "python") - pub command: String, - /// Arguments to the command - #[serde(default)] - pub args: Vec, - /// Working directory (None = runtime working dir) - #[serde(default, skip_serializing_if = "Option::is_none")] - pub working_directory: Option, - /// Run this task when dirigent starts - #[serde(default)] - pub run_at_startup: bool, - /// Max lines to keep in memory buffer (0 = unlimited) - #[serde(default = "default_buffer_size")] - pub buffer_size: usize, - /// Write output to disk (overrides buffer_size — keeps everything) - #[serde(default = "default_persist")] - pub persist_to_disk: bool, - /// Rotate previous output file before starting - #[serde(default)] - pub rotate_previous: bool, - /// Environment variables to set - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub env: Vec<(String, String)>, -} - -fn default_buffer_size() -> usize { - 10000 -} -fn default_persist() -> bool { - true -} - -/// Runtime state of a task -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub enum TaskStatus { - Stopped, - Running { pid: u32 }, - Finished { exit_code: Option }, - Failed { error: String }, -} - -/// Full info about a task (definition + runtime state) -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TaskInfo { - pub definition: TaskDefinition, - pub status: TaskStatus, - pub started_at: Option>, - pub stopped_at: Option>, -} - -/// Which output stream to read -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] -pub enum OutputKind { - Stdout, - Stderr, - Combined, -} diff --git a/crates/dirigent_zed/CLAUDE.md b/crates/dirigent_zed/CLAUDE.md deleted file mode 100644 index a65858d..0000000 --- a/crates/dirigent_zed/CLAUDE.md +++ /dev/null @@ -1,64 +0,0 @@ -# Package: dirigent_zed - -Zed editor integration for Dirigent -- detection, agent discovery, binary resolution. - -## Quick Facts -- **Type**: Library -- **Main Entry**: src/lib.rs -- **Dependencies**: dirigent_config, dirs, serde, serde_json, thiserror, tracing -- **Status**: Initial implementation - -## Purpose - -Detects Zed editor installations on the current system, discovers configured -ACP agents from Zed's `settings.json`, and resolves downloaded binary paths -from the Zed data directory. - -## Key Types - -- `ZedChannel` -- Release channel enum (Stable, Preview, Nightly, Dev) -- `ZedAgent` -- Discovered agent with name, type, binary path, env overrides -- `AgentServerType` -- Registry, Custom, or Extension -- `ZedInstallation` -- Detected installation with channel, paths, and agents - -## Module Organization - -- **`paths.rs`** -- Platform path resolution for Zed config/data directories -- **`agents.rs`** -- Agent discovery from settings.json, JSONC stripping, binary resolution -- **`detection.rs`** -- High-level installation detection combining paths and agents - -## Platform Paths - -| Platform | Config Dir | Data Dir | -|----------|-----------|----------| -| Linux | `$XDG_CONFIG_HOME/zed` | `$XDG_DATA_HOME/zed` | -| macOS | `~/.config/zed` | `~/Library/Application Support/Zed` | -| Windows | `%APPDATA%\Zed` | `%LOCALAPPDATA%\Zed` | - -## Usage - -```rust -let installations = dirigent_zed::detect_installations(); -for inst in &installations { - for agent in &inst.agents { - if let Some(ref binary) = agent.binary_path { - println!("{}: {}", agent.name, binary.display()); - } - } -} -``` - -## Testing - -```bash -cargo test -p dirigent_zed -``` - -## Related Packages - -- **dirigent_config** -- Dirigent's own path resolution (dependency) -- **dirigent_core** -- Will consume this crate for Zed connector integration (future) - -## Research - -See `docs/research/zed-integration.md` for detailed platform paths and detection strategies. diff --git a/crates/dirigent_zed/Cargo.toml b/crates/dirigent_zed/Cargo.toml deleted file mode 100644 index c576f5f..0000000 --- a/crates/dirigent_zed/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "dirigent_zed" -version = "0.1.0" -edition = "2021" -description = "Zed editor integration for Dirigent — detection, agent discovery, binary resolution" - -[dependencies] -dirigent_config = { path = "../dirigent_config" } -dirs = "5" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "2.0" -tracing = "0.1" - -[dev-dependencies] -tempfile = "3" diff --git a/crates/dirigent_zed/src/agents.rs b/crates/dirigent_zed/src/agents.rs deleted file mode 100644 index bf0dc61..0000000 --- a/crates/dirigent_zed/src/agents.rs +++ /dev/null @@ -1,1145 +0,0 @@ -//! Agent discovery from Zed settings and binary resolution. -//! -//! Reads Zed's `settings.json` (which may contain JSONC comments), extracts -//! `agent_servers` configuration, and resolves downloaded binary paths from -//! the data directory. - -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; - -/// Agent server type as defined in Zed settings. -/// -/// Defaults to `Registry` when the `"type"` field is omitted in settings.json, -/// matching Zed's own behavior where omitting type means it's a registry agent. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] -#[serde(rename_all = "lowercase")] -pub enum AgentServerType { - #[default] - Registry, - Custom, - Extension, -} - -impl std::fmt::Display for AgentServerType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - AgentServerType::Registry => f.write_str("registry"), - AgentServerType::Custom => f.write_str("custom"), - AgentServerType::Extension => f.write_str("extension"), - } - } -} - -/// A discovered Zed agent with its configuration and resolved binary path. -#[derive(Debug, Clone)] -pub struct ZedAgent { - /// Agent name (key in `agent_servers` map). - pub name: String, - /// Server type: registry, custom, or extension. - pub agent_type: AgentServerType, - /// Resolved binary path (populated by `resolve_binary_paths`). - pub binary_path: Option, - /// Environment variable overrides from settings. - pub env_overrides: HashMap, - /// Display name from the ACP registry (e.g. "Claude Agent", "Codex CLI"). - pub display_name: Option, - /// Description from the ACP registry. - pub description: Option, - /// Command arguments from the registry distribution config (e.g. `["--acp"]`). - pub args: Vec, - /// Path to a locally cached icon file (SVG) from the registry. - pub icon_local_path: Option, - /// Icon URL from the registry CDN. - pub icon_url: Option, -} - -/// Raw serde model for an entry in the `agent_servers` map in settings.json. -#[derive(Debug, Deserialize)] -struct AgentServerEntry { - #[serde(rename = "type", default)] - server_type: AgentServerType, - #[serde(default)] - env: HashMap, - // We ignore other fields like default_mode, default_model, command, args, etc. - // for now; we only need type and env for discovery. -} - -/// Raw serde model for the top-level settings.json (only the fields we care about). -#[derive(Debug, Deserialize)] -struct ZedSettings { - #[serde(default)] - agent_servers: HashMap, -} - -/// Discover agents from Zed's `settings.json` in the given config directory. -/// -/// Reads `{config_dir}/settings.json`, strips JSONC comments, and parses -/// the `agent_servers` key into a list of `ZedAgent` values. -/// -/// Returns an empty vec if the file doesn't exist or can't be parsed. -pub fn discover_agents_from_settings(config_dir: &Path) -> Vec { - let settings_path = config_dir.join("settings.json"); - - let content = match std::fs::read_to_string(&settings_path) { - Ok(c) => c, - Err(e) => { - tracing::debug!( - path = %settings_path.display(), - error = %e, - "Could not read Zed settings.json" - ); - return Vec::new(); - } - }; - - let stripped = strip_jsonc_comments(&content); - - let settings: ZedSettings = match serde_json::from_str(&stripped) { - Ok(s) => s, - Err(e) => { - tracing::warn!( - path = %settings_path.display(), - error = %e, - "Failed to parse Zed settings.json" - ); - return Vec::new(); - } - }; - - settings - .agent_servers - .into_iter() - .map(|(name, entry)| ZedAgent { - name, - agent_type: entry.server_type, - binary_path: None, - env_overrides: entry.env, - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }) - .collect() -} - -/// Discover agents from the `external_agents/` directory in the Zed data dir. -/// -/// Zed downloads registry agents to `{data_dir}/external_agents/{package_name}/`. -/// These may not appear in `settings.json` if the user hasn't customized them. -/// This function scans the directory and returns agents for any packages not -/// already present in the `existing_agents` list (matched by fuzzy name). -pub fn discover_agents_from_external_dir( - data_dir: &Path, - existing_agents: &[ZedAgent], -) -> Vec { - let agents_dir = data_dir.join("external_agents"); - - if !agents_dir.is_dir() { - return Vec::new(); - } - - let existing_names: Vec = existing_agents - .iter() - .map(|a| a.name.to_lowercase()) - .collect(); - - let mut discovered = Vec::new(); - - let entries = match std::fs::read_dir(&agents_dir) { - Ok(e) => e, - Err(_) => return Vec::new(), - }; - - for entry in entries.flatten() { - let path = entry.path(); - if !path.is_dir() { - continue; - } - - let dir_name = entry.file_name().to_string_lossy().to_string(); - - // Skip the registry directory — it contains cached registry metadata - // and downloaded binaries indexed by URL hash, not agent configurations. - if dir_name == "registry" { - tracing::debug!("Skipping registry directory in external_agents"); - continue; - } - - let dir_lower = dir_name.to_lowercase(); - - // Check if this directory already matches an existing agent from settings. - let already_covered = existing_names.iter().any(|existing| { - dir_lower == *existing - || dir_lower.contains(existing) - || existing.contains(&dir_lower) - }); - - if already_covered { - continue; - } - - tracing::debug!( - dir = %dir_name, - "Discovered external agent not in settings" - ); - - discovered.push(ZedAgent { - name: dir_name, - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }); - } - - discovered -} - -/// Resolve binary paths for registry agents from the Zed data directory. -/// -/// For each agent with `agent_type == Registry`, looks for downloaded binaries -/// under `{data_dir}/external_agents/`. Zed stores agents directly under -/// `external_agents/{package_name}/{version}/` — package names often differ -/// from settings keys (e.g., settings key `"claude"` maps to directory -/// `claude-agent-acp` or `claude-code-acp`). -/// -/// The resolution strategy: -/// 1. Try exact directory name match first -/// 2. Scan all directories in `external_agents/` for fuzzy matches -/// (directory name contains agent name, or agent name contains directory name) -/// 3. Within matched directories, find the latest version subdirectory -/// 4. Look for native executables and Node.js bin entries -pub fn resolve_binary_paths(agents: &mut [ZedAgent], data_dir: &Path) { - let agents_dir = data_dir.join("external_agents"); - - if !agents_dir.is_dir() { - tracing::debug!( - path = %agents_dir.display(), - "external_agents directory not found" - ); - return; - } - - // Collect all directories in external_agents/ for fuzzy matching. - let available_dirs: Vec<(String, PathBuf)> = std::fs::read_dir(&agents_dir) - .ok() - .into_iter() - .flatten() - .flatten() - .filter_map(|entry| { - let path = entry.path(); - if path.is_dir() { - let name = entry.file_name().to_string_lossy().to_string(); - Some((name, path)) - } else { - None - } - }) - .collect(); - - for agent in agents.iter_mut() { - if agent.agent_type != AgentServerType::Registry { - continue; - } - - // Find matching directories for this agent name. - let matching_dirs = find_matching_agent_dirs(&agent.name, &available_dirs); - - if matching_dirs.is_empty() { - tracing::debug!( - agent = %agent.name, - "No matching directory found in external_agents" - ); - continue; - } - - // When multiple dirs match (e.g., claude-agent-acp and claude-code-acp), - // pick the one with the most recently modified version directory. - let mut best_binary: Option<(PathBuf, std::time::SystemTime)> = None; - - for agent_dir in &matching_dirs { - let version_dir = match find_latest_version_dir(agent_dir) { - Some(d) => d, - None => continue, - }; - - let dir_mtime = std::fs::metadata(&version_dir) - .and_then(|m| m.modified()) - .unwrap_or(std::time::SystemTime::UNIX_EPOCH); - - if let Some(binary) = find_nodejs_bin(&agent.name, &version_dir) - .or_else(|| find_native_binary(&version_dir)) - { - match &best_binary { - Some((_, best_time)) if dir_mtime <= *best_time => {} - _ => { - best_binary = Some((binary, dir_mtime)); - } - } - } - } - - if let Some((binary, _)) = best_binary { - tracing::debug!( - agent = %agent.name, - binary = %binary.display(), - "Resolved agent binary path" - ); - agent.binary_path = Some(binary); - } - } -} - -/// Enrich discovered agents with metadata from the parsed registry. -/// -/// For each agent, looks up the corresponding registry entry using fuzzy name -/// matching and populates display name, description, args, icon path, and icon URL. -pub fn enrich_agents_from_registry( - agents: &mut [ZedAgent], - registry: &std::collections::HashMap, -) { - for agent in agents.iter_mut() { - if let Some(info) = crate::registry::find_registry_match(&agent.name, registry) { - if agent.display_name.is_none() { - agent.display_name = Some(info.display_name.clone()); - } - if agent.description.is_none() { - agent.description = Some(info.description.clone()); - } - if agent.args.is_empty() { - agent.args = info.args.clone(); - } - if agent.icon_local_path.is_none() { - agent.icon_local_path = info.icon_local_path.clone(); - } - if agent.icon_url.is_none() { - agent.icon_url = info.icon_url.clone(); - } - - tracing::debug!( - agent = %agent.name, - registry_id = %info.id, - display_name = %info.display_name, - "Enriched agent from registry" - ); - } - } -} - -/// Find directories in `external_agents/` that match an agent settings name. -/// -/// Matching strategy (in priority order): -/// 1. Exact match: directory name equals agent name -/// 2. Directory name contains agent name (e.g., "claude-agent-acp" contains "claude") -/// 3. Agent name contains directory name (e.g., "claude-acp" contains "claude") -fn find_matching_agent_dirs<'a>( - agent_name: &str, - available_dirs: &'a [(String, PathBuf)], -) -> Vec<&'a Path> { - let agent_lower = agent_name.to_lowercase(); - let mut exact = Vec::new(); - let mut contains = Vec::new(); - - for (dir_name, dir_path) in available_dirs { - let dir_lower = dir_name.to_lowercase(); - if dir_lower == agent_lower { - exact.push(dir_path.as_path()); - } else if dir_lower.contains(&agent_lower) || agent_lower.contains(&dir_lower) { - contains.push(dir_path.as_path()); - } - } - - if !exact.is_empty() { - exact - } else { - contains - } -} - -/// Find the most recently modified version directory inside `agent_dir`. -/// -/// Zed uses various version directory formats: `v0.9.2`, `0.20.0`, `v_abc123`. -/// We accept any subdirectory as a potential version directory. -fn find_latest_version_dir(agent_dir: &Path) -> Option { - let read_dir = std::fs::read_dir(agent_dir).ok()?; - - let mut best: Option<(PathBuf, std::time::SystemTime)> = None; - - for entry in read_dir.flatten() { - let path = entry.path(); - if !path.is_dir() { - continue; - } - // Skip hidden directories and node_modules at this level. - let name = entry.file_name(); - let name_str = name.to_string_lossy(); - if name_str.starts_with('.') || name_str == "node_modules" { - continue; - } - - let modified = entry.metadata().and_then(|m| m.modified()).ok(); - if let Some(mod_time) = modified { - match &best { - Some((_, best_time)) if mod_time > *best_time => { - best = Some((path, mod_time)); - } - None => { - best = Some((path, mod_time)); - } - _ => {} - } - } else { - if best.is_none() { - best = Some((path, std::time::SystemTime::UNIX_EPOCH)); - } - } - } - - best.map(|(path, _)| path) -} - -/// Find a Node.js executable in `node_modules/.bin/` within a version directory. -/// -/// Many Zed agents are npm packages. Their executables are symlinked in -/// `node_modules/.bin/`. We prefer a binary whose file stem matches the agent -/// name (case-insensitive, with common ACP suffixes stripped). If no match is -/// found, we fall back to the first executable -- this covers agents whose -/// binary name differs from their directory name. -/// -/// The name-matching preference is important because npm packages often install -/// dependency binaries alongside the agent binary (e.g., `acorn`, `glob`), and -/// without matching we might pick the wrong one. -fn find_nodejs_bin(agent_name: &str, version_dir: &Path) -> Option { - let bin_dir = version_dir.join("node_modules").join(".bin"); - if !bin_dir.is_dir() { - return None; - } - - let read_dir = std::fs::read_dir(&bin_dir).ok()?; - - let agent_lower = agent_name.to_lowercase(); - let agent_core = crate::registry::strip_acp_suffixes(&agent_lower); - - let mut first_executable: Option = None; - - for entry in read_dir.flatten() { - let path = entry.path(); - if path.is_dir() { - continue; - } - if !is_executable(&path) { - continue; - } - - // Check if this binary's stem matches the agent name. - if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { - let stem_lower = stem.to_lowercase(); - if stem_lower == agent_lower || stem_lower == agent_core { - return Some(path); - } - } - - if first_executable.is_none() { - first_executable = Some(path); - } - } - - first_executable -} - -/// Find a native executable binary inside a version directory. -/// -/// Looks for executable files directly in the version directory, recursing -/// one level for extracted archives. Skips `node_modules/` and hidden dirs. -fn find_native_binary(dir: &Path) -> Option { - let read_dir = std::fs::read_dir(dir).ok()?; - - for entry in read_dir.flatten() { - let path = entry.path(); - let name = entry.file_name(); - let name_str = name.to_string_lossy(); - - if path.is_dir() { - // Skip node_modules and hidden directories. - if name_str == "node_modules" || name_str.starts_with('.') { - continue; - } - // Recurse one level for extracted archives. - if let Some(binary) = find_native_binary(&path) { - return Some(binary); - } - continue; - } - - // Skip non-binary files (package.json, lock files, etc.) - if name_str.ends_with(".json") || name_str.ends_with(".lock") { - continue; - } - - if is_executable(&path) { - return Some(path); - } - } - - None -} - -/// Check if a file is executable. -#[cfg(unix)] -fn is_executable(path: &Path) -> bool { - use std::os::unix::fs::PermissionsExt; - path.metadata() - .map(|m| m.permissions().mode() & 0o111 != 0) - .unwrap_or(false) -} - -#[cfg(not(unix))] -fn is_executable(path: &Path) -> bool { - path.extension() - .map(|ext| ext == "exe" || ext == "cmd" || ext == "bat") - .unwrap_or(false) -} - -/// Strip JSONC comments from input text. -/// -/// Handles: -/// - Line comments (`// ...`) -/// - Block comments (`/* ... */`) -/// - Does NOT strip inside quoted strings -/// - Handles escaped quotes inside strings -pub fn strip_jsonc_comments(input: &str) -> String { - let mut output = String::with_capacity(input.len()); - let chars: Vec = input.chars().collect(); - let len = chars.len(); - let mut i = 0; - - while i < len { - let ch = chars[i]; - - // Inside a JSON string — pass through verbatim, respecting escape sequences. - if ch == '"' { - output.push(ch); - i += 1; - while i < len { - let c = chars[i]; - output.push(c); - if c == '\\' { - // Escaped character: push the next char unconditionally. - i += 1; - if i < len { - output.push(chars[i]); - } - } else if c == '"' { - break; - } - i += 1; - } - i += 1; - continue; - } - - // Check for line comment `//` - if ch == '/' && i + 1 < len && chars[i + 1] == '/' { - // Skip until end of line. - i += 2; - while i < len && chars[i] != '\n' { - i += 1; - } - continue; - } - - // Check for block comment `/* ... */` - if ch == '/' && i + 1 < len && chars[i + 1] == '*' { - i += 2; - while i + 1 < len { - if chars[i] == '*' && chars[i + 1] == '/' { - i += 2; - break; - } - // Preserve newlines so line numbers stay meaningful for error messages. - if chars[i] == '\n' { - output.push('\n'); - } - i += 1; - } - continue; - } - - output.push(ch); - i += 1; - } - - output -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - - /// Returns a binary file name appropriate for the platform. - /// On Windows, appends `.exe`; on Unix, returns the name as-is. - fn platform_binary(name: &str) -> String { - if cfg!(windows) { - format!("{}.exe", name) - } else { - name.to_string() - } - } - - #[test] - fn test_strip_jsonc_line_comments() { - let input = r#"{ - "key": "value", // this is a comment - "other": 42 // another comment -}"#; - let stripped = strip_jsonc_comments(input); - assert!(!stripped.contains("// this is")); - assert!(!stripped.contains("// another")); - let parsed: serde_json::Value = serde_json::from_str(&stripped).unwrap(); - assert_eq!(parsed["key"], "value"); - assert_eq!(parsed["other"], 42); - } - - #[test] - fn test_strip_jsonc_block_comments() { - let input = r#"{ - /* block comment */ - "key": "value", - "other": /* inline block */ 42 -}"#; - let stripped = strip_jsonc_comments(input); - assert!(!stripped.contains("block comment")); - assert!(!stripped.contains("inline block")); - let parsed: serde_json::Value = serde_json::from_str(&stripped).unwrap(); - assert_eq!(parsed["key"], "value"); - assert_eq!(parsed["other"], 42); - } - - #[test] - fn test_strip_jsonc_preserves_strings() { - let input = r#"{ - "url": "https://example.com/path", // comment after url - "comment_like": "this has // inside and /* block */ too" -}"#; - let stripped = strip_jsonc_comments(input); - // The comment after url should be stripped. - assert!(!stripped.contains("// comment after url")); - // But the strings should be intact. - let parsed: serde_json::Value = serde_json::from_str(&stripped).unwrap(); - assert_eq!(parsed["url"], "https://example.com/path"); - assert_eq!( - parsed["comment_like"], - "this has // inside and /* block */ too" - ); - } - - #[test] - fn test_strip_jsonc_escaped_quotes() { - let input = r#"{ - "escaped": "he said \"hello\" // not a comment", - "real": 1 // real comment -}"#; - let stripped = strip_jsonc_comments(input); - assert!(!stripped.contains("// real comment")); - let parsed: serde_json::Value = serde_json::from_str(&stripped).unwrap(); - assert_eq!( - parsed["escaped"], - r#"he said "hello" // not a comment"# - ); - assert_eq!(parsed["real"], 1); - } - - #[test] - fn test_parse_agent_servers() { - let dir = tempfile::tempdir().unwrap(); - let settings_content = r#"{ - // Agent configuration - "agent_servers": { - "claude-acp": { - "type": "registry", - "default_mode": "plan", - "env": { - "CLAUDE_CODE_EXECUTABLE": "/usr/local/bin/claude" - } - }, - "codex-acp": { - "type": "registry", - "default_model": "o4-mini" - }, - "My Custom Agent": { - "type": "custom", - "command": "node", - "args": ["~/projects/agent/index.js", "--acp"], - "env": {} - } - } -}"#; - let settings_path = dir.path().join("settings.json"); - let mut f = std::fs::File::create(&settings_path).unwrap(); - f.write_all(settings_content.as_bytes()).unwrap(); - - let agents = discover_agents_from_settings(dir.path()); - assert_eq!(agents.len(), 3); - - let claude = agents.iter().find(|a| a.name == "claude-acp").unwrap(); - assert_eq!(claude.agent_type, AgentServerType::Registry); - assert_eq!( - claude.env_overrides.get("CLAUDE_CODE_EXECUTABLE"), - Some(&"/usr/local/bin/claude".to_string()) - ); - - let codex = agents.iter().find(|a| a.name == "codex-acp").unwrap(); - assert_eq!(codex.agent_type, AgentServerType::Registry); - assert!(codex.env_overrides.is_empty()); - - let custom = agents - .iter() - .find(|a| a.name == "My Custom Agent") - .unwrap(); - assert_eq!(custom.agent_type, AgentServerType::Custom); - } - - #[test] - fn test_discover_missing_file() { - let dir = tempfile::tempdir().unwrap(); - let agents = discover_agents_from_settings(dir.path()); - assert!(agents.is_empty()); - } - - #[test] - fn test_discover_no_agent_servers_key() { - let dir = tempfile::tempdir().unwrap(); - let settings_path = dir.path().join("settings.json"); - std::fs::write(&settings_path, r#"{ "theme": "dark" }"#).unwrap(); - - let agents = discover_agents_from_settings(dir.path()); - assert!(agents.is_empty()); - } - - #[test] - fn test_resolve_binary_paths_no_data() { - let dir = tempfile::tempdir().unwrap(); - let mut agents = vec![ZedAgent { - name: "claude-acp".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!(agents[0].binary_path.is_none()); - } - - #[test] - fn test_resolve_native_binary_exact_name() { - // Simulates: external_agents/codex/v0.9.2/codex-acp (native ELF) - let dir = tempfile::tempdir().unwrap(); - let version_dir = dir - .path() - .join("external_agents") - .join("codex") - .join("v0.9.2"); - std::fs::create_dir_all(&version_dir).unwrap(); - - let binary_path = version_dir.join(platform_binary("codex-acp")); - std::fs::write(&binary_path, b"#!/bin/sh\necho hello").unwrap(); - - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&binary_path, std::fs::Permissions::from_mode(0o755)) - .unwrap(); - } - - let mut agents = vec![ZedAgent { - name: "codex".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!(agents[0].binary_path.is_some()); - assert!(agents[0] - .binary_path - .as_ref() - .unwrap() - .to_string_lossy() - .contains("codex-acp")); - } - - #[test] - fn test_resolve_nodejs_bin() { - // Simulates: external_agents/claude-agent-acp/0.20.0/node_modules/.bin/claude-agent-acp - let dir = tempfile::tempdir().unwrap(); - let bin_dir = dir - .path() - .join("external_agents") - .join("claude-agent-acp") - .join("0.20.0") - .join("node_modules") - .join(".bin"); - std::fs::create_dir_all(&bin_dir).unwrap(); - - let binary_path = bin_dir.join(platform_binary("claude-agent-acp")); - std::fs::write(&binary_path, b"#!/usr/bin/env node\nconsole.log('hi')").unwrap(); - - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&binary_path, std::fs::Permissions::from_mode(0o755)) - .unwrap(); - } - - // Settings key is "claude" but directory is "claude-agent-acp" — fuzzy match. - let mut agents = vec![ZedAgent { - name: "claude".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!( - agents[0].binary_path.is_some(), - "Should resolve Node.js bin via fuzzy directory match" - ); - assert!(agents[0] - .binary_path - .as_ref() - .unwrap() - .to_string_lossy() - .contains("claude-agent-acp")); - } - - #[test] - fn test_resolve_semver_version_dirs() { - // Zed uses semver version dirs like "0.20.0", not just "v_" prefixed. - let dir = tempfile::tempdir().unwrap(); - let version_dir = dir - .path() - .join("external_agents") - .join("test-agent") - .join("1.2.3"); - std::fs::create_dir_all(&version_dir).unwrap(); - - let binary_path = version_dir.join(platform_binary("test-agent")); - std::fs::write(&binary_path, b"#!/bin/sh\necho hello").unwrap(); - - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&binary_path, std::fs::Permissions::from_mode(0o755)) - .unwrap(); - } - - let mut agents = vec![ZedAgent { - name: "test-agent".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!( - agents[0].binary_path.is_some(), - "Should find binary in semver-named version directory" - ); - } - - #[test] - fn test_resolve_picks_latest_version() { - // Two version dirs, the newer one should win. - let dir = tempfile::tempdir().unwrap(); - let agents_base = dir.path().join("external_agents").join("my-agent"); - - let old_dir = agents_base.join("0.1.0"); - std::fs::create_dir_all(&old_dir).unwrap(); - let old_bin = old_dir.join(platform_binary("my-agent")); - std::fs::write(&old_bin, b"old").unwrap(); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&old_bin, std::fs::Permissions::from_mode(0o755)).unwrap(); - } - - // Sleep briefly to ensure different mtime - std::thread::sleep(std::time::Duration::from_millis(50)); - - let new_dir = agents_base.join("0.2.0"); - std::fs::create_dir_all(&new_dir).unwrap(); - let new_bin = new_dir.join(platform_binary("my-agent")); - std::fs::write(&new_bin, b"new").unwrap(); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&new_bin, std::fs::Permissions::from_mode(0o755)).unwrap(); - } - - let mut agents = vec![ZedAgent { - name: "my-agent".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!(agents[0].binary_path.is_some()); - let resolved = agents[0].binary_path.as_ref().unwrap(); - assert!( - resolved.to_string_lossy().contains("0.2.0"), - "Should pick the newer version directory, got: {}", - resolved.display() - ); - } - - #[test] - fn test_resolve_skips_custom_agents() { - let dir = tempfile::tempdir().unwrap(); - let mut agents = vec![ZedAgent { - name: "my-agent".to_string(), - agent_type: AgentServerType::Custom, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - resolve_binary_paths(&mut agents, dir.path()); - assert!(agents[0].binary_path.is_none()); - } - - #[test] - fn test_fuzzy_match_dir_contains_agent_name() { - // Directory "claude-code-acp" should match agent name "claude" - let dirs = vec![ - ("claude-code-acp".to_string(), PathBuf::from("/fake/claude-code-acp")), - ("codex".to_string(), PathBuf::from("/fake/codex")), - ]; - let matches = find_matching_agent_dirs("claude", &dirs); - assert_eq!(matches.len(), 1); - assert!(matches[0].to_string_lossy().contains("claude-code-acp")); - } - - #[test] - fn test_fuzzy_match_exact_preferred() { - // Exact match "codex" should be preferred over fuzzy "codex-extra" - let dirs = vec![ - ("codex".to_string(), PathBuf::from("/fake/codex")), - ("codex-extra".to_string(), PathBuf::from("/fake/codex-extra")), - ]; - let matches = find_matching_agent_dirs("codex", &dirs); - assert_eq!(matches.len(), 1); - assert!(matches[0].to_string_lossy().contains("/codex")); - } - - #[test] - fn test_fuzzy_match_multiple_fuzzy_results() { - // "claude" matches both "claude-agent-acp" and "claude-code-acp" - let dirs = vec![ - ("claude-agent-acp".to_string(), PathBuf::from("/fake/claude-agent-acp")), - ("claude-code-acp".to_string(), PathBuf::from("/fake/claude-code-acp")), - ("codex".to_string(), PathBuf::from("/fake/codex")), - ]; - let matches = find_matching_agent_dirs("claude", &dirs); - assert_eq!(matches.len(), 2); - } - - #[test] - fn test_agent_server_type_display() { - assert_eq!(AgentServerType::Registry.to_string(), "registry"); - assert_eq!(AgentServerType::Custom.to_string(), "custom"); - assert_eq!(AgentServerType::Extension.to_string(), "extension"); - } - - #[test] - fn test_agent_server_type_default_is_registry() { - assert_eq!(AgentServerType::default(), AgentServerType::Registry); - } - - #[test] - fn test_parse_missing_type_defaults_to_registry() { - let dir = tempfile::tempdir().unwrap(); - let settings = r#"{ - "agent_servers": { - "codex": { - "command": "codex-acp", - "args": [], - "env": {} - } - } - }"#; - std::fs::write(dir.path().join("settings.json"), settings).unwrap(); - - let agents = discover_agents_from_settings(dir.path()); - assert_eq!(agents.len(), 1); - assert_eq!(agents[0].name, "codex"); - assert_eq!(agents[0].agent_type, AgentServerType::Registry); - } - - #[test] - fn test_discover_external_agents_not_in_settings() { - let data_dir = tempfile::tempdir().unwrap(); - let ext_dir = data_dir.path().join("external_agents"); - - // Create directories simulating Zed-downloaded agents - std::fs::create_dir_all(ext_dir.join("claude-agent-acp")).unwrap(); - std::fs::create_dir_all(ext_dir.join("claude-code-acp")).unwrap(); - std::fs::create_dir_all(ext_dir.join("codex")).unwrap(); - std::fs::create_dir_all(ext_dir.join("gemini")).unwrap(); - - // Existing agents from settings: only "codex" - let existing = vec![ZedAgent { - name: "codex".to_string(), - agent_type: AgentServerType::Custom, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - let discovered = discover_agents_from_external_dir(data_dir.path(), &existing); - - // "codex" is already in settings (exact match), so it's excluded. - // "claude-agent-acp" and "claude-code-acp" are new, and "gemini" is new. - assert_eq!(discovered.len(), 3); - - let names: Vec<&str> = discovered.iter().map(|a| a.name.as_str()).collect(); - assert!(names.contains(&"claude-agent-acp")); - assert!(names.contains(&"claude-code-acp")); - assert!(names.contains(&"gemini")); - - // All should be Registry type - for agent in &discovered { - assert_eq!(agent.agent_type, AgentServerType::Registry); - } - } - - #[test] - fn test_discover_external_agents_fuzzy_excludes() { - let data_dir = tempfile::tempdir().unwrap(); - let ext_dir = data_dir.path().join("external_agents"); - - std::fs::create_dir_all(ext_dir.join("claude-agent-acp")).unwrap(); - - // "claude" in settings should fuzzy-match "claude-agent-acp" - let existing = vec![ZedAgent { - name: "claude".to_string(), - agent_type: AgentServerType::Registry, - binary_path: None, - env_overrides: HashMap::new(), - display_name: None, - description: None, - args: Vec::new(), - icon_local_path: None, - icon_url: None, - }]; - - let discovered = discover_agents_from_external_dir(data_dir.path(), &existing); - assert!( - discovered.is_empty(), - "claude-agent-acp should be excluded by fuzzy match with 'claude'" - ); - } - - #[test] - fn test_discover_external_agents_no_dir() { - let data_dir = tempfile::tempdir().unwrap(); - // No external_agents/ directory exists - let discovered = discover_agents_from_external_dir(data_dir.path(), &[]); - assert!(discovered.is_empty()); - } - - #[test] - fn test_discover_excludes_registry_dir() { - let tmp = tempfile::tempdir().unwrap(); - let agents_dir = tmp.path().join("external_agents"); - std::fs::create_dir_all(agents_dir.join("registry")).unwrap(); - std::fs::create_dir_all(agents_dir.join("some-agent")).unwrap(); - - let discovered = discover_agents_from_external_dir(tmp.path(), &[]); - let names: Vec<&str> = discovered.iter().map(|a| a.name.as_str()).collect(); - assert!(!names.contains(&"registry"), "registry/ should be excluded"); - assert!(names.contains(&"some-agent")); - } - - #[test] - fn test_find_nodejs_bin_prefers_agent_name() { - // Simulates the Gemini scenario: multiple binaries in .bin/, - // the agent binary should be preferred over dependency binaries. - let dir = tempfile::tempdir().unwrap(); - let bin_dir = dir - .path() - .join("external_agents") - .join("gemini") - .join("0.23.0") - .join("node_modules") - .join(".bin"); - std::fs::create_dir_all(&bin_dir).unwrap(); - - // Create dependency binaries that sort alphabetically before "gemini" - for name in &["acorn", "esparse", "extract-zip", "gemini", "glob"] { - let path = bin_dir.join(platform_binary(name)); - std::fs::write(&path, b"#!/usr/bin/env node\n").unwrap(); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).unwrap(); - } - } - - let version_dir = dir - .path() - .join("external_agents") - .join("gemini") - .join("0.23.0"); - - let result = find_nodejs_bin("gemini", &version_dir); - assert!(result.is_some(), "Should find a binary"); - let resolved = result.unwrap(); - let stem = resolved.file_stem().unwrap().to_string_lossy().to_string(); - assert_eq!( - stem, "gemini", - "Should prefer 'gemini' binary over dependency binaries, got: {}", - resolved.display() - ); - } -} diff --git a/crates/dirigent_zed/src/detection.rs b/crates/dirigent_zed/src/detection.rs deleted file mode 100644 index 4e69d9a..0000000 --- a/crates/dirigent_zed/src/detection.rs +++ /dev/null @@ -1,158 +0,0 @@ -//! Detection of Zed editor installations on the current system. -//! -//! Checks for the existence of Zed configuration directories and discovers -//! configured agents within each installation. - -use crate::agents::{self, ZedAgent}; -use crate::paths::{self, ZedChannel}; -use std::path::PathBuf; - -/// A detected Zed editor installation with its configuration and agents. -#[derive(Debug, Clone)] -pub struct ZedInstallation { - /// Release channel (Stable, Preview, Nightly, Dev). - pub channel: ZedChannel, - /// Path to the configuration directory (contains `settings.json`). - pub config_dir: PathBuf, - /// Path to the data directory (contains `external_agents/`). - pub data_dir: PathBuf, - /// Agents discovered from settings and resolved binary paths. - pub agents: Vec, -} - -/// Detect Zed installations on this system. -/// -/// Checks for the existence of `settings.json` in the Zed config directory. -/// Currently, Zed uses a single config directory for all channels (unlike -/// some editors that have per-channel directories). We report one installation -/// per detected config directory. -/// -/// For each found installation: -/// 1. Discovers agents from `settings.json` (`agent_servers` key) -/// 2. Resolves downloaded binary paths from the data directory -pub fn detect_installations() -> Vec { - let config_dir = match paths::zed_config_dir() { - Some(d) => d, - None => { - tracing::debug!("Could not determine Zed config directory for this platform"); - return Vec::new(); - } - }; - - let data_dir = match paths::zed_data_dir() { - Some(d) => d, - None => { - tracing::debug!("Could not determine Zed data directory for this platform"); - return Vec::new(); - } - }; - - let settings_path = config_dir.join("settings.json"); - if !settings_path.exists() { - tracing::debug!( - path = %settings_path.display(), - "Zed settings.json not found — no installation detected" - ); - return Vec::new(); - } - - tracing::info!( - config = %config_dir.display(), - data = %data_dir.display(), - "Detected Zed installation" - ); - - let mut found_agents = agents::discover_agents_from_settings(&config_dir); - - // Also discover agents from external_agents/ that aren't in settings. - let extra_agents = agents::discover_agents_from_external_dir(&data_dir, &found_agents); - found_agents.extend(extra_agents); - - agents::resolve_binary_paths(&mut found_agents, &data_dir); - - // Enrich agents with registry metadata (display names, descriptions, args, icons). - let registry = crate::registry::parse_registry(&data_dir); - agents::enrich_agents_from_registry(&mut found_agents, ®istry); - - // Zed currently uses a single config dir for all channels. We report it - // as Stable by default. If we later learn how to distinguish channels - // (e.g., via a marker file or binary path), we can refine this. - vec![ZedInstallation { - channel: ZedChannel::Stable, - config_dir, - data_dir, - agents: found_agents, - }] -} - -/// Detect installations using explicit paths (useful for testing or overrides). -pub fn detect_installation_at(config_dir: PathBuf, data_dir: PathBuf) -> Option { - let settings_path = config_dir.join("settings.json"); - if !settings_path.exists() { - return None; - } - - let mut found_agents = agents::discover_agents_from_settings(&config_dir); - let extra_agents = agents::discover_agents_from_external_dir(&data_dir, &found_agents); - found_agents.extend(extra_agents); - agents::resolve_binary_paths(&mut found_agents, &data_dir); - - let registry = crate::registry::parse_registry(&data_dir); - agents::enrich_agents_from_registry(&mut found_agents, ®istry); - - Some(ZedInstallation { - channel: ZedChannel::Stable, - config_dir, - data_dir, - agents: found_agents, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_detect_installation_at_missing_settings() { - let dir = tempfile::tempdir().unwrap(); - let result = detect_installation_at(dir.path().to_path_buf(), dir.path().to_path_buf()); - assert!(result.is_none()); - } - - #[test] - fn test_detect_installation_at_with_settings() { - let config_dir = tempfile::tempdir().unwrap(); - let data_dir = tempfile::tempdir().unwrap(); - - let settings = r#"{ - "agent_servers": { - "claude-acp": { "type": "registry" } - } - }"#; - std::fs::write(config_dir.path().join("settings.json"), settings).unwrap(); - - let installation = - detect_installation_at(config_dir.path().to_path_buf(), data_dir.path().to_path_buf()); - assert!(installation.is_some()); - - let inst = installation.unwrap(); - assert_eq!(inst.channel, ZedChannel::Stable); - assert_eq!(inst.agents.len(), 1); - assert_eq!(inst.agents[0].name, "claude-acp"); - } - - #[test] - fn test_detect_installation_at_empty_settings() { - let config_dir = tempfile::tempdir().unwrap(); - let data_dir = tempfile::tempdir().unwrap(); - - std::fs::write(config_dir.path().join("settings.json"), "{}").unwrap(); - - let installation = - detect_installation_at(config_dir.path().to_path_buf(), data_dir.path().to_path_buf()); - assert!(installation.is_some()); - - let inst = installation.unwrap(); - assert!(inst.agents.is_empty()); - } -} diff --git a/crates/dirigent_zed/src/lib.rs b/crates/dirigent_zed/src/lib.rs deleted file mode 100644 index 40f3f61..0000000 --- a/crates/dirigent_zed/src/lib.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! Zed editor integration for Dirigent. -//! -//! Provides detection of Zed installations, discovery of configured ACP agents, -//! and binary path resolution for agent servers managed by Zed. -//! -//! # Usage -//! -//! ```rust,no_run -//! use dirigent_zed::{detect_installations, ZedAgent, AgentServerType}; -//! -//! let installations = detect_installations(); -//! for inst in &installations { -//! println!("Zed {} at {}", inst.channel, inst.config_dir.display()); -//! for agent in &inst.agents { -//! println!(" Agent: {} ({})", agent.name, agent.agent_type); -//! if let Some(ref path) = agent.binary_path { -//! println!(" Binary: {}", path.display()); -//! } -//! } -//! } -//! ``` - -pub mod agents; -pub mod detection; -pub mod paths; -pub mod registry; - -pub use agents::{AgentServerType, ZedAgent}; -pub use detection::{detect_installation_at, detect_installations, ZedInstallation}; -pub use paths::ZedChannel; -pub use registry::{parse_registry, find_registry_match, RegistryAgentInfo}; diff --git a/crates/dirigent_zed/src/paths.rs b/crates/dirigent_zed/src/paths.rs deleted file mode 100644 index 46718ac..0000000 --- a/crates/dirigent_zed/src/paths.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! Platform path resolution for Zed editor directories. -//! -//! Resolves configuration and data directories for each Zed release channel -//! across Linux, macOS, and Windows. - -use std::path::PathBuf; - -/// Zed release channel. Each channel has independent config and data directories. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ZedChannel { - Stable, - Preview, - Nightly, - Dev, -} - -impl ZedChannel { - /// All known release channels. - pub fn all() -> &'static [ZedChannel] { - &[ - ZedChannel::Stable, - ZedChannel::Preview, - ZedChannel::Nightly, - ZedChannel::Dev, - ] - } - - /// Socket/identifier name used in IPC paths. - pub fn socket_name(&self) -> &'static str { - match self { - ZedChannel::Stable => "stable", - ZedChannel::Preview => "preview", - ZedChannel::Nightly => "nightly", - ZedChannel::Dev => "dev", - } - } - - /// Display name for the channel. - pub fn display_name(&self) -> &'static str { - match self { - ZedChannel::Stable => "Stable", - ZedChannel::Preview => "Preview", - ZedChannel::Nightly => "Nightly", - ZedChannel::Dev => "Dev", - } - } -} - -impl std::fmt::Display for ZedChannel { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(self.display_name()) - } -} - -/// Resolve the Zed configuration directory for this platform. -/// -/// | Platform | Path | -/// |----------|------| -/// | Linux | `$XDG_CONFIG_HOME/zed` (default: `~/.config/zed`) | -/// | macOS | `~/.config/zed` | -/// | Windows | `%APPDATA%\Zed` | -pub fn zed_config_dir() -> Option { - #[cfg(target_os = "macos")] - { - dirs::home_dir().map(|d| d.join(".config").join("zed")) - } - #[cfg(target_os = "windows")] - { - dirs::config_dir().map(|d| d.join("Zed")) - } - #[cfg(not(any(target_os = "macos", target_os = "windows")))] - { - dirs::config_dir().map(|d| d.join("zed")) - } -} - -/// Resolve the Zed data directory for this platform. -/// -/// | Platform | Path | -/// |----------|------| -/// | Linux | `$XDG_DATA_HOME/zed` (default: `~/.local/share/zed`) | -/// | macOS | `~/Library/Application Support/Zed` | -/// | Windows | `%LOCALAPPDATA%\Zed` | -pub fn zed_data_dir() -> Option { - #[cfg(target_os = "windows")] - { - dirs::data_local_dir().map(|d| d.join("Zed")) - } - #[cfg(not(target_os = "windows"))] - { - dirs::data_dir().map(|d| d.join("zed")) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_all_channels() { - let channels = ZedChannel::all(); - assert_eq!(channels.len(), 4); - assert!(channels.contains(&ZedChannel::Stable)); - assert!(channels.contains(&ZedChannel::Preview)); - assert!(channels.contains(&ZedChannel::Nightly)); - assert!(channels.contains(&ZedChannel::Dev)); - } - - #[test] - fn test_socket_names() { - assert_eq!(ZedChannel::Stable.socket_name(), "stable"); - assert_eq!(ZedChannel::Preview.socket_name(), "preview"); - assert_eq!(ZedChannel::Nightly.socket_name(), "nightly"); - assert_eq!(ZedChannel::Dev.socket_name(), "dev"); - } - - #[test] - fn test_display_names() { - assert_eq!(ZedChannel::Stable.to_string(), "Stable"); - assert_eq!(ZedChannel::Dev.to_string(), "Dev"); - } - - #[test] - fn test_config_dir_is_some() { - // On any supported platform, this should resolve. - let dir = zed_config_dir(); - assert!(dir.is_some(), "zed_config_dir should resolve on this platform"); - let path = dir.unwrap(); - let path_str = path.to_string_lossy(); - assert!( - path_str.contains("zed") || path_str.contains("Zed"), - "config dir should contain 'zed': {path_str}" - ); - } - - #[test] - fn test_data_dir_is_some() { - let dir = zed_data_dir(); - assert!(dir.is_some(), "zed_data_dir should resolve on this platform"); - let path = dir.unwrap(); - let path_str = path.to_string_lossy(); - assert!( - path_str.contains("zed") || path_str.contains("Zed"), - "data dir should contain 'zed': {path_str}" - ); - } -} diff --git a/crates/dirigent_zed/src/registry.rs b/crates/dirigent_zed/src/registry.rs deleted file mode 100644 index 5dcb333..0000000 --- a/crates/dirigent_zed/src/registry.rs +++ /dev/null @@ -1,546 +0,0 @@ -//! Registry metadata parsing for Zed ACP agents. -//! -//! Reads the local `registry.json` file that Zed caches from the ACP registry CDN. -//! Provides enrichment data (display names, descriptions, command args, icon paths) -//! for discovered agents. - -use serde::Deserialize; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; - -/// Metadata about an agent from the ACP registry. -#[derive(Debug, Clone)] -pub struct RegistryAgentInfo { - /// Registry identifier (e.g. "claude-acp", "codex-acp"). - pub id: String, - /// Human-friendly display name (e.g. "Claude Agent", "Codex CLI"). - pub display_name: String, - /// Short description of the agent. - pub description: String, - /// Icon URL from the CDN. - pub icon_url: Option, - /// Path to the locally cached icon file (SVG), if it exists. - pub icon_local_path: Option, - /// Version string from the registry. - pub version: String, - /// Command arguments from the distribution config. - /// - /// For npx-distributed agents this may include flags like `["--acp"]`. - /// For binary-distributed agents this is the platform-specific `cmd` value. - pub args: Vec, - /// The command to run, extracted from the platform-appropriate distribution. - /// - /// For binary distributions this is the `cmd` field (e.g. `"./codex-acp"`). - /// For npx distributions this is the npx package specifier. - pub command: Option, - /// Environment variables from the distribution config. - pub env: HashMap, -} - -// --------------------------------------------------------------------------- -// Raw serde models for registry.json -// --------------------------------------------------------------------------- - -#[derive(Debug, Deserialize)] -struct RegistryFile { - #[serde(default)] - agents: Vec, -} - -#[derive(Debug, Deserialize)] -struct RawRegistryAgent { - id: String, - name: String, - #[serde(default)] - version: String, - #[serde(default)] - description: String, - #[serde(default)] - icon: Option, - #[serde(default)] - distribution: Option, -} - -#[derive(Debug, Deserialize)] -struct RawDistribution { - #[serde(default)] - binary: Option>, - #[serde(default)] - npx: Option, - #[serde(default)] - uvx: Option, -} - -#[derive(Debug, Deserialize)] -struct RawBinaryTarget { - #[serde(default)] - cmd: Option, -} - -#[derive(Debug, Deserialize)] -struct RawNpxDistribution { - #[serde(default)] - package: Option, - #[serde(default)] - args: Vec, - #[serde(default)] - env: HashMap, -} - -#[derive(Debug, Deserialize)] -struct RawUvxDistribution { - #[serde(default)] - package: Option, - #[serde(default)] - args: Vec, - #[serde(default)] - env: HashMap, -} - -/// Determine the current platform key used in the registry's binary distribution map. -/// -/// Returns keys like `"windows-x86_64"`, `"linux-aarch64"`, `"darwin-aarch64"`, etc. -fn current_platform_key() -> Option<&'static str> { - #[cfg(all(target_os = "windows", target_arch = "x86_64"))] - { - Some("windows-x86_64") - } - #[cfg(all(target_os = "windows", target_arch = "aarch64"))] - { - Some("windows-aarch64") - } - #[cfg(all(target_os = "macos", target_arch = "aarch64"))] - { - Some("darwin-aarch64") - } - #[cfg(all(target_os = "macos", target_arch = "x86_64"))] - { - Some("darwin-x86_64") - } - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] - { - Some("linux-x86_64") - } - #[cfg(all(target_os = "linux", target_arch = "aarch64"))] - { - Some("linux-aarch64") - } - #[cfg(not(any( - all(target_os = "windows", target_arch = "x86_64"), - all(target_os = "windows", target_arch = "aarch64"), - all(target_os = "macos", target_arch = "aarch64"), - all(target_os = "macos", target_arch = "x86_64"), - all(target_os = "linux", target_arch = "x86_64"), - all(target_os = "linux", target_arch = "aarch64"), - )))] - { - None - } -} - -/// Parse the local Zed registry.json and return a map of agent id -> metadata. -/// -/// The registry file lives at `{data_dir}/external_agents/registry/registry.json`. -/// Icons are cached at `{data_dir}/external_agents/registry/icons/{id}.svg`. -/// -/// Returns an empty map if the file doesn't exist or can't be parsed. -pub fn parse_registry(data_dir: &Path) -> HashMap { - let registry_dir = data_dir.join("external_agents").join("registry"); - let registry_path = registry_dir.join("registry.json"); - - let content = match std::fs::read_to_string(®istry_path) { - Ok(c) => c, - Err(e) => { - tracing::debug!( - path = %registry_path.display(), - error = %e, - "Could not read Zed registry.json" - ); - return HashMap::new(); - } - }; - - let registry: RegistryFile = match serde_json::from_str(&content) { - Ok(r) => r, - Err(e) => { - tracing::warn!( - path = %registry_path.display(), - error = %e, - "Failed to parse Zed registry.json" - ); - return HashMap::new(); - } - }; - - let icons_dir = registry_dir.join("icons"); - let platform_key = current_platform_key(); - - let mut map = HashMap::with_capacity(registry.agents.len()); - - for agent in registry.agents { - let icon_local_path = { - let candidate = icons_dir.join(format!("{}.svg", agent.id)); - if candidate.exists() { - Some(candidate) - } else { - None - } - }; - - let (command, args, env) = - extract_distribution_info(agent.distribution.as_ref(), platform_key); - - let info = RegistryAgentInfo { - id: agent.id.clone(), - display_name: agent.name, - description: agent.description, - icon_url: agent.icon, - icon_local_path, - version: agent.version, - args, - command, - env, - }; - - map.insert(agent.id, info); - } - - tracing::debug!( - count = map.len(), - "Parsed Zed registry with {} agents", - map.len() - ); - - map -} - -/// Extract command, args, and env from the distribution config. -/// -/// Priority: binary (platform-specific) > npx > uvx. -fn extract_distribution_info( - distribution: Option<&RawDistribution>, - platform_key: Option<&str>, -) -> (Option, Vec, HashMap) { - let dist = match distribution { - Some(d) => d, - None => return (None, Vec::new(), HashMap::new()), - }; - - // Prefer binary distribution for the current platform. - if let Some(ref binary) = dist.binary { - if let Some(key) = platform_key { - if let Some(target) = binary.get(key) { - let cmd = target.cmd.clone(); - return (cmd, Vec::new(), HashMap::new()); - } - } - } - - // Fall back to npx distribution. - if let Some(ref npx) = dist.npx { - return ( - npx.package.clone(), - npx.args.clone(), - npx.env.clone(), - ); - } - - // Fall back to uvx distribution. - if let Some(ref uvx) = dist.uvx { - return ( - uvx.package.clone(), - uvx.args.clone(), - uvx.env.clone(), - ); - } - - (None, Vec::new(), HashMap::new()) -} - -/// Look up a registry entry by matching an agent name or directory name to a registry id. -/// -/// The matching strategy: -/// 1. Exact match on registry id -/// 2. Substring: registry id contained in agent name, or vice versa -/// 3. Core-name match: strip common ACP suffixes and compare base names -/// (e.g. "claude-agent-acp" and "claude-acp" both have core name "claude") -pub fn find_registry_match<'a>( - agent_name: &str, - registry: &'a HashMap, -) -> Option<&'a RegistryAgentInfo> { - let name_lower = agent_name.to_lowercase(); - - // 1. Exact match on registry id. - if let Some(info) = registry.get(&name_lower) { - return Some(info); - } - - // 2. Substring match: registry id contained in agent name, or vice versa. - let mut best: Option<&'a RegistryAgentInfo> = None; - let mut best_len = 0; - - for (id, info) in registry { - let id_lower = id.to_lowercase(); - if name_lower.contains(&id_lower) || id_lower.contains(&name_lower) { - if id_lower.len() > best_len { - best = Some(info); - best_len = id_lower.len(); - } - } - } - - if best.is_some() { - return best; - } - - // 3. Core-name match: strip ACP-related suffixes and compare. - let agent_core = strip_acp_suffixes(&name_lower); - for (id, info) in registry { - let id_lower = id.to_lowercase(); - let id_core = strip_acp_suffixes(&id_lower); - if !agent_core.is_empty() && agent_core == id_core { - return Some(info); - } - } - - None -} - -/// Strip common ACP-related suffixes to extract the core agent name. -/// -/// For example: -/// - "claude-agent-acp" -> "claude" -/// - "claude-acp" -> "claude" -/// - "claude-code-acp" -> "claude" -/// - "codex" -> "codex" -pub fn strip_acp_suffixes(name: &str) -> &str { - // Strip known suffixes in order of specificity (longest first). - for suffix in &["-agent-acp", "-code-acp", "-acp", "-cli"] { - if let Some(core) = name.strip_suffix(suffix) { - return core; - } - } - name -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - - fn sample_registry_json() -> &'static str { - r#"{ - "version": "1.0.0", - "agents": [ - { - "id": "claude-acp", - "name": "Claude Agent", - "version": "0.24.2", - "description": "ACP wrapper for Anthropic's Claude", - "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/claude-acp.svg", - "distribution": { - "npx": { - "package": "@agentclientprotocol/claude-agent-acp@0.24.2" - } - } - }, - { - "id": "codex-acp", - "name": "Codex CLI", - "version": "0.10.0", - "description": "ACP adapter for OpenAI's coding assistant", - "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/codex-acp.svg", - "distribution": { - "binary": { - "linux-x86_64": { - "archive": "https://example.com/codex.tar.gz", - "cmd": "./codex-acp" - }, - "windows-x86_64": { - "archive": "https://example.com/codex.zip", - "cmd": "./codex-acp.exe" - }, - "darwin-aarch64": { - "archive": "https://example.com/codex-mac.tar.gz", - "cmd": "./codex-acp" - } - }, - "npx": { - "package": "@zed-industries/codex-acp@0.10.0" - } - } - }, - { - "id": "auggie", - "name": "Auggie CLI", - "version": "0.21.0", - "description": "Augment Code's powerful software agent", - "distribution": { - "npx": { - "package": "@augmentcode/auggie@0.21.0", - "args": ["--acp"], - "env": { "AUGMENT_DISABLE_AUTO_UPDATE": "1" } - } - } - } - ] -}"# - } - - #[test] - fn test_parse_registry_basic() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - std::fs::create_dir_all(®istry_dir).unwrap(); - - let mut f = - std::fs::File::create(registry_dir.join("registry.json")).unwrap(); - f.write_all(sample_registry_json().as_bytes()).unwrap(); - - let map = parse_registry(dir.path()); - assert_eq!(map.len(), 3); - - let claude = map.get("claude-acp").unwrap(); - assert_eq!(claude.display_name, "Claude Agent"); - assert_eq!(claude.description, "ACP wrapper for Anthropic's Claude"); - assert!(claude.icon_url.is_some()); - // No local icon file created in test. - assert!(claude.icon_local_path.is_none()); - - let auggie = map.get("auggie").unwrap(); - assert_eq!(auggie.args, vec!["--acp"]); - assert_eq!( - auggie.env.get("AUGMENT_DISABLE_AUTO_UPDATE"), - Some(&"1".to_string()) - ); - } - - #[test] - fn test_parse_registry_with_local_icon() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - let icons_dir = registry_dir.join("icons"); - std::fs::create_dir_all(&icons_dir).unwrap(); - - std::fs::File::create(registry_dir.join("registry.json")) - .unwrap() - .write_all(sample_registry_json().as_bytes()) - .unwrap(); - - // Create a fake icon file. - std::fs::write(icons_dir.join("claude-acp.svg"), "").unwrap(); - - let map = parse_registry(dir.path()); - let claude = map.get("claude-acp").unwrap(); - assert!(claude.icon_local_path.is_some()); - assert!(claude - .icon_local_path - .as_ref() - .unwrap() - .to_string_lossy() - .contains("claude-acp.svg")); - } - - #[test] - fn test_parse_registry_missing_file() { - let dir = tempfile::tempdir().unwrap(); - let map = parse_registry(dir.path()); - assert!(map.is_empty()); - } - - #[test] - fn test_parse_registry_invalid_json() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - std::fs::create_dir_all(®istry_dir).unwrap(); - std::fs::write(registry_dir.join("registry.json"), "not json").unwrap(); - - let map = parse_registry(dir.path()); - assert!(map.is_empty()); - } - - #[test] - fn test_find_registry_match_exact() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - std::fs::create_dir_all(®istry_dir).unwrap(); - std::fs::File::create(registry_dir.join("registry.json")) - .unwrap() - .write_all(sample_registry_json().as_bytes()) - .unwrap(); - - let map = parse_registry(dir.path()); - - // Exact match. - let info = find_registry_match("claude-acp", &map).unwrap(); - assert_eq!(info.display_name, "Claude Agent"); - } - - #[test] - fn test_find_registry_match_fuzzy() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - std::fs::create_dir_all(®istry_dir).unwrap(); - std::fs::File::create(registry_dir.join("registry.json")) - .unwrap() - .write_all(sample_registry_json().as_bytes()) - .unwrap(); - - let map = parse_registry(dir.path()); - - // Agent name "claude" should fuzzy-match "claude-acp". - let info = find_registry_match("claude", &map).unwrap(); - assert_eq!(info.display_name, "Claude Agent"); - - // Directory name "claude-agent-acp" should fuzzy-match "claude-acp". - let info2 = find_registry_match("claude-agent-acp", &map).unwrap(); - assert_eq!(info2.display_name, "Claude Agent"); - } - - #[test] - fn test_find_registry_match_no_match() { - let map = HashMap::new(); - assert!(find_registry_match("nonexistent", &map).is_none()); - } - - #[test] - fn test_binary_distribution_platform_cmd() { - let dir = tempfile::tempdir().unwrap(); - let registry_dir = dir - .path() - .join("external_agents") - .join("registry"); - std::fs::create_dir_all(®istry_dir).unwrap(); - std::fs::File::create(registry_dir.join("registry.json")) - .unwrap() - .write_all(sample_registry_json().as_bytes()) - .unwrap(); - - let map = parse_registry(dir.path()); - let codex = map.get("codex-acp").unwrap(); - - // On any supported platform, the binary distribution should produce a command. - // The exact value depends on the compile target. - if current_platform_key().is_some() { - assert!( - codex.command.is_some(), - "codex-acp should have a command from binary distribution" - ); - } - } -}