feat(fermata): add secret filtering engine — the security brain

Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gabor Körber
2026-05-25 17:29:07 +02:00
parent f77fd73966
commit 087429d275
22 changed files with 4557 additions and 172 deletions
+298
View File
@@ -0,0 +1,298 @@
use assert_cmd::Command;
use std::fs;
/// Helper: create a temp project directory with a `.botsecrets` config and
/// a `.env` file containing the given secrets.
fn setup_project(
env_content: &str,
botsecrets_content: Option<&str>,
) -> tempfile::TempDir {
let tmp = tempfile::tempdir().unwrap();
// .env with test secrets
fs::write(tmp.path().join(".env"), env_content).unwrap();
// .botsecrets config (use default if not specified)
let botsecrets = botsecrets_content.unwrap_or(
r#"
[files]
patterns = [".env"]
"#,
);
fs::write(tmp.path().join(".botsecrets"), botsecrets).unwrap();
// .botignore (empty — required for project root detection)
fs::write(tmp.path().join(".botignore"), "").unwrap();
tmp
}
#[test]
fn post_tool_use_redacts_known_secret() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "DB_HOST=localhost\nDB_PASSWORD=supersecret123\nDB_PORT=5432"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput");
assert!(
updated.contains("*****"),
"expected masked secret, got: {updated}"
);
assert!(
!updated.contains("supersecret123"),
"secret should be redacted, got: {updated}"
);
assert!(
updated.contains("DB_HOST=localhost"),
"non-secret lines should be preserved, got: {updated}"
);
assert!(
updated.contains("DB_PORT=5432"),
"non-secret lines should be preserved, got: {updated}"
);
}
#[test]
fn post_tool_use_no_secrets_passthrough() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "Hello, world! This text has no secrets."
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
// Empty JSON object means "no changes".
assert_eq!(v, serde_json::json!({}), "expected empty JSON for passthrough");
}
#[test]
fn post_tool_use_empty_response_passthrough() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": ""
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v, serde_json::json!({}));
}
#[test]
fn post_tool_use_heuristic_enforce_appends_warning() {
// Use a config with heuristic in enforce mode (the default).
let botsecrets = r#"
[files]
patterns = [".env"]
[heuristic]
enabled = true
mode = "enforce"
"#;
let tmp = setup_project("UNRELATED_KEY=foo\n", Some(botsecrets));
// Include something that looks like a GitHub PAT (classic) in the response.
// Pattern requires `ghp_` followed by exactly 36 alphanumeric chars.
let payload = serde_json::json!({
"tool_name": "Bash",
"tool_input": { "command": "cat output.log" },
"tool_response": "deploy log: token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij used"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput with heuristic warning");
assert!(
updated.contains("[fermata] WARNING"),
"expected heuristic warning, got: {updated}"
);
}
#[test]
fn pre_tool_use_backward_compat_default_event() {
// `--event` defaults to pre-tool-use; existing `--harness claude` still works.
let tmp = tempfile::tempdir().unwrap();
fs::write(tmp.path().join(".botignore"), ".env\n").unwrap();
let target = tmp.path().join(".env");
fs::write(&target, "").unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": target.to_str().unwrap() }
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--harness", "claude"])
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "deny");
}
#[test]
fn pre_tool_use_explicit_event_flag() {
// Explicitly passing `--event pre-tool-use` works identically.
let tmp = tempfile::tempdir().unwrap();
fs::write(tmp.path().join(".botignore"), ".env\n").unwrap();
let target = tmp.path().join("safe.txt");
fs::write(&target, "").unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": target.to_str().unwrap() }
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "pre-tool-use", "--harness", "claude"])
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "allow");
}
#[test]
fn unknown_event_exits_2() {
Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "nonsense", "--harness", "claude"])
.write_stdin("{}")
.assert()
.code(2);
}
#[test]
fn post_tool_use_no_project_root_passthrough() {
// When run in a directory with no .botignore / .botsecrets,
// PostToolUse should fail-open with `{}`.
let tmp = tempfile::tempdir().unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "DB_PASSWORD=supersecret123"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v, serde_json::json!({}));
}
#[test]
fn post_tool_use_multiple_secrets_redacted() {
let tmp = setup_project(
"DB_PASSWORD=supersecret123\nAPI_KEY=my-api-key-abc\n",
None,
);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/config" },
"tool_response": "config: password=supersecret123, key=my-api-key-abc, host=localhost"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput");
assert!(!updated.contains("supersecret123"), "first secret should be redacted");
assert!(!updated.contains("my-api-key-abc"), "second secret should be redacted");
assert!(updated.contains("host=localhost"), "non-secret should be preserved");
}