feat(fermata): add secret filtering engine — the security brain

Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gabor Körber
2026-05-25 17:29:07 +02:00
parent f77fd73966
commit 087429d275
22 changed files with 4557 additions and 172 deletions
+373
View File
@@ -0,0 +1,373 @@
//! Integration tests for the secret value redactor.
use std::path::PathBuf;
use dirigent_fermata::core::secrets::config::RedactionStyle;
use dirigent_fermata::core::secrets::manifest::Manifest;
use dirigent_fermata::core::secrets::parser::SecretEntry;
use dirigent_fermata::core::secrets::redactor::Redactor;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
fn entry(key: &str, value: &str) -> SecretEntry {
SecretEntry {
key: key.to_string(),
value: value.to_string(),
source: PathBuf::from("test"),
}
}
fn make_redactor(entries: Vec<SecretEntry>, style: RedactionStyle) -> Redactor {
let manifest = Manifest::from_entries(entries);
Redactor::new(&manifest, style)
}
// ---------------------------------------------------------------------------
// Basic redaction
// ---------------------------------------------------------------------------
#[test]
fn basic_single_secret() {
let r = make_redactor(
vec![entry("DB_PASSWORD", "super_secret_123")],
RedactionStyle::Masked,
);
let result = r.redact("connecting with password super_secret_123 ...");
assert_eq!(result.text, "connecting with password ***** ...");
assert!(result.was_redacted());
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "DB_PASSWORD");
}
// ---------------------------------------------------------------------------
// Multiple secrets
// ---------------------------------------------------------------------------
#[test]
fn multiple_different_secrets() {
let r = make_redactor(
vec![
entry("DB_PASSWORD", "db_pass_value"),
entry("API_KEY", "ak_12345678"),
],
RedactionStyle::Masked,
);
let result = r.redact("db=db_pass_value key=ak_12345678");
assert_eq!(result.text, "db=***** key=*****");
assert_eq!(result.redactions.len(), 2);
assert_eq!(result.redactions[0].key, "DB_PASSWORD");
assert_eq!(result.redactions[1].key, "API_KEY");
}
// ---------------------------------------------------------------------------
// Repeated occurrences
// ---------------------------------------------------------------------------
#[test]
fn same_secret_multiple_times() {
let r = make_redactor(
vec![entry("TOKEN", "tok_abcdef")],
RedactionStyle::Named,
);
let result = r.redact("first=tok_abcdef second=tok_abcdef");
assert_eq!(result.text, "first=<REDACTED:TOKEN> second=<REDACTED:TOKEN>");
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// Redaction styles
// ---------------------------------------------------------------------------
#[test]
fn style_masked() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Masked,
);
let result = r.redact("val=secret_value");
assert_eq!(result.text, "val=*****");
}
#[test]
fn style_typed() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Typed,
);
let result = r.redact("val=secret_value");
// "secret_value" is 12 chars
assert_eq!(result.text, "val=<REDACTED:string:12>");
}
#[test]
fn style_named() {
let r = make_redactor(
vec![entry("MY_API_KEY", "secret_value")],
RedactionStyle::Named,
);
let result = r.redact("val=secret_value");
assert_eq!(result.text, "val=<REDACTED:MY_API_KEY>");
}
#[test]
fn style_absent() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Absent,
);
let result = r.redact("val=secret_value end");
assert_eq!(result.text, "val= end");
assert!(result.was_redacted());
}
// ---------------------------------------------------------------------------
// Overlapping values (longest match wins)
// ---------------------------------------------------------------------------
#[test]
fn overlapping_longest_match_wins() {
let r = make_redactor(
vec![
entry("SHORT_KEY", "secret"),
entry("LONG_KEY", "secret_long_value"),
],
RedactionStyle::Named,
);
let result = r.redact("x=secret_long_value");
// The longer value should match, not the shorter substring.
assert_eq!(result.text, "x=<REDACTED:LONG_KEY>");
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "LONG_KEY");
}
#[test]
fn shorter_match_still_found_when_no_overlap() {
let r = make_redactor(
vec![
entry("SHORT_KEY", "secret"),
entry("LONG_KEY", "secret_long_value"),
],
RedactionStyle::Named,
);
// "secret" appears standalone (not as part of "secret_long_value")
let result = r.redact("a=secret b=secret_long_value");
assert_eq!(result.text, "a=<REDACTED:SHORT_KEY> b=<REDACTED:LONG_KEY>");
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// No match
// ---------------------------------------------------------------------------
#[test]
fn no_match_returns_unchanged() {
let r = make_redactor(
vec![entry("KEY", "not_present_here")],
RedactionStyle::Masked,
);
let result = r.redact("nothing to see here");
assert_eq!(result.text, "nothing to see here");
assert!(!result.was_redacted());
assert!(result.redactions.is_empty());
}
// ---------------------------------------------------------------------------
// Empty text
// ---------------------------------------------------------------------------
#[test]
fn empty_input_returns_empty() {
let r = make_redactor(
vec![entry("KEY", "some_secret")],
RedactionStyle::Masked,
);
let result = r.redact("");
assert_eq!(result.text, "");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Empty manifest
// ---------------------------------------------------------------------------
#[test]
fn empty_manifest_returns_unchanged() {
let manifest = Manifest::empty();
let r = Redactor::new(&manifest, RedactionStyle::Masked);
assert!(!r.has_secrets());
let result = r.redact("some text with no secrets");
assert_eq!(result.text, "some text with no secrets");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Short values filtered out by Manifest::from_entries
// ---------------------------------------------------------------------------
#[test]
fn short_values_are_filtered() {
// Values shorter than 4 chars should be dropped by from_entries.
let r = make_redactor(
vec![entry("TINY", "abc"), entry("LONG_ENOUGH", "abcd")],
RedactionStyle::Masked,
);
let result = r.redact("abc abcd");
// "abc" should NOT be redacted (too short), "abcd" should be.
assert_eq!(result.text, "abc *****");
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "LONG_ENOUGH");
}
// ---------------------------------------------------------------------------
// Zero false negatives — every declared secret must be caught
// ---------------------------------------------------------------------------
#[test]
fn zero_false_negatives() {
let secrets = vec![
entry("A_SECRET", "alpha_secret_val"),
entry("B_TOKEN", "bravo_token_val_"),
entry("C_PASSWORD", "charlie_pass_99"),
entry("D_API_KEY", "delta_key_00000"),
];
let r = make_redactor(secrets.clone(), RedactionStyle::Masked);
// Build text that contains every single secret value.
let text = format!(
"a={} b={} c={} d={}",
"alpha_secret_val", "bravo_token_val_", "charlie_pass_99", "delta_key_00000",
);
let result = r.redact(&text);
// Every secret value must be replaced.
for s in &secrets {
if s.value.len() >= 4 {
assert!(
!result.text.contains(&s.value),
"Secret {} was not redacted: {}",
s.key,
result.text,
);
}
}
assert_eq!(result.redactions.len(), 4);
}
// ---------------------------------------------------------------------------
// Multi-line text
// ---------------------------------------------------------------------------
#[test]
fn multi_line_redaction() {
let r = make_redactor(
vec![
entry("DB_PASSWORD", "s3cr3t_p@ss"),
entry("API_KEY", "ak-1234567890"),
],
RedactionStyle::Masked,
);
let text = "# Config file\n\
DATABASE_URL=postgres://user:s3cr3t_p@ss@host/db\n\
API_KEY=ak-1234567890\n\
OTHER=safe_value\n";
let result = r.redact(text);
assert!(!result.text.contains("s3cr3t_p@ss"));
assert!(!result.text.contains("ak-1234567890"));
assert!(result.text.contains("safe_value"));
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// Redaction metadata correctness
// ---------------------------------------------------------------------------
#[test]
fn redaction_metadata_offset_and_len() {
let r = make_redactor(
vec![entry("SECRET", "ABCDEFGH")],
RedactionStyle::Masked,
);
let text = "prefix_ABCDEFGH_suffix";
let result = r.redact(text);
assert_eq!(result.redactions.len(), 1);
let red = &result.redactions[0];
assert_eq!(red.key, "SECRET");
assert_eq!(red.offset, 7); // "prefix_" is 7 bytes
assert_eq!(red.original_len, 8); // "ABCDEFGH" is 8 bytes
}
#[test]
fn redaction_metadata_multiple_offsets() {
let r = make_redactor(
vec![entry("TOK", "xxxx1234")],
RedactionStyle::Masked,
);
// "a=xxxx1234 b=xxxx1234"
let text = "a=xxxx1234 b=xxxx1234";
let result = r.redact(text);
assert_eq!(result.redactions.len(), 2);
assert_eq!(result.redactions[0].offset, 2); // after "a="
assert_eq!(result.redactions[0].original_len, 8);
assert_eq!(result.redactions[1].offset, 13); // after " b="
assert_eq!(result.redactions[1].original_len, 8);
}
// ---------------------------------------------------------------------------
// has_secrets() helper
// ---------------------------------------------------------------------------
#[test]
fn has_secrets_with_entries() {
let r = make_redactor(
vec![entry("KEY", "long_enough_value")],
RedactionStyle::Masked,
);
assert!(r.has_secrets());
}
#[test]
fn has_secrets_empty() {
let r = make_redactor(vec![], RedactionStyle::Masked);
assert!(!r.has_secrets());
}
// ---------------------------------------------------------------------------
// was_redacted() helper
// ---------------------------------------------------------------------------
#[test]
fn was_redacted_true_when_match() {
let r = make_redactor(
vec![entry("KEY", "findme_value")],
RedactionStyle::Masked,
);
let result = r.redact("findme_value");
assert!(result.was_redacted());
}
#[test]
fn was_redacted_false_when_no_match() {
let r = make_redactor(
vec![entry("KEY", "findme_value")],
RedactionStyle::Masked,
);
let result = r.redact("nothing here");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Deduplication in from_entries
// ---------------------------------------------------------------------------
#[test]
fn duplicate_entries_deduplicated() {
let manifest = Manifest::from_entries(vec![
entry("KEY", "same_value_here"),
entry("KEY", "same_value_here"),
]);
assert_eq!(manifest.len(), 1);
}