Files
fermata/tests/core_secrets_scanner.rs
T
Gabor Körber 087429d275 feat(fermata): add secret filtering engine — the security brain
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-25 17:29:07 +02:00

255 lines
8.2 KiB
Rust

use dirigent_fermata::core::secrets::config::HeuristicConfig;
use dirigent_fermata::core::secrets::scanner::{shannon_entropy, Confidence, Scanner};
// ---------------------------------------------------------------------------
// Helper: build a scanner with default config (built-in rules only)
// ---------------------------------------------------------------------------
fn default_scanner() -> Scanner {
Scanner::builtin().expect("built-in rules must compile")
}
// ---------------------------------------------------------------------------
// Specific provider patterns
// ---------------------------------------------------------------------------
#[test]
fn detects_aws_access_key() {
let scanner = default_scanner();
let findings = scanner.scan("here is my key: AKIAIOSFODNN7EXAMPLE ok");
assert!(
findings.iter().any(|f| f.pattern_id == "aws-access-key"),
"expected aws-access-key finding, got: {findings:?}"
);
assert_eq!(findings[0].confidence, Confidence::High);
}
#[test]
fn detects_github_pat_classic() {
let scanner = default_scanner();
let findings = scanner.scan("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij");
assert!(
findings.iter().any(|f| f.pattern_id == "github-pat-classic"),
"expected github-pat-classic finding, got: {findings:?}"
);
}
#[test]
fn detects_stripe_secret_key() {
let scanner = default_scanner();
let findings = scanner.scan("STRIPE_KEY=sk_live_abcdefghijklmnopqrstuvwx");
assert!(
findings.iter().any(|f| f.pattern_id == "stripe-secret-key"),
"expected stripe-secret-key finding, got: {findings:?}"
);
}
#[test]
fn detects_private_key_header() {
let scanner = default_scanner();
let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAK...\n-----END RSA PRIVATE KEY-----";
let findings = scanner.scan(text);
assert!(
findings
.iter()
.any(|f| f.pattern_id == "private-key-header"),
"expected private-key-header finding, got: {findings:?}"
);
}
#[test]
fn detects_jwt_token() {
let scanner = default_scanner();
// A realistic-looking (but fake) JWT.
let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ik\
pvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
let findings = scanner.scan(jwt);
assert!(
findings.iter().any(|f| f.pattern_id == "jwt"),
"expected jwt finding, got: {findings:?}"
);
}
#[test]
fn detects_database_connection_url() {
let scanner = default_scanner();
let findings = scanner.scan("DATABASE_URL=postgres://admin:s3cretP4ss@db.example.com:5432/mydb");
assert!(
findings
.iter()
.any(|f| f.pattern_id == "database-connection-url"),
"expected database-connection-url finding, got: {findings:?}"
);
}
#[test]
fn detects_slack_webhook() {
let scanner = default_scanner();
let findings = scanner
.scan("https://hooks.slack.com/services/T0ABCDEFG/B0ABCDEFG/abcdefghijklmnopqrstuvwx");
assert!(
findings.iter().any(|f| f.pattern_id == "slack-webhook"),
"expected slack-webhook finding, got: {findings:?}"
);
}
#[test]
fn detects_anthropic_api_key() {
let scanner = default_scanner();
let key = "sk-ant-aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789abcdefgh";
let findings = scanner.scan(&format!("my key is {key}"));
assert!(
findings
.iter()
.any(|f| f.pattern_id == "anthropic-api-key"),
"expected anthropic-api-key finding, got: {findings:?}"
);
}
#[test]
fn detects_sendgrid_api_key() {
let scanner = default_scanner();
let key = "SG.abcdefghijklmnopqrstuv.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst";
let findings = scanner.scan(key);
assert!(
findings.iter().any(|f| f.pattern_id == "sendgrid-api-key"),
"expected sendgrid-api-key finding, got: {findings:?}"
);
}
// ---------------------------------------------------------------------------
// Generic patterns — entropy filtering
// ---------------------------------------------------------------------------
#[test]
fn rejects_low_entropy_generic_api_key() {
let scanner = default_scanner();
// "test" repeated has very low entropy — should NOT trigger.
let findings = scanner.scan(r#"api_key = "testtesttesttesttest""#);
let generic_hits: Vec<_> = findings
.iter()
.filter(|f| f.pattern_id == "generic-api-key")
.collect();
assert!(
generic_hits.is_empty(),
"low-entropy api_key should be filtered out, got: {generic_hits:?}"
);
}
#[test]
fn accepts_high_entropy_generic_secret() {
let scanner = default_scanner();
// A high-entropy random-looking value.
let findings = scanner.scan(r#"secret = "a8Kz3Lm9Xq2Wp7Yn"#);
let has_generic = findings
.iter()
.any(|f| f.pattern_id == "generic-secret");
assert!(
has_generic,
"high-entropy secret should be detected, got: {findings:?}"
);
}
// ---------------------------------------------------------------------------
// Custom patterns from config
// ---------------------------------------------------------------------------
#[test]
fn custom_pattern_from_config() {
let config = HeuristicConfig {
enabled: true,
patterns: vec![r"MY_CUSTOM_[A-Z]{10}".to_string()],
..Default::default()
};
let scanner = Scanner::new(&config).expect("should compile custom pattern");
let findings = scanner.scan("found MY_CUSTOM_ABCDEFGHIJ in output");
assert!(
findings.iter().any(|f| f.pattern_id == "custom-0"),
"expected custom-0 finding, got: {findings:?}"
);
assert_eq!(findings[0].confidence, Confidence::High);
}
// ---------------------------------------------------------------------------
// Edge cases
// ---------------------------------------------------------------------------
#[test]
fn empty_text_returns_no_findings() {
let scanner = default_scanner();
assert!(scanner.scan("").is_empty());
}
#[test]
fn plain_text_returns_no_findings() {
let scanner = default_scanner();
let findings = scanner.scan("This is just a normal paragraph with no secrets.");
assert!(
findings.is_empty(),
"plain text should have no findings, got: {findings:?}"
);
}
#[test]
fn overlapping_matches_are_deduplicated() {
// Construct text where the same span could match multiple patterns.
// The bearer token pattern and a generic pattern could overlap on the same region.
let scanner = default_scanner();
let text = "Authorization: Bearer ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh";
let findings = scanner.scan(text);
// Verify no two findings have overlapping spans.
for i in 0..findings.len() {
for j in (i + 1)..findings.len() {
assert!(
findings[j].span.start >= findings[i].span.end,
"findings {i} and {j} overlap: {:?} vs {:?}",
findings[i].span,
findings[j].span,
);
}
}
}
// ---------------------------------------------------------------------------
// Shannon entropy unit tests (supplement the inline mod tests)
// ---------------------------------------------------------------------------
#[test]
fn entropy_known_values() {
// Single character repeated → 0.
assert!((shannon_entropy("aaaa") - 0.0).abs() < f64::EPSILON);
// Perfectly balanced binary → 1.0 bits/char.
let balanced = "ababababab";
assert!((shannon_entropy(balanced) - 1.0).abs() < 0.01);
// High diversity.
let diverse = "aB3$kL9!mZ7@wQ1#xR5^";
assert!(shannon_entropy(diverse) > 3.5);
}
// ---------------------------------------------------------------------------
// Scanner construction
// ---------------------------------------------------------------------------
#[test]
fn builtin_scanner_has_rules() {
let scanner = default_scanner();
assert!(
scanner.rule_count() >= 30,
"expected at least 30 built-in rules, got {}",
scanner.rule_count()
);
}
#[test]
fn invalid_custom_pattern_returns_error() {
let config = HeuristicConfig {
enabled: true,
patterns: vec![r"[invalid".to_string()],
..Default::default()
};
assert!(Scanner::new(&config).is_err());
}