✨ feat(fermata): add secret filtering engine — the security brain
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.
New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.
Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0
Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates
195 tests (130 new), all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,254 @@
|
||||
use dirigent_fermata::core::secrets::config::HeuristicConfig;
|
||||
use dirigent_fermata::core::secrets::scanner::{shannon_entropy, Confidence, Scanner};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: build a scanner with default config (built-in rules only)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn default_scanner() -> Scanner {
|
||||
Scanner::builtin().expect("built-in rules must compile")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Specific provider patterns
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn detects_aws_access_key() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner.scan("here is my key: AKIAIOSFODNN7EXAMPLE ok");
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "aws-access-key"),
|
||||
"expected aws-access-key finding, got: {findings:?}"
|
||||
);
|
||||
assert_eq!(findings[0].confidence, Confidence::High);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_github_pat_classic() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner.scan("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij");
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "github-pat-classic"),
|
||||
"expected github-pat-classic finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_stripe_secret_key() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner.scan("STRIPE_KEY=sk_live_abcdefghijklmnopqrstuvwx");
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "stripe-secret-key"),
|
||||
"expected stripe-secret-key finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_private_key_header() {
|
||||
let scanner = default_scanner();
|
||||
let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAK...\n-----END RSA PRIVATE KEY-----";
|
||||
let findings = scanner.scan(text);
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.pattern_id == "private-key-header"),
|
||||
"expected private-key-header finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_jwt_token() {
|
||||
let scanner = default_scanner();
|
||||
// A realistic-looking (but fake) JWT.
|
||||
let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ik\
|
||||
pvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
|
||||
let findings = scanner.scan(jwt);
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "jwt"),
|
||||
"expected jwt finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_database_connection_url() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner.scan("DATABASE_URL=postgres://admin:s3cretP4ss@db.example.com:5432/mydb");
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.pattern_id == "database-connection-url"),
|
||||
"expected database-connection-url finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_slack_webhook() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner
|
||||
.scan("https://hooks.slack.com/services/T0ABCDEFG/B0ABCDEFG/abcdefghijklmnopqrstuvwx");
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "slack-webhook"),
|
||||
"expected slack-webhook finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_anthropic_api_key() {
|
||||
let scanner = default_scanner();
|
||||
let key = "sk-ant-aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789abcdefgh";
|
||||
let findings = scanner.scan(&format!("my key is {key}"));
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.pattern_id == "anthropic-api-key"),
|
||||
"expected anthropic-api-key finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_sendgrid_api_key() {
|
||||
let scanner = default_scanner();
|
||||
let key = "SG.abcdefghijklmnopqrstuv.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst";
|
||||
let findings = scanner.scan(key);
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "sendgrid-api-key"),
|
||||
"expected sendgrid-api-key finding, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Generic patterns — entropy filtering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn rejects_low_entropy_generic_api_key() {
|
||||
let scanner = default_scanner();
|
||||
// "test" repeated has very low entropy — should NOT trigger.
|
||||
let findings = scanner.scan(r#"api_key = "testtesttesttesttest""#);
|
||||
let generic_hits: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.pattern_id == "generic-api-key")
|
||||
.collect();
|
||||
assert!(
|
||||
generic_hits.is_empty(),
|
||||
"low-entropy api_key should be filtered out, got: {generic_hits:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_high_entropy_generic_secret() {
|
||||
let scanner = default_scanner();
|
||||
// A high-entropy random-looking value.
|
||||
let findings = scanner.scan(r#"secret = "a8Kz3Lm9Xq2Wp7Yn"#);
|
||||
let has_generic = findings
|
||||
.iter()
|
||||
.any(|f| f.pattern_id == "generic-secret");
|
||||
assert!(
|
||||
has_generic,
|
||||
"high-entropy secret should be detected, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Custom patterns from config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn custom_pattern_from_config() {
|
||||
let config = HeuristicConfig {
|
||||
enabled: true,
|
||||
patterns: vec![r"MY_CUSTOM_[A-Z]{10}".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let scanner = Scanner::new(&config).expect("should compile custom pattern");
|
||||
let findings = scanner.scan("found MY_CUSTOM_ABCDEFGHIJ in output");
|
||||
assert!(
|
||||
findings.iter().any(|f| f.pattern_id == "custom-0"),
|
||||
"expected custom-0 finding, got: {findings:?}"
|
||||
);
|
||||
assert_eq!(findings[0].confidence, Confidence::High);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Edge cases
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn empty_text_returns_no_findings() {
|
||||
let scanner = default_scanner();
|
||||
assert!(scanner.scan("").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plain_text_returns_no_findings() {
|
||||
let scanner = default_scanner();
|
||||
let findings = scanner.scan("This is just a normal paragraph with no secrets.");
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"plain text should have no findings, got: {findings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn overlapping_matches_are_deduplicated() {
|
||||
// Construct text where the same span could match multiple patterns.
|
||||
// The bearer token pattern and a generic pattern could overlap on the same region.
|
||||
let scanner = default_scanner();
|
||||
let text = "Authorization: Bearer ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh";
|
||||
let findings = scanner.scan(text);
|
||||
|
||||
// Verify no two findings have overlapping spans.
|
||||
for i in 0..findings.len() {
|
||||
for j in (i + 1)..findings.len() {
|
||||
assert!(
|
||||
findings[j].span.start >= findings[i].span.end,
|
||||
"findings {i} and {j} overlap: {:?} vs {:?}",
|
||||
findings[i].span,
|
||||
findings[j].span,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shannon entropy unit tests (supplement the inline mod tests)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn entropy_known_values() {
|
||||
// Single character repeated → 0.
|
||||
assert!((shannon_entropy("aaaa") - 0.0).abs() < f64::EPSILON);
|
||||
|
||||
// Perfectly balanced binary → 1.0 bits/char.
|
||||
let balanced = "ababababab";
|
||||
assert!((shannon_entropy(balanced) - 1.0).abs() < 0.01);
|
||||
|
||||
// High diversity.
|
||||
let diverse = "aB3$kL9!mZ7@wQ1#xR5^";
|
||||
assert!(shannon_entropy(diverse) > 3.5);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scanner construction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn builtin_scanner_has_rules() {
|
||||
let scanner = default_scanner();
|
||||
assert!(
|
||||
scanner.rule_count() >= 30,
|
||||
"expected at least 30 built-in rules, got {}",
|
||||
scanner.rule_count()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_custom_pattern_returns_error() {
|
||||
let config = HeuristicConfig {
|
||||
enabled: true,
|
||||
patterns: vec![r"[invalid".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(Scanner::new(&config).is_err());
|
||||
}
|
||||
Reference in New Issue
Block a user