✨ feat(fermata): add secret filtering engine — the security brain
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.
New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.
Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0
Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates
195 tests (130 new), all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
//! Secret value redactor.
|
||||
//!
|
||||
//! Takes the known-secrets [`Manifest`] and efficiently replaces every
|
||||
//! occurrence of a secret value in arbitrary text using an Aho-Corasick
|
||||
//! automaton for multi-pattern matching.
|
||||
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
use super::config::RedactionStyle;
|
||||
use super::manifest::Manifest;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Output types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A redaction event -- records what was replaced and where.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Redaction {
|
||||
/// The key name of the redacted secret.
|
||||
pub key: String,
|
||||
/// Byte offset in the *original* text where the match starts.
|
||||
pub offset: usize,
|
||||
/// Length (in bytes) of the original secret value that was replaced.
|
||||
pub original_len: usize,
|
||||
}
|
||||
|
||||
/// The result of redacting text.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RedactedText {
|
||||
/// The text with secret values replaced.
|
||||
pub text: String,
|
||||
/// List of redactions that were applied (in order of occurrence).
|
||||
pub redactions: Vec<Redaction>,
|
||||
}
|
||||
|
||||
impl RedactedText {
|
||||
/// Returns `true` if any redactions were made.
|
||||
pub fn was_redacted(&self) -> bool {
|
||||
!self.redactions.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Redactor
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Replaces known secret values in text with configurable placeholders.
|
||||
///
|
||||
/// Construction is cheap when the manifest is empty and O(n) in the total
|
||||
/// length of secret values otherwise (Aho-Corasick automaton build).
|
||||
/// Redaction itself is O(n) in the length of the input text.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Redactor {
|
||||
/// The Aho-Corasick automaton for multi-pattern matching.
|
||||
/// `None` when the manifest is empty (no-op fast path).
|
||||
automaton: Option<AhoCorasick>,
|
||||
/// Secret entries parallel to the automaton patterns.
|
||||
/// Index `i` in the automaton corresponds to `entries[i]`.
|
||||
entries: Vec<RedactorEntry>,
|
||||
/// How to format replacements.
|
||||
style: RedactionStyle,
|
||||
}
|
||||
|
||||
/// Internal entry -- stores info needed for replacement formatting.
|
||||
#[derive(Debug, Clone)]
|
||||
struct RedactorEntry {
|
||||
key: String,
|
||||
value_len: usize,
|
||||
}
|
||||
|
||||
impl Redactor {
|
||||
/// Build a redactor from a manifest and redaction style.
|
||||
///
|
||||
/// The manifest entries are already sorted by value length descending,
|
||||
/// but Aho-Corasick with `LeftmostLongest` handles overlap correctly
|
||||
/// regardless of input order.
|
||||
pub fn new(manifest: &Manifest, style: RedactionStyle) -> Self {
|
||||
let secrets = manifest.entries();
|
||||
if secrets.is_empty() {
|
||||
return Self {
|
||||
automaton: None,
|
||||
entries: Vec::new(),
|
||||
style,
|
||||
};
|
||||
}
|
||||
|
||||
// Build patterns from secret *values* (not keys).
|
||||
let patterns: Vec<&str> = secrets.iter().map(|e| e.value.as_str()).collect();
|
||||
let entries: Vec<RedactorEntry> = secrets
|
||||
.iter()
|
||||
.map(|e| RedactorEntry {
|
||||
key: e.key.clone(),
|
||||
value_len: e.value.len(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
// LeftmostLongest ensures that when one secret value is a substring
|
||||
// of another, the longer match wins.
|
||||
let automaton = AhoCorasick::builder()
|
||||
.match_kind(aho_corasick::MatchKind::LeftmostLongest)
|
||||
.build(&patterns)
|
||||
.ok(); // If build fails (shouldn't for valid strings), fall back to no-op.
|
||||
|
||||
Self {
|
||||
automaton,
|
||||
entries,
|
||||
style,
|
||||
}
|
||||
}
|
||||
|
||||
/// Redact all known secret values in the input text.
|
||||
///
|
||||
/// Returns the redacted text together with metadata about each
|
||||
/// replacement (key name, byte offset, original length).
|
||||
pub fn redact(&self, text: &str) -> RedactedText {
|
||||
let automaton = match &self.automaton {
|
||||
Some(a) => a,
|
||||
None => {
|
||||
return RedactedText {
|
||||
text: text.to_string(),
|
||||
redactions: Vec::new(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut result = String::with_capacity(text.len());
|
||||
let mut redactions = Vec::new();
|
||||
let mut last_end = 0;
|
||||
|
||||
for mat in automaton.find_iter(text) {
|
||||
let entry = &self.entries[mat.pattern().as_usize()];
|
||||
|
||||
// Append text before the match.
|
||||
result.push_str(&text[last_end..mat.start()]);
|
||||
|
||||
// Append the replacement placeholder.
|
||||
let replacement = self.format_replacement(entry);
|
||||
result.push_str(&replacement);
|
||||
|
||||
redactions.push(Redaction {
|
||||
key: entry.key.clone(),
|
||||
offset: mat.start(),
|
||||
original_len: entry.value_len,
|
||||
});
|
||||
|
||||
last_end = mat.end();
|
||||
}
|
||||
|
||||
// Append remaining text after the last match.
|
||||
result.push_str(&text[last_end..]);
|
||||
|
||||
RedactedText {
|
||||
text: result,
|
||||
redactions,
|
||||
}
|
||||
}
|
||||
|
||||
/// Format the replacement string according to the configured style.
|
||||
fn format_replacement(&self, entry: &RedactorEntry) -> String {
|
||||
match self.style {
|
||||
RedactionStyle::Masked => "*****".to_string(),
|
||||
RedactionStyle::Typed => format!("<REDACTED:string:{}>", entry.value_len),
|
||||
RedactionStyle::Named => format!("<REDACTED:{}>", entry.key),
|
||||
RedactionStyle::Absent => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this redactor has any secrets loaded.
|
||||
pub fn has_secrets(&self) -> bool {
|
||||
self.automaton.is_some()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user