feat(fermata): add secret filtering engine — the security brain

Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gabor Körber
2026-05-25 17:29:07 +02:00
parent f77fd73966
commit 087429d275
22 changed files with 4557 additions and 172 deletions
+310
View File
@@ -0,0 +1,310 @@
//! Secret manifest loader.
//!
//! Discovers secret files per the `.botsecrets` configuration, parses them,
//! filters by key patterns, and produces the known-secrets set that the
//! Redactor will consume.
use std::path::{Path, PathBuf};
use globset::{Glob, GlobSetBuilder};
use thiserror::Error;
use walkdir::WalkDir;
use super::config::{ParseErrorAction, SecretsConfig};
use super::parser::{self, FileFormat, ParseError, SecretEntry};
// ---------------------------------------------------------------------------
// Errors
// ---------------------------------------------------------------------------
#[derive(Debug, Error)]
pub enum ManifestError {
#[error(transparent)]
Parse(#[from] ParseError),
#[error("glob pattern error: {0}")]
Glob(String),
}
// ---------------------------------------------------------------------------
// Manifest
// ---------------------------------------------------------------------------
/// The complete set of known secrets discovered from a project.
///
/// Entries are sorted by value length descending (longest first) so the
/// redactor replaces the most specific match before shorter substrings.
#[derive(Debug, Clone)]
pub struct Manifest {
entries: Vec<SecretEntry>,
}
/// Minimum secret value length to keep. Anything shorter risks false-positive
/// redaction (e.g. `"yes"`, `"on"`, `"42"`).
const MIN_VALUE_LEN: usize = 4;
/// Directories that are unconditionally skipped during file discovery.
const SKIP_DIRS: &[&str] = &[".git", "node_modules", "target", "__pycache__", ".venv"];
impl Manifest {
/// Build a manifest by discovering and parsing secret files relative to
/// `root`.
pub fn build(config: &SecretsConfig, root: &Path) -> Result<Self, ManifestError> {
let mut entries = Vec::new();
// 1. Discover files matching `config.files.patterns`.
let discovered = discover_files(&config.files.patterns, root)?;
// 2. Parse each discovered file.
for path in &discovered {
match parse_discovered_file(path) {
Ok(file_entries) => entries.extend(file_entries),
Err(e) => match config.enforcement.on_parse_error {
ParseErrorAction::Allow => {
eprintln!(
"fermata: warning: skipping unparseable file {}: {}",
path.display(),
e
);
}
ParseErrorAction::Deny => {
return Err(e.into());
}
ParseErrorAction::MaskEntireFile => {
// We cannot extract individual secrets — the redactor
// may choose to mask the entire file content if it
// appears in output. For now we log and continue.
eprintln!(
"fermata: warning: cannot parse {}: {}",
path.display(),
e
);
}
},
}
}
// 3. Filter discovered entries by the effective key patterns.
entries = filter_by_key_patterns(entries, config);
// 4. Process explicit `[[file]]` overrides — these bypass key filtering
// because the user declared them intentionally.
for override_cfg in &config.file_overrides {
let override_path = root.join(&override_cfg.path);
if !override_path.is_file() {
continue;
}
let format = override_cfg
.format
.as_deref()
.and_then(FileFormat::from_hint);
let key_filter = if override_cfg.keys.is_empty() {
None
} else {
Some(override_cfg.keys.as_slice())
};
match parser::parse_secret_file(&override_path, format, key_filter) {
Ok(file_entries) => entries.extend(file_entries),
Err(e) => {
eprintln!(
"fermata: warning: cannot parse override file {}: {}",
override_path.display(),
e
);
}
}
}
// 5. Deduplicate (same key + value from different discovery paths).
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
// 6. Sort by value length descending (longest first for redaction).
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
// 7. Remove entries with very short values to avoid false replacements.
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
Ok(Self { entries })
}
/// Build a manifest from a pre-built list of secret entries.
///
/// Applies the same post-processing as [`Manifest::build`]:
/// - Deduplicates entries with the same key and value.
/// - Sorts by value length descending (longest first for redaction).
/// - Removes entries with values shorter than 4 characters.
///
/// Useful for testing and for library consumers that obtain secrets
/// from sources other than filesystem discovery.
pub fn from_entries(mut entries: Vec<SecretEntry>) -> Self {
// Deduplicate (same key + value).
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
// Sort by value length descending (longest first for redaction).
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
// Remove entries with very short values to avoid false replacements.
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
Self { entries }
}
/// Build an empty manifest (no secrets known).
pub fn empty() -> Self {
Self {
entries: Vec::new(),
}
}
/// Returns all discovered secret entries.
pub fn entries(&self) -> &[SecretEntry] {
&self.entries
}
/// Returns `true` if the manifest contains no secrets.
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
/// Number of known secrets.
pub fn len(&self) -> usize {
self.entries.len()
}
}
// ---------------------------------------------------------------------------
// File discovery
// ---------------------------------------------------------------------------
/// Walk the project tree and collect files matching any of the given glob
/// patterns. Patterns are matched against paths *relative to* `root`.
fn discover_files(patterns: &[String], root: &Path) -> Result<Vec<PathBuf>, ManifestError> {
if patterns.is_empty() {
return Ok(Vec::new());
}
// Compile all patterns into a single GlobSet for efficient matching.
let mut builder = GlobSetBuilder::new();
for pat in patterns {
// `globset` patterns match against the full relative path including
// intermediate directories (e.g. `.docker/config.json`). We add
// both the literal pattern and a `**/` prefixed variant so that
// `.env` matches at the root and `subdir/.env` matches nested.
let glob = Glob::new(pat).map_err(|e| ManifestError::Glob(e.to_string()))?;
builder.add(glob);
// Also match nested occurrences: `**/<pattern>`.
if !pat.contains('/') {
let nested = format!("**/{pat}");
let nested_glob =
Glob::new(&nested).map_err(|e| ManifestError::Glob(e.to_string()))?;
builder.add(nested_glob);
}
}
let glob_set = builder.build().map_err(|e| ManifestError::Glob(e.to_string()))?;
let mut result = Vec::new();
for entry in WalkDir::new(root).follow_links(false) {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
// Skip common large / non-project directories.
if entry.file_type().is_dir() {
if let Some(name) = entry.file_name().to_str() {
if SKIP_DIRS.contains(&name) {
// WalkDir does not support in-place skip, but we simply
// won't match anything under these dirs because we check
// the dir name on each entry. We continue and let non-file
// entries fall through.
continue;
}
}
continue; // Only interested in files.
}
if !entry.file_type().is_file() {
continue;
}
// Check that no ancestor directory is in the skip list.
let abs_path = entry.path();
if has_skipped_ancestor(abs_path, root) {
continue;
}
// Match relative path against the glob set.
let rel = match abs_path.strip_prefix(root) {
Ok(r) => r,
Err(_) => continue,
};
if glob_set.is_match(rel) {
result.push(abs_path.to_path_buf());
}
}
Ok(result)
}
/// Returns `true` if any path component between `root` and `path` is in
/// [`SKIP_DIRS`].
fn has_skipped_ancestor(path: &Path, root: &Path) -> bool {
if let Ok(rel) = path.strip_prefix(root) {
for component in rel.parent().into_iter().flat_map(|p| p.components()) {
if let Some(name) = component.as_os_str().to_str() {
if SKIP_DIRS.contains(&name) {
return true;
}
}
}
}
false
}
// ---------------------------------------------------------------------------
// Single-file parsing
// ---------------------------------------------------------------------------
/// Parse a single discovered file. Auto-detects format from extension.
/// Returns an empty `Vec` if the format cannot be determined (e.g. `.key`,
/// `.pem` — opaque/binary files).
fn parse_discovered_file(path: &Path) -> Result<Vec<SecretEntry>, ParseError> {
let format = match FileFormat::from_path(path) {
Some(fmt) => fmt,
None => return Ok(Vec::new()), // opaque file — skip
};
parser::parse_secret_file(path, Some(format), None)
}
// ---------------------------------------------------------------------------
// Key-pattern filtering
// ---------------------------------------------------------------------------
/// Keep only entries whose key matches the effective key-include patterns
/// from the configuration.
fn filter_by_key_patterns(entries: Vec<SecretEntry>, config: &SecretsConfig) -> Vec<SecretEntry> {
entries
.into_iter()
.filter(|e| config.key_matches(&e.key))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_manifest() {
let m = Manifest::empty();
assert!(m.is_empty());
assert_eq!(m.len(), 0);
assert!(m.entries().is_empty());
}
}