✨ feat(fermata): add secret filtering engine — the security brain
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.
New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.
Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0
Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates
195 tests (130 new), all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,310 @@
|
||||
//! Secret manifest loader.
|
||||
//!
|
||||
//! Discovers secret files per the `.botsecrets` configuration, parses them,
|
||||
//! filters by key patterns, and produces the known-secrets set that the
|
||||
//! Redactor will consume.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use globset::{Glob, GlobSetBuilder};
|
||||
use thiserror::Error;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use super::config::{ParseErrorAction, SecretsConfig};
|
||||
use super::parser::{self, FileFormat, ParseError, SecretEntry};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Errors
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ManifestError {
|
||||
#[error(transparent)]
|
||||
Parse(#[from] ParseError),
|
||||
#[error("glob pattern error: {0}")]
|
||||
Glob(String),
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Manifest
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// The complete set of known secrets discovered from a project.
|
||||
///
|
||||
/// Entries are sorted by value length descending (longest first) so the
|
||||
/// redactor replaces the most specific match before shorter substrings.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Manifest {
|
||||
entries: Vec<SecretEntry>,
|
||||
}
|
||||
|
||||
/// Minimum secret value length to keep. Anything shorter risks false-positive
|
||||
/// redaction (e.g. `"yes"`, `"on"`, `"42"`).
|
||||
const MIN_VALUE_LEN: usize = 4;
|
||||
|
||||
/// Directories that are unconditionally skipped during file discovery.
|
||||
const SKIP_DIRS: &[&str] = &[".git", "node_modules", "target", "__pycache__", ".venv"];
|
||||
|
||||
impl Manifest {
|
||||
/// Build a manifest by discovering and parsing secret files relative to
|
||||
/// `root`.
|
||||
pub fn build(config: &SecretsConfig, root: &Path) -> Result<Self, ManifestError> {
|
||||
let mut entries = Vec::new();
|
||||
|
||||
// 1. Discover files matching `config.files.patterns`.
|
||||
let discovered = discover_files(&config.files.patterns, root)?;
|
||||
|
||||
// 2. Parse each discovered file.
|
||||
for path in &discovered {
|
||||
match parse_discovered_file(path) {
|
||||
Ok(file_entries) => entries.extend(file_entries),
|
||||
Err(e) => match config.enforcement.on_parse_error {
|
||||
ParseErrorAction::Allow => {
|
||||
eprintln!(
|
||||
"fermata: warning: skipping unparseable file {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
ParseErrorAction::Deny => {
|
||||
return Err(e.into());
|
||||
}
|
||||
ParseErrorAction::MaskEntireFile => {
|
||||
// We cannot extract individual secrets — the redactor
|
||||
// may choose to mask the entire file content if it
|
||||
// appears in output. For now we log and continue.
|
||||
eprintln!(
|
||||
"fermata: warning: cannot parse {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Filter discovered entries by the effective key patterns.
|
||||
entries = filter_by_key_patterns(entries, config);
|
||||
|
||||
// 4. Process explicit `[[file]]` overrides — these bypass key filtering
|
||||
// because the user declared them intentionally.
|
||||
for override_cfg in &config.file_overrides {
|
||||
let override_path = root.join(&override_cfg.path);
|
||||
if !override_path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let format = override_cfg
|
||||
.format
|
||||
.as_deref()
|
||||
.and_then(FileFormat::from_hint);
|
||||
|
||||
let key_filter = if override_cfg.keys.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(override_cfg.keys.as_slice())
|
||||
};
|
||||
|
||||
match parser::parse_secret_file(&override_path, format, key_filter) {
|
||||
Ok(file_entries) => entries.extend(file_entries),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"fermata: warning: cannot parse override file {}: {}",
|
||||
override_path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Deduplicate (same key + value from different discovery paths).
|
||||
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
|
||||
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
|
||||
|
||||
// 6. Sort by value length descending (longest first for redaction).
|
||||
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
|
||||
|
||||
// 7. Remove entries with very short values to avoid false replacements.
|
||||
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
|
||||
|
||||
Ok(Self { entries })
|
||||
}
|
||||
|
||||
/// Build a manifest from a pre-built list of secret entries.
|
||||
///
|
||||
/// Applies the same post-processing as [`Manifest::build`]:
|
||||
/// - Deduplicates entries with the same key and value.
|
||||
/// - Sorts by value length descending (longest first for redaction).
|
||||
/// - Removes entries with values shorter than 4 characters.
|
||||
///
|
||||
/// Useful for testing and for library consumers that obtain secrets
|
||||
/// from sources other than filesystem discovery.
|
||||
pub fn from_entries(mut entries: Vec<SecretEntry>) -> Self {
|
||||
// Deduplicate (same key + value).
|
||||
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
|
||||
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
|
||||
|
||||
// Sort by value length descending (longest first for redaction).
|
||||
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
|
||||
|
||||
// Remove entries with very short values to avoid false replacements.
|
||||
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
|
||||
|
||||
Self { entries }
|
||||
}
|
||||
|
||||
/// Build an empty manifest (no secrets known).
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns all discovered secret entries.
|
||||
pub fn entries(&self) -> &[SecretEntry] {
|
||||
&self.entries
|
||||
}
|
||||
|
||||
/// Returns `true` if the manifest contains no secrets.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
/// Number of known secrets.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File discovery
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Walk the project tree and collect files matching any of the given glob
|
||||
/// patterns. Patterns are matched against paths *relative to* `root`.
|
||||
fn discover_files(patterns: &[String], root: &Path) -> Result<Vec<PathBuf>, ManifestError> {
|
||||
if patterns.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Compile all patterns into a single GlobSet for efficient matching.
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
for pat in patterns {
|
||||
// `globset` patterns match against the full relative path including
|
||||
// intermediate directories (e.g. `.docker/config.json`). We add
|
||||
// both the literal pattern and a `**/` prefixed variant so that
|
||||
// `.env` matches at the root and `subdir/.env` matches nested.
|
||||
let glob = Glob::new(pat).map_err(|e| ManifestError::Glob(e.to_string()))?;
|
||||
builder.add(glob);
|
||||
|
||||
// Also match nested occurrences: `**/<pattern>`.
|
||||
if !pat.contains('/') {
|
||||
let nested = format!("**/{pat}");
|
||||
let nested_glob =
|
||||
Glob::new(&nested).map_err(|e| ManifestError::Glob(e.to_string()))?;
|
||||
builder.add(nested_glob);
|
||||
}
|
||||
}
|
||||
let glob_set = builder.build().map_err(|e| ManifestError::Glob(e.to_string()))?;
|
||||
|
||||
let mut result = Vec::new();
|
||||
|
||||
for entry in WalkDir::new(root).follow_links(false) {
|
||||
let entry = match entry {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
// Skip common large / non-project directories.
|
||||
if entry.file_type().is_dir() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if SKIP_DIRS.contains(&name) {
|
||||
// WalkDir does not support in-place skip, but we simply
|
||||
// won't match anything under these dirs because we check
|
||||
// the dir name on each entry. We continue and let non-file
|
||||
// entries fall through.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
continue; // Only interested in files.
|
||||
}
|
||||
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check that no ancestor directory is in the skip list.
|
||||
let abs_path = entry.path();
|
||||
if has_skipped_ancestor(abs_path, root) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match relative path against the glob set.
|
||||
let rel = match abs_path.strip_prefix(root) {
|
||||
Ok(r) => r,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if glob_set.is_match(rel) {
|
||||
result.push(abs_path.to_path_buf());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Returns `true` if any path component between `root` and `path` is in
|
||||
/// [`SKIP_DIRS`].
|
||||
fn has_skipped_ancestor(path: &Path, root: &Path) -> bool {
|
||||
if let Ok(rel) = path.strip_prefix(root) {
|
||||
for component in rel.parent().into_iter().flat_map(|p| p.components()) {
|
||||
if let Some(name) = component.as_os_str().to_str() {
|
||||
if SKIP_DIRS.contains(&name) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Single-file parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Parse a single discovered file. Auto-detects format from extension.
|
||||
/// Returns an empty `Vec` if the format cannot be determined (e.g. `.key`,
|
||||
/// `.pem` — opaque/binary files).
|
||||
fn parse_discovered_file(path: &Path) -> Result<Vec<SecretEntry>, ParseError> {
|
||||
let format = match FileFormat::from_path(path) {
|
||||
Some(fmt) => fmt,
|
||||
None => return Ok(Vec::new()), // opaque file — skip
|
||||
};
|
||||
parser::parse_secret_file(path, Some(format), None)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Key-pattern filtering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Keep only entries whose key matches the effective key-include patterns
|
||||
/// from the configuration.
|
||||
fn filter_by_key_patterns(entries: Vec<SecretEntry>, config: &SecretsConfig) -> Vec<SecretEntry> {
|
||||
entries
|
||||
.into_iter()
|
||||
.filter(|e| config.key_matches(&e.key))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_manifest() {
|
||||
let m = Manifest::empty();
|
||||
assert!(m.is_empty());
|
||||
assert_eq!(m.len(), 0);
|
||||
assert!(m.entries().is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user