087429d275
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.
New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.
Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0
Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates
195 tests (130 new), all passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
311 lines
11 KiB
Rust
311 lines
11 KiB
Rust
//! Secret manifest loader.
|
|
//!
|
|
//! Discovers secret files per the `.botsecrets` configuration, parses them,
|
|
//! filters by key patterns, and produces the known-secrets set that the
|
|
//! Redactor will consume.
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use globset::{Glob, GlobSetBuilder};
|
|
use thiserror::Error;
|
|
use walkdir::WalkDir;
|
|
|
|
use super::config::{ParseErrorAction, SecretsConfig};
|
|
use super::parser::{self, FileFormat, ParseError, SecretEntry};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Errors
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum ManifestError {
|
|
#[error(transparent)]
|
|
Parse(#[from] ParseError),
|
|
#[error("glob pattern error: {0}")]
|
|
Glob(String),
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Manifest
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// The complete set of known secrets discovered from a project.
|
|
///
|
|
/// Entries are sorted by value length descending (longest first) so the
|
|
/// redactor replaces the most specific match before shorter substrings.
|
|
#[derive(Debug, Clone)]
|
|
pub struct Manifest {
|
|
entries: Vec<SecretEntry>,
|
|
}
|
|
|
|
/// Minimum secret value length to keep. Anything shorter risks false-positive
|
|
/// redaction (e.g. `"yes"`, `"on"`, `"42"`).
|
|
const MIN_VALUE_LEN: usize = 4;
|
|
|
|
/// Directories that are unconditionally skipped during file discovery.
|
|
const SKIP_DIRS: &[&str] = &[".git", "node_modules", "target", "__pycache__", ".venv"];
|
|
|
|
impl Manifest {
|
|
/// Build a manifest by discovering and parsing secret files relative to
|
|
/// `root`.
|
|
pub fn build(config: &SecretsConfig, root: &Path) -> Result<Self, ManifestError> {
|
|
let mut entries = Vec::new();
|
|
|
|
// 1. Discover files matching `config.files.patterns`.
|
|
let discovered = discover_files(&config.files.patterns, root)?;
|
|
|
|
// 2. Parse each discovered file.
|
|
for path in &discovered {
|
|
match parse_discovered_file(path) {
|
|
Ok(file_entries) => entries.extend(file_entries),
|
|
Err(e) => match config.enforcement.on_parse_error {
|
|
ParseErrorAction::Allow => {
|
|
eprintln!(
|
|
"fermata: warning: skipping unparseable file {}: {}",
|
|
path.display(),
|
|
e
|
|
);
|
|
}
|
|
ParseErrorAction::Deny => {
|
|
return Err(e.into());
|
|
}
|
|
ParseErrorAction::MaskEntireFile => {
|
|
// We cannot extract individual secrets — the redactor
|
|
// may choose to mask the entire file content if it
|
|
// appears in output. For now we log and continue.
|
|
eprintln!(
|
|
"fermata: warning: cannot parse {}: {}",
|
|
path.display(),
|
|
e
|
|
);
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
// 3. Filter discovered entries by the effective key patterns.
|
|
entries = filter_by_key_patterns(entries, config);
|
|
|
|
// 4. Process explicit `[[file]]` overrides — these bypass key filtering
|
|
// because the user declared them intentionally.
|
|
for override_cfg in &config.file_overrides {
|
|
let override_path = root.join(&override_cfg.path);
|
|
if !override_path.is_file() {
|
|
continue;
|
|
}
|
|
|
|
let format = override_cfg
|
|
.format
|
|
.as_deref()
|
|
.and_then(FileFormat::from_hint);
|
|
|
|
let key_filter = if override_cfg.keys.is_empty() {
|
|
None
|
|
} else {
|
|
Some(override_cfg.keys.as_slice())
|
|
};
|
|
|
|
match parser::parse_secret_file(&override_path, format, key_filter) {
|
|
Ok(file_entries) => entries.extend(file_entries),
|
|
Err(e) => {
|
|
eprintln!(
|
|
"fermata: warning: cannot parse override file {}: {}",
|
|
override_path.display(),
|
|
e
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// 5. Deduplicate (same key + value from different discovery paths).
|
|
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
|
|
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
|
|
|
|
// 6. Sort by value length descending (longest first for redaction).
|
|
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
|
|
|
|
// 7. Remove entries with very short values to avoid false replacements.
|
|
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
|
|
|
|
Ok(Self { entries })
|
|
}
|
|
|
|
/// Build a manifest from a pre-built list of secret entries.
|
|
///
|
|
/// Applies the same post-processing as [`Manifest::build`]:
|
|
/// - Deduplicates entries with the same key and value.
|
|
/// - Sorts by value length descending (longest first for redaction).
|
|
/// - Removes entries with values shorter than 4 characters.
|
|
///
|
|
/// Useful for testing and for library consumers that obtain secrets
|
|
/// from sources other than filesystem discovery.
|
|
pub fn from_entries(mut entries: Vec<SecretEntry>) -> Self {
|
|
// Deduplicate (same key + value).
|
|
entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value)));
|
|
entries.dedup_by(|a, b| a.key == b.key && a.value == b.value);
|
|
|
|
// Sort by value length descending (longest first for redaction).
|
|
entries.sort_by(|a, b| b.value.len().cmp(&a.value.len()));
|
|
|
|
// Remove entries with very short values to avoid false replacements.
|
|
entries.retain(|e| e.value.len() >= MIN_VALUE_LEN);
|
|
|
|
Self { entries }
|
|
}
|
|
|
|
/// Build an empty manifest (no secrets known).
|
|
pub fn empty() -> Self {
|
|
Self {
|
|
entries: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Returns all discovered secret entries.
|
|
pub fn entries(&self) -> &[SecretEntry] {
|
|
&self.entries
|
|
}
|
|
|
|
/// Returns `true` if the manifest contains no secrets.
|
|
pub fn is_empty(&self) -> bool {
|
|
self.entries.is_empty()
|
|
}
|
|
|
|
/// Number of known secrets.
|
|
pub fn len(&self) -> usize {
|
|
self.entries.len()
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// File discovery
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Walk the project tree and collect files matching any of the given glob
|
|
/// patterns. Patterns are matched against paths *relative to* `root`.
|
|
fn discover_files(patterns: &[String], root: &Path) -> Result<Vec<PathBuf>, ManifestError> {
|
|
if patterns.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
// Compile all patterns into a single GlobSet for efficient matching.
|
|
let mut builder = GlobSetBuilder::new();
|
|
for pat in patterns {
|
|
// `globset` patterns match against the full relative path including
|
|
// intermediate directories (e.g. `.docker/config.json`). We add
|
|
// both the literal pattern and a `**/` prefixed variant so that
|
|
// `.env` matches at the root and `subdir/.env` matches nested.
|
|
let glob = Glob::new(pat).map_err(|e| ManifestError::Glob(e.to_string()))?;
|
|
builder.add(glob);
|
|
|
|
// Also match nested occurrences: `**/<pattern>`.
|
|
if !pat.contains('/') {
|
|
let nested = format!("**/{pat}");
|
|
let nested_glob =
|
|
Glob::new(&nested).map_err(|e| ManifestError::Glob(e.to_string()))?;
|
|
builder.add(nested_glob);
|
|
}
|
|
}
|
|
let glob_set = builder.build().map_err(|e| ManifestError::Glob(e.to_string()))?;
|
|
|
|
let mut result = Vec::new();
|
|
|
|
for entry in WalkDir::new(root).follow_links(false) {
|
|
let entry = match entry {
|
|
Ok(e) => e,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
// Skip common large / non-project directories.
|
|
if entry.file_type().is_dir() {
|
|
if let Some(name) = entry.file_name().to_str() {
|
|
if SKIP_DIRS.contains(&name) {
|
|
// WalkDir does not support in-place skip, but we simply
|
|
// won't match anything under these dirs because we check
|
|
// the dir name on each entry. We continue and let non-file
|
|
// entries fall through.
|
|
continue;
|
|
}
|
|
}
|
|
continue; // Only interested in files.
|
|
}
|
|
|
|
if !entry.file_type().is_file() {
|
|
continue;
|
|
}
|
|
|
|
// Check that no ancestor directory is in the skip list.
|
|
let abs_path = entry.path();
|
|
if has_skipped_ancestor(abs_path, root) {
|
|
continue;
|
|
}
|
|
|
|
// Match relative path against the glob set.
|
|
let rel = match abs_path.strip_prefix(root) {
|
|
Ok(r) => r,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
if glob_set.is_match(rel) {
|
|
result.push(abs_path.to_path_buf());
|
|
}
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Returns `true` if any path component between `root` and `path` is in
|
|
/// [`SKIP_DIRS`].
|
|
fn has_skipped_ancestor(path: &Path, root: &Path) -> bool {
|
|
if let Ok(rel) = path.strip_prefix(root) {
|
|
for component in rel.parent().into_iter().flat_map(|p| p.components()) {
|
|
if let Some(name) = component.as_os_str().to_str() {
|
|
if SKIP_DIRS.contains(&name) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Single-file parsing
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Parse a single discovered file. Auto-detects format from extension.
|
|
/// Returns an empty `Vec` if the format cannot be determined (e.g. `.key`,
|
|
/// `.pem` — opaque/binary files).
|
|
fn parse_discovered_file(path: &Path) -> Result<Vec<SecretEntry>, ParseError> {
|
|
let format = match FileFormat::from_path(path) {
|
|
Some(fmt) => fmt,
|
|
None => return Ok(Vec::new()), // opaque file — skip
|
|
};
|
|
parser::parse_secret_file(path, Some(format), None)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Key-pattern filtering
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Keep only entries whose key matches the effective key-include patterns
|
|
/// from the configuration.
|
|
fn filter_by_key_patterns(entries: Vec<SecretEntry>, config: &SecretsConfig) -> Vec<SecretEntry> {
|
|
entries
|
|
.into_iter()
|
|
.filter(|e| config.key_matches(&e.key))
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn empty_manifest() {
|
|
let m = Manifest::empty();
|
|
assert!(m.is_empty());
|
|
assert_eq!(m.len(), 0);
|
|
assert!(m.entries().is_empty());
|
|
}
|
|
}
|