feat(fermata): add secret filtering engine — the security brain

Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gabor Körber
2026-05-25 17:29:07 +02:00
parent f77fd73966
commit 087429d275
22 changed files with 4557 additions and 172 deletions
+530
View File
@@ -0,0 +1,530 @@
//! Parse and merge `.botsecrets` TOML configuration files.
//!
//! The configuration is layered (most-specific wins):
//!
//! 1. Built-in defaults
//! 2. `~/.config/fermata/.botsecrets` (user-global)
//! 3. `<root>/.botsecrets` (project)
//! 4. `<root>/.botsecrets.local` (local overrides, git-ignored)
//!
//! Vec fields like `files.patterns` are *replaced* by more-specific layers.
//! `keys.include` and `keys.exclude` *accumulate* across layers.
//! Scalar fields (style, mode, enabled) take the most-specific value.
use globset::{Glob, GlobMatcher};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use thiserror::Error;
// ---------------------------------------------------------------------------
// Errors
// ---------------------------------------------------------------------------
#[derive(Debug, Error)]
pub enum SecretsConfigError {
#[error("io error reading {path}: {source}")]
Io {
path: PathBuf,
source: std::io::Error,
},
#[error("TOML parse error in {path}: {source}")]
Parse {
path: PathBuf,
source: toml::de::Error,
},
}
// ---------------------------------------------------------------------------
// Config types
// ---------------------------------------------------------------------------
/// Top-level `.botsecrets` configuration.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SecretsConfig {
#[serde(default)]
pub files: FilesConfig,
#[serde(default)]
pub keys: KeysConfig,
#[serde(default)]
pub redaction: RedactionConfig,
#[serde(default)]
pub heuristic: HeuristicConfig,
#[serde(default)]
pub enforcement: EnforcementConfig,
#[serde(default, rename = "file")]
pub file_overrides: Vec<FileOverride>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct FilesConfig {
#[serde(default = "default_file_patterns")]
pub patterns: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct KeysConfig {
#[serde(default)]
pub include: Vec<String>,
#[serde(default)]
pub exclude: Vec<String>,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum RedactionStyle {
Masked,
Typed,
Named,
Absent,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct RedactionConfig {
#[serde(default = "default_redaction_style")]
pub style: RedactionStyle,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum HeuristicMode {
Enforce,
Report,
Disabled,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct HeuristicConfig {
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default = "default_heuristic_mode")]
pub mode: HeuristicMode,
#[serde(default)]
pub patterns: Vec<String>,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum EnforcementMode {
Strict,
Permissive,
Audit,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum ParseErrorAction {
MaskEntireFile,
Allow,
Deny,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct EnforcementConfig {
#[serde(default = "default_enforcement_mode")]
pub mode: EnforcementMode,
#[serde(default = "default_parse_error_action")]
pub on_parse_error: ParseErrorAction,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct FileOverride {
pub path: String,
#[serde(default)]
pub format: Option<String>,
#[serde(default)]
pub keys: Vec<String>,
}
// ---------------------------------------------------------------------------
// Built-in defaults
// ---------------------------------------------------------------------------
pub(crate) fn default_file_patterns() -> Vec<String> {
vec![
".env",
".env.*",
"*.env",
"secrets.*",
"credentials.*",
"*.key",
"*.pem",
"*.p12",
"*.pfx",
"id_rsa",
"id_ed25519",
"id_ecdsa",
"Secrets.toml",
"Secrets.*.toml",
"terraform.tfvars",
"*.auto.tfvars",
"terraform.tfstate",
"*.tfstate",
".docker/config.json",
"config/master.key",
"config/credentials/*.key",
".aws/credentials",
".netrc",
".htpasswd",
"service-account.json",
"service-account-key.json",
]
.into_iter()
.map(String::from)
.collect()
}
/// Built-in key name patterns that are always treated as sensitive.
pub const BUILTIN_KEY_PATTERNS: &[&str] = &[
"*PASSWORD*",
"*PASSWD*",
"*SECRET*",
"*API_KEY*",
"*APIKEY*",
"*TOKEN*",
"*ACCESS_KEY*",
"*PRIVATE_KEY*",
"*AUTH*",
"*CREDENTIAL*",
"*CONNECTION_STRING*",
"*CONN_STR*",
"DATABASE_URL",
"REDIS_URL",
"MONGODB_URI",
"AMQP_URL",
"AWS_SECRET_ACCESS_KEY",
"AWS_ACCESS_KEY_ID",
"AWS_SESSION_TOKEN",
"GITHUB_TOKEN",
"GH_TOKEN",
"GITLAB_TOKEN",
"NPM_TOKEN",
"NODE_AUTH_TOKEN",
"STRIPE_SECRET_KEY",
"STRIPE_WEBHOOK_SECRET",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"SENTRY_DSN",
"HEROKU_API_KEY",
"SENDGRID_API_KEY",
"JWT_SECRET",
"JWT_SIGNING_KEY",
"SESSION_SECRET",
"ENCRYPTION_KEY",
"ENCRYPT_KEY",
"MASTER_KEY",
"SIGNING_KEY",
"SECRET_KEY",
"SECRET_KEY_BASE",
"APP_KEY",
"NEXTAUTH_SECRET",
];
fn default_redaction_style() -> RedactionStyle {
RedactionStyle::Masked
}
fn default_heuristic_mode() -> HeuristicMode {
HeuristicMode::Enforce
}
fn default_true() -> bool {
true
}
fn default_enforcement_mode() -> EnforcementMode {
EnforcementMode::Permissive
}
fn default_parse_error_action() -> ParseErrorAction {
ParseErrorAction::MaskEntireFile
}
// ---------------------------------------------------------------------------
// Default impls
// ---------------------------------------------------------------------------
impl Default for SecretsConfig {
fn default() -> Self {
Self {
files: FilesConfig::default(),
keys: KeysConfig::default(),
redaction: RedactionConfig::default(),
heuristic: HeuristicConfig::default(),
enforcement: EnforcementConfig::default(),
file_overrides: Vec::new(),
}
}
}
impl Default for FilesConfig {
fn default() -> Self {
Self {
patterns: default_file_patterns(),
}
}
}
impl Default for KeysConfig {
fn default() -> Self {
Self {
include: Vec::new(),
exclude: Vec::new(),
}
}
}
impl Default for RedactionConfig {
fn default() -> Self {
Self {
style: default_redaction_style(),
}
}
}
impl Default for HeuristicConfig {
fn default() -> Self {
Self {
enabled: default_true(),
mode: default_heuristic_mode(),
patterns: Vec::new(),
}
}
}
impl Default for EnforcementConfig {
fn default() -> Self {
Self {
mode: default_enforcement_mode(),
on_parse_error: default_parse_error_action(),
}
}
}
// ---------------------------------------------------------------------------
// Partial layer (for merge)
// ---------------------------------------------------------------------------
/// A partially-specified config layer parsed from a single `.botsecrets` file.
/// `Option`-wrapped fields distinguish "absent" from "explicitly set".
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialSecretsConfig {
#[serde(default)]
files: Option<PartialFilesConfig>,
#[serde(default)]
keys: Option<PartialKeysConfig>,
#[serde(default)]
redaction: Option<PartialRedactionConfig>,
#[serde(default)]
heuristic: Option<PartialHeuristicConfig>,
#[serde(default)]
enforcement: Option<PartialEnforcementConfig>,
#[serde(default, rename = "file")]
file: Option<Vec<FileOverride>>,
}
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialFilesConfig {
patterns: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialKeysConfig {
include: Option<Vec<String>>,
exclude: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialRedactionConfig {
style: Option<RedactionStyle>,
}
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialHeuristicConfig {
enabled: Option<bool>,
mode: Option<HeuristicMode>,
patterns: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default, Deserialize)]
struct PartialEnforcementConfig {
mode: Option<EnforcementMode>,
on_parse_error: Option<ParseErrorAction>,
}
// ---------------------------------------------------------------------------
// Merge logic
// ---------------------------------------------------------------------------
impl SecretsConfig {
/// Apply a partial layer on top of `self`.
///
/// - Vec fields (`files.patterns`, `heuristic.patterns`, `file_overrides`):
/// **replaced** by the layer's value when present.
/// - `keys.include` / `keys.exclude`: **accumulated** (appended).
/// - Scalar fields: overwritten when present in the layer.
fn merge_layer(&mut self, layer: PartialSecretsConfig) {
// files
if let Some(f) = layer.files {
if let Some(patterns) = f.patterns {
self.files.patterns = patterns;
}
}
// keys (accumulate)
if let Some(k) = layer.keys {
if let Some(inc) = k.include {
self.keys.include.extend(inc);
}
if let Some(exc) = k.exclude {
self.keys.exclude.extend(exc);
}
}
// redaction
if let Some(r) = layer.redaction {
if let Some(style) = r.style {
self.redaction.style = style;
}
}
// heuristic
if let Some(h) = layer.heuristic {
if let Some(enabled) = h.enabled {
self.heuristic.enabled = enabled;
}
if let Some(mode) = h.mode {
self.heuristic.mode = mode;
}
if let Some(patterns) = h.patterns {
self.heuristic.patterns = patterns;
}
}
// enforcement
if let Some(e) = layer.enforcement {
if let Some(mode) = e.mode {
self.enforcement.mode = mode;
}
if let Some(action) = e.on_parse_error {
self.enforcement.on_parse_error = action;
}
}
// file overrides (replace)
if let Some(overrides) = layer.file {
self.file_overrides = overrides;
}
}
}
// ---------------------------------------------------------------------------
// Loading & discovery
// ---------------------------------------------------------------------------
/// Return the user-global fermata config directory.
/// `~/.config/fermata` on Unix, `%APPDATA%/fermata` on Windows.
fn user_config_dir() -> Option<PathBuf> {
#[cfg(unix)]
{
std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config").join("fermata"))
}
#[cfg(windows)]
{
std::env::var_os("APPDATA").map(|a| PathBuf::from(a).join("fermata"))
}
}
impl SecretsConfig {
/// Load `.botsecrets` configuration for a project.
///
/// Merges layers in order (most-specific wins):
/// 1. Built-in defaults
/// 2. `~/.config/fermata/.botsecrets`
/// 3. `<root>/.botsecrets`
/// 4. `<root>/.botsecrets.local`
pub fn load(root: &Path) -> Result<Self, SecretsConfigError> {
let mut config = Self::default();
// Layer 2: user-global
if let Some(user_dir) = user_config_dir() {
let user_file = user_dir.join(".botsecrets");
if user_file.is_file() {
let layer = Self::read_partial(&user_file)?;
config.merge_layer(layer);
}
}
// Layer 3: project root
let project_file = root.join(".botsecrets");
if project_file.is_file() {
let layer = Self::read_partial(&project_file)?;
config.merge_layer(layer);
}
// Layer 4: local overrides
let local_file = root.join(".botsecrets.local");
if local_file.is_file() {
let layer = Self::read_partial(&local_file)?;
config.merge_layer(layer);
}
Ok(config)
}
/// Parse a single `.botsecrets` file into a partial layer.
fn read_partial(path: &Path) -> Result<PartialSecretsConfig, SecretsConfigError> {
let text = std::fs::read_to_string(path).map_err(|e| SecretsConfigError::Io {
path: path.to_path_buf(),
source: e,
})?;
toml::from_str(&text).map_err(|e| SecretsConfigError::Parse {
path: path.to_path_buf(),
source: e,
})
}
/// Load from a TOML string (useful for testing and embedding).
pub fn from_toml(toml_str: &str) -> Result<Self, toml::de::Error> {
toml::from_str(toml_str)
}
/// Returns the effective key-include patterns: built-in defaults + user
/// `keys.include`, minus any pattern that appears in `keys.exclude`.
pub fn effective_key_includes(&self) -> Vec<String> {
let mut patterns: Vec<String> = BUILTIN_KEY_PATTERNS
.iter()
.map(|s| (*s).to_owned())
.collect();
patterns.extend(self.keys.include.iter().cloned());
// Remove excluded patterns (exact string match).
if !self.keys.exclude.is_empty() {
let exclude_set: std::collections::HashSet<&str> =
self.keys.exclude.iter().map(|s| s.as_str()).collect();
patterns.retain(|p| !exclude_set.contains(p.as_str()));
}
patterns
}
/// Check whether `key` matches any of the effective key-include patterns.
///
/// Matching is case-insensitive and uses glob semantics (`*` wildcards).
pub fn key_matches(&self, key: &str) -> bool {
let patterns = self.effective_key_includes();
let upper = key.to_ascii_uppercase();
for pat in &patterns {
let pat_upper = pat.to_ascii_uppercase();
// Build a glob matcher. Patterns without path separators are
// matched as plain globs against the key name.
if let Ok(glob) = Glob::new(&pat_upper) {
let matcher: GlobMatcher = glob.compile_matcher();
if matcher.is_match(&upper) {
return true;
}
}
}
false
}
}