feat(fermata): add secret filtering engine — the security brain

Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Gabor Körber
2026-05-25 17:29:07 +02:00
parent f77fd73966
commit 087429d275
22 changed files with 4557 additions and 172 deletions
+298
View File
@@ -0,0 +1,298 @@
use assert_cmd::Command;
use std::fs;
/// Helper: create a temp project directory with a `.botsecrets` config and
/// a `.env` file containing the given secrets.
fn setup_project(
env_content: &str,
botsecrets_content: Option<&str>,
) -> tempfile::TempDir {
let tmp = tempfile::tempdir().unwrap();
// .env with test secrets
fs::write(tmp.path().join(".env"), env_content).unwrap();
// .botsecrets config (use default if not specified)
let botsecrets = botsecrets_content.unwrap_or(
r#"
[files]
patterns = [".env"]
"#,
);
fs::write(tmp.path().join(".botsecrets"), botsecrets).unwrap();
// .botignore (empty — required for project root detection)
fs::write(tmp.path().join(".botignore"), "").unwrap();
tmp
}
#[test]
fn post_tool_use_redacts_known_secret() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "DB_HOST=localhost\nDB_PASSWORD=supersecret123\nDB_PORT=5432"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput");
assert!(
updated.contains("*****"),
"expected masked secret, got: {updated}"
);
assert!(
!updated.contains("supersecret123"),
"secret should be redacted, got: {updated}"
);
assert!(
updated.contains("DB_HOST=localhost"),
"non-secret lines should be preserved, got: {updated}"
);
assert!(
updated.contains("DB_PORT=5432"),
"non-secret lines should be preserved, got: {updated}"
);
}
#[test]
fn post_tool_use_no_secrets_passthrough() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "Hello, world! This text has no secrets."
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
// Empty JSON object means "no changes".
assert_eq!(v, serde_json::json!({}), "expected empty JSON for passthrough");
}
#[test]
fn post_tool_use_empty_response_passthrough() {
let tmp = setup_project("DB_PASSWORD=supersecret123\n", None);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": ""
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v, serde_json::json!({}));
}
#[test]
fn post_tool_use_heuristic_enforce_appends_warning() {
// Use a config with heuristic in enforce mode (the default).
let botsecrets = r#"
[files]
patterns = [".env"]
[heuristic]
enabled = true
mode = "enforce"
"#;
let tmp = setup_project("UNRELATED_KEY=foo\n", Some(botsecrets));
// Include something that looks like a GitHub PAT (classic) in the response.
// Pattern requires `ghp_` followed by exactly 36 alphanumeric chars.
let payload = serde_json::json!({
"tool_name": "Bash",
"tool_input": { "command": "cat output.log" },
"tool_response": "deploy log: token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij used"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput with heuristic warning");
assert!(
updated.contains("[fermata] WARNING"),
"expected heuristic warning, got: {updated}"
);
}
#[test]
fn pre_tool_use_backward_compat_default_event() {
// `--event` defaults to pre-tool-use; existing `--harness claude` still works.
let tmp = tempfile::tempdir().unwrap();
fs::write(tmp.path().join(".botignore"), ".env\n").unwrap();
let target = tmp.path().join(".env");
fs::write(&target, "").unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": target.to_str().unwrap() }
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--harness", "claude"])
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "deny");
}
#[test]
fn pre_tool_use_explicit_event_flag() {
// Explicitly passing `--event pre-tool-use` works identically.
let tmp = tempfile::tempdir().unwrap();
fs::write(tmp.path().join(".botignore"), ".env\n").unwrap();
let target = tmp.path().join("safe.txt");
fs::write(&target, "").unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": target.to_str().unwrap() }
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "pre-tool-use", "--harness", "claude"])
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "allow");
}
#[test]
fn unknown_event_exits_2() {
Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "nonsense", "--harness", "claude"])
.write_stdin("{}")
.assert()
.code(2);
}
#[test]
fn post_tool_use_no_project_root_passthrough() {
// When run in a directory with no .botignore / .botsecrets,
// PostToolUse should fail-open with `{}`.
let tmp = tempfile::tempdir().unwrap();
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/file.txt" },
"tool_response": "DB_PASSWORD=supersecret123"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v, serde_json::json!({}));
}
#[test]
fn post_tool_use_multiple_secrets_redacted() {
let tmp = setup_project(
"DB_PASSWORD=supersecret123\nAPI_KEY=my-api-key-abc\n",
None,
);
let payload = serde_json::json!({
"tool_name": "Read",
"tool_input": { "file_path": "/some/config" },
"tool_response": "config: password=supersecret123, key=my-api-key-abc, host=localhost"
})
.to_string();
let out = Command::cargo_bin("fermata")
.unwrap()
.args(["hook", "--event", "post-tool-use", "--harness", "claude"])
.current_dir(tmp.path())
.write_stdin(payload)
.assert()
.success()
.get_output()
.stdout
.clone();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
let updated = v["hookSpecificOutput"]["updatedToolOutput"]
.as_str()
.expect("expected updatedToolOutput");
assert!(!updated.contains("supersecret123"), "first secret should be redacted");
assert!(!updated.contains("my-api-key-abc"), "second secret should be redacted");
assert!(updated.contains("host=localhost"), "non-secret should be preserved");
}
+388
View File
@@ -0,0 +1,388 @@
use dirigent_fermata::core::secrets::config::{
EnforcementMode, HeuristicMode, ParseErrorAction, RedactionStyle, SecretsConfig,
BUILTIN_KEY_PATTERNS,
};
#[test]
fn parse_minimal_files_only() {
let cfg = SecretsConfig::from_toml(
r#"
[files]
patterns = [".env", ".env.*"]
"#,
)
.unwrap();
assert_eq!(cfg.files.patterns, vec![".env", ".env.*"]);
// Other sections use defaults
assert_eq!(cfg.redaction.style, RedactionStyle::Masked);
assert_eq!(cfg.enforcement.mode, EnforcementMode::Permissive);
}
#[test]
fn parse_full_config() {
let cfg = SecretsConfig::from_toml(
r#"
[files]
patterns = [".env", "secrets.*"]
[keys]
include = ["STRIPE_*", "TWILIO_*"]
exclude = ["PUBLIC_KEY", "SSH_KEY_PATH"]
[redaction]
style = "typed"
[heuristic]
enabled = false
mode = "report"
patterns = ['AKIA[A-Z2-7]{16}']
[enforcement]
mode = "strict"
on_parse_error = "deny"
[[file]]
path = "settings.py"
format = "python-assignments"
keys = ["SECRET_KEY", "DATABASES.*.PASSWORD"]
"#,
)
.unwrap();
assert_eq!(cfg.files.patterns, vec![".env", "secrets.*"]);
assert_eq!(cfg.keys.include, vec!["STRIPE_*", "TWILIO_*"]);
assert_eq!(cfg.keys.exclude, vec!["PUBLIC_KEY", "SSH_KEY_PATH"]);
assert_eq!(cfg.redaction.style, RedactionStyle::Typed);
assert!(!cfg.heuristic.enabled);
assert_eq!(cfg.heuristic.mode, HeuristicMode::Report);
assert_eq!(cfg.heuristic.patterns, vec!["AKIA[A-Z2-7]{16}"]);
assert_eq!(cfg.enforcement.mode, EnforcementMode::Strict);
assert_eq!(cfg.enforcement.on_parse_error, ParseErrorAction::Deny);
assert_eq!(cfg.file_overrides.len(), 1);
assert_eq!(cfg.file_overrides[0].path, "settings.py");
assert_eq!(
cfg.file_overrides[0].format.as_deref(),
Some("python-assignments")
);
assert_eq!(
cfg.file_overrides[0].keys,
vec!["SECRET_KEY", "DATABASES.*.PASSWORD"]
);
}
#[test]
fn empty_toml_returns_defaults() {
let cfg = SecretsConfig::from_toml("").unwrap();
assert!(!cfg.files.patterns.is_empty());
assert!(cfg.files.patterns.contains(&".env".to_string()));
assert_eq!(cfg.redaction.style, RedactionStyle::Masked);
assert!(cfg.heuristic.enabled);
assert_eq!(cfg.heuristic.mode, HeuristicMode::Enforce);
assert_eq!(cfg.enforcement.mode, EnforcementMode::Permissive);
assert_eq!(
cfg.enforcement.on_parse_error,
ParseErrorAction::MaskEntireFile
);
assert!(cfg.file_overrides.is_empty());
}
#[test]
fn invalid_toml_produces_error() {
let result = SecretsConfig::from_toml("this is not valid {{ toml");
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("expected"),
"error should describe parse issue: {err_msg}"
);
}
#[test]
fn effective_key_includes_has_builtins() {
let cfg = SecretsConfig::default();
let effective = cfg.effective_key_includes();
for builtin in BUILTIN_KEY_PATTERNS {
assert!(
effective.contains(&builtin.to_string()),
"missing builtin: {builtin}"
);
}
}
#[test]
fn effective_key_includes_adds_user_patterns() {
let cfg = SecretsConfig::from_toml(
r#"
[keys]
include = ["MY_CUSTOM_SECRET_*"]
"#,
)
.unwrap();
let effective = cfg.effective_key_includes();
assert!(effective.contains(&"MY_CUSTOM_SECRET_*".to_string()));
// Builtins still present
assert!(effective.contains(&"*PASSWORD*".to_string()));
}
#[test]
fn effective_key_includes_removes_excluded() {
let cfg = SecretsConfig::from_toml(
r#"
[keys]
exclude = ["*TOKEN*", "SENTRY_DSN"]
"#,
)
.unwrap();
let effective = cfg.effective_key_includes();
assert!(
!effective.contains(&"*TOKEN*".to_string()),
"excluded pattern should be removed"
);
assert!(
!effective.contains(&"SENTRY_DSN".to_string()),
"excluded pattern should be removed"
);
// Other builtins still present
assert!(effective.contains(&"*PASSWORD*".to_string()));
}
#[test]
fn key_matches_glob_case_insensitive() {
let cfg = SecretsConfig::default();
assert!(cfg.key_matches("DATABASE_URL"));
assert!(cfg.key_matches("database_url"));
assert!(cfg.key_matches("my_password_here"));
assert!(cfg.key_matches("MY_PASSWORD_HERE"));
assert!(cfg.key_matches("STRIPE_SECRET_KEY"));
assert!(cfg.key_matches("AWS_ACCESS_KEY_ID"));
}
#[test]
fn key_matches_non_secret_keys() {
let cfg = SecretsConfig::default();
assert!(!cfg.key_matches("DEBUG"));
assert!(!cfg.key_matches("LOG_LEVEL"));
assert!(!cfg.key_matches("PORT"));
assert!(!cfg.key_matches("HOST"));
}
#[test]
fn key_matches_respects_user_include() {
let cfg = SecretsConfig::from_toml(
r#"
[keys]
include = ["MY_APP_*"]
"#,
)
.unwrap();
assert!(cfg.key_matches("MY_APP_SETTING"));
assert!(cfg.key_matches("my_app_setting"));
}
#[test]
fn key_matches_respects_user_exclude() {
let cfg = SecretsConfig::from_toml(
r#"
[keys]
exclude = ["*TOKEN*"]
"#,
)
.unwrap();
// TOKEN patterns were excluded, so GITHUB_TOKEN should no longer match
// via the *TOKEN* pattern. But it might match via GITHUB_TOKEN literal.
// Let's check something that only matched *TOKEN*.
assert!(!cfg.key_matches("MY_TOKEN"));
// PASSWORD still matches
assert!(cfg.key_matches("MY_PASSWORD"));
}
#[test]
fn builtin_file_patterns_present() {
let cfg = SecretsConfig::default();
let patterns = &cfg.files.patterns;
assert!(patterns.contains(&".env".to_string()));
assert!(patterns.contains(&"*.pem".to_string()));
assert!(patterns.contains(&".aws/credentials".to_string()));
assert!(patterns.contains(&"terraform.tfvars".to_string()));
}
#[test]
fn load_missing_files_returns_defaults() {
let tmp = tempfile::tempdir().unwrap();
let cfg = SecretsConfig::load(tmp.path()).unwrap();
assert_eq!(cfg.files.patterns, SecretsConfig::default().files.patterns);
assert_eq!(cfg.redaction.style, RedactionStyle::Masked);
}
#[test]
fn load_project_botsecrets() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join(".botsecrets"),
r#"
[redaction]
style = "named"
[keys]
include = ["CUSTOM_*"]
"#,
)
.unwrap();
let cfg = SecretsConfig::load(tmp.path()).unwrap();
assert_eq!(cfg.redaction.style, RedactionStyle::Named);
assert!(cfg.effective_key_includes().contains(&"CUSTOM_*".to_string()));
// File patterns remain at defaults (not overridden)
assert!(cfg.files.patterns.contains(&".env".to_string()));
}
#[test]
fn load_local_overrides_project() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join(".botsecrets"),
r#"
[redaction]
style = "named"
[enforcement]
mode = "strict"
"#,
)
.unwrap();
std::fs::write(
tmp.path().join(".botsecrets.local"),
r#"
[redaction]
style = "absent"
"#,
)
.unwrap();
let cfg = SecretsConfig::load(tmp.path()).unwrap();
// .local overrides .botsecrets for redaction style
assert_eq!(cfg.redaction.style, RedactionStyle::Absent);
// enforcement from .botsecrets is preserved (not in .local)
assert_eq!(cfg.enforcement.mode, EnforcementMode::Strict);
}
#[test]
fn load_invalid_botsecrets_returns_error() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(tmp.path().join(".botsecrets"), "invalid {{ toml").unwrap();
let result = SecretsConfig::load(tmp.path());
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains(".botsecrets"), "error should mention file: {err}");
}
#[test]
fn merge_keys_accumulate() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join(".botsecrets"),
r#"
[keys]
include = ["FROM_PROJECT"]
exclude = ["EXCLUDE_PROJECT"]
"#,
)
.unwrap();
std::fs::write(
tmp.path().join(".botsecrets.local"),
r#"
[keys]
include = ["FROM_LOCAL"]
exclude = ["EXCLUDE_LOCAL"]
"#,
)
.unwrap();
let cfg = SecretsConfig::load(tmp.path()).unwrap();
assert!(cfg.keys.include.contains(&"FROM_PROJECT".to_string()));
assert!(cfg.keys.include.contains(&"FROM_LOCAL".to_string()));
assert!(cfg.keys.exclude.contains(&"EXCLUDE_PROJECT".to_string()));
assert!(cfg.keys.exclude.contains(&"EXCLUDE_LOCAL".to_string()));
}
#[test]
fn merge_file_patterns_replaced_not_appended() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join(".botsecrets"),
r#"
[files]
patterns = ["only-this.env"]
"#,
)
.unwrap();
let cfg = SecretsConfig::load(tmp.path()).unwrap();
assert_eq!(cfg.files.patterns, vec!["only-this.env"]);
// Defaults should be gone, replaced by the project's list
assert!(!cfg.files.patterns.contains(&".env".to_string()));
}
#[test]
fn all_redaction_styles_parse() {
for (input, expected) in [
("masked", RedactionStyle::Masked),
("typed", RedactionStyle::Typed),
("named", RedactionStyle::Named),
("absent", RedactionStyle::Absent),
] {
let toml_str = format!("[redaction]\nstyle = \"{input}\"");
let cfg = SecretsConfig::from_toml(&toml_str).unwrap();
assert_eq!(cfg.redaction.style, expected, "failed for: {input}");
}
}
#[test]
fn all_enforcement_modes_parse() {
for (input, expected) in [
("strict", EnforcementMode::Strict),
("permissive", EnforcementMode::Permissive),
("audit", EnforcementMode::Audit),
] {
let toml_str = format!("[enforcement]\nmode = \"{input}\"");
let cfg = SecretsConfig::from_toml(&toml_str).unwrap();
assert_eq!(cfg.enforcement.mode, expected, "failed for: {input}");
}
}
#[test]
fn all_heuristic_modes_parse() {
for (input, expected) in [
("enforce", HeuristicMode::Enforce),
("report", HeuristicMode::Report),
("disabled", HeuristicMode::Disabled),
] {
let toml_str = format!("[heuristic]\nmode = \"{input}\"");
let cfg = SecretsConfig::from_toml(&toml_str).unwrap();
assert_eq!(cfg.heuristic.mode, expected, "failed for: {input}");
}
}
#[test]
fn serialization_roundtrip() {
let cfg = SecretsConfig::from_toml(
r#"
[files]
patterns = [".env"]
[redaction]
style = "typed"
[enforcement]
mode = "audit"
on_parse_error = "allow"
"#,
)
.unwrap();
let serialized = toml::to_string(&cfg).unwrap();
let deserialized: SecretsConfig = toml::from_str(&serialized).unwrap();
assert_eq!(deserialized.redaction.style, RedactionStyle::Typed);
assert_eq!(deserialized.enforcement.mode, EnforcementMode::Audit);
assert_eq!(
deserialized.enforcement.on_parse_error,
ParseErrorAction::Allow
);
}
+307
View File
@@ -0,0 +1,307 @@
//! Integration tests for `secrets::manifest` — the manifest loader that
//! discovers secret files, parses them, and builds the known-secrets set.
use std::fs;
use dirigent_fermata::core::secrets::config::SecretsConfig;
use dirigent_fermata::core::secrets::manifest::Manifest;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Create a minimal config that only discovers `.env*` files and matches
/// common secret key patterns (the defaults).
fn default_config() -> SecretsConfig {
SecretsConfig::default()
}
/// Create a config from TOML.
fn config_from_toml(toml: &str) -> SecretsConfig {
SecretsConfig::from_toml(toml).expect("valid TOML config")
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[test]
fn discovers_env_file_and_extracts_matching_secrets() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".env"),
"DATABASE_URL=postgres://localhost/db\nAPP_NAME=myapp\nSECRET_KEY=super-secret-value-1234\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
// DATABASE_URL and SECRET_KEY match the default key patterns; APP_NAME does not.
assert!(!manifest.is_empty());
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
assert!(keys.contains(&"DATABASE_URL"), "expected DATABASE_URL, got {keys:?}");
assert!(keys.contains(&"SECRET_KEY"), "expected SECRET_KEY, got {keys:?}");
assert!(!keys.contains(&"APP_NAME"), "APP_NAME should be filtered out");
}
#[test]
fn discovers_nested_env_local_file() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("services").join("auth");
fs::create_dir_all(&nested).unwrap();
fs::write(
nested.join(".env.local"),
"AUTH_TOKEN=tok_abcdefgh12345678\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
assert!(!manifest.is_empty());
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
assert!(keys.contains(&"AUTH_TOKEN"), "expected AUTH_TOKEN, got {keys:?}");
}
#[test]
fn filters_entries_by_key_patterns() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".env"),
"MY_PASSWORD=hunter2hunter2\nNOT_SENSITIVE=hello-world-1234\nAPI_KEY=abcdef1234567890\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
assert!(keys.contains(&"MY_PASSWORD"));
assert!(keys.contains(&"API_KEY"));
assert!(!keys.contains(&"NOT_SENSITIVE"));
}
#[test]
fn file_override_with_explicit_format_and_key_filter() {
let dir = tempfile::tempdir().unwrap();
// Write a file that wouldn't normally be discovered by default patterns.
fs::write(
dir.path().join("custom_secrets.conf"),
"SERVICE_TOKEN=long-token-value-here\nDEBUG=true-ish-thing\n",
)
.unwrap();
let config = config_from_toml(
r#"
[files]
patterns = []
[[file]]
path = "custom_secrets.conf"
format = "env"
keys = ["SERVICE_TOKEN"]
"#,
);
let manifest = Manifest::build(&config, dir.path()).unwrap();
assert_eq!(manifest.len(), 1);
assert_eq!(manifest.entries()[0].key, "SERVICE_TOKEN");
assert_eq!(manifest.entries()[0].value, "long-token-value-here");
}
#[test]
fn empty_project_yields_empty_manifest() {
let dir = tempfile::tempdir().unwrap();
// No files at all.
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
assert!(manifest.is_empty());
assert_eq!(manifest.len(), 0);
}
#[test]
fn entries_sorted_by_value_length_descending() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".env"),
// Deliberately out of order by length.
"TOKEN_A=short1234\nTOKEN_B=a-much-longer-secret-value-here\nTOKEN_C=medium-value1\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
let lengths: Vec<usize> = manifest.entries().iter().map(|e| e.value.len()).collect();
for window in lengths.windows(2) {
assert!(
window[0] >= window[1],
"entries not sorted by value length descending: {lengths:?}"
);
}
}
#[test]
fn short_values_filtered_out() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".env"),
"PASSWORD_TINY=yes\nPASSWORD_OK=long-enough-password\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
// "yes" is 3 chars, below the 4-char minimum.
assert!(!keys.contains(&"PASSWORD_TINY"), "short value should be filtered");
assert!(keys.contains(&"PASSWORD_OK"));
}
#[test]
fn deduplication_of_same_key_value() {
let dir = tempfile::tempdir().unwrap();
// Same secret appears in two different .env files.
fs::write(
dir.path().join(".env"),
"SECRET_KEY=shared-secret-value-12345\n",
)
.unwrap();
let sub = dir.path().join("sub");
fs::create_dir(&sub).unwrap();
fs::write(sub.join(".env"), "SECRET_KEY=shared-secret-value-12345\n").unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
// Should be deduplicated to a single entry.
let matching: Vec<_> = manifest
.entries()
.iter()
.filter(|e| e.key == "SECRET_KEY")
.collect();
assert_eq!(
matching.len(),
1,
"duplicate entries should be collapsed: found {}",
matching.len()
);
}
#[test]
fn unparseable_file_with_allow_is_skipped() {
let dir = tempfile::tempdir().unwrap();
// Write a file that looks like an env file but contains garbage TOML.
// Actually, .env parser is lenient, so let's use a .toml extension
// with invalid TOML content to trigger a parse error.
let secrets_dir = dir.path();
fs::write(secrets_dir.join("secrets.toml"), "this is not valid toml {{{\n").unwrap();
// Also write a valid .env so we can confirm it still works.
fs::write(
secrets_dir.join(".env"),
"API_KEY=valid-secret-12345678\n",
)
.unwrap();
let config = config_from_toml(
r#"
[enforcement]
on_parse_error = "allow"
"#,
);
let manifest = Manifest::build(&config, secrets_dir).unwrap();
// The broken secrets.toml is skipped; .env is still processed.
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
assert!(keys.contains(&"API_KEY"));
}
#[test]
fn unparseable_file_with_deny_returns_error() {
let dir = tempfile::tempdir().unwrap();
fs::write(dir.path().join("secrets.toml"), "not valid toml {{{\n").unwrap();
let config = config_from_toml(
r#"
[enforcement]
on_parse_error = "deny"
"#,
);
let result = Manifest::build(&config, dir.path());
assert!(result.is_err(), "deny mode should propagate parse errors");
}
#[test]
fn manifest_empty_and_is_empty() {
let m = Manifest::empty();
assert!(m.is_empty());
assert_eq!(m.len(), 0);
assert!(m.entries().is_empty());
}
#[test]
fn skips_git_and_node_modules_directories() {
let dir = tempfile::tempdir().unwrap();
// .env inside .git should be skipped.
let git_dir = dir.path().join(".git");
fs::create_dir(&git_dir).unwrap();
fs::write(git_dir.join(".env"), "SECRET_KEY=git-secret-12345\n").unwrap();
// .env inside node_modules should be skipped.
let nm_dir = dir.path().join("node_modules").join("pkg");
fs::create_dir_all(&nm_dir).unwrap();
fs::write(nm_dir.join(".env"), "TOKEN=nm-token-12345678\n").unwrap();
// .env at root should be found.
fs::write(
dir.path().join(".env"),
"API_KEY=root-api-key-12345\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
let values: Vec<&str> = manifest.entries().iter().map(|e| e.value.as_str()).collect();
assert!(
values.contains(&"root-api-key-12345"),
"root .env should be found"
);
assert!(
!values.contains(&"git-secret-12345"),
".git/.env should be skipped"
);
assert!(
!values.contains(&"nm-token-12345678"),
"node_modules/.env should be skipped"
);
}
#[test]
fn opaque_file_formats_are_skipped_gracefully() {
let dir = tempfile::tempdir().unwrap();
// .pem and .key files match default patterns but have no parseable format.
fs::write(dir.path().join("server.key"), "binary-ish key data here\n").unwrap();
fs::write(
dir.path().join(".env"),
"PASSWORD=parseable-secret-12345\n",
)
.unwrap();
let config = default_config();
let manifest = Manifest::build(&config, dir.path()).unwrap();
// Should not error, should still find the .env entry.
let keys: Vec<&str> = manifest.entries().iter().map(|e| e.key.as_str()).collect();
assert!(keys.contains(&"PASSWORD"));
}
+404
View File
@@ -0,0 +1,404 @@
//! Integration tests for the multi-format secret file parser.
use dirigent_fermata::core::secrets::parser::{
parse_content, parse_secret_file, FileFormat, SecretEntry,
};
use std::path::Path;
use tempfile::NamedTempFile;
fn p(s: &str) -> &Path {
Path::new(s)
}
// ---------------------------------------------------------------------------
// .env parsing
// ---------------------------------------------------------------------------
#[test]
fn env_basic_key_value() {
let entries = parse_content("DATABASE_URL=postgres://localhost/db", FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "DATABASE_URL");
assert_eq!(entries[0].value, "postgres://localhost/db");
}
#[test]
fn env_double_quoted() {
let entries = parse_content(r#"SECRET="hello world""#, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "hello world");
}
#[test]
fn env_single_quoted() {
let entries = parse_content("SECRET='hello world'", FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "hello world");
}
#[test]
fn env_comments_and_empty_lines() {
let content = "# comment\n\nKEY=value\n # indented comment\n";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "KEY");
}
#[test]
fn env_export_prefix() {
let content = "export API_KEY=abc123\nexport TOKEN=\"xyz\"";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].key, "API_KEY");
assert_eq!(entries[0].value, "abc123");
assert_eq!(entries[1].key, "TOKEN");
assert_eq!(entries[1].value, "xyz");
}
#[test]
fn env_whitespace_handling() {
let content = " KEY = value \nKEY2= spaced ";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
// Key is trimmed; unquoted value trimmed.
assert_eq!(entries[0].key, "KEY");
assert_eq!(entries[0].value, "value");
assert_eq!(entries[1].key, "KEY2");
assert_eq!(entries[1].value, "spaced");
}
#[test]
fn env_escape_sequences_in_double_quotes() {
let content = r#"MSG="line1\nline2""#;
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "line1\nline2");
}
// ---------------------------------------------------------------------------
// TOML parsing
// ---------------------------------------------------------------------------
#[test]
fn toml_flat_table() {
let content = r#"
API_KEY = "abc"
DB_PASS = "secret"
"#;
let entries = parse_content(content, FileFormat::Toml, p("Secrets.toml")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "API_KEY" && e.value == "abc"));
assert!(entries.iter().any(|e| e.key == "DB_PASS" && e.value == "secret"));
}
#[test]
fn toml_nested_tables() {
let content = r#"
[database]
password = "secret"
host = "localhost"
port = 5432
"#;
let entries = parse_content(content, FileFormat::Toml, p("config.toml")).unwrap();
// Only string values extracted; port (integer) skipped.
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "database.password" && e.value == "secret"));
assert!(entries.iter().any(|e| e.key == "database.host" && e.value == "localhost"));
}
#[test]
fn toml_mixed_types_only_strings() {
let content = r#"
name = "app"
debug = true
count = 42
ratio = 3.14
"#;
let entries = parse_content(content, FileFormat::Toml, p("app.toml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// JSON parsing
// ---------------------------------------------------------------------------
#[test]
fn json_flat_object() {
let content = r#"{"api_key": "abc", "secret": "xyz"}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "api_key" && e.value == "abc"));
}
#[test]
fn json_nested_objects() {
let content = r#"{"db": {"password": "foo", "port": 5432}}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "foo");
}
#[test]
fn json_arrays() {
let content = r#"{"keys": ["a", "b"]}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "keys.0" && e.value == "a"));
assert!(entries.iter().any(|e| e.key == "keys.1" && e.value == "b"));
}
#[test]
fn json_mixed_types() {
let content = r#"{"name": "app", "count": 42, "active": true, "data": null}"#;
let entries = parse_content(content, FileFormat::Json, p("a.json")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// YAML parsing
// ---------------------------------------------------------------------------
#[test]
fn yaml_flat_map() {
let content = "api_key: abc\nsecret: xyz\n";
let entries = parse_content(content, FileFormat::Yaml, p("secrets.yaml")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "api_key" && e.value == "abc"));
}
#[test]
fn yaml_nested_maps() {
let content = "db:\n password: foo\n port: 5432\n";
let entries = parse_content(content, FileFormat::Yaml, p("secrets.yml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "foo");
}
#[test]
fn yaml_mixed_types() {
let content = "name: app\ncount: 42\nactive: true\n";
let entries = parse_content(content, FileFormat::Yaml, p("a.yaml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// Python assignment parsing
// ---------------------------------------------------------------------------
#[test]
fn python_matches_assignments() {
let content = r#"
API_KEY = "abc123"
DB_PASS = 'secret'
import os
x = 42
"#;
let entries = parse_content(content, FileFormat::PythonAssignments, p("settings.py")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "API_KEY" && e.value == "abc123"));
assert!(entries.iter().any(|e| e.key == "DB_PASS" && e.value == "secret"));
}
#[test]
fn python_skips_non_matching() {
let content = "result = some_function()\nfor x in range(10):\n pass\n";
let entries = parse_content(content, FileFormat::PythonAssignments, p("a.py")).unwrap();
assert!(entries.is_empty());
}
// ---------------------------------------------------------------------------
// Properties parsing
// ---------------------------------------------------------------------------
#[test]
fn properties_equals_separator() {
let content = "db.password=secret\ndb.host=localhost";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "db.password" && e.value == "secret"));
}
#[test]
fn properties_colon_separator() {
let content = "db.password: secret";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "secret");
}
#[test]
fn properties_comments() {
let content = "# comment\n! also comment\nkey=value";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "key");
}
#[test]
fn properties_continuation_lines() {
let content = "long.value=hello \\\n world";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "long.value");
assert_eq!(entries[0].value, "hello world");
}
// ---------------------------------------------------------------------------
// Auto-detection from file extension
// ---------------------------------------------------------------------------
#[test]
fn format_from_path_env_variants() {
assert_eq!(FileFormat::from_path(p(".env")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p(".env.local")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p(".env.production")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p("staging.env")), Some(FileFormat::Env));
}
#[test]
fn format_from_path_extensions() {
assert_eq!(FileFormat::from_path(p("a.toml")), Some(FileFormat::Toml));
assert_eq!(FileFormat::from_path(p("a.json")), Some(FileFormat::Json));
assert_eq!(FileFormat::from_path(p("a.yaml")), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_path(p("a.yml")), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_path(p("a.py")), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_path(p("a.properties")), Some(FileFormat::Properties));
}
#[test]
fn format_from_path_unknown() {
assert_eq!(FileFormat::from_path(p("a.key")), None);
assert_eq!(FileFormat::from_path(p("a.pem")), None);
}
// ---------------------------------------------------------------------------
// Format hints
// ---------------------------------------------------------------------------
#[test]
fn format_from_hint() {
assert_eq!(FileFormat::from_hint("env"), Some(FileFormat::Env));
assert_eq!(FileFormat::from_hint("dotenv"), Some(FileFormat::Env));
assert_eq!(FileFormat::from_hint("toml"), Some(FileFormat::Toml));
assert_eq!(FileFormat::from_hint("json"), Some(FileFormat::Json));
assert_eq!(FileFormat::from_hint("yaml"), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_hint("yml"), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_hint("python-assignments"), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_hint("python"), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_hint("properties"), Some(FileFormat::Properties));
assert_eq!(FileFormat::from_hint("java-properties"), Some(FileFormat::Properties));
assert_eq!(FileFormat::from_hint("unknown"), None);
}
// ---------------------------------------------------------------------------
// Key filtering
// ---------------------------------------------------------------------------
#[test]
fn filter_by_glob() {
let content = "API_KEY=abc\nDB_HOST=localhost\nDB_PASSWORD=secret\n";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 3);
let filter = vec!["*PASSWORD*".to_string(), "*API_KEY*".to_string()];
let result = parse_secret_file_with_filter(content, &filter);
assert_eq!(result.len(), 2);
assert!(result.iter().any(|e| e.key == "API_KEY"));
assert!(result.iter().any(|e| e.key == "DB_PASSWORD"));
}
/// Helper that parses env content with a key filter (avoids temp files).
fn parse_secret_file_with_filter(content: &str, filter: &[String]) -> Vec<SecretEntry> {
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
// Re-implement the filter logic for testing without disk I/O.
use dirigent_fermata::core::secrets::parser::parse_content as pc;
let all = pc(content, FileFormat::Env, p(".env")).unwrap();
// Apply filter manually using the same approach as parse_secret_file.
let matchers: Vec<_> = filter
.iter()
.filter_map(|p| {
globset::Glob::new(&p.to_ascii_uppercase())
.ok()
.map(|g| g.compile_matcher())
})
.collect();
all.into_iter()
.filter(|entry| {
let upper = entry.key.to_ascii_uppercase();
matchers.iter().any(|m| m.is_match(&upper))
})
.collect()
}
// ---------------------------------------------------------------------------
// Error on unrecognised format
// ---------------------------------------------------------------------------
#[test]
fn error_on_unknown_format() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".xyz").unwrap();
write!(tmp, "KEY=value").unwrap();
let result = parse_secret_file(tmp.path(), None, None);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("cannot determine file format"));
}
// ---------------------------------------------------------------------------
// Empty file
// ---------------------------------------------------------------------------
#[test]
fn empty_file_produces_empty_vec() {
let entries = parse_content("", FileFormat::Env, p(".env")).unwrap();
assert!(entries.is_empty());
let entries = parse_content("{}", FileFormat::Json, p("a.json")).unwrap();
assert!(entries.is_empty());
let entries = parse_content("", FileFormat::Toml, p("a.toml")).unwrap();
assert!(entries.is_empty());
}
// ---------------------------------------------------------------------------
// parse_secret_file end-to-end (disk)
// ---------------------------------------------------------------------------
#[test]
fn parse_secret_file_from_disk() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".env").unwrap();
write!(tmp, "SECRET=hunter2\nPORT=8080").unwrap();
let entries = parse_secret_file(tmp.path(), None, None).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "SECRET" && e.value == "hunter2"));
// Source path should match.
assert_eq!(entries[0].source, tmp.path());
}
#[test]
fn parse_secret_file_with_key_filter() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".env").unwrap();
write!(tmp, "API_KEY=abc\nHOST=localhost\nDB_PASSWORD=secret").unwrap();
let filter = vec!["*PASSWORD*".to_string()];
let entries = parse_secret_file(tmp.path(), None, Some(&filter)).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "DB_PASSWORD");
}
#[test]
fn parse_secret_file_with_format_override() {
use std::io::Write;
// Write env content to a .txt file — format override should work.
let mut tmp = NamedTempFile::with_suffix(".txt").unwrap();
write!(tmp, "KEY=value").unwrap();
let entries = parse_secret_file(tmp.path(), Some(FileFormat::Env), None).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "KEY");
}
+373
View File
@@ -0,0 +1,373 @@
//! Integration tests for the secret value redactor.
use std::path::PathBuf;
use dirigent_fermata::core::secrets::config::RedactionStyle;
use dirigent_fermata::core::secrets::manifest::Manifest;
use dirigent_fermata::core::secrets::parser::SecretEntry;
use dirigent_fermata::core::secrets::redactor::Redactor;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
fn entry(key: &str, value: &str) -> SecretEntry {
SecretEntry {
key: key.to_string(),
value: value.to_string(),
source: PathBuf::from("test"),
}
}
fn make_redactor(entries: Vec<SecretEntry>, style: RedactionStyle) -> Redactor {
let manifest = Manifest::from_entries(entries);
Redactor::new(&manifest, style)
}
// ---------------------------------------------------------------------------
// Basic redaction
// ---------------------------------------------------------------------------
#[test]
fn basic_single_secret() {
let r = make_redactor(
vec![entry("DB_PASSWORD", "super_secret_123")],
RedactionStyle::Masked,
);
let result = r.redact("connecting with password super_secret_123 ...");
assert_eq!(result.text, "connecting with password ***** ...");
assert!(result.was_redacted());
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "DB_PASSWORD");
}
// ---------------------------------------------------------------------------
// Multiple secrets
// ---------------------------------------------------------------------------
#[test]
fn multiple_different_secrets() {
let r = make_redactor(
vec![
entry("DB_PASSWORD", "db_pass_value"),
entry("API_KEY", "ak_12345678"),
],
RedactionStyle::Masked,
);
let result = r.redact("db=db_pass_value key=ak_12345678");
assert_eq!(result.text, "db=***** key=*****");
assert_eq!(result.redactions.len(), 2);
assert_eq!(result.redactions[0].key, "DB_PASSWORD");
assert_eq!(result.redactions[1].key, "API_KEY");
}
// ---------------------------------------------------------------------------
// Repeated occurrences
// ---------------------------------------------------------------------------
#[test]
fn same_secret_multiple_times() {
let r = make_redactor(
vec![entry("TOKEN", "tok_abcdef")],
RedactionStyle::Named,
);
let result = r.redact("first=tok_abcdef second=tok_abcdef");
assert_eq!(result.text, "first=<REDACTED:TOKEN> second=<REDACTED:TOKEN>");
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// Redaction styles
// ---------------------------------------------------------------------------
#[test]
fn style_masked() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Masked,
);
let result = r.redact("val=secret_value");
assert_eq!(result.text, "val=*****");
}
#[test]
fn style_typed() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Typed,
);
let result = r.redact("val=secret_value");
// "secret_value" is 12 chars
assert_eq!(result.text, "val=<REDACTED:string:12>");
}
#[test]
fn style_named() {
let r = make_redactor(
vec![entry("MY_API_KEY", "secret_value")],
RedactionStyle::Named,
);
let result = r.redact("val=secret_value");
assert_eq!(result.text, "val=<REDACTED:MY_API_KEY>");
}
#[test]
fn style_absent() {
let r = make_redactor(
vec![entry("KEY", "secret_value")],
RedactionStyle::Absent,
);
let result = r.redact("val=secret_value end");
assert_eq!(result.text, "val= end");
assert!(result.was_redacted());
}
// ---------------------------------------------------------------------------
// Overlapping values (longest match wins)
// ---------------------------------------------------------------------------
#[test]
fn overlapping_longest_match_wins() {
let r = make_redactor(
vec![
entry("SHORT_KEY", "secret"),
entry("LONG_KEY", "secret_long_value"),
],
RedactionStyle::Named,
);
let result = r.redact("x=secret_long_value");
// The longer value should match, not the shorter substring.
assert_eq!(result.text, "x=<REDACTED:LONG_KEY>");
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "LONG_KEY");
}
#[test]
fn shorter_match_still_found_when_no_overlap() {
let r = make_redactor(
vec![
entry("SHORT_KEY", "secret"),
entry("LONG_KEY", "secret_long_value"),
],
RedactionStyle::Named,
);
// "secret" appears standalone (not as part of "secret_long_value")
let result = r.redact("a=secret b=secret_long_value");
assert_eq!(result.text, "a=<REDACTED:SHORT_KEY> b=<REDACTED:LONG_KEY>");
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// No match
// ---------------------------------------------------------------------------
#[test]
fn no_match_returns_unchanged() {
let r = make_redactor(
vec![entry("KEY", "not_present_here")],
RedactionStyle::Masked,
);
let result = r.redact("nothing to see here");
assert_eq!(result.text, "nothing to see here");
assert!(!result.was_redacted());
assert!(result.redactions.is_empty());
}
// ---------------------------------------------------------------------------
// Empty text
// ---------------------------------------------------------------------------
#[test]
fn empty_input_returns_empty() {
let r = make_redactor(
vec![entry("KEY", "some_secret")],
RedactionStyle::Masked,
);
let result = r.redact("");
assert_eq!(result.text, "");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Empty manifest
// ---------------------------------------------------------------------------
#[test]
fn empty_manifest_returns_unchanged() {
let manifest = Manifest::empty();
let r = Redactor::new(&manifest, RedactionStyle::Masked);
assert!(!r.has_secrets());
let result = r.redact("some text with no secrets");
assert_eq!(result.text, "some text with no secrets");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Short values filtered out by Manifest::from_entries
// ---------------------------------------------------------------------------
#[test]
fn short_values_are_filtered() {
// Values shorter than 4 chars should be dropped by from_entries.
let r = make_redactor(
vec![entry("TINY", "abc"), entry("LONG_ENOUGH", "abcd")],
RedactionStyle::Masked,
);
let result = r.redact("abc abcd");
// "abc" should NOT be redacted (too short), "abcd" should be.
assert_eq!(result.text, "abc *****");
assert_eq!(result.redactions.len(), 1);
assert_eq!(result.redactions[0].key, "LONG_ENOUGH");
}
// ---------------------------------------------------------------------------
// Zero false negatives — every declared secret must be caught
// ---------------------------------------------------------------------------
#[test]
fn zero_false_negatives() {
let secrets = vec![
entry("A_SECRET", "alpha_secret_val"),
entry("B_TOKEN", "bravo_token_val_"),
entry("C_PASSWORD", "charlie_pass_99"),
entry("D_API_KEY", "delta_key_00000"),
];
let r = make_redactor(secrets.clone(), RedactionStyle::Masked);
// Build text that contains every single secret value.
let text = format!(
"a={} b={} c={} d={}",
"alpha_secret_val", "bravo_token_val_", "charlie_pass_99", "delta_key_00000",
);
let result = r.redact(&text);
// Every secret value must be replaced.
for s in &secrets {
if s.value.len() >= 4 {
assert!(
!result.text.contains(&s.value),
"Secret {} was not redacted: {}",
s.key,
result.text,
);
}
}
assert_eq!(result.redactions.len(), 4);
}
// ---------------------------------------------------------------------------
// Multi-line text
// ---------------------------------------------------------------------------
#[test]
fn multi_line_redaction() {
let r = make_redactor(
vec![
entry("DB_PASSWORD", "s3cr3t_p@ss"),
entry("API_KEY", "ak-1234567890"),
],
RedactionStyle::Masked,
);
let text = "# Config file\n\
DATABASE_URL=postgres://user:s3cr3t_p@ss@host/db\n\
API_KEY=ak-1234567890\n\
OTHER=safe_value\n";
let result = r.redact(text);
assert!(!result.text.contains("s3cr3t_p@ss"));
assert!(!result.text.contains("ak-1234567890"));
assert!(result.text.contains("safe_value"));
assert_eq!(result.redactions.len(), 2);
}
// ---------------------------------------------------------------------------
// Redaction metadata correctness
// ---------------------------------------------------------------------------
#[test]
fn redaction_metadata_offset_and_len() {
let r = make_redactor(
vec![entry("SECRET", "ABCDEFGH")],
RedactionStyle::Masked,
);
let text = "prefix_ABCDEFGH_suffix";
let result = r.redact(text);
assert_eq!(result.redactions.len(), 1);
let red = &result.redactions[0];
assert_eq!(red.key, "SECRET");
assert_eq!(red.offset, 7); // "prefix_" is 7 bytes
assert_eq!(red.original_len, 8); // "ABCDEFGH" is 8 bytes
}
#[test]
fn redaction_metadata_multiple_offsets() {
let r = make_redactor(
vec![entry("TOK", "xxxx1234")],
RedactionStyle::Masked,
);
// "a=xxxx1234 b=xxxx1234"
let text = "a=xxxx1234 b=xxxx1234";
let result = r.redact(text);
assert_eq!(result.redactions.len(), 2);
assert_eq!(result.redactions[0].offset, 2); // after "a="
assert_eq!(result.redactions[0].original_len, 8);
assert_eq!(result.redactions[1].offset, 13); // after " b="
assert_eq!(result.redactions[1].original_len, 8);
}
// ---------------------------------------------------------------------------
// has_secrets() helper
// ---------------------------------------------------------------------------
#[test]
fn has_secrets_with_entries() {
let r = make_redactor(
vec![entry("KEY", "long_enough_value")],
RedactionStyle::Masked,
);
assert!(r.has_secrets());
}
#[test]
fn has_secrets_empty() {
let r = make_redactor(vec![], RedactionStyle::Masked);
assert!(!r.has_secrets());
}
// ---------------------------------------------------------------------------
// was_redacted() helper
// ---------------------------------------------------------------------------
#[test]
fn was_redacted_true_when_match() {
let r = make_redactor(
vec![entry("KEY", "findme_value")],
RedactionStyle::Masked,
);
let result = r.redact("findme_value");
assert!(result.was_redacted());
}
#[test]
fn was_redacted_false_when_no_match() {
let r = make_redactor(
vec![entry("KEY", "findme_value")],
RedactionStyle::Masked,
);
let result = r.redact("nothing here");
assert!(!result.was_redacted());
}
// ---------------------------------------------------------------------------
// Deduplication in from_entries
// ---------------------------------------------------------------------------
#[test]
fn duplicate_entries_deduplicated() {
let manifest = Manifest::from_entries(vec![
entry("KEY", "same_value_here"),
entry("KEY", "same_value_here"),
]);
assert_eq!(manifest.len(), 1);
}
+254
View File
@@ -0,0 +1,254 @@
use dirigent_fermata::core::secrets::config::HeuristicConfig;
use dirigent_fermata::core::secrets::scanner::{shannon_entropy, Confidence, Scanner};
// ---------------------------------------------------------------------------
// Helper: build a scanner with default config (built-in rules only)
// ---------------------------------------------------------------------------
fn default_scanner() -> Scanner {
Scanner::builtin().expect("built-in rules must compile")
}
// ---------------------------------------------------------------------------
// Specific provider patterns
// ---------------------------------------------------------------------------
#[test]
fn detects_aws_access_key() {
let scanner = default_scanner();
let findings = scanner.scan("here is my key: AKIAIOSFODNN7EXAMPLE ok");
assert!(
findings.iter().any(|f| f.pattern_id == "aws-access-key"),
"expected aws-access-key finding, got: {findings:?}"
);
assert_eq!(findings[0].confidence, Confidence::High);
}
#[test]
fn detects_github_pat_classic() {
let scanner = default_scanner();
let findings = scanner.scan("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij");
assert!(
findings.iter().any(|f| f.pattern_id == "github-pat-classic"),
"expected github-pat-classic finding, got: {findings:?}"
);
}
#[test]
fn detects_stripe_secret_key() {
let scanner = default_scanner();
let findings = scanner.scan("STRIPE_KEY=sk_live_abcdefghijklmnopqrstuvwx");
assert!(
findings.iter().any(|f| f.pattern_id == "stripe-secret-key"),
"expected stripe-secret-key finding, got: {findings:?}"
);
}
#[test]
fn detects_private_key_header() {
let scanner = default_scanner();
let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAK...\n-----END RSA PRIVATE KEY-----";
let findings = scanner.scan(text);
assert!(
findings
.iter()
.any(|f| f.pattern_id == "private-key-header"),
"expected private-key-header finding, got: {findings:?}"
);
}
#[test]
fn detects_jwt_token() {
let scanner = default_scanner();
// A realistic-looking (but fake) JWT.
let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ik\
pvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
let findings = scanner.scan(jwt);
assert!(
findings.iter().any(|f| f.pattern_id == "jwt"),
"expected jwt finding, got: {findings:?}"
);
}
#[test]
fn detects_database_connection_url() {
let scanner = default_scanner();
let findings = scanner.scan("DATABASE_URL=postgres://admin:s3cretP4ss@db.example.com:5432/mydb");
assert!(
findings
.iter()
.any(|f| f.pattern_id == "database-connection-url"),
"expected database-connection-url finding, got: {findings:?}"
);
}
#[test]
fn detects_slack_webhook() {
let scanner = default_scanner();
let findings = scanner
.scan("https://hooks.slack.com/services/T0ABCDEFG/B0ABCDEFG/abcdefghijklmnopqrstuvwx");
assert!(
findings.iter().any(|f| f.pattern_id == "slack-webhook"),
"expected slack-webhook finding, got: {findings:?}"
);
}
#[test]
fn detects_anthropic_api_key() {
let scanner = default_scanner();
let key = "sk-ant-aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789abcdefgh";
let findings = scanner.scan(&format!("my key is {key}"));
assert!(
findings
.iter()
.any(|f| f.pattern_id == "anthropic-api-key"),
"expected anthropic-api-key finding, got: {findings:?}"
);
}
#[test]
fn detects_sendgrid_api_key() {
let scanner = default_scanner();
let key = "SG.abcdefghijklmnopqrstuv.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst";
let findings = scanner.scan(key);
assert!(
findings.iter().any(|f| f.pattern_id == "sendgrid-api-key"),
"expected sendgrid-api-key finding, got: {findings:?}"
);
}
// ---------------------------------------------------------------------------
// Generic patterns — entropy filtering
// ---------------------------------------------------------------------------
#[test]
fn rejects_low_entropy_generic_api_key() {
let scanner = default_scanner();
// "test" repeated has very low entropy — should NOT trigger.
let findings = scanner.scan(r#"api_key = "testtesttesttesttest""#);
let generic_hits: Vec<_> = findings
.iter()
.filter(|f| f.pattern_id == "generic-api-key")
.collect();
assert!(
generic_hits.is_empty(),
"low-entropy api_key should be filtered out, got: {generic_hits:?}"
);
}
#[test]
fn accepts_high_entropy_generic_secret() {
let scanner = default_scanner();
// A high-entropy random-looking value.
let findings = scanner.scan(r#"secret = "a8Kz3Lm9Xq2Wp7Yn"#);
let has_generic = findings
.iter()
.any(|f| f.pattern_id == "generic-secret");
assert!(
has_generic,
"high-entropy secret should be detected, got: {findings:?}"
);
}
// ---------------------------------------------------------------------------
// Custom patterns from config
// ---------------------------------------------------------------------------
#[test]
fn custom_pattern_from_config() {
let config = HeuristicConfig {
enabled: true,
patterns: vec![r"MY_CUSTOM_[A-Z]{10}".to_string()],
..Default::default()
};
let scanner = Scanner::new(&config).expect("should compile custom pattern");
let findings = scanner.scan("found MY_CUSTOM_ABCDEFGHIJ in output");
assert!(
findings.iter().any(|f| f.pattern_id == "custom-0"),
"expected custom-0 finding, got: {findings:?}"
);
assert_eq!(findings[0].confidence, Confidence::High);
}
// ---------------------------------------------------------------------------
// Edge cases
// ---------------------------------------------------------------------------
#[test]
fn empty_text_returns_no_findings() {
let scanner = default_scanner();
assert!(scanner.scan("").is_empty());
}
#[test]
fn plain_text_returns_no_findings() {
let scanner = default_scanner();
let findings = scanner.scan("This is just a normal paragraph with no secrets.");
assert!(
findings.is_empty(),
"plain text should have no findings, got: {findings:?}"
);
}
#[test]
fn overlapping_matches_are_deduplicated() {
// Construct text where the same span could match multiple patterns.
// The bearer token pattern and a generic pattern could overlap on the same region.
let scanner = default_scanner();
let text = "Authorization: Bearer ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh";
let findings = scanner.scan(text);
// Verify no two findings have overlapping spans.
for i in 0..findings.len() {
for j in (i + 1)..findings.len() {
assert!(
findings[j].span.start >= findings[i].span.end,
"findings {i} and {j} overlap: {:?} vs {:?}",
findings[i].span,
findings[j].span,
);
}
}
}
// ---------------------------------------------------------------------------
// Shannon entropy unit tests (supplement the inline mod tests)
// ---------------------------------------------------------------------------
#[test]
fn entropy_known_values() {
// Single character repeated → 0.
assert!((shannon_entropy("aaaa") - 0.0).abs() < f64::EPSILON);
// Perfectly balanced binary → 1.0 bits/char.
let balanced = "ababababab";
assert!((shannon_entropy(balanced) - 1.0).abs() < 0.01);
// High diversity.
let diverse = "aB3$kL9!mZ7@wQ1#xR5^";
assert!(shannon_entropy(diverse) > 3.5);
}
// ---------------------------------------------------------------------------
// Scanner construction
// ---------------------------------------------------------------------------
#[test]
fn builtin_scanner_has_rules() {
let scanner = default_scanner();
assert!(
scanner.rule_count() >= 30,
"expected at least 30 built-in rules, got {}",
scanner.rule_count()
);
}
#[test]
fn invalid_custom_pattern_returns_error() {
let config = HeuristicConfig {
enabled: true,
patterns: vec![r"[invalid".to_string()],
..Default::default()
};
assert!(Scanner::new(&config).is_err());
}
+58 -1
View File
@@ -1,5 +1,5 @@
use dirigent_fermata::core::{Decision, Reason};
use dirigent_fermata::harness::{HarnessAdapter, PathKind, ToolOp};
use dirigent_fermata::harness::{HarnessAdapter, HookEvent, PathKind, ToolOp};
use dirigent_fermata::harness::claude::ClaudeAdapter;
#[test]
@@ -84,3 +84,60 @@ fn renders_ask_as_ask() {
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "ask");
}
// ---------------------------------------------------------------------------
// PostToolUse
// ---------------------------------------------------------------------------
#[test]
fn parses_post_tool_use_payload() {
let payload = br#"{"tool_name":"Read","tool_input":{"file_path":"/proj/.env"},"tool_response":"SECRET=abc"}"#;
let p = ClaudeAdapter.parse_post_tool_use(payload).unwrap();
assert_eq!(p.tool_name, "Read");
assert_eq!(p.tool_response, "SECRET=abc");
}
#[test]
fn parses_post_tool_use_missing_response() {
// tool_response absent → defaults to empty string.
let payload = br#"{"tool_name":"Bash","tool_input":{"command":"ls"}}"#;
let p = ClaudeAdapter.parse_post_tool_use(payload).unwrap();
assert_eq!(p.tool_response, "");
}
#[test]
fn renders_post_tool_use_with_redacted_output() {
let payload = br#"{"tool_name":"Read","tool_input":{},"tool_response":"x"}"#;
let p = ClaudeAdapter.parse_post_tool_use(payload).unwrap();
let out = ClaudeAdapter
.render_post_tool_use(&p, Some("redacted text"))
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v["hookSpecificOutput"]["hookEventName"], "PostToolUse");
assert_eq!(
v["hookSpecificOutput"]["updatedToolOutput"],
"redacted text"
);
}
#[test]
fn renders_post_tool_use_passthrough() {
let payload = br#"{"tool_name":"Read","tool_input":{},"tool_response":"clean"}"#;
let p = ClaudeAdapter.parse_post_tool_use(payload).unwrap();
let out = ClaudeAdapter.render_post_tool_use(&p, None).unwrap();
let v: serde_json::Value = serde_json::from_slice(&out).unwrap();
assert_eq!(v, serde_json::json!({}));
}
// ---------------------------------------------------------------------------
// HookEvent parsing
// ---------------------------------------------------------------------------
#[test]
fn hook_event_parse_variants() {
assert_eq!(HookEvent::parse("pre-tool-use"), Some(HookEvent::PreToolUse));
assert_eq!(HookEvent::parse("PreToolUse"), Some(HookEvent::PreToolUse));
assert_eq!(HookEvent::parse("post-tool-use"), Some(HookEvent::PostToolUse));
assert_eq!(HookEvent::parse("PostToolUse"), Some(HookEvent::PostToolUse));
assert_eq!(HookEvent::parse("unknown"), None);
}