Files
fermata/tests/core_secrets_parser.rs
Gabor Körber 087429d275 feat(fermata): add secret filtering engine — the security brain
Implement Goals 1–3 and 5 from the reveal-layer security brain goal.
fermata now detects, redacts, and scans for secrets in AI agent tool
output, filling the ecosystem gap where no coding agent filters secrets
post-read.

New core/secrets/ module:
- config.rs: .botsecrets TOML format with hierarchical merge and ~40
  built-in key patterns
- parser.rs: multi-format secret file parser (.env, TOML, YAML, JSON,
  Python assignments, Java properties)
- manifest.rs: file discovery + parsing → known-secrets set
- redactor.rs: Aho-Corasick multi-pattern replacement with 4 styles
- scanner.rs: RegexSet heuristic detection with 35 gitleaks-derived
  patterns (MIT) and Shannon entropy filtering
- patterns.rs: curated rules for AWS, GitHub, Stripe, Slack, JWT, etc.

Hook integration:
- fermata hook --event post-tool-use reads tool output, runs redactor +
  scanner, returns updatedToolOutput for Claude Code
- Backward compatible: --event pre-tool-use (default) unchanged
- Fail-open: errors produce {} and exit 0

Library API:
- Redactor::new(manifest, style).redact(text) → RedactedText
- Scanner::new(config).scan(text) → Vec<Finding>
- Compiles without CLI feature for embedding in other crates

195 tests (130 new), all passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-25 17:29:07 +02:00

405 lines
14 KiB
Rust

//! Integration tests for the multi-format secret file parser.
use dirigent_fermata::core::secrets::parser::{
parse_content, parse_secret_file, FileFormat, SecretEntry,
};
use std::path::Path;
use tempfile::NamedTempFile;
fn p(s: &str) -> &Path {
Path::new(s)
}
// ---------------------------------------------------------------------------
// .env parsing
// ---------------------------------------------------------------------------
#[test]
fn env_basic_key_value() {
let entries = parse_content("DATABASE_URL=postgres://localhost/db", FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "DATABASE_URL");
assert_eq!(entries[0].value, "postgres://localhost/db");
}
#[test]
fn env_double_quoted() {
let entries = parse_content(r#"SECRET="hello world""#, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "hello world");
}
#[test]
fn env_single_quoted() {
let entries = parse_content("SECRET='hello world'", FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "hello world");
}
#[test]
fn env_comments_and_empty_lines() {
let content = "# comment\n\nKEY=value\n # indented comment\n";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "KEY");
}
#[test]
fn env_export_prefix() {
let content = "export API_KEY=abc123\nexport TOKEN=\"xyz\"";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].key, "API_KEY");
assert_eq!(entries[0].value, "abc123");
assert_eq!(entries[1].key, "TOKEN");
assert_eq!(entries[1].value, "xyz");
}
#[test]
fn env_whitespace_handling() {
let content = " KEY = value \nKEY2= spaced ";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
// Key is trimmed; unquoted value trimmed.
assert_eq!(entries[0].key, "KEY");
assert_eq!(entries[0].value, "value");
assert_eq!(entries[1].key, "KEY2");
assert_eq!(entries[1].value, "spaced");
}
#[test]
fn env_escape_sequences_in_double_quotes() {
let content = r#"MSG="line1\nline2""#;
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries[0].value, "line1\nline2");
}
// ---------------------------------------------------------------------------
// TOML parsing
// ---------------------------------------------------------------------------
#[test]
fn toml_flat_table() {
let content = r#"
API_KEY = "abc"
DB_PASS = "secret"
"#;
let entries = parse_content(content, FileFormat::Toml, p("Secrets.toml")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "API_KEY" && e.value == "abc"));
assert!(entries.iter().any(|e| e.key == "DB_PASS" && e.value == "secret"));
}
#[test]
fn toml_nested_tables() {
let content = r#"
[database]
password = "secret"
host = "localhost"
port = 5432
"#;
let entries = parse_content(content, FileFormat::Toml, p("config.toml")).unwrap();
// Only string values extracted; port (integer) skipped.
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "database.password" && e.value == "secret"));
assert!(entries.iter().any(|e| e.key == "database.host" && e.value == "localhost"));
}
#[test]
fn toml_mixed_types_only_strings() {
let content = r#"
name = "app"
debug = true
count = 42
ratio = 3.14
"#;
let entries = parse_content(content, FileFormat::Toml, p("app.toml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// JSON parsing
// ---------------------------------------------------------------------------
#[test]
fn json_flat_object() {
let content = r#"{"api_key": "abc", "secret": "xyz"}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "api_key" && e.value == "abc"));
}
#[test]
fn json_nested_objects() {
let content = r#"{"db": {"password": "foo", "port": 5432}}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "foo");
}
#[test]
fn json_arrays() {
let content = r#"{"keys": ["a", "b"]}"#;
let entries = parse_content(content, FileFormat::Json, p("secrets.json")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "keys.0" && e.value == "a"));
assert!(entries.iter().any(|e| e.key == "keys.1" && e.value == "b"));
}
#[test]
fn json_mixed_types() {
let content = r#"{"name": "app", "count": 42, "active": true, "data": null}"#;
let entries = parse_content(content, FileFormat::Json, p("a.json")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// YAML parsing
// ---------------------------------------------------------------------------
#[test]
fn yaml_flat_map() {
let content = "api_key: abc\nsecret: xyz\n";
let entries = parse_content(content, FileFormat::Yaml, p("secrets.yaml")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "api_key" && e.value == "abc"));
}
#[test]
fn yaml_nested_maps() {
let content = "db:\n password: foo\n port: 5432\n";
let entries = parse_content(content, FileFormat::Yaml, p("secrets.yml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "foo");
}
#[test]
fn yaml_mixed_types() {
let content = "name: app\ncount: 42\nactive: true\n";
let entries = parse_content(content, FileFormat::Yaml, p("a.yaml")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "name");
}
// ---------------------------------------------------------------------------
// Python assignment parsing
// ---------------------------------------------------------------------------
#[test]
fn python_matches_assignments() {
let content = r#"
API_KEY = "abc123"
DB_PASS = 'secret'
import os
x = 42
"#;
let entries = parse_content(content, FileFormat::PythonAssignments, p("settings.py")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "API_KEY" && e.value == "abc123"));
assert!(entries.iter().any(|e| e.key == "DB_PASS" && e.value == "secret"));
}
#[test]
fn python_skips_non_matching() {
let content = "result = some_function()\nfor x in range(10):\n pass\n";
let entries = parse_content(content, FileFormat::PythonAssignments, p("a.py")).unwrap();
assert!(entries.is_empty());
}
// ---------------------------------------------------------------------------
// Properties parsing
// ---------------------------------------------------------------------------
#[test]
fn properties_equals_separator() {
let content = "db.password=secret\ndb.host=localhost";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "db.password" && e.value == "secret"));
}
#[test]
fn properties_colon_separator() {
let content = "db.password: secret";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries[0].key, "db.password");
assert_eq!(entries[0].value, "secret");
}
#[test]
fn properties_comments() {
let content = "# comment\n! also comment\nkey=value";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "key");
}
#[test]
fn properties_continuation_lines() {
let content = "long.value=hello \\\n world";
let entries = parse_content(content, FileFormat::Properties, p("app.properties")).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "long.value");
assert_eq!(entries[0].value, "hello world");
}
// ---------------------------------------------------------------------------
// Auto-detection from file extension
// ---------------------------------------------------------------------------
#[test]
fn format_from_path_env_variants() {
assert_eq!(FileFormat::from_path(p(".env")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p(".env.local")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p(".env.production")), Some(FileFormat::Env));
assert_eq!(FileFormat::from_path(p("staging.env")), Some(FileFormat::Env));
}
#[test]
fn format_from_path_extensions() {
assert_eq!(FileFormat::from_path(p("a.toml")), Some(FileFormat::Toml));
assert_eq!(FileFormat::from_path(p("a.json")), Some(FileFormat::Json));
assert_eq!(FileFormat::from_path(p("a.yaml")), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_path(p("a.yml")), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_path(p("a.py")), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_path(p("a.properties")), Some(FileFormat::Properties));
}
#[test]
fn format_from_path_unknown() {
assert_eq!(FileFormat::from_path(p("a.key")), None);
assert_eq!(FileFormat::from_path(p("a.pem")), None);
}
// ---------------------------------------------------------------------------
// Format hints
// ---------------------------------------------------------------------------
#[test]
fn format_from_hint() {
assert_eq!(FileFormat::from_hint("env"), Some(FileFormat::Env));
assert_eq!(FileFormat::from_hint("dotenv"), Some(FileFormat::Env));
assert_eq!(FileFormat::from_hint("toml"), Some(FileFormat::Toml));
assert_eq!(FileFormat::from_hint("json"), Some(FileFormat::Json));
assert_eq!(FileFormat::from_hint("yaml"), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_hint("yml"), Some(FileFormat::Yaml));
assert_eq!(FileFormat::from_hint("python-assignments"), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_hint("python"), Some(FileFormat::PythonAssignments));
assert_eq!(FileFormat::from_hint("properties"), Some(FileFormat::Properties));
assert_eq!(FileFormat::from_hint("java-properties"), Some(FileFormat::Properties));
assert_eq!(FileFormat::from_hint("unknown"), None);
}
// ---------------------------------------------------------------------------
// Key filtering
// ---------------------------------------------------------------------------
#[test]
fn filter_by_glob() {
let content = "API_KEY=abc\nDB_HOST=localhost\nDB_PASSWORD=secret\n";
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
assert_eq!(entries.len(), 3);
let filter = vec!["*PASSWORD*".to_string(), "*API_KEY*".to_string()];
let result = parse_secret_file_with_filter(content, &filter);
assert_eq!(result.len(), 2);
assert!(result.iter().any(|e| e.key == "API_KEY"));
assert!(result.iter().any(|e| e.key == "DB_PASSWORD"));
}
/// Helper that parses env content with a key filter (avoids temp files).
fn parse_secret_file_with_filter(content: &str, filter: &[String]) -> Vec<SecretEntry> {
let entries = parse_content(content, FileFormat::Env, p(".env")).unwrap();
// Re-implement the filter logic for testing without disk I/O.
use dirigent_fermata::core::secrets::parser::parse_content as pc;
let all = pc(content, FileFormat::Env, p(".env")).unwrap();
// Apply filter manually using the same approach as parse_secret_file.
let matchers: Vec<_> = filter
.iter()
.filter_map(|p| {
globset::Glob::new(&p.to_ascii_uppercase())
.ok()
.map(|g| g.compile_matcher())
})
.collect();
all.into_iter()
.filter(|entry| {
let upper = entry.key.to_ascii_uppercase();
matchers.iter().any(|m| m.is_match(&upper))
})
.collect()
}
// ---------------------------------------------------------------------------
// Error on unrecognised format
// ---------------------------------------------------------------------------
#[test]
fn error_on_unknown_format() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".xyz").unwrap();
write!(tmp, "KEY=value").unwrap();
let result = parse_secret_file(tmp.path(), None, None);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("cannot determine file format"));
}
// ---------------------------------------------------------------------------
// Empty file
// ---------------------------------------------------------------------------
#[test]
fn empty_file_produces_empty_vec() {
let entries = parse_content("", FileFormat::Env, p(".env")).unwrap();
assert!(entries.is_empty());
let entries = parse_content("{}", FileFormat::Json, p("a.json")).unwrap();
assert!(entries.is_empty());
let entries = parse_content("", FileFormat::Toml, p("a.toml")).unwrap();
assert!(entries.is_empty());
}
// ---------------------------------------------------------------------------
// parse_secret_file end-to-end (disk)
// ---------------------------------------------------------------------------
#[test]
fn parse_secret_file_from_disk() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".env").unwrap();
write!(tmp, "SECRET=hunter2\nPORT=8080").unwrap();
let entries = parse_secret_file(tmp.path(), None, None).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.iter().any(|e| e.key == "SECRET" && e.value == "hunter2"));
// Source path should match.
assert_eq!(entries[0].source, tmp.path());
}
#[test]
fn parse_secret_file_with_key_filter() {
use std::io::Write;
let mut tmp = NamedTempFile::with_suffix(".env").unwrap();
write!(tmp, "API_KEY=abc\nHOST=localhost\nDB_PASSWORD=secret").unwrap();
let filter = vec!["*PASSWORD*".to_string()];
let entries = parse_secret_file(tmp.path(), None, Some(&filter)).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "DB_PASSWORD");
}
#[test]
fn parse_secret_file_with_format_override() {
use std::io::Write;
// Write env content to a .txt file — format override should work.
let mut tmp = NamedTempFile::with_suffix(".txt").unwrap();
write!(tmp, "KEY=value").unwrap();
let entries = parse_secret_file(tmp.path(), Some(FileFormat::Env), None).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].key, "KEY");
}