//! Secret manifest loader. //! //! Discovers secret files per the `.botsecrets` configuration, parses them, //! filters by key patterns, and produces the known-secrets set that the //! Redactor will consume. use std::path::{Path, PathBuf}; use globset::{Glob, GlobSetBuilder}; use thiserror::Error; use walkdir::WalkDir; use super::config::{ParseErrorAction, SecretsConfig}; use super::parser::{self, FileFormat, ParseError, SecretEntry}; // --------------------------------------------------------------------------- // Errors // --------------------------------------------------------------------------- #[derive(Debug, Error)] pub enum ManifestError { #[error(transparent)] Parse(#[from] ParseError), #[error("glob pattern error: {0}")] Glob(String), } // --------------------------------------------------------------------------- // Manifest // --------------------------------------------------------------------------- /// The complete set of known secrets discovered from a project. /// /// Entries are sorted by value length descending (longest first) so the /// redactor replaces the most specific match before shorter substrings. #[derive(Debug, Clone)] pub struct Manifest { entries: Vec, } /// Minimum secret value length to keep. Anything shorter risks false-positive /// redaction (e.g. `"yes"`, `"on"`, `"42"`). const MIN_VALUE_LEN: usize = 4; /// Directories that are unconditionally skipped during file discovery. const SKIP_DIRS: &[&str] = &[".git", "node_modules", "target", "__pycache__", ".venv"]; impl Manifest { /// Build a manifest by discovering and parsing secret files relative to /// `root`. pub fn build(config: &SecretsConfig, root: &Path) -> Result { let mut entries = Vec::new(); // 1. Discover files matching `config.files.patterns`. let discovered = discover_files(&config.files.patterns, root)?; // 2. Parse each discovered file. for path in &discovered { match parse_discovered_file(path) { Ok(file_entries) => entries.extend(file_entries), Err(e) => match config.enforcement.on_parse_error { ParseErrorAction::Allow => { eprintln!( "fermata: warning: skipping unparseable file {}: {}", path.display(), e ); } ParseErrorAction::Deny => { return Err(e.into()); } ParseErrorAction::MaskEntireFile => { // We cannot extract individual secrets — the redactor // may choose to mask the entire file content if it // appears in output. For now we log and continue. eprintln!( "fermata: warning: cannot parse {}: {}", path.display(), e ); } }, } } // 3. Filter discovered entries by the effective key patterns. entries = filter_by_key_patterns(entries, config); // 4. Process explicit `[[file]]` overrides — these bypass key filtering // because the user declared them intentionally. for override_cfg in &config.file_overrides { let override_path = root.join(&override_cfg.path); if !override_path.is_file() { continue; } let format = override_cfg .format .as_deref() .and_then(FileFormat::from_hint); let key_filter = if override_cfg.keys.is_empty() { None } else { Some(override_cfg.keys.as_slice()) }; match parser::parse_secret_file(&override_path, format, key_filter) { Ok(file_entries) => entries.extend(file_entries), Err(e) => { eprintln!( "fermata: warning: cannot parse override file {}: {}", override_path.display(), e ); } } } // 5. Deduplicate (same key + value from different discovery paths). entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value))); entries.dedup_by(|a, b| a.key == b.key && a.value == b.value); // 6. Sort by value length descending (longest first for redaction). entries.sort_by(|a, b| b.value.len().cmp(&a.value.len())); // 7. Remove entries with very short values to avoid false replacements. entries.retain(|e| e.value.len() >= MIN_VALUE_LEN); Ok(Self { entries }) } /// Build a manifest from a pre-built list of secret entries. /// /// Applies the same post-processing as [`Manifest::build`]: /// - Deduplicates entries with the same key and value. /// - Sorts by value length descending (longest first for redaction). /// - Removes entries with values shorter than 4 characters. /// /// Useful for testing and for library consumers that obtain secrets /// from sources other than filesystem discovery. pub fn from_entries(mut entries: Vec) -> Self { // Deduplicate (same key + value). entries.sort_by(|a, b| a.key.cmp(&b.key).then_with(|| a.value.cmp(&b.value))); entries.dedup_by(|a, b| a.key == b.key && a.value == b.value); // Sort by value length descending (longest first for redaction). entries.sort_by(|a, b| b.value.len().cmp(&a.value.len())); // Remove entries with very short values to avoid false replacements. entries.retain(|e| e.value.len() >= MIN_VALUE_LEN); Self { entries } } /// Build an empty manifest (no secrets known). pub fn empty() -> Self { Self { entries: Vec::new(), } } /// Returns all discovered secret entries. pub fn entries(&self) -> &[SecretEntry] { &self.entries } /// Returns `true` if the manifest contains no secrets. pub fn is_empty(&self) -> bool { self.entries.is_empty() } /// Number of known secrets. pub fn len(&self) -> usize { self.entries.len() } } // --------------------------------------------------------------------------- // File discovery // --------------------------------------------------------------------------- /// Walk the project tree and collect files matching any of the given glob /// patterns. Patterns are matched against paths *relative to* `root`. fn discover_files(patterns: &[String], root: &Path) -> Result, ManifestError> { if patterns.is_empty() { return Ok(Vec::new()); } // Compile all patterns into a single GlobSet for efficient matching. let mut builder = GlobSetBuilder::new(); for pat in patterns { // `globset` patterns match against the full relative path including // intermediate directories (e.g. `.docker/config.json`). We add // both the literal pattern and a `**/` prefixed variant so that // `.env` matches at the root and `subdir/.env` matches nested. let glob = Glob::new(pat).map_err(|e| ManifestError::Glob(e.to_string()))?; builder.add(glob); // Also match nested occurrences: `**/`. if !pat.contains('/') { let nested = format!("**/{pat}"); let nested_glob = Glob::new(&nested).map_err(|e| ManifestError::Glob(e.to_string()))?; builder.add(nested_glob); } } let glob_set = builder.build().map_err(|e| ManifestError::Glob(e.to_string()))?; let mut result = Vec::new(); for entry in WalkDir::new(root).follow_links(false) { let entry = match entry { Ok(e) => e, Err(_) => continue, }; // Skip common large / non-project directories. if entry.file_type().is_dir() { if let Some(name) = entry.file_name().to_str() { if SKIP_DIRS.contains(&name) { // WalkDir does not support in-place skip, but we simply // won't match anything under these dirs because we check // the dir name on each entry. We continue and let non-file // entries fall through. continue; } } continue; // Only interested in files. } if !entry.file_type().is_file() { continue; } // Check that no ancestor directory is in the skip list. let abs_path = entry.path(); if has_skipped_ancestor(abs_path, root) { continue; } // Match relative path against the glob set. let rel = match abs_path.strip_prefix(root) { Ok(r) => r, Err(_) => continue, }; if glob_set.is_match(rel) { result.push(abs_path.to_path_buf()); } } Ok(result) } /// Returns `true` if any path component between `root` and `path` is in /// [`SKIP_DIRS`]. fn has_skipped_ancestor(path: &Path, root: &Path) -> bool { if let Ok(rel) = path.strip_prefix(root) { for component in rel.parent().into_iter().flat_map(|p| p.components()) { if let Some(name) = component.as_os_str().to_str() { if SKIP_DIRS.contains(&name) { return true; } } } } false } // --------------------------------------------------------------------------- // Single-file parsing // --------------------------------------------------------------------------- /// Parse a single discovered file. Auto-detects format from extension. /// Returns an empty `Vec` if the format cannot be determined (e.g. `.key`, /// `.pem` — opaque/binary files). fn parse_discovered_file(path: &Path) -> Result, ParseError> { let format = match FileFormat::from_path(path) { Some(fmt) => fmt, None => return Ok(Vec::new()), // opaque file — skip }; parser::parse_secret_file(path, Some(format), None) } // --------------------------------------------------------------------------- // Key-pattern filtering // --------------------------------------------------------------------------- /// Keep only entries whose key matches the effective key-include patterns /// from the configuration. fn filter_by_key_patterns(entries: Vec, config: &SecretsConfig) -> Vec { entries .into_iter() .filter(|e| config.key_matches(&e.key)) .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn empty_manifest() { let m = Manifest::empty(); assert!(m.is_empty()); assert_eq!(m.len(), 0); assert!(m.entries().is_empty()); } }