use dirigent_fermata::core::secrets::config::HeuristicConfig; use dirigent_fermata::core::secrets::scanner::{shannon_entropy, Confidence, Scanner}; // --------------------------------------------------------------------------- // Helper: build a scanner with default config (built-in rules only) // --------------------------------------------------------------------------- fn default_scanner() -> Scanner { Scanner::builtin().expect("built-in rules must compile") } // --------------------------------------------------------------------------- // Specific provider patterns // --------------------------------------------------------------------------- #[test] fn detects_aws_access_key() { let scanner = default_scanner(); let findings = scanner.scan("here is my key: AKIAIOSFODNN7EXAMPLE ok"); assert!( findings.iter().any(|f| f.pattern_id == "aws-access-key"), "expected aws-access-key finding, got: {findings:?}" ); assert_eq!(findings[0].confidence, Confidence::High); } #[test] fn detects_github_pat_classic() { let scanner = default_scanner(); let findings = scanner.scan("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"); assert!( findings.iter().any(|f| f.pattern_id == "github-pat-classic"), "expected github-pat-classic finding, got: {findings:?}" ); } #[test] fn detects_stripe_secret_key() { let scanner = default_scanner(); let findings = scanner.scan("STRIPE_KEY=sk_live_abcdefghijklmnopqrstuvwx"); assert!( findings.iter().any(|f| f.pattern_id == "stripe-secret-key"), "expected stripe-secret-key finding, got: {findings:?}" ); } #[test] fn detects_private_key_header() { let scanner = default_scanner(); let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAK...\n-----END RSA PRIVATE KEY-----"; let findings = scanner.scan(text); assert!( findings .iter() .any(|f| f.pattern_id == "private-key-header"), "expected private-key-header finding, got: {findings:?}" ); } #[test] fn detects_jwt_token() { let scanner = default_scanner(); // A realistic-looking (but fake) JWT. let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ik\ pvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"; let findings = scanner.scan(jwt); assert!( findings.iter().any(|f| f.pattern_id == "jwt"), "expected jwt finding, got: {findings:?}" ); } #[test] fn detects_database_connection_url() { let scanner = default_scanner(); let findings = scanner.scan("DATABASE_URL=postgres://admin:s3cretP4ss@db.example.com:5432/mydb"); assert!( findings .iter() .any(|f| f.pattern_id == "database-connection-url"), "expected database-connection-url finding, got: {findings:?}" ); } #[test] fn detects_slack_webhook() { let scanner = default_scanner(); let findings = scanner .scan("https://hooks.slack.com/services/T0ABCDEFG/B0ABCDEFG/abcdefghijklmnopqrstuvwx"); assert!( findings.iter().any(|f| f.pattern_id == "slack-webhook"), "expected slack-webhook finding, got: {findings:?}" ); } #[test] fn detects_anthropic_api_key() { let scanner = default_scanner(); let key = "sk-ant-aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789abcdefgh"; let findings = scanner.scan(&format!("my key is {key}")); assert!( findings .iter() .any(|f| f.pattern_id == "anthropic-api-key"), "expected anthropic-api-key finding, got: {findings:?}" ); } #[test] fn detects_sendgrid_api_key() { let scanner = default_scanner(); let key = "SG.abcdefghijklmnopqrstuv.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst"; let findings = scanner.scan(key); assert!( findings.iter().any(|f| f.pattern_id == "sendgrid-api-key"), "expected sendgrid-api-key finding, got: {findings:?}" ); } // --------------------------------------------------------------------------- // Generic patterns — entropy filtering // --------------------------------------------------------------------------- #[test] fn rejects_low_entropy_generic_api_key() { let scanner = default_scanner(); // "test" repeated has very low entropy — should NOT trigger. let findings = scanner.scan(r#"api_key = "testtesttesttesttest""#); let generic_hits: Vec<_> = findings .iter() .filter(|f| f.pattern_id == "generic-api-key") .collect(); assert!( generic_hits.is_empty(), "low-entropy api_key should be filtered out, got: {generic_hits:?}" ); } #[test] fn accepts_high_entropy_generic_secret() { let scanner = default_scanner(); // A high-entropy random-looking value. let findings = scanner.scan(r#"secret = "a8Kz3Lm9Xq2Wp7Yn"#); let has_generic = findings .iter() .any(|f| f.pattern_id == "generic-secret"); assert!( has_generic, "high-entropy secret should be detected, got: {findings:?}" ); } // --------------------------------------------------------------------------- // Custom patterns from config // --------------------------------------------------------------------------- #[test] fn custom_pattern_from_config() { let config = HeuristicConfig { enabled: true, patterns: vec![r"MY_CUSTOM_[A-Z]{10}".to_string()], ..Default::default() }; let scanner = Scanner::new(&config).expect("should compile custom pattern"); let findings = scanner.scan("found MY_CUSTOM_ABCDEFGHIJ in output"); assert!( findings.iter().any(|f| f.pattern_id == "custom-0"), "expected custom-0 finding, got: {findings:?}" ); assert_eq!(findings[0].confidence, Confidence::High); } // --------------------------------------------------------------------------- // Edge cases // --------------------------------------------------------------------------- #[test] fn empty_text_returns_no_findings() { let scanner = default_scanner(); assert!(scanner.scan("").is_empty()); } #[test] fn plain_text_returns_no_findings() { let scanner = default_scanner(); let findings = scanner.scan("This is just a normal paragraph with no secrets."); assert!( findings.is_empty(), "plain text should have no findings, got: {findings:?}" ); } #[test] fn overlapping_matches_are_deduplicated() { // Construct text where the same span could match multiple patterns. // The bearer token pattern and a generic pattern could overlap on the same region. let scanner = default_scanner(); let text = "Authorization: Bearer ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh"; let findings = scanner.scan(text); // Verify no two findings have overlapping spans. for i in 0..findings.len() { for j in (i + 1)..findings.len() { assert!( findings[j].span.start >= findings[i].span.end, "findings {i} and {j} overlap: {:?} vs {:?}", findings[i].span, findings[j].span, ); } } } // --------------------------------------------------------------------------- // Shannon entropy unit tests (supplement the inline mod tests) // --------------------------------------------------------------------------- #[test] fn entropy_known_values() { // Single character repeated → 0. assert!((shannon_entropy("aaaa") - 0.0).abs() < f64::EPSILON); // Perfectly balanced binary → 1.0 bits/char. let balanced = "ababababab"; assert!((shannon_entropy(balanced) - 1.0).abs() < 0.01); // High diversity. let diverse = "aB3$kL9!mZ7@wQ1#xR5^"; assert!(shannon_entropy(diverse) > 3.5); } // --------------------------------------------------------------------------- // Scanner construction // --------------------------------------------------------------------------- #[test] fn builtin_scanner_has_rules() { let scanner = default_scanner(); assert!( scanner.rule_count() >= 30, "expected at least 30 built-in rules, got {}", scanner.rule_count() ); } #[test] fn invalid_custom_pattern_returns_error() { let config = HeuristicConfig { enabled: true, patterns: vec![r"[invalid".to_string()], ..Default::default() }; assert!(Scanner::new(&config).is_err()); }