use regex::Regex; use std::sync::OnceLock; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Confidence { /// Absolute path or path with explicit separator. High, /// Bare filename with extension; could be a word. Low, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct PathCandidate { pub text: String, pub confidence: Confidence, } /// Heuristically extract path-like substrings from arbitrary text. /// Confident matches (absolute paths, paths containing separators) → `High`. /// Bare filenames with an extension → `Low` (advisory only). pub fn extract_path_candidates(text: &str) -> Vec { static UNIX_ABS: OnceLock = OnceLock::new(); static WIN_ABS: OnceLock = OnceLock::new(); static REL_WITH_SEP: OnceLock = OnceLock::new(); static BARE_NAME: OnceLock = OnceLock::new(); let unix_abs = UNIX_ABS.get_or_init(|| Regex::new(r"(?m)(?:^|\s)(/[\w./~\-_]+)").unwrap()); let win_abs = WIN_ABS.get_or_init(|| Regex::new(r#"(?m)(?:^|\s)([A-Za-z]:\\[\w.\\\-_]+)"#).unwrap()); let rel = REL_WITH_SEP.get_or_init(|| Regex::new(r"(?m)(?:^|\s)((?:\./|\.\./|[\w\-_]+/)[\w./\-_]+)").unwrap()); let bare = BARE_NAME.get_or_init(|| Regex::new(r"(?m)(?:^|\s)([\w\-_]+\.[A-Za-z]{1,8})(?:\s|[.,;:!?]|$)").unwrap()); let mut out = Vec::new(); let mut seen = std::collections::HashSet::new(); for re in [unix_abs, win_abs, rel] { for cap in re.captures_iter(text) { let m = cap.get(1).unwrap().as_str().trim_end_matches(['.', ',', ';', ':', '!', '?']); if seen.insert(m.to_string()) { out.push(PathCandidate { text: m.to_string(), confidence: Confidence::High }); } } } for cap in bare.captures_iter(text) { let m = cap.get(1).unwrap().as_str(); if seen.insert(m.to_string()) { out.push(PathCandidate { text: m.to_string(), confidence: Confidence::Low }); } } out }