sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
@@ -0,0 +1,50 @@
use regex::Regex;
use std::sync::OnceLock;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Confidence {
/// Absolute path or path with explicit separator.
High,
/// Bare filename with extension; could be a word.
Low,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PathCandidate {
pub text: String,
pub confidence: Confidence,
}
/// Heuristically extract path-like substrings from arbitrary text.
/// Confident matches (absolute paths, paths containing separators) → `High`.
/// Bare filenames with an extension → `Low` (advisory only).
pub fn extract_path_candidates(text: &str) -> Vec<PathCandidate> {
static UNIX_ABS: OnceLock<Regex> = OnceLock::new();
static WIN_ABS: OnceLock<Regex> = OnceLock::new();
static REL_WITH_SEP: OnceLock<Regex> = OnceLock::new();
static BARE_NAME: OnceLock<Regex> = OnceLock::new();
let unix_abs = UNIX_ABS.get_or_init(|| Regex::new(r"(?m)(?:^|\s)(/[\w./~\-_]+)").unwrap());
let win_abs = WIN_ABS.get_or_init(|| Regex::new(r#"(?m)(?:^|\s)([A-Za-z]:\\[\w.\\\-_]+)"#).unwrap());
let rel = REL_WITH_SEP.get_or_init(|| Regex::new(r"(?m)(?:^|\s)((?:\./|\.\./|[\w\-_]+/)[\w./\-_]+)").unwrap());
let bare = BARE_NAME.get_or_init(|| Regex::new(r"(?m)(?:^|\s)([\w\-_]+\.[A-Za-z]{1,8})(?:\s|[.,;:!?]|$)").unwrap());
let mut out = Vec::new();
let mut seen = std::collections::HashSet::new();
for re in [unix_abs, win_abs, rel] {
for cap in re.captures_iter(text) {
let m = cap.get(1).unwrap().as_str().trim_end_matches(['.', ',', ';', ':', '!', '?']);
if seen.insert(m.to_string()) {
out.push(PathCandidate { text: m.to_string(), confidence: Confidence::High });
}
}
}
for cap in bare.captures_iter(text) {
let m = cap.get(1).unwrap().as_str();
if seen.insert(m.to_string()) {
out.push(PathCandidate { text: m.to_string(), confidence: Confidence::Low });
}
}
out
}