Files
dirigent/crates/dirigent_projects/src/detection.rs
T
2026-05-08 01:59:04 +02:00

752 lines
25 KiB
Rust

//! Project detection and import support.
//!
//! Provides path normalization, worktree detection, multi-path grouping,
//! and matching logic to link discovered import paths to existing projects.
use std::collections::HashMap;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use dirigent_protocol::project::{Project, ProjectRepository};
use crate::error::{ProjectError, Result};
use crate::params::{AddRepositoryParams, CreateProjectParams};
use crate::traits::ProjectStore;
// ---------------------------------------------------------------------------
// DTOs
// ---------------------------------------------------------------------------
/// A project discovered during import, before resolution against existing projects.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectedProject {
/// Filesystem path as discovered (pre-normalization may have been applied).
pub discovered_path: String,
/// Suggested name derived from the path (e.g. last directory component).
pub suggested_name: String,
/// Number of sessions associated with this discovered path.
pub session_count: usize,
/// How this detection was resolved against existing projects.
pub resolution: ProjectResolution,
}
/// How a detected project path was resolved against the existing project store.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ProjectResolution {
/// Matched an existing project and repository.
Linked {
project_id: Uuid,
project_name: String,
matched_repository_id: Uuid,
},
/// No match found — suggests creating a new project.
CreateNew { name: String },
/// The user chose to skip this detection.
Skip,
}
/// Full result of running project detection over a set of import discoveries.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectDetectionResult {
/// One entry per discovered path.
pub detections: Vec<DetectedProject>,
/// Hints about git worktree relationships.
pub worktree_hints: Vec<WorktreeHint>,
/// Hints about paths that share a common parent.
pub multi_path_hints: Vec<MultiPathHint>,
}
/// Hint that a path is (or may be) a git worktree.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorktreeHint {
/// The worktree path itself.
pub worktree_path: String,
/// The main repository path (parsed from `.git` file), if resolved.
pub main_repo_path: Option<String>,
}
/// Hint that multiple discovered paths share a common immediate parent.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MultiPathHint {
/// The shared parent directory.
pub shared_parent: String,
/// The child paths that share this parent.
pub paths: Vec<String>,
}
/// Request to create a project from an import detection.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportProjectCreationRequest {
/// Project name.
pub name: String,
/// Primary repository path.
pub primary_path: String,
/// Additional repository paths.
#[serde(default)]
pub additional_paths: Vec<String>,
/// Optional icon.
#[serde(skip_serializing_if = "Option::is_none")]
pub icon: Option<String>,
/// Tags for the new project.
#[serde(default)]
pub tags: Vec<String>,
/// Programming languages.
#[serde(default)]
pub languages: Vec<String>,
}
/// Result of creating a project from an import request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImportProjectCreationResult {
/// The created project's ID.
pub project_id: Uuid,
/// The created project's name.
pub project_name: String,
/// How many repositories were created (primary + additional).
pub repositories_created: usize,
}
/// Lightweight input describing a project discovered during import.
///
/// This mirrors the shape used by import discovery (name + path + session count).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscoveredImportProject {
/// Project name (typically the directory basename or user-facing label).
pub name: String,
/// Filesystem path associated with this project.
pub path: String,
/// Number of sessions discovered under this path.
pub session_count: usize,
}
// ---------------------------------------------------------------------------
// Path normalization
// ---------------------------------------------------------------------------
/// Normalize a filesystem path for consistent cross-platform comparison.
///
/// Steps (in order):
/// 1. Try `std::fs::canonicalize()` — if it succeeds, use that (resolves symlinks,
/// `..`, etc.) and convert to forward slashes.
/// 2. On failure, apply textual normalization:
/// - Backslash -> forward slash
/// - MinGW `/c/Users/...` -> `C:/Users/...`
/// - WSL `/mnt/c/Users/...` -> `C:/Users/...`
/// - UNC `\\server\share` -> `//server/share`
/// - Tilde `~/foo` -> expanded home + `/foo`
/// - Collapse `//` -> `/` (except leading UNC)
/// - Resolve `.` and `..` segments
/// - Strip trailing `/`
/// 3. On Windows, lowercase the entire result for case-insensitive comparison.
pub fn normalize_project_path(path: &str) -> String {
// Try canonical resolution first.
if let Ok(canonical) = std::fs::canonicalize(path) {
let mut s = canonical.to_string_lossy().replace('\\', "/");
// Strip trailing slash unless it's a root like "C:/"
if s.len() > 1 && s.ends_with('/') && !s.ends_with(":/") {
s.pop();
}
return platform_case_normalize(s);
}
// Textual fallback.
let mut s = path.replace('\\', "/");
// Tilde expansion.
if s.starts_with("~/") || s == "~" {
if let Some(home) = home_dir_string() {
if s == "~" {
s = home;
} else {
s = format!("{}/{}", home.trim_end_matches('/'), &s[2..]);
}
}
}
// MinGW: /c/Users/... -> C:/Users/...
if let Some(rest) = try_strip_mingw(&s) {
s = rest;
}
// WSL: /mnt/c/Users/... -> C:/Users/...
if let Some(rest) = try_strip_wsl(&s) {
s = rest;
}
// UNC already converted by backslash replacement: //server/share is fine.
// Collapse double slashes (preserve leading // for UNC).
s = collapse_slashes(&s);
// Resolve `.` and `..` segments textually.
s = resolve_dots(&s);
// Strip trailing slash (unless root).
if s.len() > 1 && s.ends_with('/') && !s.ends_with(":/") {
s.pop();
}
platform_case_normalize(s)
}
fn home_dir_string() -> Option<String> {
dirs::home_dir().map(|p| p.to_string_lossy().replace('\\', "/"))
}
fn try_strip_mingw(s: &str) -> Option<String> {
let bytes = s.as_bytes();
// Pattern: /X/... where X is a single ASCII letter
if bytes.len() >= 3
&& bytes[0] == b'/'
&& bytes[1].is_ascii_alphabetic()
&& bytes[2] == b'/'
{
let drive = (bytes[1] as char).to_ascii_uppercase();
Some(format!("{}:/{}", drive, &s[3..]))
} else {
None
}
}
fn try_strip_wsl(s: &str) -> Option<String> {
if let Some(rest) = s.strip_prefix("/mnt/") {
let bytes = rest.as_bytes();
if !bytes.is_empty() && bytes[0].is_ascii_alphabetic() {
let drive = (bytes[0] as char).to_ascii_uppercase();
let remainder = if bytes.len() > 1 && bytes[1] == b'/' {
&rest[2..]
} else if bytes.len() == 1 {
""
} else {
return None; // e.g. /mnt/cdrom — not a drive letter
};
return Some(format!("{}:/{}", drive, remainder));
}
}
None
}
fn collapse_slashes(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
// Preserve leading double slash for UNC.
if s.starts_with("//") {
result.push('/');
result.push('/');
chars.next();
chars.next();
// Skip any additional leading slashes beyond the two.
while chars.peek() == Some(&'/') {
chars.next();
}
}
let mut prev_slash = false;
for c in chars {
if c == '/' {
if !prev_slash {
result.push(c);
}
prev_slash = true;
} else {
result.push(c);
prev_slash = false;
}
}
result
}
fn resolve_dots(s: &str) -> String {
// Split on '/', resolve `.` and `..` textually.
let mut parts: Vec<&str> = Vec::new();
let prefix = if s.starts_with("//") {
"//"
} else if s.starts_with('/') {
"/"
} else {
""
};
for segment in s.split('/') {
match segment {
"" | "." => {}
".." => {
// Don't pop past the root.
if !parts.is_empty() && *parts.last().unwrap() != ".." {
parts.pop();
}
}
other => parts.push(other),
}
}
let joined = parts.join("/");
if prefix.is_empty() {
joined
} else {
format!("{}{}", prefix, joined)
}
}
#[cfg(target_os = "windows")]
fn platform_case_normalize(s: String) -> String {
s.to_lowercase()
}
#[cfg(not(target_os = "windows"))]
fn platform_case_normalize(s: String) -> String {
s
}
// ---------------------------------------------------------------------------
// Worktree detection
// ---------------------------------------------------------------------------
/// Check whether the given path is a git worktree (`.git` is a file, not a directory).
///
/// If it is, parses the `gitdir:` pointer to determine the main repository path.
pub fn detect_worktree(path: &str) -> Option<WorktreeHint> {
let dot_git = PathBuf::from(path).join(".git");
// Only interested if .git is a *file* (worktree pointer), not a directory.
let meta = std::fs::symlink_metadata(&dot_git).ok()?;
if !meta.is_file() {
return None;
}
let content = std::fs::read_to_string(&dot_git).ok()?;
let gitdir_line = content
.lines()
.find(|l| l.starts_with("gitdir:"))?;
let gitdir_raw = gitdir_line["gitdir:".len()..].trim();
// The gitdir path typically looks like `/path/to/main-repo/.git/worktrees/<name>`.
// Walk up to find the main repo root.
let gitdir_path = if PathBuf::from(gitdir_raw).is_absolute() {
PathBuf::from(gitdir_raw)
} else {
PathBuf::from(path).join(gitdir_raw)
};
// Try to resolve: .../main-repo/.git/worktrees/xxx -> .../main-repo
let main_repo = gitdir_path
.ancestors()
.find(|ancestor| {
// Check if this ancestor has `.git` as a child (actual git dir, not worktree file).
let git_child = ancestor.join(".git");
git_child.is_dir()
})
.map(|p| normalize_project_path(&p.to_string_lossy()));
Some(WorktreeHint {
worktree_path: normalize_project_path(path),
main_repo_path: main_repo,
})
}
// ---------------------------------------------------------------------------
// Multi-path grouping
// ---------------------------------------------------------------------------
/// Group paths that share a common immediate parent directory.
///
/// Only produces hints for groups of 2+ paths.
pub fn find_multi_path_groups(paths: &[String]) -> Vec<MultiPathHint> {
let mut by_parent: HashMap<String, Vec<String>> = HashMap::new();
for path in paths {
let normalized = normalize_project_path(path);
// Find immediate parent by stripping last component.
if let Some(parent) = PathBuf::from(&normalized).parent() {
let parent_str = parent.to_string_lossy().replace('\\', "/");
by_parent
.entry(parent_str)
.or_default()
.push(normalized);
}
}
by_parent
.into_iter()
.filter(|(_, children)| children.len() >= 2)
.map(|(parent, mut children)| {
children.sort();
MultiPathHint {
shared_parent: parent,
paths: children,
}
})
.collect()
}
// ---------------------------------------------------------------------------
// Detection logic
// ---------------------------------------------------------------------------
/// Match discovered import projects against existing projects.
///
/// For each discovered path, attempts to find a match in the existing project
/// store using (in priority order):
/// 1. Exact normalized path match against any repository
/// 2. Canonical (fs::canonicalize) path match
/// 3. Name-based hint (project name == suggested name)
///
/// Unmatched paths get `ProjectResolution::CreateNew`.
pub fn detect_projects(
discovered: &[DiscoveredImportProject],
existing_projects: &[(Project, Vec<ProjectRepository>)],
) -> ProjectDetectionResult {
// Pre-build a lookup from normalized repo paths -> (project, repo).
let mut path_index: HashMap<String, (&Project, &ProjectRepository)> = HashMap::new();
let mut canonical_index: HashMap<String, (&Project, &ProjectRepository)> = HashMap::new();
let mut name_index: HashMap<String, &Project> = HashMap::new();
for (project, repos) in existing_projects {
name_index.insert(project.name.to_lowercase(), project);
for repo in repos {
let repo_path_str = repo.path.to_string_lossy().to_string();
let normalized = normalize_project_path(&repo_path_str);
path_index.insert(normalized.clone(), (project, repo));
// Also try canonical path of the repo.
if let Ok(canonical) = std::fs::canonicalize(&repo.path) {
let canon_norm = normalize_project_path(&canonical.to_string_lossy());
canonical_index.insert(canon_norm, (project, repo));
}
}
}
let mut detections = Vec::with_capacity(discovered.len());
let discovered_paths: Vec<String> = discovered.iter().map(|d| d.path.clone()).collect();
let worktree_hints: Vec<WorktreeHint> = discovered_paths
.iter()
.filter_map(|p| detect_worktree(p))
.collect();
for disc in discovered {
let normalized = normalize_project_path(&disc.path);
// 1. Exact normalized path match.
if let Some((project, repo)) = path_index.get(&normalized) {
detections.push(DetectedProject {
discovered_path: disc.path.clone(),
suggested_name: disc.name.clone(),
session_count: disc.session_count,
resolution: ProjectResolution::Linked {
project_id: project.id,
project_name: project.name.clone(),
matched_repository_id: repo.id,
},
});
continue;
}
// 2. Canonical path match.
let canon_norm = std::fs::canonicalize(&disc.path)
.map(|c| normalize_project_path(&c.to_string_lossy()))
.unwrap_or_default();
if !canon_norm.is_empty() {
if let Some((project, repo)) = canonical_index.get(&canon_norm) {
detections.push(DetectedProject {
discovered_path: disc.path.clone(),
suggested_name: disc.name.clone(),
session_count: disc.session_count,
resolution: ProjectResolution::Linked {
project_id: project.id,
project_name: project.name.clone(),
matched_repository_id: repo.id,
},
});
continue;
}
}
// 3. Name hint match.
let suggested_lower = derive_suggested_name(&disc.path).to_lowercase();
if let Some(project) = name_index.get(&suggested_lower) {
// Find the primary repo or any repo to satisfy the linked variant.
let existing_repos = existing_projects
.iter()
.find(|(p, _)| p.id == project.id)
.map(|(_, repos)| repos);
if let Some(repos) = existing_repos {
if let Some(repo) = repos.iter().find(|r| r.is_primary).or(repos.first()) {
detections.push(DetectedProject {
discovered_path: disc.path.clone(),
suggested_name: disc.name.clone(),
session_count: disc.session_count,
resolution: ProjectResolution::Linked {
project_id: project.id,
project_name: project.name.clone(),
matched_repository_id: repo.id,
},
});
continue;
}
}
}
// 4. No match — suggest creating.
let name = derive_suggested_name(&disc.path);
detections.push(DetectedProject {
discovered_path: disc.path.clone(),
suggested_name: disc.name.clone(),
session_count: disc.session_count,
resolution: ProjectResolution::CreateNew { name },
});
}
let multi_path_hints = find_multi_path_groups(&discovered_paths);
ProjectDetectionResult {
detections,
worktree_hints,
multi_path_hints,
}
}
/// Derive a suggested project name from a path (last non-empty component).
fn derive_suggested_name(path: &str) -> String {
let normalized = path.replace('\\', "/");
let trimmed = normalized.trim_end_matches('/');
trimmed
.rsplit('/')
.next()
.unwrap_or(trimmed)
.to_string()
}
// ---------------------------------------------------------------------------
// Project creation from import
// ---------------------------------------------------------------------------
/// Create projects from a batch of import creation requests.
///
/// For each request: creates the project, adds the primary repository, and
/// adds any additional repositories. Returns one result per request.
pub async fn create_projects_from_import(
store: &dyn ProjectStore,
requests: Vec<ImportProjectCreationRequest>,
owner: Uuid,
) -> Vec<Result<ImportProjectCreationResult>> {
let mut results = Vec::with_capacity(requests.len());
for req in requests {
results.push(create_single_project(store, req, owner).await);
}
results
}
async fn create_single_project(
store: &dyn ProjectStore,
req: ImportProjectCreationRequest,
owner: Uuid,
) -> Result<ImportProjectCreationResult> {
let project = store
.create_project(CreateProjectParams {
name: req.name.clone(),
description: String::new(),
icon: req.icon,
owner,
tags: req.tags,
languages: req.languages,
metadata: serde_json::Value::Object(serde_json::Map::new()),
})
.await?;
let mut repos_created: usize = 0;
// Primary repository.
store
.add_repository(AddRepositoryParams {
project_id: project.id,
path: PathBuf::from(&req.primary_path),
is_primary: true,
label: None,
})
.await?;
repos_created += 1;
// Additional repositories.
for additional in &req.additional_paths {
match store
.add_repository(AddRepositoryParams {
project_id: project.id,
path: PathBuf::from(additional),
is_primary: false,
label: None,
})
.await
{
Ok(_) => repos_created += 1,
Err(e) => {
tracing::warn!(
project_id = %project.id,
path = %additional,
error = %e,
"Failed to add additional repository during import"
);
}
}
}
Ok(ImportProjectCreationResult {
project_id: project.id,
project_name: project.name,
repositories_created: repos_created,
})
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_backslashes() {
let result = normalize_project_path("C:\\Users\\alice\\project");
assert!(result.contains('/'));
assert!(!result.contains('\\'));
}
#[test]
fn normalize_mingw_path() {
let result = normalize_project_path("/c/Users/alice/project");
assert!(
result.starts_with("C:/") || result.starts_with("c:/"),
"Expected drive letter prefix, got: {}",
result
);
}
#[test]
fn normalize_wsl_path() {
let result = normalize_project_path("/mnt/c/Users/alice/project");
assert!(
result.starts_with("C:/") || result.starts_with("c:/"),
"Expected drive letter prefix, got: {}",
result
);
}
#[test]
fn normalize_strips_trailing_slash() {
let result = normalize_project_path("/home/alice/project/");
assert!(!result.ends_with('/'));
}
#[test]
fn normalize_resolves_dots() {
// Textual fallback since this path won't exist on disk.
let result = normalize_project_path("/home/alice/./project/../project/src");
assert!(result.contains("/home/alice/project/src") || result.ends_with("project/src"));
}
#[test]
fn normalize_collapses_double_slashes() {
let result = normalize_project_path("/home//alice///project");
assert!(!result.contains("//") || result.starts_with("//"));
}
#[test]
fn derive_suggested_name_basic() {
assert_eq!(derive_suggested_name("/home/alice/my-project"), "my-project");
assert_eq!(derive_suggested_name("C:\\Users\\bob\\work"), "work");
assert_eq!(derive_suggested_name("/home/alice/my-project/"), "my-project");
}
#[test]
fn multi_path_groups_basic() {
let paths = vec![
"/home/alice/projects/foo".to_string(),
"/home/alice/projects/bar".to_string(),
"/home/alice/work/baz".to_string(),
];
let groups = find_multi_path_groups(&paths);
// foo and bar share /home/alice/projects, baz is alone under /home/alice/work
let multi = groups
.iter()
.find(|g| g.paths.len() == 2);
assert!(multi.is_some(), "Expected a group with 2 paths");
}
#[test]
fn detect_projects_creates_new_for_unmatched() {
let discovered = vec![DiscoveredImportProject {
name: "my-project".to_string(),
path: "/nonexistent/path/my-project".to_string(),
session_count: 5,
}];
let existing: Vec<(Project, Vec<ProjectRepository>)> = vec![];
let result = detect_projects(&discovered, &existing);
assert_eq!(result.detections.len(), 1);
match &result.detections[0].resolution {
ProjectResolution::CreateNew { name } => {
assert_eq!(name, "my-project");
}
other => panic!("Expected CreateNew, got {:?}", other),
}
}
#[test]
fn detect_projects_links_by_name() {
use chrono::Utc;
let project_id = Uuid::now_v7();
let repo_id = Uuid::now_v7();
let now = Utc::now();
let project = Project {
id: project_id,
name: "dirigent".to_string(),
description: String::new(),
icon: None,
owner: Uuid::nil(),
members: vec![],
tags: vec![],
languages: vec![],
linked_projects: vec![],
metadata: serde_json::json!({}),
created_at: now,
updated_at: now,
};
let repo = ProjectRepository {
id: repo_id,
project_id,
path: PathBuf::from("/other/path/dirigent"),
is_primary: true,
label: None,
access: dirigent_protocol::project::AccessMode::ReadWrite,
created_at: now,
updated_at: now,
};
let discovered = vec![DiscoveredImportProject {
name: "dirigent".to_string(),
path: "/somewhere/else/dirigent".to_string(),
session_count: 3,
}];
let result = detect_projects(&discovered, &[(project, vec![repo])]);
assert_eq!(result.detections.len(), 1);
match &result.detections[0].resolution {
ProjectResolution::Linked {
project_id: pid,
matched_repository_id: rid,
..
} => {
assert_eq!(*pid, project_id);
assert_eq!(*rid, repo_id);
}
other => panic!("Expected Linked, got {:?}", other),
}
}
}