//! Project detection and import support. //! //! Provides path normalization, worktree detection, multi-path grouping, //! and matching logic to link discovered import paths to existing projects. use std::collections::HashMap; use std::path::PathBuf; use serde::{Deserialize, Serialize}; use uuid::Uuid; use dirigent_protocol::project::{Project, ProjectRepository}; use crate::error::{ProjectError, Result}; use crate::params::{AddRepositoryParams, CreateProjectParams}; use crate::traits::ProjectStore; // --------------------------------------------------------------------------- // DTOs // --------------------------------------------------------------------------- /// A project discovered during import, before resolution against existing projects. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DetectedProject { /// Filesystem path as discovered (pre-normalization may have been applied). pub discovered_path: String, /// Suggested name derived from the path (e.g. last directory component). pub suggested_name: String, /// Number of sessions associated with this discovered path. pub session_count: usize, /// How this detection was resolved against existing projects. pub resolution: ProjectResolution, } /// How a detected project path was resolved against the existing project store. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type")] pub enum ProjectResolution { /// Matched an existing project and repository. Linked { project_id: Uuid, project_name: String, matched_repository_id: Uuid, }, /// No match found — suggests creating a new project. CreateNew { name: String }, /// The user chose to skip this detection. Skip, } /// Full result of running project detection over a set of import discoveries. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProjectDetectionResult { /// One entry per discovered path. pub detections: Vec, /// Hints about git worktree relationships. pub worktree_hints: Vec, /// Hints about paths that share a common parent. pub multi_path_hints: Vec, } /// Hint that a path is (or may be) a git worktree. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WorktreeHint { /// The worktree path itself. pub worktree_path: String, /// The main repository path (parsed from `.git` file), if resolved. pub main_repo_path: Option, } /// Hint that multiple discovered paths share a common immediate parent. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MultiPathHint { /// The shared parent directory. pub shared_parent: String, /// The child paths that share this parent. pub paths: Vec, } /// Request to create a project from an import detection. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ImportProjectCreationRequest { /// Project name. pub name: String, /// Primary repository path. pub primary_path: String, /// Additional repository paths. #[serde(default)] pub additional_paths: Vec, /// Optional icon. #[serde(skip_serializing_if = "Option::is_none")] pub icon: Option, /// Tags for the new project. #[serde(default)] pub tags: Vec, /// Programming languages. #[serde(default)] pub languages: Vec, } /// Result of creating a project from an import request. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ImportProjectCreationResult { /// The created project's ID. pub project_id: Uuid, /// The created project's name. pub project_name: String, /// How many repositories were created (primary + additional). pub repositories_created: usize, } /// Lightweight input describing a project discovered during import. /// /// This mirrors the shape used by import discovery (name + path + session count). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DiscoveredImportProject { /// Project name (typically the directory basename or user-facing label). pub name: String, /// Filesystem path associated with this project. pub path: String, /// Number of sessions discovered under this path. pub session_count: usize, } // --------------------------------------------------------------------------- // Path normalization // --------------------------------------------------------------------------- /// Normalize a filesystem path for consistent cross-platform comparison. /// /// Steps (in order): /// 1. Try `std::fs::canonicalize()` — if it succeeds, use that (resolves symlinks, /// `..`, etc.) and convert to forward slashes. /// 2. On failure, apply textual normalization: /// - Backslash -> forward slash /// - MinGW `/c/Users/...` -> `C:/Users/...` /// - WSL `/mnt/c/Users/...` -> `C:/Users/...` /// - UNC `\\server\share` -> `//server/share` /// - Tilde `~/foo` -> expanded home + `/foo` /// - Collapse `//` -> `/` (except leading UNC) /// - Resolve `.` and `..` segments /// - Strip trailing `/` /// 3. On Windows, lowercase the entire result for case-insensitive comparison. pub fn normalize_project_path(path: &str) -> String { // Try canonical resolution first. if let Ok(canonical) = std::fs::canonicalize(path) { let mut s = canonical.to_string_lossy().replace('\\', "/"); // Strip trailing slash unless it's a root like "C:/" if s.len() > 1 && s.ends_with('/') && !s.ends_with(":/") { s.pop(); } return platform_case_normalize(s); } // Textual fallback. let mut s = path.replace('\\', "/"); // Tilde expansion. if s.starts_with("~/") || s == "~" { if let Some(home) = home_dir_string() { if s == "~" { s = home; } else { s = format!("{}/{}", home.trim_end_matches('/'), &s[2..]); } } } // MinGW: /c/Users/... -> C:/Users/... if let Some(rest) = try_strip_mingw(&s) { s = rest; } // WSL: /mnt/c/Users/... -> C:/Users/... if let Some(rest) = try_strip_wsl(&s) { s = rest; } // UNC already converted by backslash replacement: //server/share is fine. // Collapse double slashes (preserve leading // for UNC). s = collapse_slashes(&s); // Resolve `.` and `..` segments textually. s = resolve_dots(&s); // Strip trailing slash (unless root). if s.len() > 1 && s.ends_with('/') && !s.ends_with(":/") { s.pop(); } platform_case_normalize(s) } fn home_dir_string() -> Option { dirs::home_dir().map(|p| p.to_string_lossy().replace('\\', "/")) } fn try_strip_mingw(s: &str) -> Option { let bytes = s.as_bytes(); // Pattern: /X/... where X is a single ASCII letter if bytes.len() >= 3 && bytes[0] == b'/' && bytes[1].is_ascii_alphabetic() && bytes[2] == b'/' { let drive = (bytes[1] as char).to_ascii_uppercase(); Some(format!("{}:/{}", drive, &s[3..])) } else { None } } fn try_strip_wsl(s: &str) -> Option { if let Some(rest) = s.strip_prefix("/mnt/") { let bytes = rest.as_bytes(); if !bytes.is_empty() && bytes[0].is_ascii_alphabetic() { let drive = (bytes[0] as char).to_ascii_uppercase(); let remainder = if bytes.len() > 1 && bytes[1] == b'/' { &rest[2..] } else if bytes.len() == 1 { "" } else { return None; // e.g. /mnt/cdrom — not a drive letter }; return Some(format!("{}:/{}", drive, remainder)); } } None } fn collapse_slashes(s: &str) -> String { let mut result = String::with_capacity(s.len()); let mut chars = s.chars().peekable(); // Preserve leading double slash for UNC. if s.starts_with("//") { result.push('/'); result.push('/'); chars.next(); chars.next(); // Skip any additional leading slashes beyond the two. while chars.peek() == Some(&'/') { chars.next(); } } let mut prev_slash = false; for c in chars { if c == '/' { if !prev_slash { result.push(c); } prev_slash = true; } else { result.push(c); prev_slash = false; } } result } fn resolve_dots(s: &str) -> String { // Split on '/', resolve `.` and `..` textually. let mut parts: Vec<&str> = Vec::new(); let prefix = if s.starts_with("//") { "//" } else if s.starts_with('/') { "/" } else { "" }; for segment in s.split('/') { match segment { "" | "." => {} ".." => { // Don't pop past the root. if !parts.is_empty() && *parts.last().unwrap() != ".." { parts.pop(); } } other => parts.push(other), } } let joined = parts.join("/"); if prefix.is_empty() { joined } else { format!("{}{}", prefix, joined) } } #[cfg(target_os = "windows")] fn platform_case_normalize(s: String) -> String { s.to_lowercase() } #[cfg(not(target_os = "windows"))] fn platform_case_normalize(s: String) -> String { s } // --------------------------------------------------------------------------- // Worktree detection // --------------------------------------------------------------------------- /// Check whether the given path is a git worktree (`.git` is a file, not a directory). /// /// If it is, parses the `gitdir:` pointer to determine the main repository path. pub fn detect_worktree(path: &str) -> Option { let dot_git = PathBuf::from(path).join(".git"); // Only interested if .git is a *file* (worktree pointer), not a directory. let meta = std::fs::symlink_metadata(&dot_git).ok()?; if !meta.is_file() { return None; } let content = std::fs::read_to_string(&dot_git).ok()?; let gitdir_line = content .lines() .find(|l| l.starts_with("gitdir:"))?; let gitdir_raw = gitdir_line["gitdir:".len()..].trim(); // The gitdir path typically looks like `/path/to/main-repo/.git/worktrees/`. // Walk up to find the main repo root. let gitdir_path = if PathBuf::from(gitdir_raw).is_absolute() { PathBuf::from(gitdir_raw) } else { PathBuf::from(path).join(gitdir_raw) }; // Try to resolve: .../main-repo/.git/worktrees/xxx -> .../main-repo let main_repo = gitdir_path .ancestors() .find(|ancestor| { // Check if this ancestor has `.git` as a child (actual git dir, not worktree file). let git_child = ancestor.join(".git"); git_child.is_dir() }) .map(|p| normalize_project_path(&p.to_string_lossy())); Some(WorktreeHint { worktree_path: normalize_project_path(path), main_repo_path: main_repo, }) } // --------------------------------------------------------------------------- // Multi-path grouping // --------------------------------------------------------------------------- /// Group paths that share a common immediate parent directory. /// /// Only produces hints for groups of 2+ paths. pub fn find_multi_path_groups(paths: &[String]) -> Vec { let mut by_parent: HashMap> = HashMap::new(); for path in paths { let normalized = normalize_project_path(path); // Find immediate parent by stripping last component. if let Some(parent) = PathBuf::from(&normalized).parent() { let parent_str = parent.to_string_lossy().replace('\\', "/"); by_parent .entry(parent_str) .or_default() .push(normalized); } } by_parent .into_iter() .filter(|(_, children)| children.len() >= 2) .map(|(parent, mut children)| { children.sort(); MultiPathHint { shared_parent: parent, paths: children, } }) .collect() } // --------------------------------------------------------------------------- // Detection logic // --------------------------------------------------------------------------- /// Match discovered import projects against existing projects. /// /// For each discovered path, attempts to find a match in the existing project /// store using (in priority order): /// 1. Exact normalized path match against any repository /// 2. Canonical (fs::canonicalize) path match /// 3. Name-based hint (project name == suggested name) /// /// Unmatched paths get `ProjectResolution::CreateNew`. pub fn detect_projects( discovered: &[DiscoveredImportProject], existing_projects: &[(Project, Vec)], ) -> ProjectDetectionResult { // Pre-build a lookup from normalized repo paths -> (project, repo). let mut path_index: HashMap = HashMap::new(); let mut canonical_index: HashMap = HashMap::new(); let mut name_index: HashMap = HashMap::new(); for (project, repos) in existing_projects { name_index.insert(project.name.to_lowercase(), project); for repo in repos { let repo_path_str = repo.path.to_string_lossy().to_string(); let normalized = normalize_project_path(&repo_path_str); path_index.insert(normalized.clone(), (project, repo)); // Also try canonical path of the repo. if let Ok(canonical) = std::fs::canonicalize(&repo.path) { let canon_norm = normalize_project_path(&canonical.to_string_lossy()); canonical_index.insert(canon_norm, (project, repo)); } } } let mut detections = Vec::with_capacity(discovered.len()); let discovered_paths: Vec = discovered.iter().map(|d| d.path.clone()).collect(); let worktree_hints: Vec = discovered_paths .iter() .filter_map(|p| detect_worktree(p)) .collect(); for disc in discovered { let normalized = normalize_project_path(&disc.path); // 1. Exact normalized path match. if let Some((project, repo)) = path_index.get(&normalized) { detections.push(DetectedProject { discovered_path: disc.path.clone(), suggested_name: disc.name.clone(), session_count: disc.session_count, resolution: ProjectResolution::Linked { project_id: project.id, project_name: project.name.clone(), matched_repository_id: repo.id, }, }); continue; } // 2. Canonical path match. let canon_norm = std::fs::canonicalize(&disc.path) .map(|c| normalize_project_path(&c.to_string_lossy())) .unwrap_or_default(); if !canon_norm.is_empty() { if let Some((project, repo)) = canonical_index.get(&canon_norm) { detections.push(DetectedProject { discovered_path: disc.path.clone(), suggested_name: disc.name.clone(), session_count: disc.session_count, resolution: ProjectResolution::Linked { project_id: project.id, project_name: project.name.clone(), matched_repository_id: repo.id, }, }); continue; } } // 3. Name hint match. let suggested_lower = derive_suggested_name(&disc.path).to_lowercase(); if let Some(project) = name_index.get(&suggested_lower) { // Find the primary repo or any repo to satisfy the linked variant. let existing_repos = existing_projects .iter() .find(|(p, _)| p.id == project.id) .map(|(_, repos)| repos); if let Some(repos) = existing_repos { if let Some(repo) = repos.iter().find(|r| r.is_primary).or(repos.first()) { detections.push(DetectedProject { discovered_path: disc.path.clone(), suggested_name: disc.name.clone(), session_count: disc.session_count, resolution: ProjectResolution::Linked { project_id: project.id, project_name: project.name.clone(), matched_repository_id: repo.id, }, }); continue; } } } // 4. No match — suggest creating. let name = derive_suggested_name(&disc.path); detections.push(DetectedProject { discovered_path: disc.path.clone(), suggested_name: disc.name.clone(), session_count: disc.session_count, resolution: ProjectResolution::CreateNew { name }, }); } let multi_path_hints = find_multi_path_groups(&discovered_paths); ProjectDetectionResult { detections, worktree_hints, multi_path_hints, } } /// Derive a suggested project name from a path (last non-empty component). fn derive_suggested_name(path: &str) -> String { let normalized = path.replace('\\', "/"); let trimmed = normalized.trim_end_matches('/'); trimmed .rsplit('/') .next() .unwrap_or(trimmed) .to_string() } // --------------------------------------------------------------------------- // Project creation from import // --------------------------------------------------------------------------- /// Create projects from a batch of import creation requests. /// /// For each request: creates the project, adds the primary repository, and /// adds any additional repositories. Returns one result per request. pub async fn create_projects_from_import( store: &dyn ProjectStore, requests: Vec, owner: Uuid, ) -> Vec> { let mut results = Vec::with_capacity(requests.len()); for req in requests { results.push(create_single_project(store, req, owner).await); } results } async fn create_single_project( store: &dyn ProjectStore, req: ImportProjectCreationRequest, owner: Uuid, ) -> Result { let project = store .create_project(CreateProjectParams { name: req.name.clone(), description: String::new(), icon: req.icon, owner, tags: req.tags, languages: req.languages, metadata: serde_json::Value::Object(serde_json::Map::new()), }) .await?; let mut repos_created: usize = 0; // Primary repository. store .add_repository(AddRepositoryParams { project_id: project.id, path: PathBuf::from(&req.primary_path), is_primary: true, label: None, }) .await?; repos_created += 1; // Additional repositories. for additional in &req.additional_paths { match store .add_repository(AddRepositoryParams { project_id: project.id, path: PathBuf::from(additional), is_primary: false, label: None, }) .await { Ok(_) => repos_created += 1, Err(e) => { tracing::warn!( project_id = %project.id, path = %additional, error = %e, "Failed to add additional repository during import" ); } } } Ok(ImportProjectCreationResult { project_id: project.id, project_name: project.name, repositories_created: repos_created, }) } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; #[test] fn normalize_backslashes() { let result = normalize_project_path("C:\\Users\\alice\\project"); assert!(result.contains('/')); assert!(!result.contains('\\')); } #[test] fn normalize_mingw_path() { let result = normalize_project_path("/c/Users/alice/project"); assert!( result.starts_with("C:/") || result.starts_with("c:/"), "Expected drive letter prefix, got: {}", result ); } #[test] fn normalize_wsl_path() { let result = normalize_project_path("/mnt/c/Users/alice/project"); assert!( result.starts_with("C:/") || result.starts_with("c:/"), "Expected drive letter prefix, got: {}", result ); } #[test] fn normalize_strips_trailing_slash() { let result = normalize_project_path("/home/alice/project/"); assert!(!result.ends_with('/')); } #[test] fn normalize_resolves_dots() { // Textual fallback since this path won't exist on disk. let result = normalize_project_path("/home/alice/./project/../project/src"); assert!(result.contains("/home/alice/project/src") || result.ends_with("project/src")); } #[test] fn normalize_collapses_double_slashes() { let result = normalize_project_path("/home//alice///project"); assert!(!result.contains("//") || result.starts_with("//")); } #[test] fn derive_suggested_name_basic() { assert_eq!(derive_suggested_name("/home/alice/my-project"), "my-project"); assert_eq!(derive_suggested_name("C:\\Users\\bob\\work"), "work"); assert_eq!(derive_suggested_name("/home/alice/my-project/"), "my-project"); } #[test] fn multi_path_groups_basic() { let paths = vec![ "/home/alice/projects/foo".to_string(), "/home/alice/projects/bar".to_string(), "/home/alice/work/baz".to_string(), ]; let groups = find_multi_path_groups(&paths); // foo and bar share /home/alice/projects, baz is alone under /home/alice/work let multi = groups .iter() .find(|g| g.paths.len() == 2); assert!(multi.is_some(), "Expected a group with 2 paths"); } #[test] fn detect_projects_creates_new_for_unmatched() { let discovered = vec![DiscoveredImportProject { name: "my-project".to_string(), path: "/nonexistent/path/my-project".to_string(), session_count: 5, }]; let existing: Vec<(Project, Vec)> = vec![]; let result = detect_projects(&discovered, &existing); assert_eq!(result.detections.len(), 1); match &result.detections[0].resolution { ProjectResolution::CreateNew { name } => { assert_eq!(name, "my-project"); } other => panic!("Expected CreateNew, got {:?}", other), } } #[test] fn detect_projects_links_by_name() { use chrono::Utc; let project_id = Uuid::now_v7(); let repo_id = Uuid::now_v7(); let now = Utc::now(); let project = Project { id: project_id, name: "dirigent".to_string(), description: String::new(), icon: None, owner: Uuid::nil(), members: vec![], tags: vec![], languages: vec![], linked_projects: vec![], metadata: serde_json::json!({}), created_at: now, updated_at: now, }; let repo = ProjectRepository { id: repo_id, project_id, path: PathBuf::from("/other/path/dirigent"), is_primary: true, label: None, access: dirigent_protocol::project::AccessMode::ReadWrite, created_at: now, updated_at: now, }; let discovered = vec![DiscoveredImportProject { name: "dirigent".to_string(), path: "/somewhere/else/dirigent".to_string(), session_count: 3, }]; let result = detect_projects(&discovered, &[(project, vec![repo])]); assert_eq!(result.detections.len(), 1); match &result.detections[0].resolution { ProjectResolution::Linked { project_id: pid, matched_repository_id: rid, .. } => { assert_eq!(*pid, project_id); assert_eq!(*rid, repo_id); } other => panic!("Expected Linked, got {:?}", other), } } }