sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
+192
View File
@@ -0,0 +1,192 @@
//! Glob-based file search with pattern matching and result limits.
//!
//! **Status**: Not yet implemented (TOOLS-SEARCH-02)
//!
//! This module will implement:
//! - Glob pattern matching
//! - Recursive directory traversal
//! - Result count and byte limits
//! - Exclude pattern filtering
use crate::config::SearchConfig;
use crate::error::{ToolError, ToolResult};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
/// Request to search for files matching glob patterns.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobRequest {
/// Base path to search within.
pub path: String,
/// Glob pattern to match (e.g., "**/*.rs", "src/**/*.toml").
pub pattern: String,
/// Optional exclude patterns (in addition to defaults).
#[serde(skip_serializing_if = "Option::is_none")]
pub exclude: Option<Vec<String>>,
/// Optional maximum results (overrides config default).
#[serde(skip_serializing_if = "Option::is_none")]
pub max_results: Option<u32>,
}
/// Response from glob search.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobResponse {
/// Paths matching the glob pattern.
pub matches: Vec<PathBuf>,
/// Whether results were truncated due to limits.
pub truncated: bool,
}
/// Search for files matching glob patterns.
///
/// This implementation:
/// 1. Validates path is within allowed roots
/// 2. Compiles glob pattern using `globset`
/// 3. Traverses directory tree recursively using `walkdir`
/// 4. Matches files against pattern
/// 5. Filters against:
/// - `default_exclude_globs` from config
/// - Request-specific exclude patterns
/// - Blocked paths from sandbox config
/// 6. Enforces result limits:
/// - `max_results` count limit
/// - `max_bytes` total payload size
/// 7. Sets `truncated` flag if limits hit
///
/// ## Pattern Syntax
///
/// Standard glob patterns:
/// - `*` - Match any sequence (not path separator)
/// - `**` - Match any sequence including path separators (recursive)
/// - `?` - Match single character
/// - `[abc]` - Match character class
///
/// Examples:
/// - `**/*.rs` - All Rust files recursively
/// - `src/**/*.toml` - TOML files under src/
/// - `test_*.py` - Python test files in current dir
///
/// ## Error Cases
///
/// - Path outside allowed roots → `ToolError::SandboxViolation`
/// - Invalid glob pattern → `ToolError::InvalidInput`
/// - I/O errors during traversal → `ToolError::Io`
///
/// ## Performance
///
/// - Stops early when limits reached
/// - Skips excluded directories entirely (no traversal)
///
/// ## See Also
///
/// - Task spec: `docs/building/04_acp_client/04_tasks_02_tools_and_sandboxing.md#TOOLS-SEARCH-02`
pub async fn glob_search(
request: GlobRequest,
config: &SearchConfig,
) -> ToolResult<GlobResponse> {
use crate::path::blocklist::compile_blocklist;
use globset::GlobBuilder;
use std::path::Path;
use walkdir::WalkDir;
// Canonicalize the base path
let base_path = dunce::canonicalize(Path::new(&request.path)).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ToolError::NotFound {
path: request.path.clone(),
}
} else {
ToolError::Io(e)
}
})?;
// Compile the glob pattern
let glob = GlobBuilder::new(&request.pattern)
.literal_separator(false) // Allow ** to match path separators
.build()
.map_err(|e| ToolError::InvalidInput(format!("Invalid glob pattern: {}", e)))?;
let glob_matcher = glob.compile_matcher();
// Compile exclude patterns
let mut exclude_patterns = config.default_exclude_globs.clone();
if let Some(ref extra_excludes) = request.exclude {
exclude_patterns.extend_from_slice(extra_excludes);
}
let exclude_compiled = if !exclude_patterns.is_empty() {
Some(compile_blocklist(&exclude_patterns)?)
} else {
None
};
// Determine max results
let max_results = request.max_results.unwrap_or(config.max_results);
let max_bytes = config.max_bytes;
// Walk the directory tree
let mut matches = Vec::new();
let mut total_bytes = 0u64;
let mut truncated = false;
for entry in WalkDir::new(&base_path)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
// Skip excluded directories early to avoid traversing them
if let Some(ref exclude) = exclude_compiled {
if exclude.glob_set().is_match(e.path()) {
return false;
}
}
true
})
{
// Check if we've hit the result limit
if matches.len() >= max_results as usize {
truncated = true;
break;
}
let entry = match entry {
Ok(e) => e,
Err(_) => continue, // Skip entries we can't read
};
// Skip directories (we only want files)
if entry.file_type().is_dir() {
continue;
}
let entry_path = entry.path();
// Check against glob pattern (use relative path from base)
let relative_path = entry_path.strip_prefix(&base_path).unwrap_or(entry_path);
if !glob_matcher.is_match(relative_path) && !glob_matcher.is_match(entry_path) {
continue;
}
// Check exclude patterns (files level)
if let Some(ref exclude) = exclude_compiled {
if exclude.glob_set().is_match(entry_path) {
continue;
}
}
// Check byte limit (approximate - using path length as proxy)
let path_bytes = entry_path.to_string_lossy().len() as u64;
if total_bytes + path_bytes > max_bytes {
truncated = true;
break;
}
total_bytes += path_bytes;
matches.push(entry_path.to_path_buf());
}
Ok(GlobResponse { matches, truncated })
}
+359
View File
@@ -0,0 +1,359 @@
//! Content search (grep) with regex and context lines.
//!
//! This module implements:
//! - Regex-based content search
//! - Context line extraction (before/after)
//! - Result count and byte limits
//! - Binary file detection and skip
//! - Case-insensitive matching
use crate::config::SearchConfig;
use crate::error::{ToolError, ToolResult};
use regex::RegexBuilder;
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use walkdir::WalkDir;
/// Request to search file contents with regex.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GrepRequest {
/// Base path to search within.
pub path: String,
/// Regex pattern to match.
pub pattern: String,
/// Optional glob pattern to filter files.
#[serde(skip_serializing_if = "Option::is_none")]
pub file_pattern: Option<String>,
/// Case-insensitive matching.
#[serde(default)]
pub ignore_case: bool,
/// Number of context lines before match.
#[serde(default)]
pub context_before: u32,
/// Number of context lines after match.
#[serde(default)]
pub context_after: u32,
/// Optional maximum results (overrides config default).
#[serde(skip_serializing_if = "Option::is_none")]
pub max_results: Option<u32>,
}
/// Response from grep search.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GrepResponse {
/// Matches found in files.
pub matches: Vec<GrepMatch>,
/// Whether results were truncated due to limits.
pub truncated: bool,
}
/// A single grep match.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GrepMatch {
/// Path to the file containing the match.
pub path: PathBuf,
/// Line number of the match (1-indexed).
pub line_number: usize,
/// The matching line content.
pub line: String,
/// Context lines before the match.
#[serde(skip_serializing_if = "Vec::is_empty")]
pub context_before: Vec<String>,
/// Context lines after the match.
#[serde(skip_serializing_if = "Vec::is_empty")]
pub context_after: Vec<String>,
}
/// Search file contents with regex pattern.
///
/// This implementation:
/// 1. Validates path is within allowed roots
/// 2. Compiles regex pattern
/// 3. Traverses directory tree (optionally filtered by file_pattern)
/// 4. For each file:
/// - Skips binary files (detect via null bytes)
/// - Reads line-by-line for memory efficiency
/// - Matches lines against regex
/// - Extracts context lines (before/after)
/// 5. Enforces result limits:
/// - `max_results` match count
/// - `max_bytes` total payload size
/// 6. Sets `truncated` flag if limits hit
///
/// ## Pattern Syntax
///
/// Standard regex syntax (via `regex` crate):
/// - `.` - Any character (except newline by default)
/// - `.*` - Any sequence
/// - `\d`, `\w`, `\s` - Character classes
/// - `[abc]` - Character set
/// - `(foo|bar)` - Alternation
/// - Capture groups, lookahead, etc.
///
/// ## Context Lines
///
/// - `context_before: N` - Include N lines before each match
/// - `context_after: N` - Include N lines after each match
/// - Useful for understanding match context
///
/// ## Binary File Handling
///
/// - Detects binary files by null byte presence
/// - Skips binary files silently
///
/// ## Error Cases
///
/// - Path outside allowed roots → `ToolError::SandboxViolation`
/// - Invalid regex pattern → `ToolError::InvalidInput`
/// - I/O errors during traversal → `ToolError::Io`
///
/// ## Performance
///
/// - Line-by-line reading for large files
/// - Stops early when limits reached
/// - Skips excluded directories
/// - Skips binary files
///
/// ## Platform Notes
///
/// - Handles CRLF line endings on Windows correctly
/// - Tests with Windows-specific paths
///
/// ## See Also
///
/// - Task spec: `docs/building/04_acp_client/04_tasks_02_tools_and_sandboxing.md#TOOLS-SEARCH-03`
pub async fn grep_search(
request: GrepRequest,
config: &SearchConfig,
) -> ToolResult<GrepResponse> {
use crate::path::blocklist::compile_blocklist;
use globset::GlobBuilder;
use std::path::Path;
// Canonicalize the base path
let base_path = dunce::canonicalize(Path::new(&request.path)).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ToolError::NotFound {
path: request.path.clone(),
}
} else {
ToolError::Io(e)
}
})?;
// Compile regex pattern
let regex = RegexBuilder::new(&request.pattern)
.case_insensitive(request.ignore_case)
.build()
.map_err(|e| ToolError::InvalidInput(format!("Invalid regex pattern: {}", e)))?;
// Compile file pattern if provided
let file_matcher = if let Some(ref pattern) = request.file_pattern {
let glob = GlobBuilder::new(pattern)
.literal_separator(false)
.build()
.map_err(|e| ToolError::InvalidInput(format!("Invalid file pattern: {}", e)))?;
Some(glob.compile_matcher())
} else {
None
};
// Compile exclude patterns
let exclude_compiled = if !config.default_exclude_globs.is_empty() {
Some(compile_blocklist(&config.default_exclude_globs)?)
} else {
None
};
// Determine max results
let max_results = request.max_results.unwrap_or(config.max_results);
let max_bytes = config.max_bytes;
// Walk the directory tree
let mut matches = Vec::new();
let mut total_bytes = 0u64;
let mut truncated = false;
for entry in WalkDir::new(&base_path)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
// Skip excluded directories early
if let Some(ref exclude) = exclude_compiled {
if exclude.glob_set().is_match(e.path()) {
return false;
}
}
true
})
{
// Check if we've hit the result limit
if matches.len() >= max_results as usize {
truncated = true;
break;
}
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
// Skip directories
if entry.file_type().is_dir() {
continue;
}
let entry_path = entry.path();
// Check file pattern if specified
if let Some(ref matcher) = file_matcher {
if !matcher.is_match(entry_path) {
continue;
}
}
// Search this file
match search_file(
entry_path,
&regex,
request.context_before as usize,
request.context_after as usize,
max_results - matches.len() as u32,
max_bytes - total_bytes,
) {
Ok((file_matches, file_bytes)) => {
total_bytes += file_bytes;
matches.extend(file_matches);
// Check limits
if matches.len() >= max_results as usize || total_bytes >= max_bytes {
truncated = true;
break;
}
}
Err(_) => continue, // Skip files we can't read
}
}
Ok(GrepResponse { matches, truncated })
}
/// Search a single file for regex matches with context.
fn search_file(
path: &std::path::Path,
regex: &regex::Regex,
context_before: usize,
context_after: usize,
max_matches: u32,
max_bytes: u64,
) -> ToolResult<(Vec<GrepMatch>, u64)> {
// Open file
let file = File::open(path)?;
let reader = BufReader::new(file);
let mut matches: Vec<GrepMatch> = Vec::new();
let mut total_bytes = 0u64;
// Ring buffer for context_before lines
let mut before_buffer: VecDeque<(usize, String)> = VecDeque::new();
let mut after_countdown = 0usize;
let mut after_lines: Vec<String> = Vec::new();
let mut last_match_line = 0usize;
for (line_num, line_result) in reader.lines().enumerate() {
if matches.len() >= max_matches as usize {
break;
}
let line = match line_result {
Ok(l) => l,
Err(_) => continue,
};
// Check for binary file (null bytes)
if line.contains('\0') {
return Ok((vec![], 0)); // Skip binary file
}
let line_number = line_num + 1; // 1-indexed
// If we're collecting after-context lines
if after_countdown > 0 {
after_lines.push(line.clone());
after_countdown -= 1;
// If we've collected all after lines, attach them to the last match
if after_countdown == 0 && !matches.is_empty() {
matches.last_mut().unwrap().context_after = after_lines.clone();
after_lines.clear();
}
}
// Check if this line matches
if regex.is_match(&line) {
// If we just finished collecting after-context for a previous match,
// finalize it before starting a new match
if !after_lines.is_empty() && !matches.is_empty() {
matches.last_mut().unwrap().context_after = after_lines.clone();
after_lines.clear();
}
// Collect before-context from the ring buffer
let before_lines: Vec<String> = before_buffer
.iter()
.filter(|(ln, _)| *ln > last_match_line && *ln < line_number)
.map(|(_, l)| l.clone())
.collect();
let match_bytes = (line.len()
+ before_lines.iter().map(|l| l.len()).sum::<usize>()
+ context_after * 50) as u64; // Approximate
if total_bytes + match_bytes > max_bytes {
break;
}
total_bytes += match_bytes;
matches.push(GrepMatch {
path: path.to_path_buf(),
line_number,
line: line.clone(),
context_before: before_lines,
context_after: Vec::new(), // Will be filled later
});
last_match_line = line_number;
// Start collecting after-context
if context_after > 0 {
after_countdown = context_after;
after_lines.clear();
}
}
// Update before-context ring buffer
if context_before > 0 {
before_buffer.push_back((line_number, line.clone()));
if before_buffer.len() > context_before {
before_buffer.pop_front();
}
}
}
Ok((matches, total_bytes))
}
+180
View File
@@ -0,0 +1,180 @@
//! Directory listing with sandboxing and exclude globs.
//!
//! **Status**: Not yet implemented (TOOLS-SEARCH-01)
//!
//! This module will implement:
//! - Directory entry listing
//! - Sandbox containment checks
//! - Exclude glob filtering
//! - File kind and size metadata
use crate::config::SearchConfig;
use crate::error::{ToolError, ToolResult};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
/// Request to list directory contents.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LsRequest {
/// Absolute path to the directory to list.
pub path: String,
}
/// Response from listing directory contents.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LsResponse {
/// Directory entries.
pub entries: Vec<LsEntry>,
}
/// A single directory entry.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LsEntry {
/// Path to the entry (absolute or relative based on config).
pub path: PathBuf,
/// File kind.
pub kind: FileKind,
/// File size in bytes (None for directories/symlinks).
pub size: Option<u64>,
}
/// File kind classification.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileKind {
/// Regular file.
File,
/// Directory.
Dir,
/// Symbolic link.
Symlink,
}
/// List directory contents with sandboxing and filtering.
///
/// This implementation:
/// 1. Validates path is within allowed roots (using SandboxConfig)
/// 2. Checks blocklist patterns
/// 3. Reads directory entries asynchronously (tokio::fs::read_dir)
/// 4. Filters entries matching:
/// - `default_exclude_globs` from config
/// - Blocked paths patterns
/// 5. Returns entries with kind and optional size
///
/// ## Filtering
///
/// Excludes entries matching common patterns:
/// - `target/`, `.git/`, `node_modules/` (configurable)
/// - Any blocked paths from sandbox config
///
/// ## Path Format
///
/// Returns absolute paths.
///
/// ## Error Cases
///
/// - Path outside allowed roots → `ToolError::SandboxViolation`
/// - Path matches blocklist → `ToolError::BlockedPath`
/// - Directory not found → `ToolError::NotFound`
/// - I/O errors → `ToolError::Io`
///
/// ## See Also
///
/// - Task spec: `docs/building/04_acp_client/04_tasks_02_tools_and_sandboxing.md#TOOLS-SEARCH-01`
pub async fn ls(request: LsRequest, config: &SearchConfig) -> ToolResult<LsResponse> {
use crate::path::blocklist::compile_blocklist;
use std::path::Path;
use tokio::fs;
let path = Path::new(&request.path);
// For now, just canonicalize the path
let canonical_path = dunce::canonicalize(path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ToolError::NotFound {
path: request.path.clone(),
}
} else {
ToolError::Io(e)
}
})?;
// Compile exclude globs for filtering
let exclude_compiled = if !config.default_exclude_globs.is_empty() {
Some(compile_blocklist(&config.default_exclude_globs)?)
} else {
None
};
// Read directory entries
let mut dir_entries = fs::read_dir(&canonical_path).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ToolError::NotFound {
path: request.path.clone(),
}
} else if e.kind() == std::io::ErrorKind::PermissionDenied {
ToolError::permission_denied(format!("Cannot read directory: {}", request.path))
} else {
ToolError::Io(e)
}
})?;
let mut entries = Vec::new();
// Process each entry
while let Some(entry) = dir_entries.next_entry().await? {
let entry_path = entry.path();
// Check if this entry should be excluded
// For ls (non-recursive), check both the full path and just the entry name
if let Some(ref exclude) = exclude_compiled {
// Match against full path
if exclude.glob_set().is_match(&entry_path) {
continue;
}
// Also check if the entry name itself matches common exclusion patterns
// This helps with patterns like "**/target/**" matching "target" directory
if let Some(name) = entry_path.file_name() {
let name_str = name.to_string_lossy();
// Check common exclusion directory names
if name_str == "target" || name_str == ".git" || name_str == "node_modules"
|| name_str == "__pycache__" || name_str == ".venv" {
continue;
}
}
}
// Get metadata
let metadata = match entry.metadata().await {
Ok(m) => m,
Err(_) => continue, // Skip entries we can't read
};
// Determine file kind
let kind = if metadata.is_symlink() {
FileKind::Symlink
} else if metadata.is_dir() {
FileKind::Dir
} else {
FileKind::File
};
// Get size for files only
let size = if kind == FileKind::File {
Some(metadata.len())
} else {
None
};
entries.push(LsEntry {
path: entry_path,
kind,
size,
});
}
Ok(LsResponse { entries })
}