Files
2026-05-08 01:59:04 +02:00

426 lines
13 KiB
Rust

//! Integration tests for file embedding functionality.
//!
//! Tests the complete embedding pipeline from file paths to ContentBlocks.
use dirigent_core::acp::content_blocks::build_content_blocks_from_files;
use dirigent_core::acp::protocol::prompt::{ContentBlock, EmbeddedResource};
use dirigent_core::acp::{AgentCapabilities, PromptCapabilities};
use dirigent_tools::config::{EmbeddingConfig, SandboxConfig};
use tempfile::TempDir;
/// Helper to create test agent capabilities.
fn create_test_agent_caps(embedded_context: bool) -> AgentCapabilities {
AgentCapabilities {
load_session: None,
prompt_capabilities: Some(PromptCapabilities {
image: None,
audio: None,
embedded_context: Some(embedded_context),
}),
mcp: None,
_meta: None,
}
}
/// Helper to create test configuration.
fn create_test_config(temp_dir: &TempDir) -> (EmbeddingConfig, SandboxConfig) {
let embedding_config = EmbeddingConfig {
max_embed_bytes: 1000,
allow_resource_link: true,
redact_patterns: vec![],
snippet_strategy: dirigent_tools::config::SnippetStrategy::HeadTail,
max_files_per_prompt: 10,
};
let mut sandbox_config = SandboxConfig::default();
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
sandbox_config.normalize_roots();
(embedding_config, sandbox_config)
}
#[test]
fn test_scenario_1_small_text_capability_on_embed() {
// Scenario 1: Small text file + capability on → embed
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("small.txt");
std::fs::write(&file_path, "Hello, world!").unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 1, "Should have one content block");
match &blocks[0] {
ContentBlock::Resource { resource, .. } => match resource {
EmbeddedResource::Text { text, .. } => {
assert_eq!(text, "Hello, world!");
}
_ => panic!("Expected text resource"),
},
_ => panic!("Expected resource block"),
}
}
#[test]
fn test_scenario_2_large_text_capability_on_link() {
// Scenario 2: Large text file + capability on → link or snippet
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("large.txt");
let large_content = "x".repeat(2000); // Exceeds 1000 byte limit
std::fs::write(&file_path, &large_content).unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 1, "Should have one content block");
match &blocks[0] {
ContentBlock::ResourceLink { size, .. } => {
assert_eq!(*size, Some(2000));
}
_ => panic!("Expected resource link block"),
}
}
#[test]
fn test_scenario_3_binary_file_link() {
// Scenario 3: Binary file → link
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("image.png");
std::fs::write(&file_path, b"\x89PNG\r\n\x1a\n").unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 1, "Should have one content block");
match &blocks[0] {
ContentBlock::ResourceLink { mime_type, .. } => {
assert_eq!(mime_type, &Some("image/png".to_string()));
}
_ => panic!("Expected resource link block"),
}
}
#[test]
fn test_scenario_4_capability_off_all_links() {
// Scenario 4: Capability off → all links
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.txt");
std::fs::write(&file_path, "Test content").unwrap();
let agent_caps = create_test_agent_caps(false); // Capability OFF
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 1, "Should have one content block");
match &blocks[0] {
ContentBlock::ResourceLink { .. } => {
// Correct - should be link when capability is off
}
_ => panic!("Expected resource link block when capability is off"),
}
}
#[test]
fn test_scenario_5_exceed_total_cap_link_remaining() {
// Scenario 5: Exceed total cap → deny or link remaining
let temp_dir = TempDir::new().unwrap();
// Create multiple small files that together exceed the total cap
let file1 = temp_dir.path().join("file1.txt");
let file2 = temp_dir.path().join("file2.txt");
let file3 = temp_dir.path().join("file3.txt");
let content = "x".repeat(800); // Each file is 800 bytes
std::fs::write(&file1, &content).unwrap();
std::fs::write(&file2, &content).unwrap();
std::fs::write(&file3, &content).unwrap();
// Total: 2400 bytes, but max_embed_bytes * max_files_per_prompt = 1000 * 10 = 10000
// So they should all embed in this test
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[file1, file2, file3],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
// All should be embedded or linked (not denied)
assert!(blocks.len() >= 2, "Should have at least 2 content blocks");
}
#[test]
fn test_scenario_6_exceed_file_count_deny() {
// Scenario 6: Exceed file count → deny
let temp_dir = TempDir::new().unwrap();
let mut embedding_config = EmbeddingConfig::default();
embedding_config.max_files_per_prompt = 2; // Limit to 2 files
let mut sandbox_config = SandboxConfig::default();
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
sandbox_config.normalize_roots();
let agent_caps = create_test_agent_caps(true);
// Create 3 files
let file1 = temp_dir.path().join("file1.txt");
let file2 = temp_dir.path().join("file2.txt");
let file3 = temp_dir.path().join("file3.txt");
std::fs::write(&file1, "File 1").unwrap();
std::fs::write(&file2, "File 2").unwrap();
std::fs::write(&file3, "File 3").unwrap();
let blocks = build_content_blocks_from_files(
&[file1, file2, file3],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
// Should have at most 2 blocks (third file denied)
assert!(blocks.len() <= 2, "Should have at most 2 content blocks");
}
#[test]
fn test_scenario_7_redaction_patterns_applied() {
// Scenario 7: Redaction patterns applied → verify content redacted
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("secrets.txt");
std::fs::write(&file_path, "api_key: sk-1234567890abcdef").unwrap();
let agent_caps = create_test_agent_caps(true);
let mut embedding_config = EmbeddingConfig::default();
embedding_config.redact_patterns = vec![
r"(?i)(api[_-]?key):\s*([a-zA-Z0-9_\-\.]+)".to_string(),
];
let mut sandbox_config = SandboxConfig::default();
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
sandbox_config.normalize_roots();
let blocks = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 1, "Should have one content block");
match &blocks[0] {
ContentBlock::Resource { resource, .. } => match resource {
EmbeddedResource::Text { text, .. } => {
// Verify that the API key is redacted
assert!(
!text.contains("sk-1234567890abcdef"),
"Secret should be redacted"
);
assert!(text.contains("REDACTED"), "Should contain redaction marker");
}
_ => panic!("Expected text resource"),
},
_ => panic!("Expected resource block"),
}
}
#[test]
fn test_scenario_8_sandbox_violation_deny() {
// Scenario 8: Sandbox violation → deny with clear error
let temp_dir = TempDir::new().unwrap();
let outside_dir = TempDir::new().unwrap();
let file_path = outside_dir.path().join("outside.txt");
std::fs::write(&file_path, "Outside sandbox").unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let result = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
);
// Should fail with sandbox violation
assert!(result.is_err(), "Should fail with sandbox violation");
let err = result.unwrap_err();
assert!(
format!("{:?}", err).contains("SandboxViolation"),
"Error should be SandboxViolation"
);
}
#[test]
fn test_scenario_9_mixed_strategies() {
// Scenario 9: Mixed strategies in one prompt → correct blocks
let temp_dir = TempDir::new().unwrap();
// Small text file (will be embedded)
let small_file = temp_dir.path().join("small.txt");
std::fs::write(&small_file, "Small content").unwrap();
// Large text file (will be linked)
let large_file = temp_dir.path().join("large.txt");
let large_content = "x".repeat(2000);
std::fs::write(&large_file, &large_content).unwrap();
// Binary file (will be linked)
let binary_file = temp_dir.path().join("image.png");
std::fs::write(&binary_file, b"\x89PNG\r\n\x1a\n").unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks = build_content_blocks_from_files(
&[small_file, large_file, binary_file],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
assert_eq!(blocks.len(), 3, "Should have three content blocks");
// First should be embedded
match &blocks[0] {
ContentBlock::Resource { .. } => {
// Correct - small file is embedded
}
_ => panic!("Expected first block to be Resource (embedded)"),
}
// Second and third should be links
match &blocks[1] {
ContentBlock::ResourceLink { .. } => {
// Correct - large file is linked
}
_ => panic!("Expected second block to be ResourceLink"),
}
match &blocks[2] {
ContentBlock::ResourceLink { .. } => {
// Correct - binary file is linked
}
_ => panic!("Expected third block to be ResourceLink"),
}
}
#[test]
fn test_uri_stability() {
// Test that URIs are stable across multiple invocations
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("stable.txt");
std::fs::write(&file_path, "Stable content").unwrap();
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let blocks1 = build_content_blocks_from_files(
&[file_path.clone()],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
let blocks2 = build_content_blocks_from_files(
&[file_path],
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
// Extract URIs and compare
match (&blocks1[0], &blocks2[0]) {
(
ContentBlock::Resource {
resource: EmbeddedResource::Text { uri: uri1, .. },
..
},
ContentBlock::Resource {
resource: EmbeddedResource::Text { uri: uri2, .. },
..
},
) => {
assert_eq!(uri1, uri2, "URIs should be stable");
}
_ => panic!("Expected resource blocks with text"),
}
}
#[test]
fn test_performance_many_files() {
// Test that building content blocks for many files is reasonably fast
let temp_dir = TempDir::new().unwrap();
// Create 10 files
let mut files = Vec::new();
for i in 0..10 {
let file_path = temp_dir.path().join(format!("file{}.txt", i));
std::fs::write(&file_path, format!("Content {}", i)).unwrap();
files.push(file_path);
}
let agent_caps = create_test_agent_caps(true);
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
let start = std::time::Instant::now();
let blocks = build_content_blocks_from_files(
&files,
&agent_caps,
&embedding_config,
&sandbox_config,
)
.unwrap();
let elapsed = start.elapsed();
assert_eq!(blocks.len(), 10, "Should have 10 content blocks");
assert!(
elapsed.as_millis() < 500,
"Should complete in less than 500ms, took {}ms",
elapsed.as_millis()
);
}