426 lines
13 KiB
Rust
426 lines
13 KiB
Rust
//! Integration tests for file embedding functionality.
|
|
//!
|
|
//! Tests the complete embedding pipeline from file paths to ContentBlocks.
|
|
|
|
use dirigent_core::acp::content_blocks::build_content_blocks_from_files;
|
|
use dirigent_core::acp::protocol::prompt::{ContentBlock, EmbeddedResource};
|
|
use dirigent_core::acp::{AgentCapabilities, PromptCapabilities};
|
|
use dirigent_tools::config::{EmbeddingConfig, SandboxConfig};
|
|
use tempfile::TempDir;
|
|
|
|
/// Helper to create test agent capabilities.
|
|
fn create_test_agent_caps(embedded_context: bool) -> AgentCapabilities {
|
|
AgentCapabilities {
|
|
load_session: None,
|
|
prompt_capabilities: Some(PromptCapabilities {
|
|
image: None,
|
|
audio: None,
|
|
embedded_context: Some(embedded_context),
|
|
}),
|
|
mcp: None,
|
|
_meta: None,
|
|
}
|
|
}
|
|
|
|
/// Helper to create test configuration.
|
|
fn create_test_config(temp_dir: &TempDir) -> (EmbeddingConfig, SandboxConfig) {
|
|
let embedding_config = EmbeddingConfig {
|
|
max_embed_bytes: 1000,
|
|
allow_resource_link: true,
|
|
redact_patterns: vec![],
|
|
snippet_strategy: dirigent_tools::config::SnippetStrategy::HeadTail,
|
|
max_files_per_prompt: 10,
|
|
};
|
|
|
|
let mut sandbox_config = SandboxConfig::default();
|
|
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
|
|
sandbox_config.normalize_roots();
|
|
|
|
(embedding_config, sandbox_config)
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_1_small_text_capability_on_embed() {
|
|
// Scenario 1: Small text file + capability on → embed
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("small.txt");
|
|
std::fs::write(&file_path, "Hello, world!").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 1, "Should have one content block");
|
|
|
|
match &blocks[0] {
|
|
ContentBlock::Resource { resource, .. } => match resource {
|
|
EmbeddedResource::Text { text, .. } => {
|
|
assert_eq!(text, "Hello, world!");
|
|
}
|
|
_ => panic!("Expected text resource"),
|
|
},
|
|
_ => panic!("Expected resource block"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_2_large_text_capability_on_link() {
|
|
// Scenario 2: Large text file + capability on → link or snippet
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("large.txt");
|
|
let large_content = "x".repeat(2000); // Exceeds 1000 byte limit
|
|
std::fs::write(&file_path, &large_content).unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 1, "Should have one content block");
|
|
|
|
match &blocks[0] {
|
|
ContentBlock::ResourceLink { size, .. } => {
|
|
assert_eq!(*size, Some(2000));
|
|
}
|
|
_ => panic!("Expected resource link block"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_3_binary_file_link() {
|
|
// Scenario 3: Binary file → link
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("image.png");
|
|
std::fs::write(&file_path, b"\x89PNG\r\n\x1a\n").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 1, "Should have one content block");
|
|
|
|
match &blocks[0] {
|
|
ContentBlock::ResourceLink { mime_type, .. } => {
|
|
assert_eq!(mime_type, &Some("image/png".to_string()));
|
|
}
|
|
_ => panic!("Expected resource link block"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_4_capability_off_all_links() {
|
|
// Scenario 4: Capability off → all links
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("test.txt");
|
|
std::fs::write(&file_path, "Test content").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(false); // Capability OFF
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 1, "Should have one content block");
|
|
|
|
match &blocks[0] {
|
|
ContentBlock::ResourceLink { .. } => {
|
|
// Correct - should be link when capability is off
|
|
}
|
|
_ => panic!("Expected resource link block when capability is off"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_5_exceed_total_cap_link_remaining() {
|
|
// Scenario 5: Exceed total cap → deny or link remaining
|
|
let temp_dir = TempDir::new().unwrap();
|
|
|
|
// Create multiple small files that together exceed the total cap
|
|
let file1 = temp_dir.path().join("file1.txt");
|
|
let file2 = temp_dir.path().join("file2.txt");
|
|
let file3 = temp_dir.path().join("file3.txt");
|
|
|
|
let content = "x".repeat(800); // Each file is 800 bytes
|
|
std::fs::write(&file1, &content).unwrap();
|
|
std::fs::write(&file2, &content).unwrap();
|
|
std::fs::write(&file3, &content).unwrap();
|
|
// Total: 2400 bytes, but max_embed_bytes * max_files_per_prompt = 1000 * 10 = 10000
|
|
// So they should all embed in this test
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file1, file2, file3],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
// All should be embedded or linked (not denied)
|
|
assert!(blocks.len() >= 2, "Should have at least 2 content blocks");
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_6_exceed_file_count_deny() {
|
|
// Scenario 6: Exceed file count → deny
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let mut embedding_config = EmbeddingConfig::default();
|
|
embedding_config.max_files_per_prompt = 2; // Limit to 2 files
|
|
|
|
let mut sandbox_config = SandboxConfig::default();
|
|
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
|
|
sandbox_config.normalize_roots();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
|
|
// Create 3 files
|
|
let file1 = temp_dir.path().join("file1.txt");
|
|
let file2 = temp_dir.path().join("file2.txt");
|
|
let file3 = temp_dir.path().join("file3.txt");
|
|
|
|
std::fs::write(&file1, "File 1").unwrap();
|
|
std::fs::write(&file2, "File 2").unwrap();
|
|
std::fs::write(&file3, "File 3").unwrap();
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file1, file2, file3],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
// Should have at most 2 blocks (third file denied)
|
|
assert!(blocks.len() <= 2, "Should have at most 2 content blocks");
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_7_redaction_patterns_applied() {
|
|
// Scenario 7: Redaction patterns applied → verify content redacted
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("secrets.txt");
|
|
std::fs::write(&file_path, "api_key: sk-1234567890abcdef").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let mut embedding_config = EmbeddingConfig::default();
|
|
embedding_config.redact_patterns = vec![
|
|
r"(?i)(api[_-]?key):\s*([a-zA-Z0-9_\-\.]+)".to_string(),
|
|
];
|
|
|
|
let mut sandbox_config = SandboxConfig::default();
|
|
sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()];
|
|
sandbox_config.normalize_roots();
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 1, "Should have one content block");
|
|
|
|
match &blocks[0] {
|
|
ContentBlock::Resource { resource, .. } => match resource {
|
|
EmbeddedResource::Text { text, .. } => {
|
|
// Verify that the API key is redacted
|
|
assert!(
|
|
!text.contains("sk-1234567890abcdef"),
|
|
"Secret should be redacted"
|
|
);
|
|
assert!(text.contains("REDACTED"), "Should contain redaction marker");
|
|
}
|
|
_ => panic!("Expected text resource"),
|
|
},
|
|
_ => panic!("Expected resource block"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_8_sandbox_violation_deny() {
|
|
// Scenario 8: Sandbox violation → deny with clear error
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let outside_dir = TempDir::new().unwrap();
|
|
let file_path = outside_dir.path().join("outside.txt");
|
|
std::fs::write(&file_path, "Outside sandbox").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let result = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
);
|
|
|
|
// Should fail with sandbox violation
|
|
assert!(result.is_err(), "Should fail with sandbox violation");
|
|
let err = result.unwrap_err();
|
|
assert!(
|
|
format!("{:?}", err).contains("SandboxViolation"),
|
|
"Error should be SandboxViolation"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_scenario_9_mixed_strategies() {
|
|
// Scenario 9: Mixed strategies in one prompt → correct blocks
|
|
let temp_dir = TempDir::new().unwrap();
|
|
|
|
// Small text file (will be embedded)
|
|
let small_file = temp_dir.path().join("small.txt");
|
|
std::fs::write(&small_file, "Small content").unwrap();
|
|
|
|
// Large text file (will be linked)
|
|
let large_file = temp_dir.path().join("large.txt");
|
|
let large_content = "x".repeat(2000);
|
|
std::fs::write(&large_file, &large_content).unwrap();
|
|
|
|
// Binary file (will be linked)
|
|
let binary_file = temp_dir.path().join("image.png");
|
|
std::fs::write(&binary_file, b"\x89PNG\r\n\x1a\n").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks = build_content_blocks_from_files(
|
|
&[small_file, large_file, binary_file],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(blocks.len(), 3, "Should have three content blocks");
|
|
|
|
// First should be embedded
|
|
match &blocks[0] {
|
|
ContentBlock::Resource { .. } => {
|
|
// Correct - small file is embedded
|
|
}
|
|
_ => panic!("Expected first block to be Resource (embedded)"),
|
|
}
|
|
|
|
// Second and third should be links
|
|
match &blocks[1] {
|
|
ContentBlock::ResourceLink { .. } => {
|
|
// Correct - large file is linked
|
|
}
|
|
_ => panic!("Expected second block to be ResourceLink"),
|
|
}
|
|
|
|
match &blocks[2] {
|
|
ContentBlock::ResourceLink { .. } => {
|
|
// Correct - binary file is linked
|
|
}
|
|
_ => panic!("Expected third block to be ResourceLink"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_uri_stability() {
|
|
// Test that URIs are stable across multiple invocations
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let file_path = temp_dir.path().join("stable.txt");
|
|
std::fs::write(&file_path, "Stable content").unwrap();
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let blocks1 = build_content_blocks_from_files(
|
|
&[file_path.clone()],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
let blocks2 = build_content_blocks_from_files(
|
|
&[file_path],
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
|
|
// Extract URIs and compare
|
|
match (&blocks1[0], &blocks2[0]) {
|
|
(
|
|
ContentBlock::Resource {
|
|
resource: EmbeddedResource::Text { uri: uri1, .. },
|
|
..
|
|
},
|
|
ContentBlock::Resource {
|
|
resource: EmbeddedResource::Text { uri: uri2, .. },
|
|
..
|
|
},
|
|
) => {
|
|
assert_eq!(uri1, uri2, "URIs should be stable");
|
|
}
|
|
_ => panic!("Expected resource blocks with text"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_performance_many_files() {
|
|
// Test that building content blocks for many files is reasonably fast
|
|
let temp_dir = TempDir::new().unwrap();
|
|
|
|
// Create 10 files
|
|
let mut files = Vec::new();
|
|
for i in 0..10 {
|
|
let file_path = temp_dir.path().join(format!("file{}.txt", i));
|
|
std::fs::write(&file_path, format!("Content {}", i)).unwrap();
|
|
files.push(file_path);
|
|
}
|
|
|
|
let agent_caps = create_test_agent_caps(true);
|
|
let (embedding_config, sandbox_config) = create_test_config(&temp_dir);
|
|
|
|
let start = std::time::Instant::now();
|
|
let blocks = build_content_blocks_from_files(
|
|
&files,
|
|
&agent_caps,
|
|
&embedding_config,
|
|
&sandbox_config,
|
|
)
|
|
.unwrap();
|
|
let elapsed = start.elapsed();
|
|
|
|
assert_eq!(blocks.len(), 10, "Should have 10 content blocks");
|
|
assert!(
|
|
elapsed.as_millis() < 500,
|
|
"Should complete in less than 500ms, took {}ms",
|
|
elapsed.as_millis()
|
|
);
|
|
}
|