//! Integration tests for file embedding functionality. //! //! Tests the complete embedding pipeline from file paths to ContentBlocks. use dirigent_core::acp::content_blocks::build_content_blocks_from_files; use dirigent_core::acp::protocol::prompt::{ContentBlock, EmbeddedResource}; use dirigent_core::acp::{AgentCapabilities, PromptCapabilities}; use dirigent_tools::config::{EmbeddingConfig, SandboxConfig}; use tempfile::TempDir; /// Helper to create test agent capabilities. fn create_test_agent_caps(embedded_context: bool) -> AgentCapabilities { AgentCapabilities { load_session: None, prompt_capabilities: Some(PromptCapabilities { image: None, audio: None, embedded_context: Some(embedded_context), }), mcp: None, _meta: None, } } /// Helper to create test configuration. fn create_test_config(temp_dir: &TempDir) -> (EmbeddingConfig, SandboxConfig) { let embedding_config = EmbeddingConfig { max_embed_bytes: 1000, allow_resource_link: true, redact_patterns: vec![], snippet_strategy: dirigent_tools::config::SnippetStrategy::HeadTail, max_files_per_prompt: 10, }; let mut sandbox_config = SandboxConfig::default(); sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()]; sandbox_config.normalize_roots(); (embedding_config, sandbox_config) } #[test] fn test_scenario_1_small_text_capability_on_embed() { // Scenario 1: Small text file + capability on → embed let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("small.txt"); std::fs::write(&file_path, "Hello, world!").unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 1, "Should have one content block"); match &blocks[0] { ContentBlock::Resource { resource, .. } => match resource { EmbeddedResource::Text { text, .. } => { assert_eq!(text, "Hello, world!"); } _ => panic!("Expected text resource"), }, _ => panic!("Expected resource block"), } } #[test] fn test_scenario_2_large_text_capability_on_link() { // Scenario 2: Large text file + capability on → link or snippet let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("large.txt"); let large_content = "x".repeat(2000); // Exceeds 1000 byte limit std::fs::write(&file_path, &large_content).unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 1, "Should have one content block"); match &blocks[0] { ContentBlock::ResourceLink { size, .. } => { assert_eq!(*size, Some(2000)); } _ => panic!("Expected resource link block"), } } #[test] fn test_scenario_3_binary_file_link() { // Scenario 3: Binary file → link let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("image.png"); std::fs::write(&file_path, b"\x89PNG\r\n\x1a\n").unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 1, "Should have one content block"); match &blocks[0] { ContentBlock::ResourceLink { mime_type, .. } => { assert_eq!(mime_type, &Some("image/png".to_string())); } _ => panic!("Expected resource link block"), } } #[test] fn test_scenario_4_capability_off_all_links() { // Scenario 4: Capability off → all links let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("test.txt"); std::fs::write(&file_path, "Test content").unwrap(); let agent_caps = create_test_agent_caps(false); // Capability OFF let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 1, "Should have one content block"); match &blocks[0] { ContentBlock::ResourceLink { .. } => { // Correct - should be link when capability is off } _ => panic!("Expected resource link block when capability is off"), } } #[test] fn test_scenario_5_exceed_total_cap_link_remaining() { // Scenario 5: Exceed total cap → deny or link remaining let temp_dir = TempDir::new().unwrap(); // Create multiple small files that together exceed the total cap let file1 = temp_dir.path().join("file1.txt"); let file2 = temp_dir.path().join("file2.txt"); let file3 = temp_dir.path().join("file3.txt"); let content = "x".repeat(800); // Each file is 800 bytes std::fs::write(&file1, &content).unwrap(); std::fs::write(&file2, &content).unwrap(); std::fs::write(&file3, &content).unwrap(); // Total: 2400 bytes, but max_embed_bytes * max_files_per_prompt = 1000 * 10 = 10000 // So they should all embed in this test let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[file1, file2, file3], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); // All should be embedded or linked (not denied) assert!(blocks.len() >= 2, "Should have at least 2 content blocks"); } #[test] fn test_scenario_6_exceed_file_count_deny() { // Scenario 6: Exceed file count → deny let temp_dir = TempDir::new().unwrap(); let mut embedding_config = EmbeddingConfig::default(); embedding_config.max_files_per_prompt = 2; // Limit to 2 files let mut sandbox_config = SandboxConfig::default(); sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()]; sandbox_config.normalize_roots(); let agent_caps = create_test_agent_caps(true); // Create 3 files let file1 = temp_dir.path().join("file1.txt"); let file2 = temp_dir.path().join("file2.txt"); let file3 = temp_dir.path().join("file3.txt"); std::fs::write(&file1, "File 1").unwrap(); std::fs::write(&file2, "File 2").unwrap(); std::fs::write(&file3, "File 3").unwrap(); let blocks = build_content_blocks_from_files( &[file1, file2, file3], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); // Should have at most 2 blocks (third file denied) assert!(blocks.len() <= 2, "Should have at most 2 content blocks"); } #[test] fn test_scenario_7_redaction_patterns_applied() { // Scenario 7: Redaction patterns applied → verify content redacted let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("secrets.txt"); std::fs::write(&file_path, "api_key: sk-1234567890abcdef").unwrap(); let agent_caps = create_test_agent_caps(true); let mut embedding_config = EmbeddingConfig::default(); embedding_config.redact_patterns = vec![ r"(?i)(api[_-]?key):\s*([a-zA-Z0-9_\-\.]+)".to_string(), ]; let mut sandbox_config = SandboxConfig::default(); sandbox_config.allowed_roots = vec![temp_dir.path().to_path_buf()]; sandbox_config.normalize_roots(); let blocks = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 1, "Should have one content block"); match &blocks[0] { ContentBlock::Resource { resource, .. } => match resource { EmbeddedResource::Text { text, .. } => { // Verify that the API key is redacted assert!( !text.contains("sk-1234567890abcdef"), "Secret should be redacted" ); assert!(text.contains("REDACTED"), "Should contain redaction marker"); } _ => panic!("Expected text resource"), }, _ => panic!("Expected resource block"), } } #[test] fn test_scenario_8_sandbox_violation_deny() { // Scenario 8: Sandbox violation → deny with clear error let temp_dir = TempDir::new().unwrap(); let outside_dir = TempDir::new().unwrap(); let file_path = outside_dir.path().join("outside.txt"); std::fs::write(&file_path, "Outside sandbox").unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let result = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ); // Should fail with sandbox violation assert!(result.is_err(), "Should fail with sandbox violation"); let err = result.unwrap_err(); assert!( format!("{:?}", err).contains("SandboxViolation"), "Error should be SandboxViolation" ); } #[test] fn test_scenario_9_mixed_strategies() { // Scenario 9: Mixed strategies in one prompt → correct blocks let temp_dir = TempDir::new().unwrap(); // Small text file (will be embedded) let small_file = temp_dir.path().join("small.txt"); std::fs::write(&small_file, "Small content").unwrap(); // Large text file (will be linked) let large_file = temp_dir.path().join("large.txt"); let large_content = "x".repeat(2000); std::fs::write(&large_file, &large_content).unwrap(); // Binary file (will be linked) let binary_file = temp_dir.path().join("image.png"); std::fs::write(&binary_file, b"\x89PNG\r\n\x1a\n").unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks = build_content_blocks_from_files( &[small_file, large_file, binary_file], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); assert_eq!(blocks.len(), 3, "Should have three content blocks"); // First should be embedded match &blocks[0] { ContentBlock::Resource { .. } => { // Correct - small file is embedded } _ => panic!("Expected first block to be Resource (embedded)"), } // Second and third should be links match &blocks[1] { ContentBlock::ResourceLink { .. } => { // Correct - large file is linked } _ => panic!("Expected second block to be ResourceLink"), } match &blocks[2] { ContentBlock::ResourceLink { .. } => { // Correct - binary file is linked } _ => panic!("Expected third block to be ResourceLink"), } } #[test] fn test_uri_stability() { // Test that URIs are stable across multiple invocations let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("stable.txt"); std::fs::write(&file_path, "Stable content").unwrap(); let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let blocks1 = build_content_blocks_from_files( &[file_path.clone()], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); let blocks2 = build_content_blocks_from_files( &[file_path], &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); // Extract URIs and compare match (&blocks1[0], &blocks2[0]) { ( ContentBlock::Resource { resource: EmbeddedResource::Text { uri: uri1, .. }, .. }, ContentBlock::Resource { resource: EmbeddedResource::Text { uri: uri2, .. }, .. }, ) => { assert_eq!(uri1, uri2, "URIs should be stable"); } _ => panic!("Expected resource blocks with text"), } } #[test] fn test_performance_many_files() { // Test that building content blocks for many files is reasonably fast let temp_dir = TempDir::new().unwrap(); // Create 10 files let mut files = Vec::new(); for i in 0..10 { let file_path = temp_dir.path().join(format!("file{}.txt", i)); std::fs::write(&file_path, format!("Content {}", i)).unwrap(); files.push(file_path); } let agent_caps = create_test_agent_caps(true); let (embedding_config, sandbox_config) = create_test_config(&temp_dir); let start = std::time::Instant::now(); let blocks = build_content_blocks_from_files( &files, &agent_caps, &embedding_config, &sandbox_config, ) .unwrap(); let elapsed = start.elapsed(); assert_eq!(blocks.len(), 10, "Should have 10 content blocks"); assert!( elapsed.as_millis() < 500, "Should complete in less than 500ms, took {}ms", elapsed.as_millis() ); }