//! File storage example for dirigent_archivist //! //! This example demonstrates: //! - Storing files with content-addressing //! - Retrieving files by file_id //! - Automatic deduplication of identical content //! - Session tracking for file references use dirigent_archivist::storage::{files, ndjson, paths::ArchivePaths}; use dirigent_archivist::types::FileRecord; use dirigent_archivist::Result; use uuid::Uuid; #[tokio::main] async fn main() -> Result<()> { // Create a temporary archive directory for this example let temp_dir = std::env::temp_dir().join(format!("dirigent_files_example_{}", Uuid::now_v7())); println!("Creating archive at: {}", temp_dir.display()); let paths = ArchivePaths::new(temp_dir.clone()); // Example 1: Store a file println!("\n--- Example 1: Store a File ---"); let content1 = b"This is a sample document with some text content."; let session1 = Uuid::now_v7(); let file_id1 = files::store_file( &paths, content1, "document.txt".to_string(), Some("text/plain".to_string()), session1, ) .await?; println!("Stored file with ID: {}", file_id1); println!("Session: {}", session1); // Example 2: Retrieve the file println!("\n--- Example 2: Retrieve the File ---"); let retrieved1 = files::get_file(&paths, &file_id1).await?; println!("Retrieved {} bytes", retrieved1.len()); println!("Content: {}", String::from_utf8_lossy(&retrieved1)); // Example 3: Store the same content from a different session (deduplication) println!("\n--- Example 3: Deduplication Demo ---"); let session2 = Uuid::now_v7(); let file_id2 = files::store_file( &paths, content1, // Same content as before "duplicate.txt".to_string(), // Different name Some("text/plain".to_string()), session2, ) .await?; println!("Stored same content with different name"); println!("File ID 1: {}", file_id1); println!("File ID 2: {}", file_id2); println!("Same file_id? {}", file_id1 == file_id2); println!("\nDeduplication: Same content produces same file_id, stored only once!"); // Example 4: Check the file index println!("\n--- Example 4: File Index ---"); let index_path = paths.root().join(".files").join("file_index.jsonl"); let records: Vec = ndjson::read_ndjson(&index_path).await?; println!("File index contains {} record(s)", records.len()); for record in &records { println!("\nFile: {}", record.file_id); println!(" Original name: {}", record.original_name); println!(" MIME type: {:?}", record.mime); println!(" Size: {} bytes", record.size); println!(" Referenced by {} session(s):", record.sessions.len()); for session_id in &record.sessions { println!(" - {}", session_id); } } // Example 5: Store different content println!("\n--- Example 5: Store Different Content ---"); let content2 = b"This is completely different content with more data!"; let session3 = Uuid::now_v7(); let file_id3 = files::store_file( &paths, content2, "different.txt".to_string(), Some("text/plain".to_string()), session3, ) .await?; println!("Stored different content"); println!("File ID 3: {}", file_id3); println!("Different from file_id1? {}", file_id1 != file_id3); // Example 6: Store binary content println!("\n--- Example 6: Binary Content ---"); let binary_content: Vec = (0..256).map(|i| i as u8).collect(); let session4 = Uuid::now_v7(); let file_id4 = files::store_file( &paths, &binary_content, "binary.dat".to_string(), Some("application/octet-stream".to_string()), session4, ) .await?; println!("Stored binary content (256 bytes)"); println!("File ID: {}", file_id4); // Retrieve and verify let retrieved_binary = files::get_file(&paths, &file_id4).await?; println!("Retrieved {} bytes", retrieved_binary.len()); println!( "Binary content verified: {}", retrieved_binary == binary_content ); // Example 7: Show final archive structure println!("\n--- Example 7: Archive Structure ---"); println!("Archive root: {}", temp_dir.display()); show_files_directory(&paths)?; // Example 8: Final statistics println!("\n--- Final Statistics ---"); let final_records: Vec = ndjson::read_ndjson(&index_path).await?; println!("Total unique files stored: {}", final_records.len()); let total_sessions: usize = final_records.iter().map(|r| r.sessions.len()).sum(); println!("Total session references: {}", total_sessions); let total_size: u64 = final_records.iter().map(|r| r.size).sum(); println!("Total storage used: {} bytes", total_size); // Content-addressing means if we had stored content1 1000 times, // we'd still only use storage for it once! println!("\nContent-addressing benefit:"); println!(" File '{}' is referenced by {} sessions", file_id1, 2); println!(" But stored only once on disk!"); // Cleanup println!("\n--- Cleanup ---"); std::fs::remove_dir_all(&temp_dir)?; println!("Removed temporary archive"); println!("\nExample completed successfully!"); Ok(()) } /// Helper function to show .files directory structure fn show_files_directory(paths: &ArchivePaths) -> Result<()> { let files_dir = paths.root().join(".files"); if !files_dir.exists() { println!("No files directory found"); return Ok(()); } println!("\n.files/ directory:"); // Show index file let index_path = files_dir.join("file_index.jsonl"); if index_path.exists() { let metadata = std::fs::metadata(&index_path)?; println!(" file_index.jsonl ({} bytes)", metadata.len()); } // Show shard directories for entry in std::fs::read_dir(&files_dir)? { let entry = entry?; let path = entry.path(); if path.is_dir() { println!(" {}/", path.file_name().unwrap().to_string_lossy()); // Show files in shard for sub_entry in std::fs::read_dir(&path)? { let sub_entry = sub_entry?; let sub_path = sub_entry.path(); if sub_path.is_dir() { println!(" {}/", sub_path.file_name().unwrap().to_string_lossy()); // Show files in sub-shard for file_entry in std::fs::read_dir(&sub_path)? { let file_entry = file_entry?; let file_path = file_entry.path(); let metadata = std::fs::metadata(&file_path)?; println!( " {} ({} bytes)", file_path.file_name().unwrap().to_string_lossy(), metadata.len() ); } } else { let metadata = std::fs::metadata(&sub_path)?; println!( " {} ({} bytes)", sub_path.file_name().unwrap().to_string_lossy(), metadata.len() ); } } } } Ok(()) }