Files
dirigent/crates/dirigent_archivist/examples/file_storage.rs
T
2026-05-08 01:59:04 +02:00

215 lines
7.3 KiB
Rust

//! File storage example for dirigent_archivist
//!
//! This example demonstrates:
//! - Storing files with content-addressing
//! - Retrieving files by file_id
//! - Automatic deduplication of identical content
//! - Session tracking for file references
use dirigent_archivist::storage::{files, ndjson, paths::ArchivePaths};
use dirigent_archivist::types::FileRecord;
use dirigent_archivist::Result;
use uuid::Uuid;
#[tokio::main]
async fn main() -> Result<()> {
// Create a temporary archive directory for this example
let temp_dir = std::env::temp_dir().join(format!("dirigent_files_example_{}", Uuid::now_v7()));
println!("Creating archive at: {}", temp_dir.display());
let paths = ArchivePaths::new(temp_dir.clone());
// Example 1: Store a file
println!("\n--- Example 1: Store a File ---");
let content1 = b"This is a sample document with some text content.";
let session1 = Uuid::now_v7();
let file_id1 = files::store_file(
&paths,
content1,
"document.txt".to_string(),
Some("text/plain".to_string()),
session1,
)
.await?;
println!("Stored file with ID: {}", file_id1);
println!("Session: {}", session1);
// Example 2: Retrieve the file
println!("\n--- Example 2: Retrieve the File ---");
let retrieved1 = files::get_file(&paths, &file_id1).await?;
println!("Retrieved {} bytes", retrieved1.len());
println!("Content: {}", String::from_utf8_lossy(&retrieved1));
// Example 3: Store the same content from a different session (deduplication)
println!("\n--- Example 3: Deduplication Demo ---");
let session2 = Uuid::now_v7();
let file_id2 = files::store_file(
&paths,
content1, // Same content as before
"duplicate.txt".to_string(), // Different name
Some("text/plain".to_string()),
session2,
)
.await?;
println!("Stored same content with different name");
println!("File ID 1: {}", file_id1);
println!("File ID 2: {}", file_id2);
println!("Same file_id? {}", file_id1 == file_id2);
println!("\nDeduplication: Same content produces same file_id, stored only once!");
// Example 4: Check the file index
println!("\n--- Example 4: File Index ---");
let index_path = paths.root().join(".files").join("file_index.jsonl");
let records: Vec<FileRecord> = ndjson::read_ndjson(&index_path).await?;
println!("File index contains {} record(s)", records.len());
for record in &records {
println!("\nFile: {}", record.file_id);
println!(" Original name: {}", record.original_name);
println!(" MIME type: {:?}", record.mime);
println!(" Size: {} bytes", record.size);
println!(" Referenced by {} session(s):", record.sessions.len());
for session_id in &record.sessions {
println!(" - {}", session_id);
}
}
// Example 5: Store different content
println!("\n--- Example 5: Store Different Content ---");
let content2 = b"This is completely different content with more data!";
let session3 = Uuid::now_v7();
let file_id3 = files::store_file(
&paths,
content2,
"different.txt".to_string(),
Some("text/plain".to_string()),
session3,
)
.await?;
println!("Stored different content");
println!("File ID 3: {}", file_id3);
println!("Different from file_id1? {}", file_id1 != file_id3);
// Example 6: Store binary content
println!("\n--- Example 6: Binary Content ---");
let binary_content: Vec<u8> = (0..256).map(|i| i as u8).collect();
let session4 = Uuid::now_v7();
let file_id4 = files::store_file(
&paths,
&binary_content,
"binary.dat".to_string(),
Some("application/octet-stream".to_string()),
session4,
)
.await?;
println!("Stored binary content (256 bytes)");
println!("File ID: {}", file_id4);
// Retrieve and verify
let retrieved_binary = files::get_file(&paths, &file_id4).await?;
println!("Retrieved {} bytes", retrieved_binary.len());
println!(
"Binary content verified: {}",
retrieved_binary == binary_content
);
// Example 7: Show final archive structure
println!("\n--- Example 7: Archive Structure ---");
println!("Archive root: {}", temp_dir.display());
show_files_directory(&paths)?;
// Example 8: Final statistics
println!("\n--- Final Statistics ---");
let final_records: Vec<FileRecord> = ndjson::read_ndjson(&index_path).await?;
println!("Total unique files stored: {}", final_records.len());
let total_sessions: usize = final_records.iter().map(|r| r.sessions.len()).sum();
println!("Total session references: {}", total_sessions);
let total_size: u64 = final_records.iter().map(|r| r.size).sum();
println!("Total storage used: {} bytes", total_size);
// Content-addressing means if we had stored content1 1000 times,
// we'd still only use storage for it once!
println!("\nContent-addressing benefit:");
println!(" File '{}' is referenced by {} sessions", file_id1, 2);
println!(" But stored only once on disk!");
// Cleanup
println!("\n--- Cleanup ---");
std::fs::remove_dir_all(&temp_dir)?;
println!("Removed temporary archive");
println!("\nExample completed successfully!");
Ok(())
}
/// Helper function to show .files directory structure
fn show_files_directory(paths: &ArchivePaths) -> Result<()> {
let files_dir = paths.root().join(".files");
if !files_dir.exists() {
println!("No files directory found");
return Ok(());
}
println!("\n.files/ directory:");
// Show index file
let index_path = files_dir.join("file_index.jsonl");
if index_path.exists() {
let metadata = std::fs::metadata(&index_path)?;
println!(" file_index.jsonl ({} bytes)", metadata.len());
}
// Show shard directories
for entry in std::fs::read_dir(&files_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
println!(" {}/", path.file_name().unwrap().to_string_lossy());
// Show files in shard
for sub_entry in std::fs::read_dir(&path)? {
let sub_entry = sub_entry?;
let sub_path = sub_entry.path();
if sub_path.is_dir() {
println!(" {}/", sub_path.file_name().unwrap().to_string_lossy());
// Show files in sub-shard
for file_entry in std::fs::read_dir(&sub_path)? {
let file_entry = file_entry?;
let file_path = file_entry.path();
let metadata = std::fs::metadata(&file_path)?;
println!(
" {} ({} bytes)",
file_path.file_name().unwrap().to_string_lossy(),
metadata.len()
);
}
} else {
let metadata = std::fs::metadata(&sub_path)?;
println!(
" {} ({} bytes)",
sub_path.file_name().unwrap().to_string_lossy(),
metadata.len()
);
}
}
}
}
Ok(())
}