Files
dirigent/crates/dirigent_archivist/src/storage/tsv.rs
T
2026-05-08 01:59:04 +02:00

553 lines
18 KiB
Rust

//! TSV (Tab-Separated Values) storage utilities.
//!
//! Handles reading and writing TSV files for session listings and indices.
//! TSV format is human-readable and grep-able, making it ideal for manual
//! inspection and command-line processing.
use crate::types::ConnectorIndexRow;
use std::path::Path;
use tokio::io::AsyncWriteExt;
use uuid::Uuid;
/// Write connector index to a TSV file atomically
///
/// This function:
/// 1. Generates the header line
/// 2. Formats each row as tab-separated values
/// 3. Writes to a temporary file
/// 4. Renames to the target path (atomic operation)
///
/// TSV format:
/// ```text
/// connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at
/// 018c8f7e-...\tOpenCode\tLocal Dev\topencode@...\t\t2025-01-15T12:34:56Z
/// ```
///
/// # Arguments
/// * `path` - Path to the TSV file
/// * `rows` - Rows to write
pub async fn write_connector_index(path: &Path, rows: &[ConnectorIndexRow]) -> std::io::Result<()> {
// Generate header
let header = "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\tfingerprint\n";
// Format rows
let mut content = String::from(header);
for row in rows {
let alias_of_str = row
.alias_of
.as_ref()
.map(|u| u.to_string())
.unwrap_or_default();
let fingerprint_str = row.fingerprint.as_deref().unwrap_or("");
let line = format!(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\n",
row.connector_uid,
row.r#type,
row.title,
row.client_native_id,
alias_of_str,
row.created_at.to_rfc3339(),
fingerprint_str,
);
content.push_str(&line);
}
// Write to temp file
let temp_path = path.with_extension("tmp");
let mut file = tokio::fs::File::create(&temp_path).await?;
file.write_all(content.as_bytes()).await?;
file.sync_all().await?;
drop(file);
// Atomically rename
tokio::fs::rename(&temp_path, path).await?;
Ok(())
}
/// Read connector index from a TSV file
///
/// If the file doesn't exist, returns an empty vector.
///
/// # Arguments
/// * `path` - Path to the TSV file
///
/// # Returns
/// Vector of connector index rows
pub async fn read_connector_index(path: &Path) -> std::io::Result<Vec<ConnectorIndexRow>> {
// Check if file exists
if !path.exists() {
return Ok(Vec::new());
}
// Read file to string
let content = tokio::fs::read_to_string(path).await?;
// Parse line by line
let mut rows = Vec::new();
for (line_num, line) in content.lines().enumerate() {
// Skip header (line 0)
if line_num == 0 {
continue;
}
// Skip empty lines
if line.trim().is_empty() {
continue;
}
// Split by tab
let parts: Vec<&str> = line.split('\t').collect();
// Accept 6 columns (legacy, no fingerprint) or 7 columns (with fingerprint)
if parts.len() != 6 && parts.len() != 7 {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Invalid TSV format at line {}: expected 6 or 7 fields, got {}",
line_num + 1,
parts.len()
),
));
}
// Parse fields
let connector_uid = Uuid::parse_str(parts[0]).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Invalid UUID at line {}: {}", line_num + 1, e),
)
})?;
let r#type = parts[1].to_string();
let title = parts[2].to_string();
let client_native_id = parts[3].to_string();
let alias_of = if parts[4].is_empty() {
None
} else {
Some(Uuid::parse_str(parts[4]).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Invalid alias_of UUID at line {}: {}", line_num + 1, e),
)
})?)
};
let created_at = chrono::DateTime::parse_from_rfc3339(parts[5])
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Invalid timestamp at line {}: {}", line_num + 1, e),
)
})?
.with_timezone(&chrono::Utc);
// Parse optional fingerprint (7th column, may be absent in legacy files)
let fingerprint = if parts.len() >= 7 && !parts[6].is_empty() {
Some(parts[6].to_string())
} else {
None
};
rows.push(ConnectorIndexRow {
connector_uid,
r#type,
title,
client_native_id,
alias_of,
created_at,
fingerprint,
});
}
Ok(rows)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{DateTime, Utc};
use std::time::SystemTime;
#[tokio::test]
async fn test_write_and_read_roundtrip() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_tsv_{}.tsv", Uuid::now_v7()));
let uid1 = Uuid::now_v7();
let uid2 = Uuid::now_v7();
let uid3 = Uuid::now_v7();
let now = DateTime::<Utc>::from(SystemTime::now());
let rows = vec![
ConnectorIndexRow {
connector_uid: uid1,
r#type: "OpenCode".to_string(),
title: "Local Dev".to_string(),
client_native_id: "opencode@localhost:12225".to_string(),
alias_of: None,
created_at: now,
fingerprint: None,
},
ConnectorIndexRow {
connector_uid: uid2,
r#type: "ACP".to_string(),
title: "Remote Agent".to_string(),
client_native_id: "acp@http://localhost:3000".to_string(),
alias_of: Some(uid3),
created_at: now,
fingerprint: None,
},
];
// Write
write_connector_index(&file_path, &rows).await.unwrap();
// Read back
let read_rows = read_connector_index(&file_path).await.unwrap();
// Verify
assert_eq!(read_rows.len(), 2);
assert_eq!(read_rows[0].connector_uid, uid1);
assert_eq!(read_rows[0].r#type, "OpenCode");
assert_eq!(read_rows[0].title, "Local Dev");
assert_eq!(read_rows[0].alias_of, None);
assert_eq!(read_rows[1].connector_uid, uid2);
assert_eq!(read_rows[1].r#type, "ACP");
assert_eq!(read_rows[1].alias_of, Some(uid3));
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_optional_field_handling() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_optional_{}.tsv", Uuid::now_v7()));
let uid1 = Uuid::now_v7();
let uid2 = Uuid::now_v7();
let now = DateTime::<Utc>::from(SystemTime::now());
let rows = vec![
ConnectorIndexRow {
connector_uid: uid1,
r#type: "Type1".to_string(),
title: "Title1".to_string(),
client_native_id: "client1".to_string(),
alias_of: None, // Empty alias_of
created_at: now,
fingerprint: None,
},
ConnectorIndexRow {
connector_uid: uid2,
r#type: "Type2".to_string(),
title: "Title2".to_string(),
client_native_id: "client2".to_string(),
alias_of: Some(uid1), // Non-empty alias_of
created_at: now,
fingerprint: None,
},
];
// Write
write_connector_index(&file_path, &rows).await.unwrap();
// Verify raw content has empty string for None
let content = tokio::fs::read_to_string(&file_path).await.unwrap();
let lines: Vec<&str> = content.lines().collect();
// First data line should have empty alias_of (two consecutive tabs)
assert!(lines[1].contains("\t\t"));
// Second data line should have a UUID for alias_of
assert!(lines[2].contains(&uid1.to_string()));
// Read back
let read_rows = read_connector_index(&file_path).await.unwrap();
// Verify optional field handling
assert_eq!(read_rows[0].alias_of, None);
assert_eq!(read_rows[1].alias_of, Some(uid1));
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_header_generation() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_header_{}.tsv", Uuid::now_v7()));
// Write empty index
write_connector_index(&file_path, &[]).await.unwrap();
// Read raw content
let content = tokio::fs::read_to_string(&file_path).await.unwrap();
// Verify header
assert_eq!(
content.trim(),
"connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\tfingerprint"
);
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_rfc3339_timestamp_formatting() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_timestamp_{}.tsv", Uuid::now_v7()));
let uid = Uuid::now_v7();
let timestamp = DateTime::<Utc>::from(SystemTime::now());
let rows = vec![ConnectorIndexRow {
connector_uid: uid,
r#type: "Test".to_string(),
title: "Title".to_string(),
client_native_id: "client".to_string(),
alias_of: None,
created_at: timestamp,
fingerprint: None,
}];
// Write
write_connector_index(&file_path, &rows).await.unwrap();
// Read raw content
let content = tokio::fs::read_to_string(&file_path).await.unwrap();
// Verify RFC 3339 format in content
assert!(content.contains('T'));
assert!(content.contains('Z') || content.contains('+'));
// Read back and verify timestamp is preserved
let read_rows = read_connector_index(&file_path).await.unwrap();
let diff =
(timestamp.timestamp_millis() - read_rows[0].created_at.timestamp_millis()).abs();
assert!(diff < 1000, "Timestamp difference too large");
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_missing_file_returns_empty_vec() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("nonexistent_{}.tsv", Uuid::now_v7()));
// Should return empty vec, not error
let rows = read_connector_index(&file_path).await.unwrap();
assert_eq!(rows.len(), 0);
}
#[tokio::test]
async fn test_malformed_tsv_error() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_malformed_{}.tsv", Uuid::now_v7()));
// Write malformed TSV (missing fields)
let content =
"connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\nuid1\ttype1\n";
tokio::fs::write(&file_path, content).await.unwrap();
// Should fail with InvalidData
let result = read_connector_index(&file_path).await;
assert!(result.is_err());
match result {
Err(e) => {
assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
assert!(e.to_string().contains("expected 6 or 7 fields"));
}
Ok(_) => panic!("Expected error"),
}
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_invalid_uuid_error() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_invalid_uuid_{}.tsv", Uuid::now_v7()));
// Write TSV with invalid UUID
let content = "connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\ninvalid-uuid\tType\tTitle\tClient\t\t2025-01-15T12:34:56Z\n";
tokio::fs::write(&file_path, content).await.unwrap();
// Should fail with InvalidData
let result = read_connector_index(&file_path).await;
assert!(result.is_err());
match result {
Err(e) => {
assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
assert!(e.to_string().contains("Invalid UUID"));
}
Ok(_) => panic!("Expected error"),
}
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_invalid_timestamp_error() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_invalid_timestamp_{}.tsv", Uuid::now_v7()));
let uid = Uuid::now_v7();
// Write TSV with invalid timestamp
let content = format!("connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\n{}\tType\tTitle\tClient\t\tinvalid-timestamp\n", uid);
tokio::fs::write(&file_path, content).await.unwrap();
// Should fail with InvalidData
let result = read_connector_index(&file_path).await;
assert!(result.is_err());
match result {
Err(e) => {
assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
assert!(e.to_string().contains("Invalid timestamp"));
}
Ok(_) => panic!("Expected error"),
}
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_atomic_write() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_atomic_{}.tsv", Uuid::now_v7()));
let uid1 = Uuid::now_v7();
let uid2 = Uuid::now_v7();
let now = DateTime::<Utc>::from(SystemTime::now());
let rows1 = vec![ConnectorIndexRow {
connector_uid: uid1,
r#type: "First".to_string(),
title: "First Write".to_string(),
client_native_id: "client1".to_string(),
alias_of: None,
created_at: now,
fingerprint: None,
}];
let rows2 = vec![ConnectorIndexRow {
connector_uid: uid2,
r#type: "Second".to_string(),
title: "Second Write".to_string(),
client_native_id: "client2".to_string(),
alias_of: None,
created_at: now,
fingerprint: None,
}];
// Write first version
write_connector_index(&file_path, &rows1).await.unwrap();
// Verify first version
let read1 = read_connector_index(&file_path).await.unwrap();
assert_eq!(read1.len(), 1);
assert_eq!(read1[0].title, "First Write");
// Overwrite with second version
write_connector_index(&file_path, &rows2).await.unwrap();
// Verify second version
let read2 = read_connector_index(&file_path).await.unwrap();
assert_eq!(read2.len(), 1);
assert_eq!(read2[0].title, "Second Write");
// Temp file should not exist
let temp_path = file_path.with_extension("tmp");
assert!(!temp_path.exists());
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_legacy_six_column_tsv_compatibility() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_legacy_tsv_{}.tsv", Uuid::now_v7()));
let uid = Uuid::now_v7();
// Write a legacy 6-column TSV (no fingerprint column)
let content = format!(
"connector_uid\ttype\ttitle\tclient_native_id\talias_of\tcreated_at\n{}\tOpenCode\tLegacy\tclient-legacy\t\t2025-01-15T12:34:56Z\n",
uid
);
tokio::fs::write(&file_path, content).await.unwrap();
// Should parse successfully with fingerprint = None
let rows = read_connector_index(&file_path).await.unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].connector_uid, uid);
assert_eq!(rows[0].r#type, "OpenCode");
assert_eq!(rows[0].title, "Legacy");
assert_eq!(rows[0].fingerprint, None);
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
#[tokio::test]
async fn test_fingerprint_roundtrip() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("test_fingerprint_{}.tsv", Uuid::now_v7()));
let uid1 = Uuid::now_v7();
let uid2 = Uuid::now_v7();
let now = DateTime::<Utc>::from(SystemTime::now());
let rows = vec![
ConnectorIndexRow {
connector_uid: uid1,
r#type: "ACP".to_string(),
title: "Claude CLI".to_string(),
client_native_id: "acp-claude-1".to_string(),
alias_of: None,
created_at: now,
fingerprint: Some("acp/stdio:/usr/bin/claude".to_string()),
},
ConnectorIndexRow {
connector_uid: uid2,
r#type: "OpenCode".to_string(),
title: "No Fingerprint".to_string(),
client_native_id: "opencode@localhost".to_string(),
alias_of: None,
created_at: now,
fingerprint: None,
},
];
// Write
write_connector_index(&file_path, &rows).await.unwrap();
// Read back
let read_rows = read_connector_index(&file_path).await.unwrap();
assert_eq!(read_rows.len(), 2);
assert_eq!(
read_rows[0].fingerprint,
Some("acp/stdio:/usr/bin/claude".to_string())
);
assert_eq!(read_rows[1].fingerprint, None);
// Clean up
tokio::fs::remove_file(&file_path).await.ok();
}
}