468 lines
17 KiB
Rust
468 lines
17 KiB
Rust
use crate::output::TaskOutputManager;
|
|
use crate::types::*;
|
|
use std::collections::HashMap;
|
|
use std::path::PathBuf;
|
|
use tokio::io::AsyncBufReadExt;
|
|
use tokio::process::Command;
|
|
use tokio::sync::RwLock;
|
|
|
|
#[derive(Debug, thiserror::Error)]
|
|
pub enum TaskError {
|
|
#[error("Task '{0}' not found")]
|
|
NotFound(String),
|
|
#[error("Task '{0}' is already running")]
|
|
AlreadyRunning(String),
|
|
#[error("Task '{0}' is not running")]
|
|
NotRunning(String),
|
|
#[error("Failed to spawn process: {0}")]
|
|
SpawnFailed(String),
|
|
#[error("IO error: {0}")]
|
|
Io(#[from] std::io::Error),
|
|
#[error("Task name '{0}' already exists")]
|
|
DuplicateName(String),
|
|
}
|
|
|
|
struct RunningTask {
|
|
abort_handles: Vec<tokio::task::JoinHandle<()>>,
|
|
child: tokio::process::Child,
|
|
lifecycle: Option<Box<dyn dirigent_process::ProcessLifecycle>>,
|
|
}
|
|
|
|
/// The main task runner service.
|
|
/// All methods take &self — uses interior mutability for shared access.
|
|
pub struct TaskRunner {
|
|
definitions: RwLock<HashMap<TaskId, TaskDefinition>>,
|
|
statuses: RwLock<HashMap<TaskId, TaskStatus>>,
|
|
started_at: RwLock<HashMap<TaskId, chrono::DateTime<chrono::Utc>>>,
|
|
stopped_at: RwLock<HashMap<TaskId, chrono::DateTime<chrono::Utc>>>,
|
|
running: RwLock<HashMap<TaskId, RunningTask>>,
|
|
tasks_dir: PathBuf,
|
|
default_working_dir: PathBuf,
|
|
process_manager: Option<std::sync::Arc<dyn dirigent_process::ProcessGroupManager>>,
|
|
}
|
|
|
|
impl TaskRunner {
|
|
pub fn new(
|
|
tasks_dir: PathBuf,
|
|
default_working_dir: PathBuf,
|
|
process_manager: Option<std::sync::Arc<dyn dirigent_process::ProcessGroupManager>>,
|
|
) -> Self {
|
|
Self {
|
|
definitions: RwLock::new(HashMap::new()),
|
|
statuses: RwLock::new(HashMap::new()),
|
|
started_at: RwLock::new(HashMap::new()),
|
|
stopped_at: RwLock::new(HashMap::new()),
|
|
running: RwLock::new(HashMap::new()),
|
|
tasks_dir,
|
|
default_working_dir,
|
|
process_manager,
|
|
}
|
|
}
|
|
|
|
pub fn tasks_dir(&self) -> &PathBuf {
|
|
&self.tasks_dir
|
|
}
|
|
|
|
/// Register a task definition (does not start it).
|
|
/// Allows re-registration to update an existing task.
|
|
pub async fn register(&self, def: TaskDefinition) -> Result<(), TaskError> {
|
|
let name = def.name.clone();
|
|
self.definitions.write().await.insert(name.clone(), def);
|
|
self.statuses
|
|
.write()
|
|
.await
|
|
.entry(name)
|
|
.or_insert(TaskStatus::Stopped);
|
|
Ok(())
|
|
}
|
|
|
|
/// Remove a task definition (stops it if running)
|
|
pub async fn remove(&self, name: &str) -> Result<(), TaskError> {
|
|
if self.is_running(name).await {
|
|
self.stop(name).await?;
|
|
}
|
|
self.definitions.write().await.remove(name);
|
|
self.statuses.write().await.remove(name);
|
|
self.started_at.write().await.remove(name);
|
|
self.stopped_at.write().await.remove(name);
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn is_running(&self, name: &str) -> bool {
|
|
matches!(
|
|
self.statuses.read().await.get(name),
|
|
Some(TaskStatus::Running { .. })
|
|
)
|
|
}
|
|
|
|
/// Start a task by name
|
|
pub async fn start(&self, name: &str) -> Result<(), TaskError> {
|
|
let def = {
|
|
let defs = self.definitions.read().await;
|
|
defs.get(name)
|
|
.cloned()
|
|
.ok_or_else(|| TaskError::NotFound(name.to_string()))?
|
|
};
|
|
|
|
if self.is_running(name).await {
|
|
return Err(TaskError::AlreadyRunning(name.to_string()));
|
|
}
|
|
|
|
let output_mgr = TaskOutputManager::new(self.tasks_dir.join(&def.name));
|
|
output_mgr.ensure_dir().await?;
|
|
|
|
if def.rotate_previous {
|
|
if let Err(e) = output_mgr.rotate().await {
|
|
tracing::warn!("Failed to rotate output for task {}: {}", name, e);
|
|
}
|
|
}
|
|
|
|
// Resolve working directory: explicit > default > current process dir
|
|
let raw_cwd = def
|
|
.working_directory
|
|
.clone()
|
|
.unwrap_or_else(|| self.default_working_dir.clone());
|
|
|
|
// Canonicalize to an absolute path; fall back to current dir if invalid
|
|
let cwd = match std::fs::canonicalize(&raw_cwd) {
|
|
Ok(p) => p,
|
|
Err(_) => {
|
|
tracing::warn!(
|
|
"Task '{}': working directory '{}' invalid, falling back to current dir",
|
|
name,
|
|
raw_cwd.display()
|
|
);
|
|
std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
|
|
}
|
|
};
|
|
|
|
let lifecycle = self.process_manager.as_ref().map(|mgr| mgr.create_lifecycle());
|
|
|
|
let mut cmd = Command::new(&def.command);
|
|
cmd.args(&def.args)
|
|
.current_dir(&cwd)
|
|
.stdout(std::process::Stdio::piped())
|
|
.stderr(std::process::Stdio::piped())
|
|
.kill_on_drop(true);
|
|
|
|
if let Some(ref lc) = lifecycle {
|
|
lc.configure_async_command(&mut cmd);
|
|
}
|
|
|
|
for (key, value) in &def.env {
|
|
cmd.env(key, value);
|
|
}
|
|
|
|
let mut child = match cmd.spawn() {
|
|
Ok(child) => child,
|
|
Err(e) => {
|
|
let error_msg = format!("{} (cwd: {}): {}", def.command, cwd.display(), e);
|
|
// Write error to stderr.log and combined.log so the user can see it in the output viewer
|
|
let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
|
let log_line = format!("[{}] Failed to start: {}\n", timestamp, error_msg);
|
|
if def.persist_to_disk {
|
|
let _ = tokio::fs::write(output_mgr.stderr_path(), log_line.as_bytes()).await;
|
|
let _ = tokio::fs::write(output_mgr.combined_path(), format!("[stderr] {}", log_line).as_bytes()).await;
|
|
}
|
|
// Set status to Failed so the UI shows it
|
|
self.statuses.write().await.insert(name.to_string(), TaskStatus::Failed { error: error_msg.clone() });
|
|
self.stopped_at.write().await.insert(name.to_string(), chrono::Utc::now());
|
|
return Err(TaskError::SpawnFailed(error_msg));
|
|
}
|
|
};
|
|
let pid = child.id().unwrap_or(0);
|
|
tracing::info!("Task '{}' started with PID {} (cwd: {})", name, pid, cwd.display());
|
|
|
|
if let Some(ref lc) = lifecycle {
|
|
if let Some(child_pid) = child.id() {
|
|
if let Err(e) = lc.register_child(child_pid) {
|
|
tracing::warn!(error = %e, "Failed to register task child with process lifecycle");
|
|
}
|
|
}
|
|
}
|
|
|
|
let stdout = child.stdout.take();
|
|
let stderr = child.stderr.take();
|
|
let persist = def.persist_to_disk;
|
|
let mut abort_handles = Vec::new();
|
|
|
|
// When not rotating, truncate old logs so we don't accumulate output across restarts
|
|
let truncate = !def.rotate_previous;
|
|
if truncate && persist {
|
|
let _ = tokio::fs::write(output_mgr.stdout_path(), b"").await;
|
|
let _ = tokio::fs::write(output_mgr.stderr_path(), b"").await;
|
|
let _ = tokio::fs::write(output_mgr.combined_path(), b"").await;
|
|
}
|
|
|
|
// Stdout capture task
|
|
if let Some(stdout) = stdout {
|
|
let stdout_path = output_mgr.stdout_path();
|
|
let combined_path = output_mgr.combined_path();
|
|
let task_name = name.to_string();
|
|
let h = tokio::spawn(async move {
|
|
let reader = tokio::io::BufReader::new(stdout);
|
|
let mut lines = reader.lines();
|
|
let mut stdout_file = if persist {
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&stdout_path)
|
|
.await
|
|
.ok()
|
|
} else {
|
|
None
|
|
};
|
|
let mut combined_file = if persist {
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&combined_path)
|
|
.await
|
|
.ok()
|
|
} else {
|
|
None
|
|
};
|
|
|
|
while let Ok(Some(line)) = lines.next_line().await {
|
|
let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
|
if let Some(ref mut f) = stdout_file {
|
|
let _ = tokio::io::AsyncWriteExt::write_all(
|
|
f,
|
|
format!("[{}] {}\n", ts, line).as_bytes(),
|
|
)
|
|
.await;
|
|
}
|
|
if let Some(ref mut f) = combined_file {
|
|
let _ = tokio::io::AsyncWriteExt::write_all(
|
|
f,
|
|
format!("[{}] [stdout] {}\n", ts, line).as_bytes(),
|
|
)
|
|
.await;
|
|
}
|
|
}
|
|
tracing::debug!("Stdout capture ended for task '{}'", task_name);
|
|
});
|
|
abort_handles.push(h);
|
|
}
|
|
|
|
// Stderr capture task
|
|
if let Some(stderr) = stderr {
|
|
let stderr_path = output_mgr.stderr_path();
|
|
let combined_path = output_mgr.combined_path();
|
|
let task_name = name.to_string();
|
|
let h = tokio::spawn(async move {
|
|
let reader = tokio::io::BufReader::new(stderr);
|
|
let mut lines = reader.lines();
|
|
let mut stderr_file = if persist {
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&stderr_path)
|
|
.await
|
|
.ok()
|
|
} else {
|
|
None
|
|
};
|
|
let mut combined_file = if persist {
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&combined_path)
|
|
.await
|
|
.ok()
|
|
} else {
|
|
None
|
|
};
|
|
|
|
while let Ok(Some(line)) = lines.next_line().await {
|
|
let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
|
if let Some(ref mut f) = stderr_file {
|
|
let _ = tokio::io::AsyncWriteExt::write_all(
|
|
f,
|
|
format!("[{}] {}\n", ts, line).as_bytes(),
|
|
)
|
|
.await;
|
|
}
|
|
if let Some(ref mut f) = combined_file {
|
|
let _ = tokio::io::AsyncWriteExt::write_all(
|
|
f,
|
|
format!("[{}] [stderr] {}\n", ts, line).as_bytes(),
|
|
)
|
|
.await;
|
|
}
|
|
}
|
|
tracing::debug!("Stderr capture ended for task '{}'", task_name);
|
|
});
|
|
abort_handles.push(h);
|
|
}
|
|
|
|
self.statuses
|
|
.write()
|
|
.await
|
|
.insert(name.to_string(), TaskStatus::Running { pid });
|
|
self.started_at
|
|
.write()
|
|
.await
|
|
.insert(name.to_string(), chrono::Utc::now());
|
|
self.stopped_at.write().await.remove(name);
|
|
|
|
self.running.write().await.insert(
|
|
name.to_string(),
|
|
RunningTask {
|
|
abort_handles,
|
|
child,
|
|
lifecycle,
|
|
},
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Stop a running task
|
|
pub async fn stop(&self, name: &str) -> Result<(), TaskError> {
|
|
if !self.is_running(name).await {
|
|
return Err(TaskError::NotRunning(name.to_string()));
|
|
}
|
|
|
|
let mut running = self.running.write().await;
|
|
if let Some(mut task) = running.remove(name) {
|
|
if let Some(ref lifecycle) = task.lifecycle {
|
|
dirigent_process::graceful_shutdown_async(
|
|
lifecycle.as_ref(),
|
|
&mut task.child,
|
|
std::time::Duration::from_secs(3),
|
|
)
|
|
.await;
|
|
} else {
|
|
let _ = task.child.kill().await;
|
|
}
|
|
for h in task.abort_handles {
|
|
h.abort();
|
|
}
|
|
tracing::info!("Task '{}' stopped", name);
|
|
}
|
|
|
|
self.statuses
|
|
.write()
|
|
.await
|
|
.insert(name.to_string(), TaskStatus::Stopped);
|
|
self.stopped_at
|
|
.write()
|
|
.await
|
|
.insert(name.to_string(), chrono::Utc::now());
|
|
Ok(())
|
|
}
|
|
|
|
/// Poll running tasks for completion (call periodically from a timer)
|
|
pub async fn poll_completed(&self) {
|
|
let mut running = self.running.write().await;
|
|
let mut completed = Vec::new();
|
|
|
|
for (name, task) in running.iter_mut() {
|
|
match task.child.try_wait() {
|
|
Ok(Some(status)) => {
|
|
let exit_code = status.code();
|
|
tracing::info!(
|
|
"Task '{}' finished with exit code: {:?}",
|
|
name,
|
|
exit_code
|
|
);
|
|
completed.push((name.clone(), exit_code));
|
|
}
|
|
Ok(None) => {}
|
|
Err(e) => {
|
|
tracing::error!("Error checking task '{}': {}", name, e);
|
|
completed.push((name.clone(), None));
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut statuses = self.statuses.write().await;
|
|
let mut stopped_at = self.stopped_at.write().await;
|
|
for (name, exit_code) in completed {
|
|
running.remove(&name);
|
|
statuses.insert(name.clone(), TaskStatus::Finished { exit_code });
|
|
stopped_at.insert(name.clone(), chrono::Utc::now());
|
|
}
|
|
}
|
|
|
|
/// List all tasks with their info
|
|
pub async fn list_tasks(&self) -> Vec<TaskInfo> {
|
|
let defs = self.definitions.read().await;
|
|
let statuses = self.statuses.read().await;
|
|
let started = self.started_at.read().await;
|
|
let stopped = self.stopped_at.read().await;
|
|
|
|
defs.values()
|
|
.map(|def| TaskInfo {
|
|
definition: def.clone(),
|
|
status: statuses
|
|
.get(&def.name)
|
|
.cloned()
|
|
.unwrap_or(TaskStatus::Stopped),
|
|
started_at: started.get(&def.name).cloned(),
|
|
stopped_at: stopped.get(&def.name).cloned(),
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// Get info for a specific task
|
|
pub async fn get_task(&self, name: &str) -> Option<TaskInfo> {
|
|
let defs = self.definitions.read().await;
|
|
let def = defs.get(name)?;
|
|
let statuses = self.statuses.read().await;
|
|
let started = self.started_at.read().await;
|
|
let stopped = self.stopped_at.read().await;
|
|
Some(TaskInfo {
|
|
definition: def.clone(),
|
|
status: statuses
|
|
.get(name)
|
|
.cloned()
|
|
.unwrap_or(TaskStatus::Stopped),
|
|
started_at: started.get(name).cloned(),
|
|
stopped_at: stopped.get(name).cloned(),
|
|
})
|
|
}
|
|
|
|
/// Read output for a task
|
|
pub async fn read_output(
|
|
&self,
|
|
name: &str,
|
|
kind: OutputKind,
|
|
tail_lines: Option<usize>,
|
|
) -> Result<String, TaskError> {
|
|
{
|
|
let defs = self.definitions.read().await;
|
|
if !defs.contains_key(name) {
|
|
return Err(TaskError::NotFound(name.to_string()));
|
|
}
|
|
}
|
|
let mgr = TaskOutputManager::new(self.tasks_dir.join(name));
|
|
mgr.read_output(kind, tail_lines).await.map_err(TaskError::Io)
|
|
}
|
|
|
|
/// Get all task definitions (for config persistence)
|
|
pub async fn get_definitions(&self) -> Vec<TaskDefinition> {
|
|
self.definitions.read().await.values().cloned().collect()
|
|
}
|
|
|
|
/// Update a task definition (stops if running, re-registers)
|
|
pub async fn update(&self, def: TaskDefinition) -> Result<(), TaskError> {
|
|
let name = def.name.clone();
|
|
if self.is_running(&name).await {
|
|
self.stop(&name).await?;
|
|
}
|
|
self.register(def).await
|
|
}
|
|
|
|
/// Stop all running tasks. Used during graceful shutdown.
|
|
pub async fn stop_all(&self) {
|
|
let names: Vec<String> = self.running.read().await.keys().cloned().collect();
|
|
for name in names {
|
|
if let Err(e) = self.stop(&name).await {
|
|
tracing::warn!(task = %name, error = %e, "Failed to stop task during shutdown");
|
|
}
|
|
}
|
|
}
|
|
}
|