sync from monorepo @ 2452e92e
This commit is contained in:
@@ -0,0 +1,467 @@
|
||||
use crate::output::TaskOutputManager;
|
||||
use crate::types::*;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TaskError {
|
||||
#[error("Task '{0}' not found")]
|
||||
NotFound(String),
|
||||
#[error("Task '{0}' is already running")]
|
||||
AlreadyRunning(String),
|
||||
#[error("Task '{0}' is not running")]
|
||||
NotRunning(String),
|
||||
#[error("Failed to spawn process: {0}")]
|
||||
SpawnFailed(String),
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Task name '{0}' already exists")]
|
||||
DuplicateName(String),
|
||||
}
|
||||
|
||||
struct RunningTask {
|
||||
abort_handles: Vec<tokio::task::JoinHandle<()>>,
|
||||
child: tokio::process::Child,
|
||||
lifecycle: Option<Box<dyn dirigent_process::ProcessLifecycle>>,
|
||||
}
|
||||
|
||||
/// The main task runner service.
|
||||
/// All methods take &self — uses interior mutability for shared access.
|
||||
pub struct TaskRunner {
|
||||
definitions: RwLock<HashMap<TaskId, TaskDefinition>>,
|
||||
statuses: RwLock<HashMap<TaskId, TaskStatus>>,
|
||||
started_at: RwLock<HashMap<TaskId, chrono::DateTime<chrono::Utc>>>,
|
||||
stopped_at: RwLock<HashMap<TaskId, chrono::DateTime<chrono::Utc>>>,
|
||||
running: RwLock<HashMap<TaskId, RunningTask>>,
|
||||
tasks_dir: PathBuf,
|
||||
default_working_dir: PathBuf,
|
||||
process_manager: Option<std::sync::Arc<dyn dirigent_process::ProcessGroupManager>>,
|
||||
}
|
||||
|
||||
impl TaskRunner {
|
||||
pub fn new(
|
||||
tasks_dir: PathBuf,
|
||||
default_working_dir: PathBuf,
|
||||
process_manager: Option<std::sync::Arc<dyn dirigent_process::ProcessGroupManager>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
definitions: RwLock::new(HashMap::new()),
|
||||
statuses: RwLock::new(HashMap::new()),
|
||||
started_at: RwLock::new(HashMap::new()),
|
||||
stopped_at: RwLock::new(HashMap::new()),
|
||||
running: RwLock::new(HashMap::new()),
|
||||
tasks_dir,
|
||||
default_working_dir,
|
||||
process_manager,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tasks_dir(&self) -> &PathBuf {
|
||||
&self.tasks_dir
|
||||
}
|
||||
|
||||
/// Register a task definition (does not start it).
|
||||
/// Allows re-registration to update an existing task.
|
||||
pub async fn register(&self, def: TaskDefinition) -> Result<(), TaskError> {
|
||||
let name = def.name.clone();
|
||||
self.definitions.write().await.insert(name.clone(), def);
|
||||
self.statuses
|
||||
.write()
|
||||
.await
|
||||
.entry(name)
|
||||
.or_insert(TaskStatus::Stopped);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove a task definition (stops it if running)
|
||||
pub async fn remove(&self, name: &str) -> Result<(), TaskError> {
|
||||
if self.is_running(name).await {
|
||||
self.stop(name).await?;
|
||||
}
|
||||
self.definitions.write().await.remove(name);
|
||||
self.statuses.write().await.remove(name);
|
||||
self.started_at.write().await.remove(name);
|
||||
self.stopped_at.write().await.remove(name);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn is_running(&self, name: &str) -> bool {
|
||||
matches!(
|
||||
self.statuses.read().await.get(name),
|
||||
Some(TaskStatus::Running { .. })
|
||||
)
|
||||
}
|
||||
|
||||
/// Start a task by name
|
||||
pub async fn start(&self, name: &str) -> Result<(), TaskError> {
|
||||
let def = {
|
||||
let defs = self.definitions.read().await;
|
||||
defs.get(name)
|
||||
.cloned()
|
||||
.ok_or_else(|| TaskError::NotFound(name.to_string()))?
|
||||
};
|
||||
|
||||
if self.is_running(name).await {
|
||||
return Err(TaskError::AlreadyRunning(name.to_string()));
|
||||
}
|
||||
|
||||
let output_mgr = TaskOutputManager::new(self.tasks_dir.join(&def.name));
|
||||
output_mgr.ensure_dir().await?;
|
||||
|
||||
if def.rotate_previous {
|
||||
if let Err(e) = output_mgr.rotate().await {
|
||||
tracing::warn!("Failed to rotate output for task {}: {}", name, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve working directory: explicit > default > current process dir
|
||||
let raw_cwd = def
|
||||
.working_directory
|
||||
.clone()
|
||||
.unwrap_or_else(|| self.default_working_dir.clone());
|
||||
|
||||
// Canonicalize to an absolute path; fall back to current dir if invalid
|
||||
let cwd = match std::fs::canonicalize(&raw_cwd) {
|
||||
Ok(p) => p,
|
||||
Err(_) => {
|
||||
tracing::warn!(
|
||||
"Task '{}': working directory '{}' invalid, falling back to current dir",
|
||||
name,
|
||||
raw_cwd.display()
|
||||
);
|
||||
std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
|
||||
}
|
||||
};
|
||||
|
||||
let lifecycle = self.process_manager.as_ref().map(|mgr| mgr.create_lifecycle());
|
||||
|
||||
let mut cmd = Command::new(&def.command);
|
||||
cmd.args(&def.args)
|
||||
.current_dir(&cwd)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.kill_on_drop(true);
|
||||
|
||||
if let Some(ref lc) = lifecycle {
|
||||
lc.configure_async_command(&mut cmd);
|
||||
}
|
||||
|
||||
for (key, value) in &def.env {
|
||||
cmd.env(key, value);
|
||||
}
|
||||
|
||||
let mut child = match cmd.spawn() {
|
||||
Ok(child) => child,
|
||||
Err(e) => {
|
||||
let error_msg = format!("{} (cwd: {}): {}", def.command, cwd.display(), e);
|
||||
// Write error to stderr.log and combined.log so the user can see it in the output viewer
|
||||
let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
||||
let log_line = format!("[{}] Failed to start: {}\n", timestamp, error_msg);
|
||||
if def.persist_to_disk {
|
||||
let _ = tokio::fs::write(output_mgr.stderr_path(), log_line.as_bytes()).await;
|
||||
let _ = tokio::fs::write(output_mgr.combined_path(), format!("[stderr] {}", log_line).as_bytes()).await;
|
||||
}
|
||||
// Set status to Failed so the UI shows it
|
||||
self.statuses.write().await.insert(name.to_string(), TaskStatus::Failed { error: error_msg.clone() });
|
||||
self.stopped_at.write().await.insert(name.to_string(), chrono::Utc::now());
|
||||
return Err(TaskError::SpawnFailed(error_msg));
|
||||
}
|
||||
};
|
||||
let pid = child.id().unwrap_or(0);
|
||||
tracing::info!("Task '{}' started with PID {} (cwd: {})", name, pid, cwd.display());
|
||||
|
||||
if let Some(ref lc) = lifecycle {
|
||||
if let Some(child_pid) = child.id() {
|
||||
if let Err(e) = lc.register_child(child_pid) {
|
||||
tracing::warn!(error = %e, "Failed to register task child with process lifecycle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let stdout = child.stdout.take();
|
||||
let stderr = child.stderr.take();
|
||||
let persist = def.persist_to_disk;
|
||||
let mut abort_handles = Vec::new();
|
||||
|
||||
// When not rotating, truncate old logs so we don't accumulate output across restarts
|
||||
let truncate = !def.rotate_previous;
|
||||
if truncate && persist {
|
||||
let _ = tokio::fs::write(output_mgr.stdout_path(), b"").await;
|
||||
let _ = tokio::fs::write(output_mgr.stderr_path(), b"").await;
|
||||
let _ = tokio::fs::write(output_mgr.combined_path(), b"").await;
|
||||
}
|
||||
|
||||
// Stdout capture task
|
||||
if let Some(stdout) = stdout {
|
||||
let stdout_path = output_mgr.stdout_path();
|
||||
let combined_path = output_mgr.combined_path();
|
||||
let task_name = name.to_string();
|
||||
let h = tokio::spawn(async move {
|
||||
let reader = tokio::io::BufReader::new(stdout);
|
||||
let mut lines = reader.lines();
|
||||
let mut stdout_file = if persist {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&stdout_path)
|
||||
.await
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let mut combined_file = if persist {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&combined_path)
|
||||
.await
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
||||
if let Some(ref mut f) = stdout_file {
|
||||
let _ = tokio::io::AsyncWriteExt::write_all(
|
||||
f,
|
||||
format!("[{}] {}\n", ts, line).as_bytes(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
if let Some(ref mut f) = combined_file {
|
||||
let _ = tokio::io::AsyncWriteExt::write_all(
|
||||
f,
|
||||
format!("[{}] [stdout] {}\n", ts, line).as_bytes(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
tracing::debug!("Stdout capture ended for task '{}'", task_name);
|
||||
});
|
||||
abort_handles.push(h);
|
||||
}
|
||||
|
||||
// Stderr capture task
|
||||
if let Some(stderr) = stderr {
|
||||
let stderr_path = output_mgr.stderr_path();
|
||||
let combined_path = output_mgr.combined_path();
|
||||
let task_name = name.to_string();
|
||||
let h = tokio::spawn(async move {
|
||||
let reader = tokio::io::BufReader::new(stderr);
|
||||
let mut lines = reader.lines();
|
||||
let mut stderr_file = if persist {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&stderr_path)
|
||||
.await
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let mut combined_file = if persist {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&combined_path)
|
||||
.await
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
let ts = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ");
|
||||
if let Some(ref mut f) = stderr_file {
|
||||
let _ = tokio::io::AsyncWriteExt::write_all(
|
||||
f,
|
||||
format!("[{}] {}\n", ts, line).as_bytes(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
if let Some(ref mut f) = combined_file {
|
||||
let _ = tokio::io::AsyncWriteExt::write_all(
|
||||
f,
|
||||
format!("[{}] [stderr] {}\n", ts, line).as_bytes(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
tracing::debug!("Stderr capture ended for task '{}'", task_name);
|
||||
});
|
||||
abort_handles.push(h);
|
||||
}
|
||||
|
||||
self.statuses
|
||||
.write()
|
||||
.await
|
||||
.insert(name.to_string(), TaskStatus::Running { pid });
|
||||
self.started_at
|
||||
.write()
|
||||
.await
|
||||
.insert(name.to_string(), chrono::Utc::now());
|
||||
self.stopped_at.write().await.remove(name);
|
||||
|
||||
self.running.write().await.insert(
|
||||
name.to_string(),
|
||||
RunningTask {
|
||||
abort_handles,
|
||||
child,
|
||||
lifecycle,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop a running task
|
||||
pub async fn stop(&self, name: &str) -> Result<(), TaskError> {
|
||||
if !self.is_running(name).await {
|
||||
return Err(TaskError::NotRunning(name.to_string()));
|
||||
}
|
||||
|
||||
let mut running = self.running.write().await;
|
||||
if let Some(mut task) = running.remove(name) {
|
||||
if let Some(ref lifecycle) = task.lifecycle {
|
||||
dirigent_process::graceful_shutdown_async(
|
||||
lifecycle.as_ref(),
|
||||
&mut task.child,
|
||||
std::time::Duration::from_secs(3),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
let _ = task.child.kill().await;
|
||||
}
|
||||
for h in task.abort_handles {
|
||||
h.abort();
|
||||
}
|
||||
tracing::info!("Task '{}' stopped", name);
|
||||
}
|
||||
|
||||
self.statuses
|
||||
.write()
|
||||
.await
|
||||
.insert(name.to_string(), TaskStatus::Stopped);
|
||||
self.stopped_at
|
||||
.write()
|
||||
.await
|
||||
.insert(name.to_string(), chrono::Utc::now());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Poll running tasks for completion (call periodically from a timer)
|
||||
pub async fn poll_completed(&self) {
|
||||
let mut running = self.running.write().await;
|
||||
let mut completed = Vec::new();
|
||||
|
||||
for (name, task) in running.iter_mut() {
|
||||
match task.child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
let exit_code = status.code();
|
||||
tracing::info!(
|
||||
"Task '{}' finished with exit code: {:?}",
|
||||
name,
|
||||
exit_code
|
||||
);
|
||||
completed.push((name.clone(), exit_code));
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(e) => {
|
||||
tracing::error!("Error checking task '{}': {}", name, e);
|
||||
completed.push((name.clone(), None));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut statuses = self.statuses.write().await;
|
||||
let mut stopped_at = self.stopped_at.write().await;
|
||||
for (name, exit_code) in completed {
|
||||
running.remove(&name);
|
||||
statuses.insert(name.clone(), TaskStatus::Finished { exit_code });
|
||||
stopped_at.insert(name.clone(), chrono::Utc::now());
|
||||
}
|
||||
}
|
||||
|
||||
/// List all tasks with their info
|
||||
pub async fn list_tasks(&self) -> Vec<TaskInfo> {
|
||||
let defs = self.definitions.read().await;
|
||||
let statuses = self.statuses.read().await;
|
||||
let started = self.started_at.read().await;
|
||||
let stopped = self.stopped_at.read().await;
|
||||
|
||||
defs.values()
|
||||
.map(|def| TaskInfo {
|
||||
definition: def.clone(),
|
||||
status: statuses
|
||||
.get(&def.name)
|
||||
.cloned()
|
||||
.unwrap_or(TaskStatus::Stopped),
|
||||
started_at: started.get(&def.name).cloned(),
|
||||
stopped_at: stopped.get(&def.name).cloned(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get info for a specific task
|
||||
pub async fn get_task(&self, name: &str) -> Option<TaskInfo> {
|
||||
let defs = self.definitions.read().await;
|
||||
let def = defs.get(name)?;
|
||||
let statuses = self.statuses.read().await;
|
||||
let started = self.started_at.read().await;
|
||||
let stopped = self.stopped_at.read().await;
|
||||
Some(TaskInfo {
|
||||
definition: def.clone(),
|
||||
status: statuses
|
||||
.get(name)
|
||||
.cloned()
|
||||
.unwrap_or(TaskStatus::Stopped),
|
||||
started_at: started.get(name).cloned(),
|
||||
stopped_at: stopped.get(name).cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Read output for a task
|
||||
pub async fn read_output(
|
||||
&self,
|
||||
name: &str,
|
||||
kind: OutputKind,
|
||||
tail_lines: Option<usize>,
|
||||
) -> Result<String, TaskError> {
|
||||
{
|
||||
let defs = self.definitions.read().await;
|
||||
if !defs.contains_key(name) {
|
||||
return Err(TaskError::NotFound(name.to_string()));
|
||||
}
|
||||
}
|
||||
let mgr = TaskOutputManager::new(self.tasks_dir.join(name));
|
||||
mgr.read_output(kind, tail_lines).await.map_err(TaskError::Io)
|
||||
}
|
||||
|
||||
/// Get all task definitions (for config persistence)
|
||||
pub async fn get_definitions(&self) -> Vec<TaskDefinition> {
|
||||
self.definitions.read().await.values().cloned().collect()
|
||||
}
|
||||
|
||||
/// Update a task definition (stops if running, re-registers)
|
||||
pub async fn update(&self, def: TaskDefinition) -> Result<(), TaskError> {
|
||||
let name = def.name.clone();
|
||||
if self.is_running(&name).await {
|
||||
self.stop(&name).await?;
|
||||
}
|
||||
self.register(def).await
|
||||
}
|
||||
|
||||
/// Stop all running tasks. Used during graceful shutdown.
|
||||
pub async fn stop_all(&self) {
|
||||
let names: Vec<String> = self.running.read().await.keys().cloned().collect();
|
||||
for name in names {
|
||||
if let Err(e) = self.stop(&name).await {
|
||||
tracing::warn!(task = %name, error = %e, "Failed to stop task during shutdown");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user