sync from monorepo @ 2452e92e

This commit is contained in:
2026-05-08 01:59:04 +02:00
commit b03dc15371
459 changed files with 129586 additions and 0 deletions
+32
View File
@@ -0,0 +1,32 @@
[package]
name = "dirigent_process"
version = "0.1.0"
edition = "2021"
description = "Cross-platform process lifecycle management for Dirigent"
[lib]
path = "src/lib.rs"
[features]
default = []
tokio = ["dep:tokio"]
[dependencies]
tracing = "0.1"
tokio = { version = "1", features = ["process", "time"], optional = true }
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.59", features = [
"Win32_System_JobObjects",
"Win32_System_Threading",
"Win32_Foundation",
"Win32_System_Console",
"Win32_Security",
] }
[target.'cfg(unix)'.dependencies]
nix = { version = "0.29", features = ["signal", "process"] }
libc = "0.2"
[dev-dependencies]
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
+30
View File
@@ -0,0 +1,30 @@
pub mod traits;
mod shutdown;
#[cfg(windows)]
mod windows;
#[cfg(unix)]
mod unix;
#[cfg(target_os = "linux")]
mod linux;
pub use traits::{ProcessGroupManager, ProcessLifecycle};
pub use shutdown::graceful_shutdown_sync;
#[cfg(feature = "tokio")]
pub use shutdown::graceful_shutdown_async;
use std::sync::Arc;
/// Create the platform-appropriate ProcessGroupManager.
///
/// Call `init()` on the returned manager before use.
pub fn create_manager() -> Arc<dyn ProcessGroupManager> {
#[cfg(windows)]
{ Arc::new(windows::WindowsProcessGroupManager::new()) }
#[cfg(target_os = "linux")]
{ Arc::new(linux::LinuxProcessGroupManager::new()) }
#[cfg(all(unix, not(target_os = "linux")))]
{ Arc::new(unix::UnixProcessGroupManager::new()) }
}
+91
View File
@@ -0,0 +1,91 @@
#![cfg(target_os = "linux")]
use crate::traits::{ProcessGroupManager, ProcessLifecycle};
use nix::sys::signal::{killpg, Signal};
use nix::unistd::Pid;
use std::io;
use std::os::unix::process::CommandExt;
use tracing::{debug, info, warn};
/// Linux process group manager with kernel-level orphan prevention.
///
/// Uses `PR_SET_CHILD_SUBREAPER` so orphaned grandchildren are reparented
/// to this process, and `PR_SET_PDEATHSIG` so children auto-die when
/// the parent crashes.
pub struct LinuxProcessGroupManager;
impl LinuxProcessGroupManager {
pub fn new() -> Self { Self }
}
impl ProcessGroupManager for LinuxProcessGroupManager {
fn init(&self) -> Result<(), io::Error> {
unsafe {
if libc::prctl(libc::PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) != 0 {
let err = io::Error::last_os_error();
warn!(error = %err, "Failed to set PR_SET_CHILD_SUBREAPER");
return Err(err);
}
}
info!("Linux process group manager initialized (child subreaper enabled)");
Ok(())
}
fn create_lifecycle(&self) -> Box<dyn ProcessLifecycle> {
Box::new(LinuxProcessLifecycle)
}
}
pub struct LinuxProcessLifecycle;
impl ProcessLifecycle for LinuxProcessLifecycle {
fn configure_command(&self, cmd: &mut std::process::Command) {
unsafe {
cmd.pre_exec(|| {
if libc::setpgid(0, 0) != 0 {
return Err(io::Error::last_os_error());
}
if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL, 0, 0, 0) != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
});
}
}
#[cfg(feature = "tokio")]
fn configure_async_command(&self, cmd: &mut tokio::process::Command) {
unsafe {
cmd.pre_exec(|| {
if libc::setpgid(0, 0) != 0 {
return Err(io::Error::last_os_error());
}
if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL, 0, 0, 0) != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
});
}
}
fn register_child(&self, pid: u32) -> Result<(), io::Error> {
debug!(pid, pgid = pid, "Linux child registered (process group + PR_SET_PDEATHSIG)");
Ok(())
}
fn send_shutdown_signal(&self, pid: u32) -> Result<(), io::Error> {
let pgid = Pid::from_raw(pid as i32);
killpg(pgid, Signal::SIGTERM)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
debug!(pid, "Sent SIGTERM to process group");
Ok(())
}
fn send_kill_signal(&self, pid: u32) -> Result<(), io::Error> {
let pgid = Pid::from_raw(pid as i32);
killpg(pgid, Signal::SIGKILL)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
debug!(pid, "Sent SIGKILL to process group");
Ok(())
}
}
+66
View File
@@ -0,0 +1,66 @@
use crate::traits::ProcessLifecycle;
use std::time::Duration;
/// Graceful shutdown: send signal → wait → force kill (sync, blocking).
///
/// Returns `true` if the process exited within the timeout, `false` if force-killed.
pub fn graceful_shutdown_sync(
lifecycle: &dyn ProcessLifecycle,
child: &mut std::process::Child,
timeout: Duration,
) -> bool {
let pid = child.id();
if pid == 0 {
return true;
}
if lifecycle.send_shutdown_signal(pid).is_err() {
return true;
}
let start = std::time::Instant::now();
let poll_interval = Duration::from_millis(50);
while start.elapsed() < timeout {
match child.try_wait() {
Ok(Some(_)) => return true,
Ok(None) => std::thread::sleep(poll_interval),
Err(_) => return true,
}
}
tracing::debug!(pid, "Graceful shutdown timed out, force killing");
let _ = lifecycle.send_kill_signal(pid);
let _ = child.wait();
false
}
/// Graceful shutdown: send signal → wait → force kill (async, non-blocking).
///
/// Returns `true` if the process exited within the timeout, `false` if force-killed.
#[cfg(feature = "tokio")]
pub async fn graceful_shutdown_async(
lifecycle: &dyn ProcessLifecycle,
child: &mut tokio::process::Child,
timeout: Duration,
) -> bool {
let pid = match child.id() {
Some(0) | None => return true,
Some(pid) => pid,
};
if lifecycle.send_shutdown_signal(pid).is_err() {
return true;
}
match tokio::time::timeout(timeout, child.wait()).await {
Ok(Ok(_)) => true,
Ok(Err(_)) => true,
Err(_) => {
tracing::debug!(pid, "Graceful shutdown timed out, force killing");
let _ = lifecycle.send_kill_signal(pid);
let _ = child.wait().await;
false
}
}
}
+40
View File
@@ -0,0 +1,40 @@
use std::io;
/// Global process group manager — one per application lifetime.
///
/// On Windows, owns a Job Object with KILL_ON_JOB_CLOSE.
/// On Linux, configures the process as a child subreaper.
/// On macOS, no-op (process groups handle cleanup).
pub trait ProcessGroupManager: Send + Sync {
/// Initialize platform-specific parent process configuration.
fn init(&self) -> Result<(), io::Error>;
/// Create a lifecycle handle for managing a child process.
fn create_lifecycle(&self) -> Box<dyn ProcessLifecycle>;
}
/// Per-child process lifecycle manager.
///
/// All methods are synchronous — OS signal/handle calls are instant.
/// For timeout-based shutdown, use the free functions in the `shutdown` module.
pub trait ProcessLifecycle: Send + Sync {
/// Configure a std::process::Command before spawning.
/// Sets platform-specific flags (process group, creation flags, pre_exec hooks).
fn configure_command(&self, cmd: &mut std::process::Command);
/// Configure a tokio::process::Command before spawning.
#[cfg(feature = "tokio")]
fn configure_async_command(&self, cmd: &mut tokio::process::Command);
/// Register a spawned child with the lifecycle manager.
/// Must be called immediately after spawn with the child's PID.
fn register_child(&self, pid: u32) -> Result<(), io::Error>;
/// Send a graceful shutdown signal to the process (and its tree).
/// Windows: CTRL_BREAK_EVENT. Unix: SIGTERM to process group.
fn send_shutdown_signal(&self, pid: u32) -> Result<(), io::Error>;
/// Forcefully kill the process (and its tree).
/// Windows: TerminateProcess. Unix: SIGKILL to process group.
fn send_kill_signal(&self, pid: u32) -> Result<(), io::Error>;
}
+78
View File
@@ -0,0 +1,78 @@
#![cfg(unix)]
use crate::traits::{ProcessGroupManager, ProcessLifecycle};
use nix::sys::signal::{killpg, Signal};
use nix::unistd::Pid;
use std::io;
use std::os::unix::process::CommandExt;
use tracing::{debug, info};
/// macOS / generic Unix process group manager.
///
/// Uses process groups for tree management. No kernel-level orphan
/// prevention (macOS lacks `PR_SET_PDEATHSIG`). Relies on launchd
/// supervision for crash recovery.
pub struct UnixProcessGroupManager;
impl UnixProcessGroupManager {
pub fn new() -> Self { Self }
}
impl ProcessGroupManager for UnixProcessGroupManager {
fn init(&self) -> Result<(), io::Error> {
info!("Unix process group manager initialized");
Ok(())
}
fn create_lifecycle(&self) -> Box<dyn ProcessLifecycle> {
Box::new(UnixProcessLifecycle)
}
}
pub struct UnixProcessLifecycle;
impl ProcessLifecycle for UnixProcessLifecycle {
fn configure_command(&self, cmd: &mut std::process::Command) {
unsafe {
cmd.pre_exec(|| {
if libc::setpgid(0, 0) != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
});
}
}
#[cfg(feature = "tokio")]
fn configure_async_command(&self, cmd: &mut tokio::process::Command) {
unsafe {
cmd.pre_exec(|| {
if libc::setpgid(0, 0) != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
});
}
}
fn register_child(&self, pid: u32) -> Result<(), io::Error> {
debug!(pid, pgid = pid, "Child registered in its own process group");
Ok(())
}
fn send_shutdown_signal(&self, pid: u32) -> Result<(), io::Error> {
let pgid = Pid::from_raw(pid as i32);
killpg(pgid, Signal::SIGTERM)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
debug!(pid, "Sent SIGTERM to process group");
Ok(())
}
fn send_kill_signal(&self, pid: u32) -> Result<(), io::Error> {
let pgid = Pid::from_raw(pid as i32);
killpg(pgid, Signal::SIGKILL)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
debug!(pid, "Sent SIGKILL to process group");
Ok(())
}
}
+199
View File
@@ -0,0 +1,199 @@
#![cfg(windows)]
use crate::traits::{ProcessGroupManager, ProcessLifecycle};
use std::io;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex;
use tracing::{debug, info, warn};
use windows_sys::Win32::Foundation::{CloseHandle, FALSE, HANDLE};
use windows_sys::Win32::System::JobObjects::{
AssignProcessToJobObject, CreateJobObjectW, JobObjectExtendedLimitInformation,
SetInformationJobObject, JOBOBJECT_EXTENDED_LIMIT_INFORMATION, JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
};
use windows_sys::Win32::System::Threading::{
OpenProcess, TerminateProcess, PROCESS_ALL_ACCESS,
};
use windows_sys::Win32::System::Console::{
GenerateConsoleCtrlEvent, CTRL_BREAK_EVENT,
};
/// Windows process group manager using Job Objects.
///
/// Creates a Job Object with `JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE` — when
/// this manager is dropped (or the process crashes), the OS automatically
/// kills all assigned child processes including grandchildren.
pub struct WindowsProcessGroupManager {
/// Wrapped in a Mutex so we can mutate through a shared reference after init.
job_handle: Mutex<HANDLE>,
initialized: AtomicBool,
}
// Safety: HANDLE (*mut c_void) is not Send/Sync by default, but we only
// mutate it during init() (guarded by AtomicBool + Mutex) and read it
// (via copy) in create_lifecycle() and drop. No concurrent mutation occurs.
unsafe impl Send for WindowsProcessGroupManager {}
unsafe impl Sync for WindowsProcessGroupManager {}
impl WindowsProcessGroupManager {
pub fn new() -> Self {
Self {
job_handle: Mutex::new(std::ptr::null_mut()),
initialized: AtomicBool::new(false),
}
}
fn handle(&self) -> HANDLE {
*self.job_handle.lock().unwrap()
}
}
impl Default for WindowsProcessGroupManager {
fn default() -> Self {
Self::new()
}
}
impl ProcessGroupManager for WindowsProcessGroupManager {
fn init(&self) -> Result<(), io::Error> {
if self.initialized.swap(true, Ordering::SeqCst) {
return Ok(());
}
unsafe {
let handle = CreateJobObjectW(std::ptr::null(), std::ptr::null());
if handle.is_null() {
self.initialized.store(false, Ordering::SeqCst);
return Err(io::Error::last_os_error());
}
let mut info: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = std::mem::zeroed();
info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
let result = SetInformationJobObject(
handle,
JobObjectExtendedLimitInformation,
&info as *const _ as *const _,
std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
);
if result == FALSE {
CloseHandle(handle);
self.initialized.store(false, Ordering::SeqCst);
return Err(io::Error::last_os_error());
}
*self.job_handle.lock().unwrap() = handle;
info!("Windows Job Object created with KILL_ON_JOB_CLOSE");
Ok(())
}
}
fn create_lifecycle(&self) -> Box<dyn ProcessLifecycle> {
Box::new(WindowsProcessLifecycle {
job_handle: self.handle(),
})
}
}
impl Drop for WindowsProcessGroupManager {
fn drop(&mut self) {
let handle = self.handle();
if !handle.is_null() {
unsafe { CloseHandle(handle); }
debug!("Windows Job Object closed");
}
}
}
/// Per-child lifecycle manager for Windows.
///
/// Assigns children to the parent's Job Object and uses
/// `CTRL_BREAK_EVENT` / `TerminateProcess` for shutdown.
pub struct WindowsProcessLifecycle {
job_handle: HANDLE,
}
// Safety: same reasoning as WindowsProcessGroupManager — HANDLE is used
// read-only after construction (only passed to OS APIs).
unsafe impl Send for WindowsProcessLifecycle {}
unsafe impl Sync for WindowsProcessLifecycle {}
impl ProcessLifecycle for WindowsProcessLifecycle {
fn configure_command(&self, cmd: &mut std::process::Command) {
use std::os::windows::process::CommandExt;
const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
const CREATE_NO_WINDOW: u32 = 0x0800_0000;
cmd.creation_flags(CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW);
}
#[cfg(feature = "tokio")]
fn configure_async_command(&self, cmd: &mut tokio::process::Command) {
use std::os::windows::process::CommandExt;
const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
const CREATE_NO_WINDOW: u32 = 0x0800_0000;
cmd.creation_flags(CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW);
}
fn register_child(&self, pid: u32) -> Result<(), io::Error> {
if self.job_handle.is_null() {
warn!(pid, "Job Object not initialized, skipping child registration");
return Ok(());
}
unsafe {
let process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
if process_handle.is_null() {
return Err(io::Error::last_os_error());
}
let result = AssignProcessToJobObject(self.job_handle, process_handle);
CloseHandle(process_handle);
if result == FALSE {
let err = io::Error::last_os_error();
warn!(pid, error = %err, "Failed to assign process to Job Object (may already be in a job)");
return Err(err);
}
debug!(pid, "Process assigned to Job Object");
Ok(())
}
}
fn send_shutdown_signal(&self, pid: u32) -> Result<(), io::Error> {
unsafe {
if GenerateConsoleCtrlEvent(CTRL_BREAK_EVENT, pid) == FALSE {
return Err(io::Error::last_os_error());
}
}
debug!(pid, "Sent CTRL_BREAK_EVENT");
Ok(())
}
fn send_kill_signal(&self, pid: u32) -> Result<(), io::Error> {
unsafe {
let handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
if handle.is_null() {
let err = io::Error::last_os_error();
// ERROR_INVALID_PARAMETER (87) means process already exited
if err.raw_os_error() == Some(87) {
return Ok(());
}
return Err(err);
}
let result = TerminateProcess(handle, 1);
CloseHandle(handle);
if result == FALSE {
let err = io::Error::last_os_error();
// ERROR_ACCESS_DENIED (5) — process may have already exited
if err.raw_os_error() == Some(5) {
return Ok(());
}
return Err(err);
}
}
debug!(pid, "Sent TerminateProcess");
Ok(())
}
}
+149
View File
@@ -0,0 +1,149 @@
use dirigent_process::{create_manager, graceful_shutdown_sync};
use std::process::Command;
use std::time::Duration;
/// Build a long-running command that does not require a TTY on any platform.
///
/// On Windows, `timeout /t N /nobreak` fails when stdin is a pipe (no console),
/// so we use `ping -n N 127.0.0.1` which sleeps approximately N-1 seconds with
/// no TTY requirement.
///
/// On Unix, `sleep N` is the idiomatic choice.
#[cfg(windows)]
fn long_sleep_cmd(seconds: u32) -> Command {
let mut cmd = Command::new("ping");
cmd.args(["-n", &seconds.to_string(), "127.0.0.1"]);
cmd
}
#[cfg(unix)]
fn long_sleep_cmd(seconds: u32) -> Command {
let mut cmd = Command::new("sleep");
cmd.arg(seconds.to_string());
cmd
}
#[cfg(all(windows, feature = "tokio"))]
fn long_sleep_async_cmd(seconds: u32) -> tokio::process::Command {
let mut cmd = tokio::process::Command::new("ping");
cmd.args(["-n", &seconds.to_string(), "127.0.0.1"]);
cmd
}
#[cfg(all(unix, feature = "tokio"))]
fn long_sleep_async_cmd(seconds: u32) -> tokio::process::Command {
let mut cmd = tokio::process::Command::new("sleep");
cmd.arg(seconds.to_string());
cmd
}
#[test]
fn test_manager_init() {
let mgr = create_manager();
mgr.init().expect("init should succeed");
// Double init should also succeed (idempotent)
mgr.init().expect("double init should succeed");
}
#[test]
fn test_create_lifecycle() {
let mgr = create_manager();
mgr.init().expect("init failed");
let _lifecycle = mgr.create_lifecycle();
}
#[test]
fn test_configure_and_spawn() {
let mgr = create_manager();
mgr.init().expect("init failed");
let lifecycle = mgr.create_lifecycle();
let mut cmd = long_sleep_cmd(30);
lifecycle.configure_command(&mut cmd);
let mut child = cmd.spawn().expect("spawn failed");
let pid = child.id();
assert!(pid > 0);
// Register should succeed
lifecycle.register_child(pid).expect("register failed");
// Process should still be running
assert!(child.try_wait().expect("try_wait failed").is_none());
// Clean up
let _ = child.kill();
let _ = child.wait();
}
#[test]
fn test_graceful_shutdown_sync() {
let mgr = create_manager();
mgr.init().expect("init failed");
let lifecycle = mgr.create_lifecycle();
let mut cmd = long_sleep_cmd(60);
lifecycle.configure_command(&mut cmd);
let mut child = cmd.spawn().expect("spawn failed");
let pid = child.id();
lifecycle.register_child(pid).expect("register failed");
// Graceful shutdown with 3s timeout — process won't exit voluntarily,
// so it should be force-killed after timeout
let exited_gracefully = graceful_shutdown_sync(
lifecycle.as_ref(),
&mut child,
Duration::from_secs(3),
);
// Process should be dead now
assert!(child.try_wait().expect("try_wait failed").is_some());
// It was force-killed (ping/sleep don't handle SIGTERM/CTRL_BREAK)
assert!(!exited_gracefully);
}
#[test]
fn test_send_kill_signal() {
let mgr = create_manager();
mgr.init().expect("init failed");
let lifecycle = mgr.create_lifecycle();
let mut cmd = long_sleep_cmd(60);
lifecycle.configure_command(&mut cmd);
let mut child = cmd.spawn().expect("spawn failed");
let pid = child.id();
lifecycle.register_child(pid).expect("register failed");
// Direct kill signal
lifecycle.send_kill_signal(pid).expect("kill failed");
// Wait for process to die
let status = child.wait().expect("wait failed");
assert!(!status.success());
}
#[cfg(feature = "tokio")]
#[tokio::test]
async fn test_async_graceful_shutdown() {
use dirigent_process::graceful_shutdown_async;
let mgr = create_manager();
mgr.init().expect("init failed");
let lifecycle = mgr.create_lifecycle();
let mut cmd = long_sleep_async_cmd(60);
lifecycle.configure_async_command(&mut cmd);
let mut child = cmd.spawn().expect("spawn failed");
let pid = child.id().expect("no pid");
lifecycle.register_child(pid).expect("register failed");
let exited_gracefully = graceful_shutdown_async(
lifecycle.as_ref(),
&mut child,
Duration::from_secs(3),
)
.await;
assert!(child.try_wait().expect("try_wait failed").is_some());
assert!(!exited_gracefully);
}