baobab/core/supervisor/src/lifecycle.rs
2025-08-05 15:44:33 +02:00

396 lines
13 KiB
Rust

//! Actor lifecycle management functionality for the Hero Supervisor
//!
//! This module provides actor process lifecycle management using Zinit as the process manager.
//! All functionality is implemented as methods on the Supervisor struct for a clean API.
use log::{debug, error, info, warn};
use serde_json::json;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use zinit_client::{Client as ZinitClient, Status};
use hero_job::ScriptType;
use crate::{Supervisor, SupervisorError};
/// Information about a actor including its configuration and current status
#[derive(Debug, Clone)]
pub struct ActorInfo {
pub config: ActorConfig,
pub status: Option<Status>,
pub is_running: bool,
}
/// Configuration for a actor binary
#[derive(Debug, Clone)]
pub struct ActorConfig {
/// Name of the actor service
pub name: String,
/// Path to the actor binary
pub binary_path: PathBuf,
/// Script type this actor handles
pub script_type: ScriptType,
/// Command line arguments for the actor
pub args: Vec<String>,
/// Environment variables for the actor
pub env: HashMap<String, String>,
/// Whether this actor should restart on exit
pub restart_on_exit: bool,
/// Health check command (optional)
pub health_check: Option<String>,
/// Dependencies that must be running first
pub dependencies: Vec<String>,
}
impl ActorConfig {
pub fn new(name: String, binary_path: PathBuf, script_type: ScriptType) -> Self {
Self {
name,
binary_path,
script_type,
args: Vec::new(),
env: HashMap::new(),
restart_on_exit: true,
health_check: None,
dependencies: Vec::new(),
}
}
pub fn with_args(mut self, args: Vec<String>) -> Self {
self.args = args;
self
}
pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
self.env = env;
self
}
pub fn with_health_check(mut self, health_check: String) -> Self {
self.health_check = Some(health_check);
self
}
pub fn with_dependencies(mut self, dependencies: Vec<String>) -> Self {
self.dependencies = dependencies;
self
}
pub fn no_restart(mut self) -> Self {
self.restart_on_exit = false;
self
}
}
/// Actor lifecycle management methods for Supervisor
impl Supervisor {
/// Get all actors with their configuration and status - unified method
pub async fn get_actors(&self, actor_configs: &[ActorConfig]) -> Vec<ActorInfo> {
let mut actors = Vec::new();
for config in actor_configs {
let status = self.zinit_client.status(&config.name).await.ok();
let is_running = status.as_ref()
.map(|s| s.state == "running" && s.pid > 0)
.unwrap_or(false);
actors.push(ActorInfo {
config: config.clone(),
status,
is_running,
});
}
actors
}
/// Start a actor using Zinit
pub async fn start_actor(
&self,
actor_config: &ActorConfig,
) -> Result<(), SupervisorError> {
info!("Starting actor: {}", actor_config.name);
// Create service configuration for Zinit
let service_config = self.create_service_config(actor_config);
// Create the service in Zinit
self.zinit_client.create_service(&actor_config.name, service_config).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create service: {}", e)))?;
// Monitor the service so Zinit starts managing it
self.zinit_client.monitor(&actor_config.name).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to monitor service: {}", e)))?;
// Start the service
self.zinit_client.start(&actor_config.name).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to start actor: {}", e)))?;
info!("Successfully started actor: {}", actor_config.name);
Ok(())
}
/// Stop a actor using Zinit
pub async fn stop_actor(
&self,
actor_name: &str,
) -> Result<(), SupervisorError> {
info!("Stopping actor: {}", actor_name);
match self.zinit_client.stop(actor_name).await {
Ok(_) => {
info!("Successfully stopped actor: {}", actor_name);
Ok(())
}
Err(e) => {
error!("Failed to stop actor {}: {}", actor_name, e);
Err(SupervisorError::ActorStopFailed(actor_name.to_string(), e.to_string()))
}
}
}
/// Restart a actor using Zinit
pub async fn restart_actor(
&self,
actor_name: &str,
) -> Result<(), SupervisorError> {
info!("Restarting actor: {}", actor_name);
match self.zinit_client.restart(actor_name).await {
Ok(_) => {
info!("Successfully restarted actor: {}", actor_name);
Ok(())
}
Err(e) => {
error!("Failed to restart actor {}: {}", actor_name, e);
Err(SupervisorError::ActorRestartFailed(actor_name.to_string(), e.to_string()))
}
}
}
/// Get status of a actor using Zinit
pub async fn get_actor_status(
&self,
actor_name: &str,
zinit_client: &ZinitClient,
) -> Result<Status, SupervisorError> {
match zinit_client.status(actor_name).await {
Ok(status) => Ok(status),
Err(e) => {
error!("Failed to get status for actor {}: {}", actor_name, e);
Err(SupervisorError::ActorStatusFailed(actor_name.to_string(), e.to_string()))
}
}
}
/// Get status of all actors
pub async fn get_all_actor_status(
&self,
actor_configs: &[ActorConfig],
zinit_client: &ZinitClient,
) -> Result<HashMap<String, Status>, SupervisorError> {
let mut status_map = HashMap::new();
for actor in actor_configs {
match zinit_client.status(&actor.name).await {
Ok(status) => {
status_map.insert(actor.name.clone(), status);
}
Err(e) => {
warn!("Failed to get status for actor {}: {}", actor.name, e);
}
}
}
Ok(status_map)
}
/// Stop multiple actors
pub async fn stop_actors(
&self,
actor_names: &[String],
) -> Result<(), SupervisorError> {
info!("Stopping {} actors", actor_names.len());
for actor_name in actor_names {
self.stop_actor(actor_name).await?;
}
Ok(())
}
/// Get count of running actors for a script type
pub async fn get_running_actor_count(
&self,
actor_configs: &[ActorConfig],
script_type: &ScriptType,
zinit_client: &ZinitClient,
) -> usize {
let mut running_count = 0;
for actor in actor_configs {
if actor.script_type == *script_type {
if let Ok(status) = zinit_client.status(&actor.name).await {
if status.state == "running" {
running_count += 1;
}
}
}
}
running_count
}
/// Send a ping job to a actor for health checking
pub async fn send_ping_job(
&self,
script_type: ScriptType,
) -> Result<(), SupervisorError> {
// Create a ping job
let ping_job = self
.new_job()
.script_type(script_type.clone())
.script("ping") // Simple ping script
.timeout(Duration::from_secs(30))
.build()?;
// Execute the ping job with a short timeout
match self.run_job_and_await_result(&ping_job).await {
Ok(_) => {
debug!("Ping job successful for script type: {:?}", script_type);
Ok(())
}
Err(e) => {
warn!("Ping job failed for script type {:?}: {}", script_type, e);
Err(SupervisorError::PingJobFailed(format!("{:?}", script_type), e.to_string()))
}
}
}
/// Create Zinit service configuration from actor config
fn create_service_config(&self, actor: &ActorConfig) -> serde_json::Map<String, serde_json::Value> {
use serde_json::{Map, Value};
let mut config = Map::new();
config.insert(
"exec".to_string(),
Value::String(format!("{} {}",
actor.binary_path.display(),
actor.args.join(" ")
))
);
config.insert(
"oneshot".to_string(),
Value::Bool(!actor.restart_on_exit)
);
if let Some(health_check) = &actor.health_check {
config.insert("test".to_string(), Value::String(health_check.clone()));
}
if !actor.dependencies.is_empty() {
config.insert("after".to_string(), json!(actor.dependencies));
}
// Add environment variables if any
if !actor.env.is_empty() {
config.insert("env".to_string(), json!(actor.env));
}
config
}
/// Launch actors based on SupervisorBuilder configuration
pub(crate) async fn launch_configured_actors(&self, builder: &crate::SupervisorBuilderData) -> Result<(), SupervisorError> {
use hero_job::ScriptType;
use std::path::PathBuf;
let mut errors = Vec::new();
// Launch OSIS actor if configured
if let Some(binary_path) = &builder.osis_actor {
let actor_id = "osis_actor_1";
let mut config = ActorConfig::new(
actor_id.to_string(),
PathBuf::from(binary_path),
ScriptType::OSIS
);
config.env.extend(builder.actor_env_vars.clone());
info!("Launching OSIS actor: {}", actor_id);
if let Err(e) = self.start_actor(&config).await {
let error_msg = format!("Failed to start OSIS actor: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch SAL actor if configured
if let Some(binary_path) = &builder.sal_actor {
let actor_id = "sal_actor_1";
let mut config = ActorConfig::new(
actor_id.to_string(),
PathBuf::from(binary_path),
ScriptType::SAL
);
config.env.extend(builder.actor_env_vars.clone());
info!("Launching SAL actor: {}", actor_id);
if let Err(e) = self.start_actor(&config).await {
let error_msg = format!("Failed to start SAL actor: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch V actor if configured
if let Some(binary_path) = &builder.v_actor {
let actor_id = "v_actor_1";
let mut config = ActorConfig::new(
actor_id.to_string(),
PathBuf::from(binary_path),
ScriptType::V
);
config.env.extend(builder.actor_env_vars.clone());
info!("Launching V actor: {}", actor_id);
if let Err(e) = self.start_actor(&config).await {
let error_msg = format!("Failed to start V actor: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch Python actor if configured
if let Some(binary_path) = &builder.python_actor {
let actor_id = "python_actor_1";
let mut config = ActorConfig::new(
actor_id.to_string(),
PathBuf::from(binary_path),
ScriptType::Python
);
config.env.extend(builder.actor_env_vars.clone());
info!("Launching Python actor: {}", actor_id);
if let Err(e) = self.start_actor(&config).await {
let error_msg = format!("Failed to start Python actor: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Return result based on whether any actors started successfully
if errors.is_empty() {
info!("All configured actors started successfully");
Ok(())
} else {
let combined_error = format!("Some actors failed to start: {}", errors.join("; "));
warn!("{}", combined_error);
Err(SupervisorError::ZinitError(combined_error))
}
}
}