396 lines
13 KiB
Rust
396 lines
13 KiB
Rust
//! Actor lifecycle management functionality for the Hero Supervisor
|
|
//!
|
|
//! This module provides actor process lifecycle management using Zinit as the process manager.
|
|
//! All functionality is implemented as methods on the Supervisor struct for a clean API.
|
|
|
|
use log::{debug, error, info, warn};
|
|
use serde_json::json;
|
|
use std::collections::HashMap;
|
|
use std::path::PathBuf;
|
|
use std::time::Duration;
|
|
use zinit_client::{Client as ZinitClient, Status};
|
|
use hero_job::ScriptType;
|
|
use crate::{Supervisor, SupervisorError};
|
|
|
|
/// Information about a actor including its configuration and current status
|
|
#[derive(Debug, Clone)]
|
|
pub struct ActorInfo {
|
|
pub config: ActorConfig,
|
|
pub status: Option<Status>,
|
|
pub is_running: bool,
|
|
}
|
|
|
|
/// Configuration for a actor binary
|
|
#[derive(Debug, Clone)]
|
|
pub struct ActorConfig {
|
|
/// Name of the actor service
|
|
pub name: String,
|
|
/// Path to the actor binary
|
|
pub binary_path: PathBuf,
|
|
/// Script type this actor handles
|
|
pub script_type: ScriptType,
|
|
/// Command line arguments for the actor
|
|
pub args: Vec<String>,
|
|
/// Environment variables for the actor
|
|
pub env: HashMap<String, String>,
|
|
/// Whether this actor should restart on exit
|
|
pub restart_on_exit: bool,
|
|
/// Health check command (optional)
|
|
pub health_check: Option<String>,
|
|
/// Dependencies that must be running first
|
|
pub dependencies: Vec<String>,
|
|
}
|
|
|
|
impl ActorConfig {
|
|
pub fn new(name: String, binary_path: PathBuf, script_type: ScriptType) -> Self {
|
|
Self {
|
|
name,
|
|
binary_path,
|
|
script_type,
|
|
args: Vec::new(),
|
|
env: HashMap::new(),
|
|
restart_on_exit: true,
|
|
health_check: None,
|
|
dependencies: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn with_args(mut self, args: Vec<String>) -> Self {
|
|
self.args = args;
|
|
self
|
|
}
|
|
|
|
pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
|
|
self.env = env;
|
|
self
|
|
}
|
|
|
|
pub fn with_health_check(mut self, health_check: String) -> Self {
|
|
self.health_check = Some(health_check);
|
|
self
|
|
}
|
|
|
|
pub fn with_dependencies(mut self, dependencies: Vec<String>) -> Self {
|
|
self.dependencies = dependencies;
|
|
self
|
|
}
|
|
|
|
pub fn no_restart(mut self) -> Self {
|
|
self.restart_on_exit = false;
|
|
self
|
|
}
|
|
}
|
|
|
|
/// Actor lifecycle management methods for Supervisor
|
|
impl Supervisor {
|
|
/// Get all actors with their configuration and status - unified method
|
|
pub async fn get_actors(&self, actor_configs: &[ActorConfig]) -> Vec<ActorInfo> {
|
|
let mut actors = Vec::new();
|
|
|
|
for config in actor_configs {
|
|
let status = self.zinit_client.status(&config.name).await.ok();
|
|
let is_running = status.as_ref()
|
|
.map(|s| s.state == "running" && s.pid > 0)
|
|
.unwrap_or(false);
|
|
|
|
actors.push(ActorInfo {
|
|
config: config.clone(),
|
|
status,
|
|
is_running,
|
|
});
|
|
}
|
|
|
|
actors
|
|
}
|
|
|
|
/// Start a actor using Zinit
|
|
pub async fn start_actor(
|
|
&self,
|
|
actor_config: &ActorConfig,
|
|
) -> Result<(), SupervisorError> {
|
|
info!("Starting actor: {}", actor_config.name);
|
|
|
|
// Create service configuration for Zinit
|
|
let service_config = self.create_service_config(actor_config);
|
|
|
|
// Create the service in Zinit
|
|
self.zinit_client.create_service(&actor_config.name, service_config).await
|
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create service: {}", e)))?;
|
|
|
|
// Monitor the service so Zinit starts managing it
|
|
self.zinit_client.monitor(&actor_config.name).await
|
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to monitor service: {}", e)))?;
|
|
|
|
// Start the service
|
|
self.zinit_client.start(&actor_config.name).await
|
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to start actor: {}", e)))?;
|
|
|
|
info!("Successfully started actor: {}", actor_config.name);
|
|
Ok(())
|
|
}
|
|
|
|
/// Stop a actor using Zinit
|
|
pub async fn stop_actor(
|
|
&self,
|
|
actor_name: &str,
|
|
) -> Result<(), SupervisorError> {
|
|
info!("Stopping actor: {}", actor_name);
|
|
|
|
match self.zinit_client.stop(actor_name).await {
|
|
Ok(_) => {
|
|
info!("Successfully stopped actor: {}", actor_name);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
error!("Failed to stop actor {}: {}", actor_name, e);
|
|
Err(SupervisorError::ActorStopFailed(actor_name.to_string(), e.to_string()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Restart a actor using Zinit
|
|
pub async fn restart_actor(
|
|
&self,
|
|
actor_name: &str,
|
|
) -> Result<(), SupervisorError> {
|
|
info!("Restarting actor: {}", actor_name);
|
|
|
|
match self.zinit_client.restart(actor_name).await {
|
|
Ok(_) => {
|
|
info!("Successfully restarted actor: {}", actor_name);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
error!("Failed to restart actor {}: {}", actor_name, e);
|
|
Err(SupervisorError::ActorRestartFailed(actor_name.to_string(), e.to_string()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get status of a actor using Zinit
|
|
pub async fn get_actor_status(
|
|
&self,
|
|
actor_name: &str,
|
|
zinit_client: &ZinitClient,
|
|
) -> Result<Status, SupervisorError> {
|
|
match zinit_client.status(actor_name).await {
|
|
Ok(status) => Ok(status),
|
|
Err(e) => {
|
|
error!("Failed to get status for actor {}: {}", actor_name, e);
|
|
Err(SupervisorError::ActorStatusFailed(actor_name.to_string(), e.to_string()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get status of all actors
|
|
pub async fn get_all_actor_status(
|
|
&self,
|
|
actor_configs: &[ActorConfig],
|
|
zinit_client: &ZinitClient,
|
|
) -> Result<HashMap<String, Status>, SupervisorError> {
|
|
let mut status_map = HashMap::new();
|
|
|
|
for actor in actor_configs {
|
|
match zinit_client.status(&actor.name).await {
|
|
Ok(status) => {
|
|
status_map.insert(actor.name.clone(), status);
|
|
}
|
|
Err(e) => {
|
|
warn!("Failed to get status for actor {}: {}", actor.name, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(status_map)
|
|
}
|
|
|
|
|
|
|
|
/// Stop multiple actors
|
|
pub async fn stop_actors(
|
|
&self,
|
|
actor_names: &[String],
|
|
) -> Result<(), SupervisorError> {
|
|
info!("Stopping {} actors", actor_names.len());
|
|
|
|
for actor_name in actor_names {
|
|
self.stop_actor(actor_name).await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Get count of running actors for a script type
|
|
pub async fn get_running_actor_count(
|
|
&self,
|
|
actor_configs: &[ActorConfig],
|
|
script_type: &ScriptType,
|
|
zinit_client: &ZinitClient,
|
|
) -> usize {
|
|
let mut running_count = 0;
|
|
|
|
for actor in actor_configs {
|
|
if actor.script_type == *script_type {
|
|
if let Ok(status) = zinit_client.status(&actor.name).await {
|
|
if status.state == "running" {
|
|
running_count += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
running_count
|
|
}
|
|
|
|
/// Send a ping job to a actor for health checking
|
|
pub async fn send_ping_job(
|
|
&self,
|
|
script_type: ScriptType,
|
|
) -> Result<(), SupervisorError> {
|
|
// Create a ping job
|
|
let ping_job = self
|
|
.new_job()
|
|
.script_type(script_type.clone())
|
|
.script("ping") // Simple ping script
|
|
.timeout(Duration::from_secs(30))
|
|
.build()?;
|
|
|
|
// Execute the ping job with a short timeout
|
|
match self.run_job_and_await_result(&ping_job).await {
|
|
Ok(_) => {
|
|
debug!("Ping job successful for script type: {:?}", script_type);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
warn!("Ping job failed for script type {:?}: {}", script_type, e);
|
|
Err(SupervisorError::PingJobFailed(format!("{:?}", script_type), e.to_string()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Create Zinit service configuration from actor config
|
|
fn create_service_config(&self, actor: &ActorConfig) -> serde_json::Map<String, serde_json::Value> {
|
|
use serde_json::{Map, Value};
|
|
|
|
let mut config = Map::new();
|
|
|
|
config.insert(
|
|
"exec".to_string(),
|
|
Value::String(format!("{} {}",
|
|
actor.binary_path.display(),
|
|
actor.args.join(" ")
|
|
))
|
|
);
|
|
|
|
config.insert(
|
|
"oneshot".to_string(),
|
|
Value::Bool(!actor.restart_on_exit)
|
|
);
|
|
|
|
if let Some(health_check) = &actor.health_check {
|
|
config.insert("test".to_string(), Value::String(health_check.clone()));
|
|
}
|
|
|
|
if !actor.dependencies.is_empty() {
|
|
config.insert("after".to_string(), json!(actor.dependencies));
|
|
}
|
|
|
|
// Add environment variables if any
|
|
if !actor.env.is_empty() {
|
|
config.insert("env".to_string(), json!(actor.env));
|
|
}
|
|
|
|
config
|
|
}
|
|
|
|
/// Launch actors based on SupervisorBuilder configuration
|
|
pub(crate) async fn launch_configured_actors(&self, builder: &crate::SupervisorBuilderData) -> Result<(), SupervisorError> {
|
|
use hero_job::ScriptType;
|
|
use std::path::PathBuf;
|
|
|
|
let mut errors = Vec::new();
|
|
|
|
// Launch OSIS actor if configured
|
|
if let Some(binary_path) = &builder.osis_actor {
|
|
let actor_id = "osis_actor_1";
|
|
let mut config = ActorConfig::new(
|
|
actor_id.to_string(),
|
|
PathBuf::from(binary_path),
|
|
ScriptType::OSIS
|
|
);
|
|
config.env.extend(builder.actor_env_vars.clone());
|
|
|
|
info!("Launching OSIS actor: {}", actor_id);
|
|
if let Err(e) = self.start_actor(&config).await {
|
|
let error_msg = format!("Failed to start OSIS actor: {}", e);
|
|
warn!("{}", error_msg);
|
|
errors.push(error_msg);
|
|
}
|
|
}
|
|
|
|
// Launch SAL actor if configured
|
|
if let Some(binary_path) = &builder.sal_actor {
|
|
let actor_id = "sal_actor_1";
|
|
let mut config = ActorConfig::new(
|
|
actor_id.to_string(),
|
|
PathBuf::from(binary_path),
|
|
ScriptType::SAL
|
|
);
|
|
config.env.extend(builder.actor_env_vars.clone());
|
|
|
|
info!("Launching SAL actor: {}", actor_id);
|
|
if let Err(e) = self.start_actor(&config).await {
|
|
let error_msg = format!("Failed to start SAL actor: {}", e);
|
|
warn!("{}", error_msg);
|
|
errors.push(error_msg);
|
|
}
|
|
}
|
|
|
|
// Launch V actor if configured
|
|
if let Some(binary_path) = &builder.v_actor {
|
|
let actor_id = "v_actor_1";
|
|
let mut config = ActorConfig::new(
|
|
actor_id.to_string(),
|
|
PathBuf::from(binary_path),
|
|
ScriptType::V
|
|
);
|
|
config.env.extend(builder.actor_env_vars.clone());
|
|
|
|
info!("Launching V actor: {}", actor_id);
|
|
if let Err(e) = self.start_actor(&config).await {
|
|
let error_msg = format!("Failed to start V actor: {}", e);
|
|
warn!("{}", error_msg);
|
|
errors.push(error_msg);
|
|
}
|
|
}
|
|
|
|
// Launch Python actor if configured
|
|
if let Some(binary_path) = &builder.python_actor {
|
|
let actor_id = "python_actor_1";
|
|
let mut config = ActorConfig::new(
|
|
actor_id.to_string(),
|
|
PathBuf::from(binary_path),
|
|
ScriptType::Python
|
|
);
|
|
config.env.extend(builder.actor_env_vars.clone());
|
|
|
|
info!("Launching Python actor: {}", actor_id);
|
|
if let Err(e) = self.start_actor(&config).await {
|
|
let error_msg = format!("Failed to start Python actor: {}", e);
|
|
warn!("{}", error_msg);
|
|
errors.push(error_msg);
|
|
}
|
|
}
|
|
|
|
// Return result based on whether any actors started successfully
|
|
if errors.is_empty() {
|
|
info!("All configured actors started successfully");
|
|
Ok(())
|
|
} else {
|
|
let combined_error = format!("Some actors failed to start: {}", errors.join("; "));
|
|
warn!("{}", combined_error);
|
|
Err(SupervisorError::ZinitError(combined_error))
|
|
}
|
|
}
|
|
}
|