sal/service_manager/src/launchctl.rs
Mahmoud-Emad 95122dffee feat: Improve service manager testing and error handling
- Add comprehensive testing instructions to README.
- Improve error handling in examples to prevent crashes.
- Enhance launchctl error handling for production safety.
- Improve zinit error handling for production safety.
- Remove obsolete plan_to_fix.md file.
- Update Rhai integration tests for improved robustness.
- Improve service manager creation on Linux with systemd fallback.
2025-07-02 12:05:03 +03:00

493 lines
18 KiB
Rust

use crate::{ServiceConfig, ServiceManager, ServiceManagerError, ServiceStatus};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
use tokio::process::Command;
use tokio::runtime::Runtime;
// Shared runtime for async operations - production-safe initialization
static ASYNC_RUNTIME: Lazy<Option<Runtime>> = Lazy::new(|| Runtime::new().ok());
/// Get the async runtime, creating a temporary one if the static runtime failed
fn get_runtime() -> Result<Runtime, ServiceManagerError> {
// Try to use the static runtime first
if let Some(_runtime) = ASYNC_RUNTIME.as_ref() {
// We can't return a reference to the static runtime because we need ownership
// for block_on, so we create a new one. This is a reasonable trade-off for safety.
Runtime::new().map_err(|e| {
ServiceManagerError::Other(format!("Failed to create async runtime: {}", e))
})
} else {
// Static runtime failed, try to create a new one
Runtime::new().map_err(|e| {
ServiceManagerError::Other(format!("Failed to create async runtime: {}", e))
})
}
}
#[derive(Debug)]
pub struct LaunchctlServiceManager {
service_prefix: String,
}
#[derive(Serialize, Deserialize)]
struct LaunchDaemon {
#[serde(rename = "Label")]
label: String,
#[serde(rename = "ProgramArguments")]
program_arguments: Vec<String>,
#[serde(rename = "WorkingDirectory", skip_serializing_if = "Option::is_none")]
working_directory: Option<String>,
#[serde(
rename = "EnvironmentVariables",
skip_serializing_if = "Option::is_none"
)]
environment_variables: Option<HashMap<String, String>>,
#[serde(rename = "KeepAlive", skip_serializing_if = "Option::is_none")]
keep_alive: Option<bool>,
#[serde(rename = "RunAtLoad")]
run_at_load: bool,
#[serde(rename = "StandardOutPath", skip_serializing_if = "Option::is_none")]
standard_out_path: Option<String>,
#[serde(rename = "StandardErrorPath", skip_serializing_if = "Option::is_none")]
standard_error_path: Option<String>,
}
impl LaunchctlServiceManager {
pub fn new() -> Self {
Self {
service_prefix: "tf.ourworld.circles".to_string(),
}
}
fn get_service_label(&self, service_name: &str) -> String {
format!("{}.{}", self.service_prefix, service_name)
}
fn get_plist_path(&self, service_name: &str) -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
PathBuf::from(home)
.join("Library")
.join("LaunchAgents")
.join(format!("{}.plist", self.get_service_label(service_name)))
}
fn get_log_path(&self, service_name: &str) -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
PathBuf::from(home)
.join("Library")
.join("Logs")
.join("circles")
.join(format!("{}.log", service_name))
}
async fn create_plist(&self, config: &ServiceConfig) -> Result<(), ServiceManagerError> {
let label = self.get_service_label(&config.name);
let plist_path = self.get_plist_path(&config.name);
let log_path = self.get_log_path(&config.name);
// Ensure the LaunchAgents directory exists
if let Some(parent) = plist_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
// Ensure the logs directory exists
if let Some(parent) = log_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
let mut program_arguments = vec![config.binary_path.clone()];
program_arguments.extend(config.args.clone());
let launch_daemon = LaunchDaemon {
label: label.clone(),
program_arguments,
working_directory: config.working_directory.clone(),
environment_variables: if config.environment.is_empty() {
None
} else {
Some(config.environment.clone())
},
keep_alive: if config.auto_restart {
Some(true)
} else {
None
},
run_at_load: true,
standard_out_path: Some(log_path.to_string_lossy().to_string()),
standard_error_path: Some(log_path.to_string_lossy().to_string()),
};
let mut plist_content = Vec::new();
plist::to_writer_xml(&mut plist_content, &launch_daemon)
.map_err(|e| ServiceManagerError::Other(format!("Failed to serialize plist: {}", e)))?;
let plist_content = String::from_utf8(plist_content).map_err(|e| {
ServiceManagerError::Other(format!("Failed to convert plist to string: {}", e))
})?;
tokio::fs::write(&plist_path, plist_content).await?;
Ok(())
}
async fn run_launchctl(&self, args: &[&str]) -> Result<String, ServiceManagerError> {
let output = Command::new("launchctl").args(args).output().await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(ServiceManagerError::Other(format!(
"launchctl command failed: {}",
stderr
)));
}
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
async fn wait_for_service_status(
&self,
service_name: &str,
timeout_secs: u64,
) -> Result<(), ServiceManagerError> {
use tokio::time::{sleep, timeout, Duration};
let timeout_duration = Duration::from_secs(timeout_secs);
let poll_interval = Duration::from_millis(500);
let result = timeout(timeout_duration, async {
loop {
match self.status(service_name) {
Ok(ServiceStatus::Running) => {
return Ok(());
}
Ok(ServiceStatus::Failed) => {
// Service failed, get error details from logs
let logs = self.logs(service_name, Some(20)).unwrap_or_default();
let error_msg = if logs.is_empty() {
"Service failed to start (no logs available)".to_string()
} else {
// Extract error lines from logs
let error_lines: Vec<&str> = logs
.lines()
.filter(|line| {
line.to_lowercase().contains("error")
|| line.to_lowercase().contains("failed")
})
.take(3)
.collect();
if error_lines.is_empty() {
format!(
"Service failed to start. Recent logs:\n{}",
logs.lines()
.rev()
.take(5)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect::<Vec<_>>()
.join("\n")
)
} else {
format!(
"Service failed to start. Errors:\n{}",
error_lines.join("\n")
)
}
};
return Err(ServiceManagerError::StartFailed(
service_name.to_string(),
error_msg,
));
}
Ok(ServiceStatus::Stopped) | Ok(ServiceStatus::Unknown) => {
// Still starting, continue polling
sleep(poll_interval).await;
}
Err(ServiceManagerError::ServiceNotFound(_)) => {
return Err(ServiceManagerError::ServiceNotFound(
service_name.to_string(),
));
}
Err(e) => {
return Err(e);
}
}
}
})
.await;
match result {
Ok(Ok(())) => Ok(()),
Ok(Err(e)) => Err(e),
Err(_) => Err(ServiceManagerError::StartFailed(
service_name.to_string(),
format!("Service did not start within {} seconds", timeout_secs),
)),
}
}
}
impl ServiceManager for LaunchctlServiceManager {
fn exists(&self, service_name: &str) -> Result<bool, ServiceManagerError> {
let plist_path = self.get_plist_path(service_name);
Ok(plist_path.exists())
}
fn start(&self, config: &ServiceConfig) -> Result<(), ServiceManagerError> {
// Use production-safe runtime for async operations
let runtime = get_runtime()?;
runtime.block_on(async {
let label = self.get_service_label(&config.name);
// Check if service is already loaded
let list_output = self.run_launchctl(&["list"]).await?;
if list_output.contains(&label) {
return Err(ServiceManagerError::ServiceAlreadyExists(
config.name.clone(),
));
}
// Create the plist file
self.create_plist(config).await?;
// Load the service
let plist_path = self.get_plist_path(&config.name);
self.run_launchctl(&["load", &plist_path.to_string_lossy()])
.await
.map_err(|e| {
ServiceManagerError::StartFailed(config.name.clone(), e.to_string())
})?;
Ok(())
})
}
fn start_existing(&self, service_name: &str) -> Result<(), ServiceManagerError> {
let runtime = get_runtime()?;
runtime.block_on(async {
let label = self.get_service_label(service_name);
let plist_path = self.get_plist_path(service_name);
// Check if plist file exists
if !plist_path.exists() {
return Err(ServiceManagerError::ServiceNotFound(
service_name.to_string(),
));
}
// Check if service is already loaded and running
let list_output = self.run_launchctl(&["list"]).await?;
if list_output.contains(&label) {
// Service is loaded, check if it's running
match self.status(service_name)? {
ServiceStatus::Running => {
return Ok(()); // Already running, nothing to do
}
_ => {
// Service is loaded but not running, try to start it
self.run_launchctl(&["start", &label]).await.map_err(|e| {
ServiceManagerError::StartFailed(
service_name.to_string(),
e.to_string(),
)
})?;
return Ok(());
}
}
}
// Service is not loaded, load it
self.run_launchctl(&["load", &plist_path.to_string_lossy()])
.await
.map_err(|e| {
ServiceManagerError::StartFailed(service_name.to_string(), e.to_string())
})?;
Ok(())
})
}
fn start_and_confirm(
&self,
config: &ServiceConfig,
timeout_secs: u64,
) -> Result<(), ServiceManagerError> {
// First start the service
self.start(config)?;
// Then wait for confirmation using production-safe runtime
let runtime = get_runtime()?;
runtime.block_on(async {
self.wait_for_service_status(&config.name, timeout_secs)
.await
})
}
fn start_existing_and_confirm(
&self,
service_name: &str,
timeout_secs: u64,
) -> Result<(), ServiceManagerError> {
// First start the existing service
self.start_existing(service_name)?;
// Then wait for confirmation using production-safe runtime
let runtime = get_runtime()?;
runtime.block_on(async {
self.wait_for_service_status(service_name, timeout_secs)
.await
})
}
fn stop(&self, service_name: &str) -> Result<(), ServiceManagerError> {
let runtime = get_runtime()?;
runtime.block_on(async {
let _label = self.get_service_label(service_name);
let plist_path = self.get_plist_path(service_name);
// Unload the service
self.run_launchctl(&["unload", &plist_path.to_string_lossy()])
.await
.map_err(|e| {
ServiceManagerError::StopFailed(service_name.to_string(), e.to_string())
})?;
Ok(())
})
}
fn restart(&self, service_name: &str) -> Result<(), ServiceManagerError> {
// For launchctl, we stop and start
if let Err(e) = self.stop(service_name) {
// If stop fails because service doesn't exist, that's ok for restart
if !matches!(e, ServiceManagerError::ServiceNotFound(_)) {
return Err(ServiceManagerError::RestartFailed(
service_name.to_string(),
e.to_string(),
));
}
}
// We need the config to restart, but we don't have it stored
// For now, return an error - in a real implementation we might store configs
Err(ServiceManagerError::RestartFailed(
service_name.to_string(),
"Restart requires re-providing service configuration".to_string(),
))
}
fn status(&self, service_name: &str) -> Result<ServiceStatus, ServiceManagerError> {
let runtime = get_runtime()?;
runtime.block_on(async {
let label = self.get_service_label(service_name);
let plist_path = self.get_plist_path(service_name);
// First check if the plist file exists
if !plist_path.exists() {
return Err(ServiceManagerError::ServiceNotFound(
service_name.to_string(),
));
}
let list_output = self.run_launchctl(&["list"]).await?;
if !list_output.contains(&label) {
return Ok(ServiceStatus::Stopped);
}
// Get detailed status
match self.run_launchctl(&["list", &label]).await {
Ok(output) => {
if output.contains("\"PID\" = ") {
Ok(ServiceStatus::Running)
} else if output.contains("\"LastExitStatus\" = ") {
Ok(ServiceStatus::Failed)
} else {
Ok(ServiceStatus::Unknown)
}
}
Err(_) => Ok(ServiceStatus::Stopped),
}
})
}
fn logs(
&self,
service_name: &str,
lines: Option<usize>,
) -> Result<String, ServiceManagerError> {
let runtime = get_runtime()?;
runtime.block_on(async {
let log_path = self.get_log_path(service_name);
if !log_path.exists() {
return Ok(String::new());
}
match lines {
Some(n) => {
let output = Command::new("tail")
.args(&["-n", &n.to_string(), &log_path.to_string_lossy()])
.output()
.await?;
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
None => {
let content = tokio::fs::read_to_string(&log_path).await?;
Ok(content)
}
}
})
}
fn list(&self) -> Result<Vec<String>, ServiceManagerError> {
let runtime = get_runtime()?;
runtime.block_on(async {
let list_output = self.run_launchctl(&["list"]).await?;
let services: Vec<String> = list_output
.lines()
.filter_map(|line| {
if line.contains(&self.service_prefix) {
// Extract service name from label
line.split_whitespace()
.last()
.and_then(|label| {
label.strip_prefix(&format!("{}.", self.service_prefix))
})
.map(|s| s.to_string())
} else {
None
}
})
.collect();
Ok(services)
})
}
fn remove(&self, service_name: &str) -> Result<(), ServiceManagerError> {
// Try to stop the service first, but don't fail if it's already stopped or doesn't exist
if let Err(e) = self.stop(service_name) {
// Log the error but continue with removal
log::warn!(
"Failed to stop service '{}' before removal: {}",
service_name,
e
);
}
// Remove the plist file using production-safe runtime
let runtime = get_runtime()?;
runtime.block_on(async {
let plist_path = self.get_plist_path(service_name);
if plist_path.exists() {
tokio::fs::remove_file(&plist_path).await?;
}
Ok(())
})
}
}