initial commit
This commit is contained in:
166
src/app.rs
Normal file
166
src/app.rs
Normal file
@@ -0,0 +1,166 @@
|
||||
//! # Hero Supervisor Application
|
||||
//!
|
||||
//! Simplified supervisor application that wraps a built Supervisor instance.
|
||||
//! Use SupervisorBuilder to construct the supervisor with all configuration,
|
||||
//! then pass it to SupervisorApp for runtime management.
|
||||
|
||||
use crate::Supervisor;
|
||||
use crate::openrpc::start_openrpc_servers;
|
||||
use log::{info, error, debug};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Main supervisor application
|
||||
pub struct SupervisorApp {
|
||||
pub supervisor: Supervisor,
|
||||
pub bind_address: String,
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
impl SupervisorApp {
|
||||
/// Create a new supervisor application with a built supervisor
|
||||
pub fn new(supervisor: Supervisor, bind_address: String, port: u16) -> Self {
|
||||
Self {
|
||||
supervisor,
|
||||
bind_address,
|
||||
port,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the complete supervisor application
|
||||
/// This method handles the entire application lifecycle:
|
||||
/// - Starts all configured runners
|
||||
/// - Launches the OpenRPC server
|
||||
/// - Sets up graceful shutdown handling
|
||||
/// - Keeps the application running
|
||||
pub async fn start(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
info!("Starting Hero Supervisor Application");
|
||||
|
||||
// Start all configured runners
|
||||
self.start_all().await?;
|
||||
|
||||
// Start OpenRPC server
|
||||
self.start_openrpc_server().await?;
|
||||
|
||||
// Set up graceful shutdown
|
||||
self.setup_graceful_shutdown().await;
|
||||
|
||||
// Keep the application running
|
||||
info!("Supervisor is running. Press Ctrl+C to shutdown.");
|
||||
self.run_main_loop().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start the OpenRPC server
|
||||
async fn start_openrpc_server(&self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
info!("Starting OpenRPC server...");
|
||||
|
||||
let supervisor_for_openrpc = Arc::new(Mutex::new(self.supervisor.clone()));
|
||||
let bind_address = self.bind_address.clone();
|
||||
let port = self.port;
|
||||
|
||||
// Start the OpenRPC server in a background task
|
||||
let server_handle = tokio::spawn(async move {
|
||||
if let Err(e) = start_openrpc_servers(supervisor_for_openrpc, &bind_address, port).await {
|
||||
error!("OpenRPC server error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Give the server a moment to start
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
info!("OpenRPC server started successfully");
|
||||
|
||||
// Store the handle for potential cleanup (we could add this to the struct if needed)
|
||||
std::mem::forget(server_handle); // For now, let it run in background
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Set up graceful shutdown handling
|
||||
async fn setup_graceful_shutdown(&self) {
|
||||
tokio::spawn(async move {
|
||||
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||
info!("Received shutdown signal");
|
||||
std::process::exit(0);
|
||||
});
|
||||
}
|
||||
|
||||
/// Main application loop
|
||||
async fn run_main_loop(&self) {
|
||||
// Keep the main thread alive
|
||||
loop {
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Start all configured runners
|
||||
pub async fn start_all(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
info!("Starting all runners");
|
||||
|
||||
let results = self.supervisor.start_all().await;
|
||||
let mut failed_count = 0;
|
||||
|
||||
for (runner_id, result) in results {
|
||||
match result {
|
||||
Ok(_) => info!("Runner {} started successfully", runner_id),
|
||||
Err(e) => {
|
||||
error!("Failed to start runner {}: {}", runner_id, e);
|
||||
failed_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failed_count == 0 {
|
||||
info!("All runners started successfully");
|
||||
} else {
|
||||
error!("Failed to start {} runners", failed_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop all configured runners
|
||||
pub async fn stop_all(&mut self, force: bool) -> Result<(), Box<dyn std::error::Error>> {
|
||||
info!("Stopping all runners (force: {})", force);
|
||||
|
||||
let results = self.supervisor.stop_all(force).await;
|
||||
let mut failed_count = 0;
|
||||
|
||||
for (runner_id, result) in results {
|
||||
match result {
|
||||
Ok(_) => info!("Runner {} stopped successfully", runner_id),
|
||||
Err(e) => {
|
||||
error!("Failed to stop runner {}: {}", runner_id, e);
|
||||
failed_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failed_count == 0 {
|
||||
info!("All runners stopped successfully");
|
||||
} else {
|
||||
error!("Failed to stop {} runners", failed_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get status of all runners
|
||||
pub async fn get_status(&self) -> Result<Vec<(String, String)>, Box<dyn std::error::Error>> {
|
||||
debug!("Getting status of all runners");
|
||||
|
||||
let statuses = self.supervisor.get_all_runner_status().await
|
||||
.map_err(|e| Box::new(e) as Box<dyn std::error::Error>)?;
|
||||
|
||||
let status_strings: Vec<(String, String)> = statuses
|
||||
.into_iter()
|
||||
.map(|(runner_id, status)| {
|
||||
let status_str = format!("{:?}", status);
|
||||
(runner_id, status_str)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(status_strings)
|
||||
}
|
||||
}
|
||||
327
src/client.rs
Normal file
327
src/client.rs
Normal file
@@ -0,0 +1,327 @@
|
||||
//! Main supervisor implementation for managing multiple actor runners.
|
||||
|
||||
use chrono::Utc;
|
||||
use redis::AsyncCommands;
|
||||
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::{runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError, job::JobStatus};
|
||||
use crate::{job::Job};
|
||||
|
||||
#[cfg(feature = "admin")]
|
||||
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
|
||||
|
||||
/// Process manager type for a runner
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ProcessManagerType {
|
||||
/// Simple process manager for direct process spawning
|
||||
Simple,
|
||||
/// Tmux process manager for session-based management
|
||||
Tmux(String), // session name
|
||||
}
|
||||
|
||||
/// Main supervisor that manages multiple runners
|
||||
#[derive(Clone)]
|
||||
pub struct Client {
|
||||
redis_client: redis::Client,
|
||||
/// Namespace for queue keys
|
||||
namespace: String,
|
||||
}
|
||||
|
||||
pub struct ClientBuilder {
|
||||
/// Redis URL for connection
|
||||
redis_url: String,
|
||||
/// Namespace for queue keys
|
||||
namespace: String,
|
||||
}
|
||||
|
||||
impl ClientBuilder {
|
||||
/// Create a new supervisor builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
redis_url: "redis://localhost:6379".to_string(),
|
||||
namespace: "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the Redis URL
|
||||
pub fn redis_url<S: Into<String>>(mut self, url: S) -> Self {
|
||||
self.redis_url = url.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the namespace for queue keys
|
||||
pub fn namespace<S: Into<String>>(mut self, namespace: S) -> Self {
|
||||
self.namespace = namespace.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the supervisor
|
||||
pub async fn build(self) -> RunnerResult<Client> {
|
||||
// Create Redis client
|
||||
let redis_client = redis::Client::open(self.redis_url.as_str())
|
||||
.map_err(|e| RunnerError::ConfigError {
|
||||
reason: format!("Invalid Redis URL: {}", e),
|
||||
})?;
|
||||
|
||||
Ok(Client {
|
||||
redis_client,
|
||||
namespace: self.namespace,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Client {
|
||||
fn default() -> Self {
|
||||
// Note: Default implementation creates an empty supervisor
|
||||
// Use Supervisor::builder() for proper initialization
|
||||
Self {
|
||||
redis_client: redis::Client::open("redis://localhost:6379").unwrap(),
|
||||
namespace: "".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Client {
|
||||
/// Create a new supervisor builder
|
||||
pub fn builder() -> ClientBuilder {
|
||||
ClientBuilder::new()
|
||||
}
|
||||
|
||||
/// List all job IDs from Redis
|
||||
pub async fn list_jobs(&self) -> RunnerResult<Vec<String>> {
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| RunnerError::RedisError { source: e })?;
|
||||
|
||||
let keys: Vec<String> = conn.keys(format!("{}:*", &self.jobs_key())).await?;
|
||||
let job_ids: Vec<String> = keys
|
||||
.into_iter()
|
||||
.filter_map(|key| {
|
||||
if key.starts_with(&format!("{}:", self.jobs_key())) {
|
||||
key.strip_prefix(&format!("{}:", self.jobs_key()))
|
||||
.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(job_ids)
|
||||
}
|
||||
|
||||
fn jobs_key(&self) -> String {
|
||||
if self.namespace.is_empty() {
|
||||
format!("job")
|
||||
} else {
|
||||
format!("{}:job", self.namespace)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn job_key(&self, job_id: &str) -> String {
|
||||
if self.namespace.is_empty() {
|
||||
format!("job:{}", job_id)
|
||||
} else {
|
||||
format!("{}:job:{}", self.namespace, job_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn job_reply_key(&self, job_id: &str) -> String {
|
||||
if self.namespace.is_empty() {
|
||||
format!("reply:{}", job_id)
|
||||
} else {
|
||||
format!("{}:reply:{}", self.namespace, job_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Set job error in Redis
|
||||
pub async fn set_error(&self,
|
||||
job_id: &str,
|
||||
error: &str,
|
||||
) -> Result<(), JobError> {
|
||||
let job_key = self.job_key(job_id);
|
||||
let now = Utc::now();
|
||||
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
conn.hset_multiple(&job_key, &[
|
||||
("error", error),
|
||||
("status", JobStatus::Error.as_str()),
|
||||
("updated_at", &now.to_rfc3339()),
|
||||
]).await
|
||||
.map_err(|e| JobError::Redis(e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Set job status in Redis
|
||||
pub async fn set_job_status(&self,
|
||||
job_id: &str,
|
||||
status: JobStatus,
|
||||
) -> Result<(), JobError> {
|
||||
let job_key = self.job_key(job_id);
|
||||
let now = Utc::now();
|
||||
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
conn.hset_multiple(&job_key, &[
|
||||
("status", status.as_str()),
|
||||
("updated_at", &now.to_rfc3339()),
|
||||
]).await
|
||||
.map_err(|e| JobError::Redis(e))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get job status from Redis
|
||||
pub async fn get_status(
|
||||
&self,
|
||||
job_id: &str,
|
||||
) -> Result<JobStatus, JobError> {
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
let status_str: Option<String> = conn.hget(&self.job_key(job_id), "status").await?;
|
||||
|
||||
match status_str {
|
||||
Some(s) => JobStatus::from_str(&s).ok_or_else(|| JobError::InvalidStatus(s)),
|
||||
None => Err(JobError::NotFound(job_id.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete job from Redis
|
||||
pub async fn delete_from_redis(
|
||||
&self,
|
||||
job_id: &str,
|
||||
) -> Result<(), JobError> {
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
let job_key = self.job_key(job_id);
|
||||
let _: () = conn.del(&job_key).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Store this job in Redis
|
||||
pub async fn store_job_in_redis(&self, job: &Job) -> Result<(), JobError> {
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
let job_key = self.job_key(&job.id);
|
||||
|
||||
// Serialize the job data
|
||||
let job_data = serde_json::to_string(job)?;
|
||||
|
||||
// Store job data in Redis hash
|
||||
let _: () = conn.hset_multiple(&job_key, &[
|
||||
("data", job_data),
|
||||
("status", JobStatus::Dispatched.as_str().to_string()),
|
||||
("created_at", job.created_at.to_rfc3339()),
|
||||
("updated_at", job.updated_at.to_rfc3339()),
|
||||
]).await?;
|
||||
|
||||
// Set TTL for the job (24 hours)
|
||||
let _: () = conn.expire(&job_key, 86400).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load a job from Redis by ID
|
||||
pub async fn load_job_from_redis(
|
||||
&self,
|
||||
job_id: &str,
|
||||
) -> Result<Job, JobError> {
|
||||
let job_key = self.job_key(job_id);
|
||||
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
// Get job data from Redis
|
||||
let job_data: Option<String> = conn.hget(&job_key, "data").await?;
|
||||
|
||||
match job_data {
|
||||
Some(data) => {
|
||||
let job: Job = serde_json::from_str(&data)?;
|
||||
Ok(job)
|
||||
}
|
||||
None => Err(JobError::NotFound(job_id.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a job by ID
|
||||
pub async fn delete_job(&mut self, job_id: &str) -> RunnerResult<()> {
|
||||
use redis::AsyncCommands;
|
||||
|
||||
let mut conn = self.redis_client.get_multiplexed_async_connection().await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
|
||||
let job_key = self.job_key(job_id);
|
||||
let deleted_count: i32 = conn.del(&job_key).await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: job_id.to_string(),
|
||||
reason: format!("Failed to delete job: {}", e),
|
||||
})?;
|
||||
|
||||
if deleted_count == 0 {
|
||||
return Err(RunnerError::QueueError {
|
||||
actor_id: job_id.to_string(),
|
||||
reason: format!("Job '{}' not found or already deleted", job_id),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Set job result in Redis
|
||||
pub async fn set_result(
|
||||
&self,
|
||||
job_id: &str,
|
||||
result: &str,
|
||||
) -> Result<(), JobError> {
|
||||
let job_key = self.job_key(&job_id);
|
||||
let now = Utc::now();
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
let _: () = conn.hset_multiple(&job_key, &[
|
||||
("result", result),
|
||||
("status", JobStatus::Finished.as_str()),
|
||||
("updated_at", &now.to_rfc3339()),
|
||||
]).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get job result from Redis
|
||||
pub async fn get_result(
|
||||
&self,
|
||||
job_id: &str,
|
||||
) -> Result<Option<String>, JobError> {
|
||||
let job_key = self.job_key(job_id);
|
||||
let mut conn = self.redis_client
|
||||
.get_multiplexed_async_connection()
|
||||
.await
|
||||
.map_err(|e| JobError:: Redis(e))?;
|
||||
let result: Option<String> = conn.hget(&job_key, "result").await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
}
|
||||
220
src/job.rs
Normal file
220
src/job.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
use redis::AsyncCommands;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Job status enumeration
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum JobStatus {
|
||||
Dispatched,
|
||||
WaitingForPrerequisites,
|
||||
Started,
|
||||
Error,
|
||||
Stopping,
|
||||
Finished,
|
||||
}
|
||||
|
||||
impl JobStatus {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
JobStatus::Dispatched => "dispatched",
|
||||
JobStatus::WaitingForPrerequisites => "waiting_for_prerequisites",
|
||||
JobStatus::Started => "started",
|
||||
JobStatus::Error => "error",
|
||||
JobStatus::Stopping => "stopping",
|
||||
JobStatus::Finished => "finished",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_str(s: &str) -> Option<Self> {
|
||||
match s {
|
||||
"dispatched" => Some(JobStatus::Dispatched),
|
||||
"waiting_for_prerequisites" => Some(JobStatus::WaitingForPrerequisites),
|
||||
"started" => Some(JobStatus::Started),
|
||||
"error" => Some(JobStatus::Error),
|
||||
"stopping" => Some(JobStatus::Stopping),
|
||||
"finished" => Some(JobStatus::Finished),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Representation of a script execution request.
|
||||
///
|
||||
/// This structure contains all the information needed to execute a script
|
||||
/// on a actor service, including the script content, dependencies, and metadata.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Job {
|
||||
pub id: String,
|
||||
pub caller_id: String,
|
||||
pub context_id: String,
|
||||
pub payload: String,
|
||||
pub runner_name: String, // name of the runner to execute this job
|
||||
pub executor: String, // name of the executor the runner will use to execute this job
|
||||
pub timeout: Duration,
|
||||
pub env_vars: HashMap<String, String>, // environment variables for script execution
|
||||
pub created_at: chrono::DateTime<chrono::Utc>,
|
||||
pub updated_at: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
/// Error types for job operations
|
||||
#[derive(Error, Debug)]
|
||||
pub enum JobError {
|
||||
#[error("Redis error: {0}")]
|
||||
Redis(#[from] redis::RedisError),
|
||||
#[error("Serialization error: {0}")]
|
||||
Serialization(#[from] serde_json::Error),
|
||||
#[error("Job not found: {0}")]
|
||||
NotFound(String),
|
||||
#[error("Invalid job status: {0}")]
|
||||
InvalidStatus(String),
|
||||
#[error("Timeout error: {0}")]
|
||||
Timeout(String),
|
||||
#[error("Invalid job data: {0}")]
|
||||
InvalidData(String),
|
||||
}
|
||||
|
||||
impl Job {
|
||||
/// Create a new job with the given parameters
|
||||
pub fn new(
|
||||
caller_id: String,
|
||||
context_id: String,
|
||||
payload: String,
|
||||
runner_name: String,
|
||||
executor: String,
|
||||
) -> Self {
|
||||
let now = Utc::now();
|
||||
Self {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
caller_id,
|
||||
context_id,
|
||||
payload,
|
||||
runner_name,
|
||||
executor,
|
||||
timeout: Duration::from_secs(300), // 5 minutes default
|
||||
env_vars: HashMap::new(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for constructing job execution requests.
|
||||
pub struct JobBuilder {
|
||||
caller_id: String,
|
||||
context_id: String,
|
||||
payload: String,
|
||||
runner_name: String,
|
||||
executor: String,
|
||||
timeout: Duration,
|
||||
env_vars: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl JobBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
caller_id: "".to_string(),
|
||||
context_id: "".to_string(),
|
||||
payload: "".to_string(),
|
||||
runner_name: "".to_string(),
|
||||
executor: "".to_string(),
|
||||
timeout: Duration::from_secs(300), // 5 minutes default
|
||||
env_vars: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the caller ID for this job
|
||||
pub fn caller_id(mut self, caller_id: &str) -> Self {
|
||||
self.caller_id = caller_id.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the context ID for this job
|
||||
pub fn context_id(mut self, context_id: &str) -> Self {
|
||||
self.context_id = context_id.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the payload (script content) for this job
|
||||
pub fn payload(mut self, payload: &str) -> Self {
|
||||
self.payload = payload.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the runner name for this job
|
||||
pub fn runner_name(mut self, runner_name: &str) -> Self {
|
||||
self.runner_name = runner_name.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the executor for this job
|
||||
pub fn executor(mut self, executor: &str) -> Self {
|
||||
self.executor = executor.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the timeout for job execution
|
||||
pub fn timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = timeout;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a single environment variable
|
||||
pub fn env_var(mut self, key: &str, value: &str) -> Self {
|
||||
self.env_vars.insert(key.to_string(), value.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple environment variables from a HashMap
|
||||
pub fn env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
|
||||
self.env_vars = env_vars;
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear all environment variables
|
||||
pub fn clear_env_vars(mut self) -> Self {
|
||||
self.env_vars.clear();
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the job
|
||||
pub fn build(self) -> Result<Job, JobError> {
|
||||
if self.caller_id.is_empty() {
|
||||
return Err(JobError::InvalidData("caller_id is required".to_string()));
|
||||
}
|
||||
if self.context_id.is_empty() {
|
||||
return Err(JobError::InvalidData("context_id is required".to_string()));
|
||||
}
|
||||
if self.payload.is_empty() {
|
||||
return Err(JobError::InvalidData("payload is required".to_string()));
|
||||
}
|
||||
if self.runner_name.is_empty() {
|
||||
return Err(JobError::InvalidData("runner_name is required".to_string()));
|
||||
}
|
||||
if self.executor.is_empty() {
|
||||
return Err(JobError::InvalidData("executor is required".to_string()));
|
||||
}
|
||||
|
||||
let mut job = Job::new(
|
||||
self.caller_id,
|
||||
self.context_id,
|
||||
self.payload,
|
||||
self.runner_name,
|
||||
self.executor,
|
||||
);
|
||||
|
||||
job.timeout = self.timeout;
|
||||
job.env_vars = self.env_vars;
|
||||
|
||||
Ok(job)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for JobBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
21
src/lib.rs
Normal file
21
src/lib.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
//! Hero Supervisor - Actor management for the Hero ecosystem.
|
||||
//!
|
||||
//! See README.md for detailed documentation and usage examples.
|
||||
|
||||
pub mod runner;
|
||||
pub mod supervisor;
|
||||
pub mod job;
|
||||
pub mod client;
|
||||
pub mod app;
|
||||
|
||||
// OpenRPC server module
|
||||
pub mod openrpc;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use runner::{
|
||||
LogInfo, Runner, RunnerConfig, RunnerResult, RunnerStatus,
|
||||
};
|
||||
pub use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
|
||||
pub use supervisor::{Supervisor, SupervisorBuilder, ProcessManagerType};
|
||||
pub use job::{Job, JobBuilder, JobStatus, JobError};
|
||||
pub use app::SupervisorApp;
|
||||
829
src/openrpc.rs
Normal file
829
src/openrpc.rs
Normal file
@@ -0,0 +1,829 @@
|
||||
//! OpenRPC server implementation.
|
||||
|
||||
use jsonrpsee::{
|
||||
core::{RpcResult, async_trait},
|
||||
proc_macros::rpc,
|
||||
server::{Server, ServerHandle},
|
||||
types::{ErrorObject, ErrorObjectOwned},
|
||||
};
|
||||
use tower_http::cors::{CorsLayer, Any};
|
||||
|
||||
use anyhow;
|
||||
use log::{debug, info, error};
|
||||
|
||||
use crate::supervisor::Supervisor;
|
||||
use crate::runner::{Runner, RunnerError};
|
||||
use crate::job::Job;
|
||||
use crate::ProcessManagerType;
|
||||
use sal_service_manager::{ProcessStatus, LogInfo};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::fs;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Load OpenRPC specification from docs/openrpc.json
|
||||
fn load_openrpc_spec() -> Result<serde_json::Value, Box<dyn std::error::Error>> {
|
||||
// Try to find the openrpc.json file relative to the current working directory
|
||||
let possible_paths = [
|
||||
"docs/openrpc.json",
|
||||
"../docs/openrpc.json",
|
||||
"../../docs/openrpc.json",
|
||||
"./supervisor/docs/openrpc.json",
|
||||
];
|
||||
|
||||
for path in &possible_paths {
|
||||
if let Ok(content) = fs::read_to_string(path) {
|
||||
match serde_json::from_str(&content) {
|
||||
Ok(spec) => {
|
||||
debug!("Loaded OpenRPC specification from: {}", path);
|
||||
return Ok(spec);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to parse OpenRPC JSON from {}: {}", path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err("Could not find or parse docs/openrpc.json".into())
|
||||
}
|
||||
|
||||
/// Helper function to convert RunnerError to RPC error
|
||||
fn runner_error_to_rpc_error(err: RunnerError) -> ErrorObject<'static> {
|
||||
ErrorObject::owned(
|
||||
-32603, // Internal error code
|
||||
format!("Supervisor error: {}", err),
|
||||
None::<()>,
|
||||
)
|
||||
}
|
||||
|
||||
/// Helper function to create invalid params error
|
||||
fn invalid_params_error(msg: &str) -> ErrorObject<'static> {
|
||||
ErrorObject::owned(
|
||||
-32602, // Invalid params error code
|
||||
format!("Invalid parameters: {}", msg),
|
||||
None::<()>,
|
||||
)
|
||||
}
|
||||
|
||||
/// Request parameters for registering a new runner
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct RegisterRunnerParams {
|
||||
pub secret: String,
|
||||
pub name: String,
|
||||
pub queue: String,
|
||||
}
|
||||
|
||||
/// Request parameters for running a job
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct RunJobParams {
|
||||
pub secret: String,
|
||||
pub job: Job,
|
||||
}
|
||||
|
||||
/// Request parameters for adding a new runner
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct AddRunnerParams {
|
||||
pub actor_id: String,
|
||||
pub binary_path: String,
|
||||
pub db_path: String,
|
||||
pub redis_url: String,
|
||||
pub process_manager_type: String, // "simple" or "tmux"
|
||||
pub tmux_session_name: Option<String>, // required if process_manager_type is "tmux"
|
||||
}
|
||||
|
||||
/// Request parameters for queuing a job
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct QueueJobParams {
|
||||
pub runner_name: String,
|
||||
pub job: Job,
|
||||
}
|
||||
|
||||
/// Request parameters for queue and wait operation
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct QueueAndWaitParams {
|
||||
pub runner_name: String,
|
||||
pub job: Job,
|
||||
pub timeout_secs: u64,
|
||||
}
|
||||
|
||||
/// Request parameters for getting runner logs
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct GetLogsParams {
|
||||
pub actor_id: String,
|
||||
pub lines: Option<usize>,
|
||||
pub follow: bool,
|
||||
}
|
||||
|
||||
/// Request parameters for adding secrets
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct AddSecretParams {
|
||||
pub admin_secret: String,
|
||||
pub secret_type: String, // "admin", "user", or "register"
|
||||
pub secret_value: String,
|
||||
}
|
||||
|
||||
/// Request parameters for removing secrets
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct RemoveSecretParams {
|
||||
pub admin_secret: String,
|
||||
pub secret_type: String, // "admin", "user", or "register"
|
||||
pub secret_value: String,
|
||||
}
|
||||
|
||||
/// Request parameters for listing secrets
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ListSecretsParams {
|
||||
pub admin_secret: String,
|
||||
}
|
||||
|
||||
/// Serializable wrapper for ProcessStatus
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub enum ProcessStatusWrapper {
|
||||
Running,
|
||||
Stopped,
|
||||
Starting,
|
||||
Stopping,
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl From<ProcessStatus> for ProcessStatusWrapper {
|
||||
fn from(status: ProcessStatus) -> Self {
|
||||
match status {
|
||||
ProcessStatus::Running => ProcessStatusWrapper::Running,
|
||||
ProcessStatus::Stopped => ProcessStatusWrapper::Stopped,
|
||||
ProcessStatus::Starting => ProcessStatusWrapper::Starting,
|
||||
ProcessStatus::Stopping => ProcessStatusWrapper::Stopping,
|
||||
ProcessStatus::Error(msg) => ProcessStatusWrapper::Error(msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note: RunnerStatus is just an alias for ProcessStatus, so we don't need a separate impl
|
||||
|
||||
/// Serializable wrapper for Runner
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct RunnerWrapper {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub command: String,
|
||||
pub redis_url: String,
|
||||
}
|
||||
|
||||
impl From<&Runner> for RunnerWrapper {
|
||||
fn from(runner: &Runner) -> Self {
|
||||
RunnerWrapper {
|
||||
id: runner.id.clone(),
|
||||
name: runner.name.clone(),
|
||||
command: runner.command.to_string_lossy().to_string(),
|
||||
redis_url: runner.redis_url.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializable wrapper for LogInfo
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct LogInfoWrapper {
|
||||
pub timestamp: String,
|
||||
pub level: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl From<LogInfo> for LogInfoWrapper {
|
||||
fn from(log: LogInfo) -> Self {
|
||||
LogInfoWrapper {
|
||||
timestamp: log.timestamp,
|
||||
level: log.level,
|
||||
message: log.message,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::runner::LogInfo> for LogInfoWrapper {
|
||||
fn from(log: crate::runner::LogInfo) -> Self {
|
||||
LogInfoWrapper {
|
||||
timestamp: log.timestamp,
|
||||
level: log.level,
|
||||
message: log.message,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Response for runner status queries
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct RunnerStatusResponse {
|
||||
pub actor_id: String,
|
||||
pub status: ProcessStatusWrapper,
|
||||
}
|
||||
|
||||
/// Response for supervisor info
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct SupervisorInfoResponse {
|
||||
pub server_url: String,
|
||||
pub admin_secrets_count: usize,
|
||||
pub user_secrets_count: usize,
|
||||
pub register_secrets_count: usize,
|
||||
pub runners_count: usize,
|
||||
}
|
||||
|
||||
/// OpenRPC trait defining all supervisor methods
|
||||
#[rpc(server)]
|
||||
pub trait SupervisorRpc {
|
||||
/// Register a new runner with secret-based authentication
|
||||
#[method(name = "register_runner")]
|
||||
async fn register_runner(&self, params: RegisterRunnerParams) -> RpcResult<String>;
|
||||
|
||||
/// Create a job (fire-and-forget, non-blocking)
|
||||
#[method(name = "create_job")]
|
||||
async fn create_job(&self, params: RunJobParams) -> RpcResult<String>;
|
||||
|
||||
/// Run a job on the appropriate runner (blocking, returns result)
|
||||
#[method(name = "run_job")]
|
||||
async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>>;
|
||||
|
||||
/// Remove a runner from the supervisor
|
||||
#[method(name = "remove_runner")]
|
||||
async fn remove_runner(&self, actor_id: String) -> RpcResult<()>;
|
||||
|
||||
/// List all runner IDs
|
||||
#[method(name = "list_runners")]
|
||||
async fn list_runners(&self) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// Start a specific runner
|
||||
#[method(name = "start_runner")]
|
||||
async fn start_runner(&self, actor_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Stop a specific runner
|
||||
#[method(name = "stop_runner")]
|
||||
async fn stop_runner(&self, actor_id: String, force: bool) -> RpcResult<()>;
|
||||
|
||||
/// Get a specific runner by ID
|
||||
#[method(name = "get_runner")]
|
||||
async fn get_runner(&self, actor_id: String) -> RpcResult<RunnerWrapper>;
|
||||
|
||||
/// Get the status of a specific runner
|
||||
#[method(name = "get_runner_status")]
|
||||
async fn get_runner_status(&self, actor_id: String) -> RpcResult<ProcessStatusWrapper>;
|
||||
|
||||
/// Get logs for a specific runner
|
||||
#[method(name = "get_runner_logs")]
|
||||
async fn get_runner_logs(&self, params: GetLogsParams) -> RpcResult<Vec<LogInfoWrapper>>;
|
||||
|
||||
/// Queue a job to a specific runner
|
||||
#[method(name = "queue_job_to_runner")]
|
||||
async fn queue_job_to_runner(&self, params: QueueJobParams) -> RpcResult<()>;
|
||||
|
||||
/// List all job IDs from Redis
|
||||
#[method(name = "list_jobs")]
|
||||
async fn list_jobs(&self) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// Get a job by job ID
|
||||
#[method(name = "get_job")]
|
||||
async fn get_job(&self, job_id: String) -> RpcResult<Job>;
|
||||
|
||||
/// Ping a runner (dispatch a ping job)
|
||||
#[method(name = "ping_runner")]
|
||||
async fn ping_runner(&self, runner_id: String) -> RpcResult<String>;
|
||||
|
||||
/// Stop a job
|
||||
#[method(name = "stop_job")]
|
||||
async fn stop_job(&self, job_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Delete a job
|
||||
#[method(name = "delete_job")]
|
||||
async fn delete_job(&self, job_id: String) -> RpcResult<()>;
|
||||
|
||||
/// Queue a job to a specific runner and wait for the result
|
||||
#[method(name = "queue_and_wait")]
|
||||
async fn queue_and_wait(&self, params: QueueAndWaitParams) -> RpcResult<Option<String>>;
|
||||
|
||||
/// Get status of all runners
|
||||
#[method(name = "get_all_runner_status")]
|
||||
async fn get_all_runner_status(&self) -> RpcResult<Vec<RunnerStatusResponse>>;
|
||||
|
||||
/// Start all runners
|
||||
#[method(name = "start_all")]
|
||||
async fn start_all(&self) -> RpcResult<Vec<(String, String)>>;
|
||||
|
||||
/// Stop all runners
|
||||
#[method(name = "stop_all")]
|
||||
async fn stop_all(&self, force: bool) -> RpcResult<Vec<(String, String)>>;
|
||||
|
||||
/// Get status of all runners (alternative format)
|
||||
#[method(name = "get_all_status")]
|
||||
async fn get_all_status(&self) -> RpcResult<Vec<(String, String)>>;
|
||||
|
||||
/// Add a secret to the supervisor (admin, user, or register)
|
||||
#[method(name = "add_secret")]
|
||||
async fn add_secret(&self, params: AddSecretParams) -> RpcResult<()>;
|
||||
|
||||
/// Remove a secret from the supervisor
|
||||
#[method(name = "remove_secret")]
|
||||
async fn remove_secret(&self, params: RemoveSecretParams) -> RpcResult<()>;
|
||||
|
||||
/// List all secrets (returns counts only for security)
|
||||
#[method(name = "list_secrets")]
|
||||
async fn list_secrets(&self, params: ListSecretsParams) -> RpcResult<SupervisorInfoResponse>;
|
||||
|
||||
/// List admin secrets (returns actual secret values)
|
||||
#[method(name = "list_admin_secrets")]
|
||||
async fn list_admin_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// List user secrets (returns actual secret values)
|
||||
#[method(name = "list_user_secrets")]
|
||||
async fn list_user_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// List register secrets (returns actual secret values)
|
||||
#[method(name = "list_register_secrets")]
|
||||
async fn list_register_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>>;
|
||||
|
||||
/// Get supervisor information and statistics
|
||||
#[method(name = "get_supervisor_info")]
|
||||
async fn get_supervisor_info(&self, admin_secret: String) -> RpcResult<SupervisorInfoResponse>;
|
||||
|
||||
/// OpenRPC discovery method - returns the OpenRPC document describing this API
|
||||
#[method(name = "rpc.discover")]
|
||||
async fn rpc_discover(&self) -> RpcResult<serde_json::Value>;
|
||||
}
|
||||
|
||||
/// Helper function to parse process manager type from string
|
||||
fn parse_process_manager_type(pm_type: &str, session_name: Option<String>) -> Result<ProcessManagerType, ErrorObject<'static>> {
|
||||
match pm_type.to_lowercase().as_str() {
|
||||
"simple" => Ok(ProcessManagerType::Simple),
|
||||
"tmux" => {
|
||||
let session = session_name.unwrap_or_else(|| "default_session".to_string());
|
||||
Ok(ProcessManagerType::Tmux(session))
|
||||
},
|
||||
_ => Err(invalid_params_error(&format!(
|
||||
"Invalid process manager type: {}. Must be 'simple' or 'tmux'",
|
||||
pm_type
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Direct RPC implementation on Arc<Mutex<Supervisor>>
|
||||
/// This eliminates the need for a wrapper struct
|
||||
#[async_trait]
|
||||
impl SupervisorRpcServer for Arc<Mutex<Supervisor>> {
|
||||
async fn register_runner(&self, params: RegisterRunnerParams) -> RpcResult<String> {
|
||||
debug!("OpenRPC request: register_runner with params: {:?}", params);
|
||||
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.register_runner(¶ms.secret, ¶ms.name, ¶ms.queue)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
|
||||
// Return the runner name that was registered
|
||||
Ok(params.name)
|
||||
}
|
||||
|
||||
async fn create_job(&self, params: RunJobParams) -> RpcResult<String> {
|
||||
debug!("OpenRPC request: create_job with params: {:?}", params);
|
||||
|
||||
let mut supervisor = self.lock().await;
|
||||
let job_id = supervisor
|
||||
.create_job(¶ms.secret, params.job)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
|
||||
Ok(job_id)
|
||||
}
|
||||
|
||||
async fn run_job(&self, params: RunJobParams) -> RpcResult<Option<String>> {
|
||||
debug!("OpenRPC request: run_job with params: {:?}", params);
|
||||
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.run_job(¶ms.secret, params.job)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn remove_runner(&self, actor_id: String) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: remove_runner with actor_id: {}", actor_id);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.remove_runner(&actor_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn list_runners(&self) -> RpcResult<Vec<String>> {
|
||||
debug!("OpenRPC request: list_runners");
|
||||
let supervisor = self.lock().await;
|
||||
Ok(supervisor.list_runners().into_iter().map(|s| s.to_string()).collect())
|
||||
}
|
||||
|
||||
async fn start_runner(&self, actor_id: String) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: start_runner with actor_id: {}", actor_id);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.start_runner(&actor_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn stop_runner(&self, actor_id: String, force: bool) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: stop_runner with actor_id: {}, force: {}", actor_id, force);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.stop_runner(&actor_id, force)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn get_runner(&self, actor_id: String) -> RpcResult<RunnerWrapper> {
|
||||
debug!("OpenRPC request: get_runner with actor_id: {}", actor_id);
|
||||
let supervisor = self.lock().await;
|
||||
match supervisor.get_runner(&actor_id) {
|
||||
Some(runner) => Ok(RunnerWrapper::from(runner)),
|
||||
None => Err(ErrorObjectOwned::owned(-32000, format!("Runner not found: {}", actor_id), None::<()>)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_runner_status(&self, actor_id: String) -> RpcResult<ProcessStatusWrapper> {
|
||||
debug!("OpenRPC request: get_runner_status with actor_id: {}", actor_id);
|
||||
let supervisor = self.lock().await;
|
||||
let status = supervisor
|
||||
.get_runner_status(&actor_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
Ok(status.into())
|
||||
}
|
||||
|
||||
async fn get_runner_logs(&self, params: GetLogsParams) -> RpcResult<Vec<LogInfoWrapper>> {
|
||||
debug!("OpenRPC request: get_runner_logs with params: {:?}", params);
|
||||
let supervisor = self.lock().await;
|
||||
let logs = supervisor
|
||||
.get_runner_logs(¶ms.actor_id, params.lines, params.follow)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
Ok(logs.into_iter().map(LogInfoWrapper::from).collect())
|
||||
}
|
||||
|
||||
async fn queue_job_to_runner(&self, params: QueueJobParams) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: queue_job_to_runner with params: {:?}", params);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.queue_job_to_runner(¶ms.runner_name, params.job)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn list_jobs(&self) -> RpcResult<Vec<String>> {
|
||||
debug!("OpenRPC request: list_jobs");
|
||||
let supervisor = self.lock().await;
|
||||
supervisor
|
||||
.list_jobs()
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn get_job(&self, job_id: String) -> RpcResult<Job> {
|
||||
debug!("OpenRPC request: get_job with job_id: {}", job_id);
|
||||
let supervisor = self.lock().await;
|
||||
supervisor
|
||||
.get_job(&job_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn ping_runner(&self, runner_id: String) -> RpcResult<String> {
|
||||
debug!("OpenRPC request: ping_runner with runner_id: {}", runner_id);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.ping_runner(&runner_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn stop_job(&self, job_id: String) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: stop_job with job_id: {}", job_id);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.stop_job(&job_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn delete_job(&self, job_id: String) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: delete_job with job_id: {}", job_id);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.delete_job(&job_id)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn queue_and_wait(&self, params: QueueAndWaitParams) -> RpcResult<Option<String>> {
|
||||
debug!("OpenRPC request: queue_and_wait with params: {:?}", params);
|
||||
let mut supervisor = self.lock().await;
|
||||
supervisor
|
||||
.queue_and_wait(¶ms.runner_name, params.job, params.timeout_secs)
|
||||
.await
|
||||
.map_err(runner_error_to_rpc_error)
|
||||
}
|
||||
|
||||
async fn get_all_runner_status(&self) -> RpcResult<Vec<RunnerStatusResponse>> {
|
||||
debug!("OpenRPC request: get_all_runner_status");
|
||||
let supervisor = self.lock().await;
|
||||
let statuses = supervisor.get_all_runner_status().await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
Ok(statuses
|
||||
.into_iter()
|
||||
.map(|(actor_id, status)| RunnerStatusResponse {
|
||||
actor_id,
|
||||
status: ProcessStatusWrapper::from(status),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn start_all(&self) -> RpcResult<Vec<(String, String)>> {
|
||||
debug!("OpenRPC request: start_all");
|
||||
let mut supervisor = self.lock().await;
|
||||
let results = supervisor.start_all().await;
|
||||
Ok(results
|
||||
.into_iter()
|
||||
.map(|(actor_id, result)| {
|
||||
let status = match result {
|
||||
Ok(_) => "Success".to_string(),
|
||||
Err(e) => format!("Error: {}", e),
|
||||
};
|
||||
(actor_id, status)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn stop_all(&self, force: bool) -> RpcResult<Vec<(String, String)>> {
|
||||
debug!("OpenRPC request: stop_all with force: {}", force);
|
||||
let mut supervisor = self.lock().await;
|
||||
let results = supervisor.stop_all(force).await;
|
||||
Ok(results
|
||||
.into_iter()
|
||||
.map(|(actor_id, result)| {
|
||||
let status = match result {
|
||||
Ok(_) => "Success".to_string(),
|
||||
Err(e) => format!("Error: {}", e),
|
||||
};
|
||||
(actor_id, status)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_all_status(&self) -> RpcResult<Vec<(String, String)>> {
|
||||
debug!("OpenRPC request: get_all_status");
|
||||
let supervisor = self.lock().await;
|
||||
let statuses = supervisor.get_all_runner_status().await
|
||||
.map_err(runner_error_to_rpc_error)?;
|
||||
Ok(statuses
|
||||
.into_iter()
|
||||
.map(|(actor_id, status)| {
|
||||
let status_str = format!("{:?}", status);
|
||||
(actor_id, status_str)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn add_secret(&self, params: AddSecretParams) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: add_secret, type: {}", params.secret_type);
|
||||
let mut supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(¶ms.admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
match params.secret_type.as_str() {
|
||||
"admin" => {
|
||||
supervisor.add_admin_secret(params.secret_value);
|
||||
}
|
||||
"user" => {
|
||||
supervisor.add_user_secret(params.secret_value);
|
||||
}
|
||||
"register" => {
|
||||
supervisor.add_register_secret(params.secret_value);
|
||||
}
|
||||
_ => {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid secret type. Must be 'admin', 'user', or 'register'", None::<()>));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_secret(&self, params: RemoveSecretParams) -> RpcResult<()> {
|
||||
debug!("OpenRPC request: remove_secret, type: {}", params.secret_type);
|
||||
let mut supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(¶ms.admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
match params.secret_type.as_str() {
|
||||
"admin" => {
|
||||
supervisor.remove_admin_secret(¶ms.secret_value);
|
||||
}
|
||||
"user" => {
|
||||
supervisor.remove_user_secret(¶ms.secret_value);
|
||||
}
|
||||
"register" => {
|
||||
supervisor.remove_register_secret(¶ms.secret_value);
|
||||
}
|
||||
_ => {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid secret type. Must be 'admin', 'user', or 'register'", None::<()>));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_secrets(&self, params: ListSecretsParams) -> RpcResult<SupervisorInfoResponse> {
|
||||
debug!("OpenRPC request: list_secrets");
|
||||
let supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(¶ms.admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
Ok(SupervisorInfoResponse {
|
||||
server_url: "http://127.0.0.1:3030".to_string(),
|
||||
admin_secrets_count: supervisor.admin_secrets_count(),
|
||||
user_secrets_count: supervisor.user_secrets_count(),
|
||||
register_secrets_count: supervisor.register_secrets_count(),
|
||||
runners_count: supervisor.runners_count(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_admin_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
|
||||
debug!("OpenRPC request: list_admin_secrets");
|
||||
let supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(&admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
Ok(supervisor.get_admin_secrets())
|
||||
}
|
||||
|
||||
async fn list_user_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
|
||||
debug!("OpenRPC request: list_user_secrets");
|
||||
let supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(&admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
Ok(supervisor.get_user_secrets())
|
||||
}
|
||||
|
||||
async fn list_register_secrets(&self, admin_secret: String) -> RpcResult<Vec<String>> {
|
||||
debug!("OpenRPC request: list_register_secrets");
|
||||
let supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(&admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
Ok(supervisor.get_register_secrets())
|
||||
}
|
||||
|
||||
async fn get_supervisor_info(&self, admin_secret: String) -> RpcResult<SupervisorInfoResponse> {
|
||||
debug!("OpenRPC request: get_supervisor_info");
|
||||
let supervisor = self.lock().await;
|
||||
|
||||
// Verify admin secret
|
||||
if !supervisor.has_admin_secret(&admin_secret) {
|
||||
return Err(ErrorObject::owned(-32602, "Invalid admin secret", None::<()>));
|
||||
}
|
||||
|
||||
Ok(SupervisorInfoResponse {
|
||||
server_url: "http://127.0.0.1:3030".to_string(),
|
||||
admin_secrets_count: supervisor.admin_secrets_count(),
|
||||
user_secrets_count: supervisor.user_secrets_count(),
|
||||
register_secrets_count: supervisor.register_secrets_count(),
|
||||
runners_count: supervisor.runners_count(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn rpc_discover(&self) -> RpcResult<serde_json::Value> {
|
||||
debug!("OpenRPC request: rpc.discover");
|
||||
|
||||
// Read OpenRPC specification from docs/openrpc.json
|
||||
match load_openrpc_spec() {
|
||||
Ok(spec) => Ok(spec),
|
||||
Err(e) => {
|
||||
error!("Failed to load OpenRPC specification: {}", e);
|
||||
// Fallback to a minimal spec if file loading fails
|
||||
Ok(serde_json::json!({
|
||||
"openrpc": "1.3.2",
|
||||
"info": {
|
||||
"title": "Hero Supervisor OpenRPC API",
|
||||
"version": "1.0.0",
|
||||
"description": "OpenRPC API for managing Hero Supervisor runners and jobs"
|
||||
},
|
||||
"methods": [],
|
||||
"error": "Failed to load full specification"
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the OpenRPC server with a default supervisor
|
||||
pub async fn start_server(addr: SocketAddr) -> anyhow::Result<ServerHandle> {
|
||||
let supervisor = Arc::new(Mutex::new(Supervisor::default()));
|
||||
start_server_with_supervisor(addr, supervisor).await
|
||||
}
|
||||
|
||||
/// Start the OpenRPC server with an existing supervisor instance
|
||||
pub async fn start_server_with_supervisor(
|
||||
addr: SocketAddr,
|
||||
supervisor: Arc<Mutex<Supervisor>>,
|
||||
) -> anyhow::Result<ServerHandle> {
|
||||
let server = Server::builder().build(addr).await?;
|
||||
let handle = server.start(supervisor.into_rpc());
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
/// Start HTTP OpenRPC server (Unix socket support would require additional dependencies)
|
||||
pub async fn start_http_openrpc_server(
|
||||
supervisor: Arc<Mutex<Supervisor>>,
|
||||
bind_address: &str,
|
||||
port: u16,
|
||||
) -> anyhow::Result<ServerHandle> {
|
||||
let http_addr: SocketAddr = format!("{}:{}", bind_address, port).parse()?;
|
||||
|
||||
// Configure CORS to allow requests from the admin UI
|
||||
let cors = CorsLayer::new()
|
||||
.allow_origin(Any)
|
||||
.allow_headers(Any)
|
||||
.allow_methods(Any);
|
||||
|
||||
// Start HTTP server with CORS
|
||||
let http_server = Server::builder()
|
||||
.set_http_middleware(tower::ServiceBuilder::new().layer(cors))
|
||||
.build(http_addr)
|
||||
.await?;
|
||||
let http_handle = http_server.start(supervisor.into_rpc());
|
||||
|
||||
info!("OpenRPC HTTP server running at http://{} with CORS enabled", http_addr);
|
||||
|
||||
Ok(http_handle)
|
||||
}
|
||||
|
||||
/// Simplified server startup function for supervisor binary
|
||||
pub async fn start_openrpc_servers(
|
||||
supervisor: Arc<Mutex<Supervisor>>,
|
||||
bind_address: &str,
|
||||
port: u16,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let bind_address = bind_address.to_string();
|
||||
tokio::spawn(async move {
|
||||
match start_http_openrpc_server(supervisor, &bind_address, port).await {
|
||||
Ok(http_handle) => {
|
||||
info!("OpenRPC server started successfully");
|
||||
// Keep the server running
|
||||
http_handle.stopped().await;
|
||||
error!("OpenRPC server stopped unexpectedly");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to start OpenRPC server: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Give the server a moment to start up
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_supervisor_rpc_creation() {
|
||||
let _rpc = SupervisorRpcImpl::new();
|
||||
// Just test that we can create the RPC implementation
|
||||
}
|
||||
|
||||
#[cfg(feature = "openrpc")]
|
||||
#[test]
|
||||
fn test_process_manager_type_parsing() {
|
||||
assert!(SupervisorRpcImpl::parse_process_manager_type("simple").is_ok());
|
||||
assert!(SupervisorRpcImpl::parse_process_manager_type("tmux").is_ok());
|
||||
assert!(SupervisorRpcImpl::parse_process_manager_type("Simple").is_ok());
|
||||
assert!(SupervisorRpcImpl::parse_process_manager_type("TMUX").is_ok());
|
||||
assert!(SupervisorRpcImpl::parse_process_manager_type("invalid").is_err());
|
||||
}
|
||||
}
|
||||
234
src/runner.rs
Normal file
234
src/runner.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
//! Runner implementation for actor process management.
|
||||
|
||||
use crate::job::{Job};
|
||||
use log::{debug, info};
|
||||
use redis::AsyncCommands;
|
||||
use sal_service_manager::{ProcessManager, ProcessManagerError as ServiceProcessManagerError, ProcessStatus, ProcessConfig};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Represents the current status of an actor/runner (alias for ProcessStatus)
|
||||
pub type RunnerStatus = ProcessStatus;
|
||||
|
||||
/// Log information structure
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LogInfo {
|
||||
pub timestamp: String,
|
||||
pub level: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Runner configuration and state (merged from RunnerConfig)
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Runner {
|
||||
/// Unique identifier for the runner
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub namespace: String,
|
||||
/// Path to the actor binary
|
||||
pub command: PathBuf, // Command to run runner by, used only if supervisor is used to run runners
|
||||
/// Redis URL for job queue
|
||||
pub redis_url: String,
|
||||
}
|
||||
|
||||
impl Runner {
|
||||
/// Create a new runner from configuration
|
||||
pub fn from_config(config: RunnerConfig) -> Self {
|
||||
Self {
|
||||
id: config.id,
|
||||
name: config.name,
|
||||
namespace: config.namespace,
|
||||
command: config.command,
|
||||
redis_url: config.redis_url,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new runner with the given parameters
|
||||
pub fn new(
|
||||
id: String,
|
||||
name: String,
|
||||
namespace: String,
|
||||
command: PathBuf,
|
||||
redis_url: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
name,
|
||||
namespace,
|
||||
command,
|
||||
redis_url,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the queue key for this runner with the given namespace
|
||||
pub fn get_queue(&self) -> String {
|
||||
if self.namespace == "" {
|
||||
format!("runner:{}", self.name)
|
||||
} else {
|
||||
format!("{}:runner:{}", self.namespace, self.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result type for runner operations
|
||||
pub type RunnerResult<T> = Result<T, RunnerError>;
|
||||
|
||||
/// Errors that can occur during runner operations
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum RunnerError {
|
||||
#[error("Actor '{actor_id}' not found")]
|
||||
ActorNotFound { actor_id: String },
|
||||
|
||||
#[error("Actor '{actor_id}' is already running")]
|
||||
ActorAlreadyRunning { actor_id: String },
|
||||
|
||||
#[error("Actor '{actor_id}' is not running")]
|
||||
ActorNotRunning { actor_id: String },
|
||||
|
||||
#[error("Failed to start actor '{actor_id}': {reason}")]
|
||||
StartupFailed { actor_id: String, reason: String },
|
||||
|
||||
#[error("Failed to stop actor '{actor_id}': {reason}")]
|
||||
StopFailed { actor_id: String, reason: String },
|
||||
|
||||
#[error("Timeout waiting for actor '{actor_id}' to start")]
|
||||
StartupTimeout { actor_id: String },
|
||||
|
||||
#[error("Job queue error for actor '{actor_id}': {reason}")]
|
||||
QueueError { actor_id: String, reason: String },
|
||||
|
||||
#[error("Process manager error: {source}")]
|
||||
ProcessManagerError {
|
||||
#[from]
|
||||
source: ServiceProcessManagerError,
|
||||
},
|
||||
|
||||
#[error("Configuration error: {reason}")]
|
||||
ConfigError { reason: String },
|
||||
|
||||
#[error("Invalid secret: {0}")]
|
||||
InvalidSecret(String),
|
||||
|
||||
#[error("IO error: {source}")]
|
||||
IoError {
|
||||
#[from]
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("Redis error: {source}")]
|
||||
RedisError {
|
||||
#[from]
|
||||
source: redis::RedisError,
|
||||
},
|
||||
|
||||
#[error("Job error: {source}")]
|
||||
JobError {
|
||||
#[from]
|
||||
source: crate::JobError,
|
||||
},
|
||||
}
|
||||
|
||||
/// Convert Runner to ProcessConfig
|
||||
pub fn runner_to_process_config(config: &Runner) -> ProcessConfig {
|
||||
ProcessConfig::new(config.id.clone(), config.command.clone())
|
||||
.with_arg("--id".to_string())
|
||||
.with_arg(config.id.clone())
|
||||
.with_arg("--redis-url".to_string())
|
||||
.with_arg(config.redis_url.clone())
|
||||
}
|
||||
|
||||
// Type alias for backward compatibility
|
||||
pub type RunnerConfig = Runner;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use sal_service_manager::{ProcessManagerError, SimpleProcessManager};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockProcessManager {
|
||||
processes: HashMap<String, ProcessStatus>,
|
||||
}
|
||||
|
||||
impl MockProcessManager {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
processes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ProcessManager for MockProcessManager {
|
||||
async fn start_process(&mut self, config: &ProcessConfig) -> Result<(), ProcessManagerError> {
|
||||
self.processes.insert(config.id.clone(), ProcessStatus::Running);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn stop_process(&mut self, process_id: &str, _force: bool) -> Result<(), ProcessManagerError> {
|
||||
self.processes.insert(process_id.to_string(), ProcessStatus::Stopped);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_status(&self, process_id: &str) -> Result<ProcessStatus, ProcessManagerError> {
|
||||
Ok(self.processes.get(process_id).cloned().unwrap_or(ProcessStatus::Stopped))
|
||||
}
|
||||
|
||||
async fn process_logs(&self, _process_id: &str, _lines: Option<usize>, _follow: bool) -> Result<Vec<LogInfo>, ProcessManagerError> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn health_check(&self) -> Result<(), ProcessManagerError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_processes(&self) -> Result<Vec<String>, ProcessManagerError> {
|
||||
Ok(self.processes.keys().cloned().collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_runner_creation() {
|
||||
let runner = Runner::new(
|
||||
"test_actor".to_string(),
|
||||
"test_runner".to_string(),
|
||||
"".to_string(),
|
||||
PathBuf::from("/path/to/binary"),
|
||||
"redis://localhost:6379".to_string(),
|
||||
);
|
||||
|
||||
assert_eq!(runner.id, "test_actor");
|
||||
assert_eq!(runner.name, "test_runner");
|
||||
assert_eq!(runner.command, PathBuf::from("/path/to/binary"));
|
||||
assert_eq!(runner.redis_url, "redis://localhost:6379");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_runner_get_queue() {
|
||||
let runner = Runner::new(
|
||||
"test_actor".to_string(),
|
||||
"test_runner".to_string(),
|
||||
"".to_string(),
|
||||
PathBuf::from("/path/to/binary"),
|
||||
"redis://localhost:6379".to_string(),
|
||||
);
|
||||
|
||||
let queue_key = runner.get_queue();
|
||||
assert_eq!(queue_key, "runner:test_runner");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_runner_error_types() {
|
||||
let error = RunnerError::ActorNotFound {
|
||||
actor_id: "test".to_string(),
|
||||
};
|
||||
assert!(error.to_string().contains("test"));
|
||||
|
||||
let error = RunnerError::ActorAlreadyRunning {
|
||||
actor_id: "test".to_string(),
|
||||
};
|
||||
assert!(error.to_string().contains("already running"));
|
||||
}
|
||||
}
|
||||
777
src/supervisor.rs
Normal file
777
src/supervisor.rs
Normal file
@@ -0,0 +1,777 @@
|
||||
//! Main supervisor implementation for managing multiple actor runners.
|
||||
|
||||
use chrono::Utc;
|
||||
use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::{client::{Client, ClientBuilder}, job::JobStatus, runner::{LogInfo, Runner, RunnerConfig, RunnerError, RunnerResult, RunnerStatus}, JobError};
|
||||
use crate::{job::Job};
|
||||
|
||||
#[cfg(feature = "admin")]
|
||||
use supervisor_admin_server::{AdminSupervisor, RunnerConfigInfo, JobInfo};
|
||||
|
||||
/// Process manager type for a runner
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ProcessManagerType {
|
||||
/// Simple process manager for direct process spawning
|
||||
Simple,
|
||||
/// Tmux process manager for session-based management
|
||||
Tmux(String), // session name
|
||||
}
|
||||
|
||||
/// Main supervisor that manages multiple runners
|
||||
#[derive(Clone)]
|
||||
pub struct Supervisor {
|
||||
/// Map of runner name to runner configuration
|
||||
runners: HashMap<String, Runner>,
|
||||
/// Shared process manager for all runners
|
||||
process_manager: Arc<Mutex<dyn ProcessManager>>,
|
||||
/// Shared Redis client for all runners
|
||||
redis_client: redis::Client,
|
||||
/// Namespace for queue keys
|
||||
namespace: String,
|
||||
/// Admin secrets for full access
|
||||
admin_secrets: Vec<String>,
|
||||
/// User secrets for limited access
|
||||
user_secrets: Vec<String>,
|
||||
/// Register secrets for runner registration
|
||||
register_secrets: Vec<String>,
|
||||
client: Client,
|
||||
}
|
||||
|
||||
pub struct SupervisorBuilder {
|
||||
/// Map of runner name to runner configuration
|
||||
runners: HashMap<String, Runner>,
|
||||
/// Redis URL for connection
|
||||
redis_url: String,
|
||||
/// Process manager type
|
||||
process_manager_type: ProcessManagerType,
|
||||
/// Namespace for queue keys
|
||||
namespace: String,
|
||||
/// Admin secrets for full access
|
||||
admin_secrets: Vec<String>,
|
||||
/// User secrets for limited access
|
||||
user_secrets: Vec<String>,
|
||||
/// Register secrets for runner registration
|
||||
register_secrets: Vec<String>,
|
||||
client_builder: ClientBuilder,
|
||||
}
|
||||
|
||||
impl SupervisorBuilder {
|
||||
/// Create a new supervisor builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
runners: HashMap::new(),
|
||||
redis_url: "redis://localhost:6379".to_string(),
|
||||
process_manager_type: ProcessManagerType::Simple,
|
||||
namespace: "".to_string(),
|
||||
admin_secrets: Vec::new(),
|
||||
user_secrets: Vec::new(),
|
||||
register_secrets: Vec::new(),
|
||||
client_builder: ClientBuilder::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the Redis URL
|
||||
pub fn redis_url<S: Into<String>>(mut self, url: S) -> Self {
|
||||
let url_string = url.into();
|
||||
self.redis_url = url_string.clone();
|
||||
self.client_builder = self.client_builder.redis_url(url_string);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the process manager type
|
||||
pub fn process_manager(mut self, pm_type: ProcessManagerType) -> Self {
|
||||
self.process_manager_type = pm_type;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the namespace for queue keys
|
||||
pub fn namespace<S: Into<String>>(mut self, namespace: S) -> Self {
|
||||
let namespace_string = namespace.into();
|
||||
self.namespace = namespace_string.clone();
|
||||
self.client_builder = self.client_builder.namespace(namespace_string);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an admin secret
|
||||
pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.admin_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple admin secrets
|
||||
pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a user secret
|
||||
pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.user_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple user secrets
|
||||
pub fn user_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a register secret
|
||||
pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||
self.register_secrets.push(secret.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add multiple register secrets
|
||||
pub fn register_secrets<I, S>(mut self, secrets: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a runner to the supervisor
|
||||
pub fn add_runner(mut self, runner: Runner) -> Self {
|
||||
self.runners.insert(runner.id.clone(), runner);
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the supervisor
|
||||
pub async fn build(self) -> RunnerResult<Supervisor> {
|
||||
// Create process manager based on type
|
||||
let process_manager: Arc<Mutex<dyn ProcessManager>> = match &self.process_manager_type {
|
||||
ProcessManagerType::Simple => {
|
||||
Arc::new(Mutex::new(SimpleProcessManager::new()))
|
||||
}
|
||||
ProcessManagerType::Tmux(session_name) => {
|
||||
Arc::new(Mutex::new(TmuxProcessManager::new(session_name.clone())))
|
||||
}
|
||||
};
|
||||
|
||||
// Create Redis client
|
||||
let redis_client = redis::Client::open(self.redis_url.as_str())
|
||||
.map_err(|e| RunnerError::ConfigError {
|
||||
reason: format!("Invalid Redis URL: {}", e),
|
||||
})?;
|
||||
|
||||
Ok(Supervisor {
|
||||
client: self.client_builder.build().await.unwrap(),
|
||||
runners: self.runners,
|
||||
process_manager,
|
||||
redis_client,
|
||||
namespace: self.namespace,
|
||||
admin_secrets: self.admin_secrets,
|
||||
user_secrets: self.user_secrets,
|
||||
register_secrets: self.register_secrets,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Supervisor {
|
||||
/// Create a new supervisor builder
|
||||
pub fn builder() -> SupervisorBuilder {
|
||||
SupervisorBuilder::new()
|
||||
}
|
||||
|
||||
/// Add a new runner to the supervisor
|
||||
pub async fn add_runner(
|
||||
&mut self,
|
||||
config: RunnerConfig,
|
||||
) -> RunnerResult<()> {
|
||||
// Runner is now just the config
|
||||
let runner = Runner::from_config(config.clone());
|
||||
|
||||
self.runners.insert(config.id.clone(), runner);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new runner with secret-based authentication
|
||||
pub async fn register_runner(&mut self, secret: &str, name: &str, queue: &str) -> RunnerResult<()> {
|
||||
// Check if the secret is valid (admin or register secret)
|
||||
if !self.admin_secrets.contains(&secret.to_string()) &&
|
||||
!self.register_secrets.contains(&secret.to_string()) {
|
||||
return Err(RunnerError::InvalidSecret("Invalid secret for runner registration".to_string()));
|
||||
}
|
||||
|
||||
// Create a basic runner config for the named runner
|
||||
let config = RunnerConfig {
|
||||
id: name.to_string(), // Use the provided name as actor_id
|
||||
name: name.to_string(), // Use the provided name as actor_id
|
||||
namespace: self.namespace.clone(),
|
||||
command: PathBuf::from("/tmp/mock_runner"), // Default path
|
||||
redis_url: "redis://localhost:6379".to_string(),
|
||||
};
|
||||
|
||||
// Add the runner using existing logic
|
||||
self.add_runner(config).await
|
||||
}
|
||||
|
||||
/// Create a job (fire-and-forget, non-blocking) with secret-based authentication
|
||||
pub async fn create_job(&mut self, secret: &str, job: crate::job::Job) -> RunnerResult<String> {
|
||||
// Check if the secret is valid (admin or user secret)
|
||||
if !self.admin_secrets.contains(&secret.to_string()) &&
|
||||
!self.user_secrets.contains(&secret.to_string()) {
|
||||
return Err(RunnerError::InvalidSecret("Invalid secret for job creation".to_string()));
|
||||
}
|
||||
|
||||
// Find the runner by name
|
||||
let runner_name = job.runner_name.clone();
|
||||
let job_id = job.id.clone(); // Store job ID before moving job
|
||||
if let Some(_runner) = self.runners.get(&runner_name) {
|
||||
// Use the supervisor's queue_job_to_runner method (fire-and-forget)
|
||||
self.queue_job_to_runner(&runner_name, job).await?;
|
||||
Ok(job_id) // Return the job ID immediately
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: job.runner_name.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Run a job on the appropriate runner with secret-based authentication
|
||||
/// This is a synchronous operation that queues the job, waits for the result, and returns it
|
||||
pub async fn run_job(&mut self, secret: &str, job: crate::job::Job) -> RunnerResult<Option<String>> {
|
||||
// Check if the secret is valid (admin or user secret)
|
||||
if !self.admin_secrets.contains(&secret.to_string()) &&
|
||||
!self.user_secrets.contains(&secret.to_string()) {
|
||||
return Err(RunnerError::InvalidSecret("Invalid secret for job execution".to_string()));
|
||||
}
|
||||
|
||||
// Find the runner by name
|
||||
let runner_name = job.runner_name.clone();
|
||||
if let Some(_runner) = self.runners.get(&runner_name) {
|
||||
// Use the synchronous queue_and_wait method with a reasonable timeout (30 seconds)
|
||||
self.queue_and_wait(&runner_name, job, 30).await
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: job.runner_name.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a runner from the supervisor
|
||||
pub async fn remove_runner(&mut self, actor_id: &str) -> RunnerResult<()> {
|
||||
if let Some(_instance) = self.runners.remove(actor_id) {
|
||||
// Runner is removed from the map, which will drop the Arc
|
||||
// and eventually clean up the runner when no more references exist
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a runner by actor ID
|
||||
pub fn get_runner(&self, actor_id: &str) -> Option<&Runner> {
|
||||
self.runners.get(actor_id)
|
||||
}
|
||||
|
||||
/// Get a job by job ID from Redis
|
||||
pub async fn get_job(&self, job_id: &str) -> RunnerResult<crate::job::Job> {
|
||||
use redis::AsyncCommands;
|
||||
|
||||
let mut conn = self.redis_client.get_multiplexed_async_connection().await
|
||||
.map_err(|e| RunnerError::RedisError {
|
||||
source: e
|
||||
})?;
|
||||
|
||||
self.client.load_job_from_redis(job_id).await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: job_id.to_string(),
|
||||
reason: format!("Failed to load job: {}", e),
|
||||
})
|
||||
}
|
||||
|
||||
/// Ping a runner by dispatching a ping job to its queue
|
||||
pub async fn ping_runner(&mut self, runner_id: &str) -> RunnerResult<String> {
|
||||
use crate::job::{Job, JobBuilder};
|
||||
use std::time::Duration;
|
||||
|
||||
// Check if runner exists
|
||||
if !self.runners.contains_key(runner_id) {
|
||||
return Err(RunnerError::ActorNotFound {
|
||||
actor_id: runner_id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Create a ping job
|
||||
let ping_job = JobBuilder::new()
|
||||
.caller_id("supervisor_ping")
|
||||
.context_id("ping_context")
|
||||
.payload("ping")
|
||||
.runner_name(runner_id)
|
||||
.executor("ping")
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: runner_id.to_string(),
|
||||
reason: format!("Failed to create ping job: {}", e),
|
||||
})?;
|
||||
|
||||
// Queue the ping job
|
||||
let job_id = ping_job.id.clone();
|
||||
self.queue_job_to_runner(runner_id, ping_job).await?;
|
||||
|
||||
Ok(job_id)
|
||||
}
|
||||
|
||||
/// Stop a job by ID
|
||||
pub async fn stop_job(&mut self, job_id: &str) -> RunnerResult<()> {
|
||||
use redis::AsyncCommands;
|
||||
|
||||
// For now, we'll implement a basic stop by removing the job from Redis
|
||||
// In a more sophisticated implementation, you might send a stop signal to the runner
|
||||
let mut conn = self.redis_client.get_multiplexed_async_connection().await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: job_id.to_string(),
|
||||
reason: format!("Failed to connect to Redis: {}", e),
|
||||
})?;
|
||||
|
||||
let job_key = self.client.set_job_status(job_id, JobStatus::Stopping).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete a job by ID
|
||||
pub async fn delete_job(&mut self, job_id: &str) -> RunnerResult<()> {
|
||||
self.client.delete_job(&job_id).await
|
||||
}
|
||||
|
||||
/// List all managed runners
|
||||
pub fn list_runners(&self) -> Vec<&str> {
|
||||
self.runners.keys().map(|s| s.as_str()).collect()
|
||||
}
|
||||
|
||||
/// Start a specific runner
|
||||
pub async fn start_runner(&mut self, actor_id: &str) -> RunnerResult<()> {
|
||||
use crate::runner::runner_to_process_config;
|
||||
use log::info;
|
||||
|
||||
if let Some(runner) = self.runners.get(actor_id) {
|
||||
info!("Starting actor {}", runner.id);
|
||||
|
||||
let process_config = runner_to_process_config(runner);
|
||||
let mut pm = self.process_manager.lock().await;
|
||||
pm.start_process(&process_config).await?;
|
||||
|
||||
info!("Successfully started actor {}", runner.id);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: actor_id.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Stop a specific runner
|
||||
pub async fn stop_runner(&mut self, actor_id: &str, force: bool) -> RunnerResult<()> {
|
||||
use log::info;
|
||||
|
||||
if let Some(runner) = self.runners.get(actor_id) {
|
||||
info!("Stopping actor {}", runner.id);
|
||||
|
||||
let mut pm = self.process_manager.lock().await;
|
||||
pm.stop_process(&runner.id, force).await?;
|
||||
|
||||
info!("Successfully stopped actor {}", runner.id);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: actor_id.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Get status of a specific runner
|
||||
pub async fn get_runner_status(&self, actor_id: &str) -> RunnerResult<RunnerStatus> {
|
||||
if let Some(runner) = self.runners.get(actor_id) {
|
||||
let pm = self.process_manager.lock().await;
|
||||
let status = pm.process_status(&runner.id).await?;
|
||||
Ok(status)
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: actor_id.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Get logs from a specific runner
|
||||
pub async fn get_runner_logs(
|
||||
&self,
|
||||
actor_id: &str,
|
||||
lines: Option<usize>,
|
||||
follow: bool,
|
||||
) -> RunnerResult<Vec<LogInfo>> {
|
||||
if let Some(runner) = self.runners.get(actor_id) {
|
||||
let pm = self.process_manager.lock().await;
|
||||
let logs = pm.process_logs(&runner.id, lines, follow).await?;
|
||||
|
||||
// Convert sal_service_manager::LogInfo to our LogInfo
|
||||
let converted_logs = logs.into_iter().map(|log| LogInfo {
|
||||
timestamp: log.timestamp,
|
||||
level: log.level,
|
||||
message: log.message,
|
||||
}).collect();
|
||||
|
||||
Ok(converted_logs)
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: actor_id.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Queue a job to a specific runner by name
|
||||
pub async fn queue_job_to_runner(&mut self, runner_name: &str, job: crate::job::Job) -> RunnerResult<()> {
|
||||
use redis::AsyncCommands;
|
||||
use log::{debug, info};
|
||||
|
||||
if let Some(runner) = self.runners.get(runner_name) {
|
||||
debug!("Queuing job {} for actor {}", job.id, runner.id);
|
||||
|
||||
let mut conn = self.redis_client.get_multiplexed_async_connection().await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: runner.id.clone(),
|
||||
reason: format!("Failed to connect to Redis: {}", e),
|
||||
})?;
|
||||
|
||||
// Store the job in Redis first
|
||||
self.client.store_job_in_redis(&job).await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: runner.id.clone(),
|
||||
reason: format!("Failed to store job: {}", e),
|
||||
})?;
|
||||
|
||||
// Use the runner's get_queue method with our namespace
|
||||
let queue_key = runner.get_queue();
|
||||
|
||||
let _: () = conn.lpush(&queue_key, &job.id).await
|
||||
.map_err(|e| RunnerError::QueueError {
|
||||
actor_id: runner.id.clone(),
|
||||
reason: format!("Failed to queue job: {}", e),
|
||||
})?;
|
||||
|
||||
info!("Job {} queued successfully for actor {} on queue {}", job.id, runner.id, queue_key);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(RunnerError::ActorNotFound {
|
||||
actor_id: runner_name.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Queue a job to a specific runner and wait for the result
|
||||
/// This implements the proper Hero job protocol:
|
||||
/// 1. Queue the job to the runner
|
||||
/// 2. BLPOP on the reply queue for this job
|
||||
/// 3. Get the job result from the job hash
|
||||
/// 4. Return the complete result
|
||||
pub async fn queue_and_wait(&mut self, runner_name: &str, job: crate::job::Job, timeout_secs: u64) -> RunnerResult<Option<String>> {
|
||||
use redis::AsyncCommands;
|
||||
|
||||
let job_id = job.id.clone();
|
||||
|
||||
// First queue the job
|
||||
self.queue_job_to_runner(runner_name, job).await?;
|
||||
|
||||
// Get Redis connection from the supervisor (shared Redis client)
|
||||
let _runner = self.runners.get(runner_name)
|
||||
.ok_or_else(|| RunnerError::ActorNotFound {
|
||||
actor_id: runner_name.to_string(),
|
||||
})?;
|
||||
|
||||
let mut conn = self.redis_client.get_multiplexed_async_connection().await
|
||||
.map_err(|e| RunnerError::RedisError {
|
||||
source: e
|
||||
})?;
|
||||
|
||||
// BLPOP on the reply queue for this specific job
|
||||
let reply_key = self.client.job_reply_key(&job_id);
|
||||
let result: Option<Vec<String>> = conn.blpop(&reply_key, timeout_secs as f64).await
|
||||
.map_err(|e| RunnerError::RedisError {
|
||||
source: e
|
||||
})?;
|
||||
|
||||
match result {
|
||||
Some(reply_data) => {
|
||||
// Reply received, now get the job result from the job hash
|
||||
let job_key = self.client.job_key(&job_id);
|
||||
let job_result: Option<String> = conn.hget(&job_key, "result").await
|
||||
.map_err(|e| RunnerError::RedisError {
|
||||
source: e
|
||||
})?;
|
||||
|
||||
Ok(job_result)
|
||||
}
|
||||
None => {
|
||||
// Timeout occurred
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get status of all runners
|
||||
pub async fn get_all_runner_status(&self) -> RunnerResult<Vec<(String, RunnerStatus)>> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (actor_id, instance) in &self.runners {
|
||||
match self.get_runner_status(actor_id).await {
|
||||
Ok(status) => results.push((actor_id.clone(), status)),
|
||||
Err(_) => {
|
||||
use sal_service_manager::ProcessStatus;
|
||||
results.push((actor_id.clone(), ProcessStatus::Stopped));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Start all runners
|
||||
pub async fn start_all(&mut self) -> Vec<(String, RunnerResult<()>)> {
|
||||
let mut results = Vec::new();
|
||||
let actor_ids: Vec<String> = self.runners.keys().cloned().collect();
|
||||
|
||||
for actor_id in actor_ids {
|
||||
let result = self.start_runner(&actor_id).await;
|
||||
results.push((actor_id, result));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Stop all runners
|
||||
pub async fn stop_all(&mut self, force: bool) -> Vec<(String, RunnerResult<()>)> {
|
||||
let mut results = Vec::new();
|
||||
let actor_ids: Vec<String> = self.runners.keys().cloned().collect();
|
||||
|
||||
for actor_id in actor_ids {
|
||||
let result = self.stop_runner(&actor_id, force).await;
|
||||
results.push((actor_id, result));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Get status of all runners
|
||||
pub async fn get_all_status(&self) -> Vec<(String, RunnerResult<RunnerStatus>)> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (actor_id, _instance) in &self.runners {
|
||||
let result = self.get_runner_status(actor_id).await;
|
||||
results.push((actor_id.clone(), result));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Add an admin secret
|
||||
pub fn add_admin_secret(&mut self, secret: String) {
|
||||
if !self.admin_secrets.contains(&secret) {
|
||||
self.admin_secrets.push(secret);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove an admin secret
|
||||
pub fn remove_admin_secret(&mut self, secret: &str) -> bool {
|
||||
if let Some(pos) = self.admin_secrets.iter().position(|x| x == secret) {
|
||||
self.admin_secrets.remove(pos);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if admin secret exists
|
||||
pub fn has_admin_secret(&self, secret: &str) -> bool {
|
||||
self.admin_secrets.contains(&secret.to_string())
|
||||
}
|
||||
|
||||
/// Get admin secrets count
|
||||
pub fn admin_secrets_count(&self) -> usize {
|
||||
self.admin_secrets.len()
|
||||
}
|
||||
|
||||
/// Add a user secret
|
||||
pub fn add_user_secret(&mut self, secret: String) {
|
||||
if !self.user_secrets.contains(&secret) {
|
||||
self.user_secrets.push(secret);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a user secret
|
||||
pub fn remove_user_secret(&mut self, secret: &str) -> bool {
|
||||
if let Some(pos) = self.user_secrets.iter().position(|x| x == secret) {
|
||||
self.user_secrets.remove(pos);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if user secret exists
|
||||
pub fn has_user_secret(&self, secret: &str) -> bool {
|
||||
self.user_secrets.contains(&secret.to_string())
|
||||
}
|
||||
|
||||
/// Get user secrets count
|
||||
pub fn user_secrets_count(&self) -> usize {
|
||||
self.user_secrets.len()
|
||||
}
|
||||
|
||||
/// Add a register secret
|
||||
pub fn add_register_secret(&mut self, secret: String) {
|
||||
if !self.register_secrets.contains(&secret) {
|
||||
self.register_secrets.push(secret);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a register secret
|
||||
pub fn remove_register_secret(&mut self, secret: &str) -> bool {
|
||||
if let Some(pos) = self.register_secrets.iter().position(|x| x == secret) {
|
||||
self.register_secrets.remove(pos);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if register secret exists
|
||||
pub fn has_register_secret(&self, secret: &str) -> bool {
|
||||
self.register_secrets.contains(&secret.to_string())
|
||||
}
|
||||
|
||||
/// Get register secrets count
|
||||
pub fn register_secrets_count(&self) -> usize {
|
||||
self.register_secrets.len()
|
||||
}
|
||||
|
||||
/// List all job IDs from Redis
|
||||
pub async fn list_jobs(&self) -> RunnerResult<Vec<String>> {
|
||||
self.client.list_jobs().await
|
||||
}
|
||||
|
||||
/// Get runners count
|
||||
pub fn runners_count(&self) -> usize {
|
||||
self.runners.len()
|
||||
}
|
||||
|
||||
/// Get admin secrets (returns cloned vector for security)
|
||||
pub fn get_admin_secrets(&self) -> Vec<String> {
|
||||
self.admin_secrets.clone()
|
||||
}
|
||||
|
||||
/// Get user secrets (returns cloned vector for security)
|
||||
pub fn get_user_secrets(&self) -> Vec<String> {
|
||||
self.user_secrets.clone()
|
||||
}
|
||||
|
||||
/// Get register secrets (returns cloned vector for security)
|
||||
pub fn get_register_secrets(&self) -> Vec<String> {
|
||||
self.register_secrets.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Supervisor {
|
||||
fn default() -> Self {
|
||||
// Note: Default implementation creates an empty supervisor
|
||||
// Use Supervisor::builder() for proper initialization
|
||||
Self {
|
||||
runners: HashMap::new(),
|
||||
process_manager: Arc::new(Mutex::new(SimpleProcessManager::new())),
|
||||
redis_client: redis::Client::open("redis://localhost:6379").unwrap(),
|
||||
namespace: "".to_string(),
|
||||
admin_secrets: Vec::new(),
|
||||
user_secrets: Vec::new(),
|
||||
register_secrets: Vec::new(),
|
||||
client: Client::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
use sal_service_manager::SimpleProcessManager;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_supervisor_creation() {
|
||||
let supervisor = Supervisor::builder()
|
||||
.redis_url("redis://localhost:6379")
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(supervisor.list_runners().len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add_runner() {
|
||||
use std::path::PathBuf;
|
||||
|
||||
let config = RunnerConfig::new(
|
||||
"test_actor".to_string(),
|
||||
"test_actor".to_string(),
|
||||
"".to_string(),
|
||||
PathBuf::from("/usr/bin/test_actor"),
|
||||
"redis://localhost:6379".to_string(),
|
||||
);
|
||||
|
||||
let runner = Runner::from_config(config.clone());
|
||||
let mut supervisor = Supervisor::builder()
|
||||
.redis_url("redis://localhost:6379")
|
||||
.add_runner(runner)
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(supervisor.list_runners().len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add_multiple_runners() {
|
||||
use std::path::PathBuf;
|
||||
|
||||
let config1 = RunnerConfig::new(
|
||||
"sal_actor".to_string(),
|
||||
"sal_actor".to_string(),
|
||||
"".to_string(),
|
||||
PathBuf::from("/usr/bin/sal_actor"),
|
||||
"redis://localhost:6379".to_string(),
|
||||
);
|
||||
|
||||
let config2 = RunnerConfig::new(
|
||||
"osis_actor".to_string(),
|
||||
"osis_actor".to_string(),
|
||||
"".to_string(),
|
||||
PathBuf::from("/usr/bin/osis_actor"),
|
||||
"redis://localhost:6379".to_string(),
|
||||
);
|
||||
|
||||
let runner1 = Runner::from_config(config1);
|
||||
let runner2 = Runner::from_config(config2);
|
||||
|
||||
let supervisor = Supervisor::builder()
|
||||
.redis_url("redis://localhost:6379")
|
||||
.add_runner(runner1)
|
||||
.add_runner(runner2)
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(supervisor.list_runners().len(), 2);
|
||||
assert!(supervisor.get_runner("sal_actor").is_some());
|
||||
assert!(supervisor.get_runner("osis_actor").is_some());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user