refactor wip
This commit is contained in:
parent
8ed40ce99c
commit
7a652c9c3c
1349
Cargo.lock
generated
1349
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,8 @@ edition = "2024"
|
|||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
|
hero_supervisor = { path = "core/supervisor" }
|
||||||
|
hero_websocket_server = { path = "interfaces/websocket/server" }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
redis = { version = "0.25.0", features = ["tokio-comp"] }
|
redis = { version = "0.25.0", features = ["tokio-comp"] }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
@ -23,6 +25,10 @@ tempfile = "3.10"
|
|||||||
name = "simple_rhai_bench"
|
name = "simple_rhai_bench"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "hive"
|
||||||
|
path = "cmd/main.rs"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
actix = "0.13"
|
actix = "0.13"
|
||||||
actix-web = { version = "4", features = ["rustls-0_23"] }
|
actix-web = { version = "4", features = ["rustls-0_23"] }
|
||||||
|
28
cmd/config.toml
Normal file
28
cmd/config.toml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
[global]
|
||||||
|
# Redis connection URL for job queuing
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
|
||||||
|
# WebSocket Server Configuration
|
||||||
|
# Handles WebSocket connections for job dispatching
|
||||||
|
[websocket_server]
|
||||||
|
host = "127.0.0.1"
|
||||||
|
port = 8443
|
||||||
|
redis_url = "redis://127.0.0.1/"
|
||||||
|
auth = false
|
||||||
|
tls = false
|
||||||
|
# cert = "/path/to/cert.pem" # Uncomment and set path for TLS
|
||||||
|
# key = "/path/to/key.pem" # Uncomment and set path for TLS
|
||||||
|
# tls_port = 8444 # Uncomment for separate TLS port
|
||||||
|
|
||||||
|
# Circles configuration - maps circle names to lists of member public keys
|
||||||
|
[websocket_server.circles]
|
||||||
|
# Example circle configuration:
|
||||||
|
# "admin" = ["04abc123...", "04def456..."]
|
||||||
|
# "users" = ["04ghi789...", "04jkl012..."]
|
||||||
|
# "ws" = [] # Public circle (no auth required)
|
||||||
|
|
||||||
|
# OSIS Worker Configuration
|
||||||
|
# Handles OSIS (HeroScript) execution
|
||||||
|
[osis_worker]
|
||||||
|
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/osis"
|
||||||
|
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "osis", "MAX_CONCURRENT_JOBS" = "5" }
|
186
cmd/main.rs
Normal file
186
cmd/main.rs
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
use std::env;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use hero_supervisor::{SupervisorBuilder, SupervisorError};
|
||||||
|
use hero_websocket_server::ServerBuilder;
|
||||||
|
use tokio::signal;
|
||||||
|
use log::{info, error};
|
||||||
|
use env_logger::Builder;
|
||||||
|
|
||||||
|
/// The main entry point of the Hero Supervisor.
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::Builder::from_default_env()
|
||||||
|
.filter_level(log::LevelFilter::Info)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
info!("Hero Supervisor starting up...");
|
||||||
|
|
||||||
|
// Get config path from command line arguments or use default
|
||||||
|
let args: Vec<String> = env::args().collect();
|
||||||
|
let config_path = if let Some(config_index) = args.iter().position(|arg| arg == "--config") {
|
||||||
|
if config_index + 1 < args.len() {
|
||||||
|
&args[config_index + 1]
|
||||||
|
} else {
|
||||||
|
"cmd/config.toml"
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
"cmd/config.toml"
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("Loading configuration from: {}", config_path);
|
||||||
|
|
||||||
|
let supervisor = SupervisorBuilder::from_toml(config_path)?
|
||||||
|
.build().await?;
|
||||||
|
|
||||||
|
// Wrap supervisor in Arc for sharing across tasks
|
||||||
|
let supervisor = Arc::new(supervisor);
|
||||||
|
|
||||||
|
// Extract worker configurations from TOML config
|
||||||
|
let worker_configs = supervisor.get_worker_configs()?;
|
||||||
|
info!("Loaded {} worker configurations from TOML", worker_configs.len());
|
||||||
|
|
||||||
|
// Spawn the background lifecycle manager with 5-minute health check interval
|
||||||
|
let health_check_interval = Duration::from_secs(5 * 60); // 5 minutes
|
||||||
|
let mut lifecycle_handle = supervisor.clone().spawn_lifecycle_manager(worker_configs, health_check_interval);
|
||||||
|
|
||||||
|
info!("Hero Supervisor started successfully!");
|
||||||
|
info!("Background lifecycle manager is running with 5-minute health checks.");
|
||||||
|
info!("Workers are being monitored and will be automatically restarted if they fail.");
|
||||||
|
|
||||||
|
// Start WebSocket server for job dispatching
|
||||||
|
info!("Starting WebSocket server for job dispatching...");
|
||||||
|
let ws_supervisor = supervisor.clone();
|
||||||
|
|
||||||
|
// Get WebSocket server config from TOML or use defaults
|
||||||
|
let ws_config = supervisor.get_websocket_config().unwrap_or_else(|_| {
|
||||||
|
info!("Using default WebSocket server configuration");
|
||||||
|
hero_supervisor::WebSocketServerConfig {
|
||||||
|
host: "127.0.0.1".to_string(),
|
||||||
|
port: 8443,
|
||||||
|
redis_url: "redis://127.0.0.1/".to_string(),
|
||||||
|
auth: false,
|
||||||
|
tls: false,
|
||||||
|
cert: None,
|
||||||
|
key: None,
|
||||||
|
tls_port: None,
|
||||||
|
circles: std::collections::HashMap::new(),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut websocket_handle = tokio::spawn(async move {
|
||||||
|
info!("WebSocket server starting on {}:{}", ws_config.host, ws_config.port);
|
||||||
|
|
||||||
|
// Create the WebSocket server with our supervisor
|
||||||
|
let mut server_builder = ServerBuilder::new()
|
||||||
|
.host(&ws_config.host)
|
||||||
|
.port(ws_config.port)
|
||||||
|
.redis_url(&ws_config.redis_url)
|
||||||
|
.with_supervisor(ws_supervisor);
|
||||||
|
|
||||||
|
// Configure auth if enabled
|
||||||
|
if ws_config.auth {
|
||||||
|
server_builder = server_builder.with_auth();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure TLS if enabled
|
||||||
|
if ws_config.tls {
|
||||||
|
if let (Some(cert), Some(key)) = (&ws_config.cert, &ws_config.key) {
|
||||||
|
server_builder = server_builder.with_tls(cert.clone(), key.clone());
|
||||||
|
if let Some(tls_port) = ws_config.tls_port {
|
||||||
|
server_builder = server_builder.with_tls_port(tls_port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure circles
|
||||||
|
if !ws_config.circles.is_empty() {
|
||||||
|
server_builder = server_builder.circles(ws_config.circles.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let server = match server_builder.build() {
|
||||||
|
Ok(server) => server,
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to build WebSocket server: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start the WebSocket server
|
||||||
|
match server.spawn_circle_server() {
|
||||||
|
Ok((server_handle, _)) => {
|
||||||
|
info!("WebSocket server successfully started and ready to dispatch jobs");
|
||||||
|
if let Err(e) = server_handle.await {
|
||||||
|
error!("WebSocket server error: {:?}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to start WebSocket server: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
info!("WebSocket server started - ready to accept connections and dispatch jobs");
|
||||||
|
|
||||||
|
// Set up graceful shutdown signal handlers
|
||||||
|
let shutdown_signal = async {
|
||||||
|
let ctrl_c = async {
|
||||||
|
signal::ctrl_c()
|
||||||
|
.await
|
||||||
|
.expect("failed to install Ctrl+C handler");
|
||||||
|
};
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
let terminate = async {
|
||||||
|
signal::unix::signal(signal::unix::SignalKind::terminate())
|
||||||
|
.expect("failed to install signal handler")
|
||||||
|
.recv()
|
||||||
|
.await;
|
||||||
|
};
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
let terminate = std::future::pending::<()>();
|
||||||
|
|
||||||
|
tokio::select! {
|
||||||
|
_ = ctrl_c => {},
|
||||||
|
_ = terminate => {},
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Shutdown signal received, initiating graceful shutdown...");
|
||||||
|
};
|
||||||
|
|
||||||
|
// Wait for shutdown signal or task completion
|
||||||
|
tokio::select! {
|
||||||
|
_ = shutdown_signal => {
|
||||||
|
info!("Graceful shutdown initiated");
|
||||||
|
|
||||||
|
// Cancel background tasks
|
||||||
|
lifecycle_handle.abort();
|
||||||
|
websocket_handle.abort();
|
||||||
|
|
||||||
|
info!("Background tasks stopped");
|
||||||
|
}
|
||||||
|
result = &mut lifecycle_handle => {
|
||||||
|
match result {
|
||||||
|
Ok(Ok(())) => info!("Lifecycle manager completed successfully"),
|
||||||
|
Ok(Err(e)) => error!("Lifecycle manager error: {}", e),
|
||||||
|
Err(e) => error!("Lifecycle manager task panicked: {}", e),
|
||||||
|
}
|
||||||
|
// Also stop the websocket handle
|
||||||
|
websocket_handle.abort();
|
||||||
|
}
|
||||||
|
result = &mut websocket_handle => {
|
||||||
|
match result {
|
||||||
|
Ok(()) => info!("WebSocket server completed successfully"),
|
||||||
|
Err(e) => error!("WebSocket server task panicked: {}", e),
|
||||||
|
}
|
||||||
|
// Also stop the lifecycle handle
|
||||||
|
lifecycle_handle.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Hero Supervisor shutdown complete");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,191 +1,70 @@
|
|||||||
|
//! Hero Supervisor Worker Demo
|
||||||
|
//!
|
||||||
|
//! This example demonstrates the new Hero Supervisor API with:
|
||||||
|
//! - Synchronous build() method
|
||||||
|
//! - Asynchronous start_workers() method
|
||||||
|
//! - Proper cleanup on program exit
|
||||||
|
//! - Signal handling for graceful shutdown
|
||||||
|
|
||||||
use colored::*;
|
use colored::*;
|
||||||
use hero_supervisor::{SupervisorBuilder, ScriptType, JobStatus};
|
use hero_supervisor::{SupervisorBuilder, ScriptType};
|
||||||
use log::warn;
|
|
||||||
use std::process::Stdio;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::process::{Child, Command as TokioCommand};
|
use tokio::signal;
|
||||||
use tokio::time::sleep;
|
|
||||||
|
|
||||||
/// Supervisor manages worker lifecycle and job execution
|
async fn run_supervisor_demo() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
pub struct Supervisor {
|
println!("{}", "🚀 Hero Supervisor Demo - New API".cyan().bold());
|
||||||
supervisor: hero_supervisor::Supervisor,
|
println!("{}", "Building supervisor synchronously...".yellow());
|
||||||
worker_processes: Vec<WorkerProcess>,
|
|
||||||
redis_url: String,
|
// Build supervisor synchronously (no .await needed)
|
||||||
}
|
let supervisor = SupervisorBuilder::new()
|
||||||
|
.redis_url("redis://127.0.0.1:6379")
|
||||||
/// Represents a managed worker process
|
.osis_worker("/usr/local/bin/osis_worker")
|
||||||
pub struct WorkerProcess {
|
.sal_worker("/usr/local/bin/sal_worker")
|
||||||
id: String,
|
.v_worker("/usr/local/bin/v_worker")
|
||||||
script_type: ScriptType,
|
.python_worker("/usr/local/bin/python_worker")
|
||||||
process: Option<Child>,
|
.worker_env_var("REDIS_URL", "redis://127.0.0.1:6379")
|
||||||
binary_path: String,
|
.worker_env_var("LOG_LEVEL", "info")
|
||||||
}
|
.build()?;
|
||||||
|
|
||||||
impl Supervisor {
|
println!("{}", "✅ Supervisor built successfully!".green());
|
||||||
/// Create a new supervisor with supervisor configuration
|
println!("{}", "Starting workers asynchronously...".yellow());
|
||||||
pub async fn new(redis_url: String) -> Result<Self, Box<dyn std::error::Error>> {
|
|
||||||
let supervisor = SupervisorBuilder::new()
|
// Start workers asynchronously
|
||||||
.caller_id("supervisor")
|
supervisor.start_workers().await?;
|
||||||
.context_id("demo-context")
|
|
||||||
.redis_url(&redis_url)
|
println!("{}", "✅ All workers started successfully!".green());
|
||||||
.heroscript_workers(vec!["hero-worker-1".to_string()])
|
|
||||||
.rhai_sal_workers(vec!["rhai-sal-worker-1".to_string()])
|
// Demonstrate job creation and execution
|
||||||
.rhai_dsl_workers(vec!["rhai-dsl-worker-1".to_string()])
|
println!("{}", "\n📋 Creating and running test jobs...".cyan().bold());
|
||||||
.build()?;
|
|
||||||
|
// Create and run a test job
|
||||||
Ok(Self {
|
println!("📝 Creating and running OSIS job...");
|
||||||
supervisor,
|
|
||||||
worker_processes: Vec::new(),
|
// Submit and run the job
|
||||||
redis_url,
|
match supervisor.new_job()
|
||||||
})
|
.script_type(ScriptType::OSIS)
|
||||||
}
|
.script("println('Hello from OSIS worker!')")
|
||||||
|
.timeout(Duration::from_secs(30))
|
||||||
/// Start a worker for a specific script type
|
.await_response().await {
|
||||||
pub async fn start_worker(&mut self, script_type: ScriptType, worker_binary_path: &str) -> Result<(), Box<dyn std::error::Error>> {
|
Ok(result) => {
|
||||||
let worker_id = match script_type {
|
println!("{}", format!("✅ Job completed successfully: {}", result).green());
|
||||||
ScriptType::HeroScript => "hero-worker-1",
|
|
||||||
ScriptType::RhaiSAL => "rhai-sal-worker-1",
|
|
||||||
ScriptType::RhaiDSL => "rhai-dsl-worker-1",
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("{}", format!("🚀 Starting {} worker: {}", script_type.as_str(), worker_id).green().bold());
|
|
||||||
|
|
||||||
// Check if worker binary exists
|
|
||||||
if !std::path::Path::new(worker_binary_path).exists() {
|
|
||||||
return Err(format!("Worker binary not found at: {}", worker_binary_path).into());
|
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
// Start the worker process
|
println!("{}", format!("❌ Job failed: {}", e).red());
|
||||||
let mut cmd = TokioCommand::new(worker_binary_path);
|
|
||||||
cmd.arg("--worker-id").arg(worker_id)
|
|
||||||
.arg("--redis-url").arg(&self.redis_url)
|
|
||||||
.arg("--no-timestamp")
|
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.stderr(Stdio::piped());
|
|
||||||
|
|
||||||
let process = cmd.spawn()?;
|
|
||||||
|
|
||||||
let worker_process = WorkerProcess {
|
|
||||||
id: worker_id.to_string(),
|
|
||||||
script_type,
|
|
||||||
process: Some(process),
|
|
||||||
binary_path: worker_binary_path.to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
self.worker_processes.push(worker_process);
|
|
||||||
|
|
||||||
// Give worker time to start up
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
println!("{}", format!("✅ Worker {} started successfully", worker_id).green());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Stop all workers
|
|
||||||
pub async fn stop_all_workers(&mut self) {
|
|
||||||
println!("{}", "🛑 Stopping all workers...".yellow().bold());
|
|
||||||
|
|
||||||
for worker in &mut self.worker_processes {
|
|
||||||
if let Some(mut process) = worker.process.take() {
|
|
||||||
println!("Stopping worker: {}", worker.id);
|
|
||||||
|
|
||||||
// Try graceful shutdown first
|
|
||||||
if let Err(e) = process.kill().await {
|
|
||||||
warn!("Failed to kill worker {}: {}", worker.id, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for process to exit
|
|
||||||
if let Ok(status) = process.wait().await {
|
|
||||||
println!("Worker {} exited with status: {:?}", worker.id, status);
|
|
||||||
} else {
|
|
||||||
warn!("Failed to wait for worker {} to exit", worker.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.worker_processes.clear();
|
|
||||||
println!("{}", "✅ All workers stopped".green());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Submit a job and return the job ID
|
|
||||||
pub async fn submit_job(&self, script_type: ScriptType, script: &str) -> Result<String, Box<dyn std::error::Error>> {
|
|
||||||
let job = self.supervisor
|
|
||||||
.new_job()
|
|
||||||
.script_type(script_type.clone())
|
|
||||||
.script(script)
|
|
||||||
.timeout(Duration::from_secs(30))
|
|
||||||
.build()?;
|
|
||||||
|
|
||||||
let job_id = job.id.clone();
|
|
||||||
self.supervisor.create_job(&job).await?;
|
|
||||||
|
|
||||||
println!("{}", format!("📝 Job {} submitted for {}", job_id, script_type.as_str()).cyan());
|
|
||||||
Ok(job_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Wait for job completion and return result
|
|
||||||
pub async fn wait_for_job_completion(&self, job_id: &str, timeout_duration: Duration) -> Result<String, Box<dyn std::error::Error>> {
|
|
||||||
let start_time = std::time::Instant::now();
|
|
||||||
|
|
||||||
println!("{}", format!("⏳ Waiting for job {} to complete...", job_id).yellow());
|
|
||||||
|
|
||||||
loop {
|
|
||||||
if start_time.elapsed() > timeout_duration {
|
|
||||||
return Err("Job execution timeout".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check job status using supervisor methods
|
|
||||||
match self.supervisor.get_job_status(job_id).await {
|
|
||||||
Ok(status) => {
|
|
||||||
match status {
|
|
||||||
JobStatus::Finished => {
|
|
||||||
if let Ok(Some(result)) = self.supervisor.get_job_output(job_id).await {
|
|
||||||
println!("{}", format!("✅ Job {} completed successfully", job_id).green());
|
|
||||||
return Ok(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
JobStatus::Error => {
|
|
||||||
return Err("Job failed".into());
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Job still running or waiting
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
// Job not found or error checking status
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(Duration::from_millis(100)).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// List all jobs
|
|
||||||
pub async fn list_jobs(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
|
|
||||||
self.supervisor.list_jobs().await.map_err(|e| e.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Clear all jobs
|
|
||||||
pub async fn clear_all_jobs(&self) -> Result<usize, Box<dyn std::error::Error>> {
|
|
||||||
self.supervisor.clear_all_jobs().await.map_err(|e| e.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get worker status
|
|
||||||
pub fn get_worker_status(&self) -> Vec<(String, ScriptType, bool)> {
|
|
||||||
self.worker_processes.iter().map(|w| {
|
|
||||||
(w.id.clone(), w.script_type.clone(), w.process.is_some())
|
|
||||||
}).collect()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for Supervisor {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
// Ensure workers are stopped when supervisor is dropped
|
|
||||||
if !self.worker_processes.is_empty() {
|
|
||||||
println!("{}", "⚠️ Supervisor dropping - stopping remaining workers".yellow());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for interrupt signal
|
||||||
|
println!("{}", "\n⏳ Press Ctrl+C to shutdown gracefully...".yellow());
|
||||||
|
signal::ctrl_c().await?;
|
||||||
|
|
||||||
|
println!("{}", "\n🛑 Shutdown signal received, cleaning up...".yellow().bold());
|
||||||
|
|
||||||
|
// Cleanup workers before exit
|
||||||
|
supervisor.cleanup_and_shutdown().await?;
|
||||||
|
|
||||||
|
println!("{}", "✅ Cleanup completed. Goodbye!".green().bold());
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@ -193,173 +72,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// Initialize logging
|
// Initialize logging
|
||||||
env_logger::Builder::from_default_env()
|
env_logger::Builder::from_default_env()
|
||||||
.filter_level(log::LevelFilter::Info)
|
.filter_level(log::LevelFilter::Info)
|
||||||
.format_timestamp(None)
|
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
println!("{}", "🎯 Hero Supervisor-Worker End-to-End Demo".blue().bold());
|
println!("{}", "Hero Supervisor Demo".cyan().bold());
|
||||||
println!("{}", "==========================================".blue());
|
println!("{}", "This demo shows the new synchronous build API".yellow());
|
||||||
println!();
|
println!();
|
||||||
|
|
||||||
// Configuration
|
|
||||||
let redis_url = "redis://localhost:6379".to_string();
|
|
||||||
let worker_binary_path = "../../target/debug/worker";
|
|
||||||
|
|
||||||
// Check if worker binary exists
|
|
||||||
if !std::path::Path::new(worker_binary_path).exists() {
|
|
||||||
println!("{}", "❌ Worker binary not found!".red().bold());
|
|
||||||
println!("Please build the worker first:");
|
|
||||||
println!(" cd ../worker && cargo build");
|
|
||||||
return Err("Worker binary not found".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create supervisor
|
|
||||||
println!("{}", "🏗️ Creating supervisor...".cyan());
|
|
||||||
let mut supervisor = Supervisor::new(redis_url).await?;
|
|
||||||
println!("{}", "✅ Supervisor created successfully".green());
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Clear any existing jobs
|
|
||||||
let cleared_count = supervisor.clear_all_jobs().await?;
|
|
||||||
if cleared_count > 0 {
|
|
||||||
println!("{}", format!("🧹 Cleared {} existing jobs", cleared_count).yellow());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Demo 1: Start a HeroScript worker
|
|
||||||
println!("{}", "📋 Demo 1: Starting HeroScript Worker".blue().bold());
|
|
||||||
println!("{}", "------------------------------------".blue());
|
|
||||||
|
|
||||||
supervisor.start_worker(ScriptType::HeroScript, worker_binary_path).await?;
|
// Run the demo
|
||||||
|
if let Err(e) = run_supervisor_demo().await {
|
||||||
// Show worker status
|
eprintln!("{}", format!("Demo failed: {}", e).red().bold());
|
||||||
let worker_status = supervisor.get_worker_status();
|
std::process::exit(1);
|
||||||
println!("Active workers:");
|
|
||||||
for (id, script_type, active) in worker_status {
|
|
||||||
let status = if active { "🟢 Running" } else { "🔴 Stopped" };
|
|
||||||
println!(" {} - {} ({})", id, script_type.as_str(), status);
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Demo 2: Submit and execute a simple job
|
|
||||||
println!("{}", "📋 Demo 2: Submit and Execute Job".blue().bold());
|
|
||||||
println!("{}", "---------------------------------".blue());
|
|
||||||
|
|
||||||
let script = r#"
|
|
||||||
print("Hello from HeroScript worker!");
|
|
||||||
let result = 42 + 8;
|
|
||||||
print("Calculation: 42 + 8 = " + result);
|
|
||||||
result
|
|
||||||
"#;
|
|
||||||
|
|
||||||
let job_id = supervisor.submit_job(ScriptType::HeroScript, script).await?;
|
|
||||||
|
|
||||||
// Wait for job completion
|
|
||||||
match supervisor.wait_for_job_completion(&job_id, Duration::from_secs(10)).await {
|
|
||||||
Ok(result) => {
|
|
||||||
println!("{}", format!("🎉 Job result: {}", result).green().bold());
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
println!("{}", format!("❌ Job failed: {}", e).red());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Demo 3: Submit multiple jobs
|
|
||||||
println!("{}", "📋 Demo 3: Multiple Jobs".blue().bold());
|
|
||||||
println!("{}", "------------------------".blue());
|
|
||||||
|
|
||||||
let jobs = vec![
|
|
||||||
("Job 1", r#"print("Job 1 executing"); "job1_result""#),
|
|
||||||
("Job 2", r#"print("Job 2 executing"); 100 + 200"#),
|
|
||||||
("Job 3", r#"print("Job 3 executing"); "hello_world""#),
|
|
||||||
];
|
|
||||||
|
|
||||||
let mut job_ids = Vec::new();
|
|
||||||
|
|
||||||
for (name, script) in jobs {
|
|
||||||
let job_id = supervisor.submit_job(ScriptType::HeroScript, script).await?;
|
|
||||||
job_ids.push((name, job_id));
|
|
||||||
println!("{} submitted: {}", name, job_ids.last().unwrap().1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all jobs to complete
|
|
||||||
for (name, job_id) in job_ids {
|
|
||||||
match supervisor.wait_for_job_completion(&job_id, Duration::from_secs(5)).await {
|
|
||||||
Ok(result) => {
|
|
||||||
println!("{} completed: {}", name, result);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
println!("{} failed: {}", name, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Demo 4: Job management
|
|
||||||
println!("{}", "📋 Demo 4: Job Management".blue().bold());
|
|
||||||
println!("{}", "-------------------------".blue());
|
|
||||||
|
|
||||||
let all_jobs = supervisor.list_jobs().await?;
|
|
||||||
println!("Total jobs in system: {}", all_jobs.len());
|
|
||||||
|
|
||||||
if !all_jobs.is_empty() {
|
|
||||||
println!("Job IDs:");
|
|
||||||
for (i, job_id) in all_jobs.iter().enumerate() {
|
|
||||||
println!(" {}. {}", i + 1, job_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Demo 5: Error handling
|
|
||||||
println!("{}", "📋 Demo 5: Error Handling".blue().bold());
|
|
||||||
println!("{}", "-------------------------".blue());
|
|
||||||
|
|
||||||
let error_script = r#"
|
|
||||||
print("This job will cause an error");
|
|
||||||
let x = undefined_variable; // This will cause an error
|
|
||||||
x
|
|
||||||
"#;
|
|
||||||
|
|
||||||
let error_job_id = supervisor.submit_job(ScriptType::HeroScript, error_script).await?;
|
|
||||||
|
|
||||||
match supervisor.wait_for_job_completion(&error_job_id, Duration::from_secs(5)).await {
|
|
||||||
Ok(result) => {
|
|
||||||
println!("Unexpected success: {}", result);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
println!("{}", format!("Expected error handled: {}", e).yellow());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Demo 6: Cleanup
|
|
||||||
println!("{}", "📋 Demo 6: Cleanup".blue().bold());
|
|
||||||
println!("{}", "-------------------".blue());
|
|
||||||
|
|
||||||
let final_job_count = supervisor.list_jobs().await?.len();
|
|
||||||
println!("Jobs before cleanup: {}", final_job_count);
|
|
||||||
|
|
||||||
let cleared = supervisor.clear_all_jobs().await?;
|
|
||||||
println!("Jobs cleared: {}", cleared);
|
|
||||||
|
|
||||||
let remaining_jobs = supervisor.list_jobs().await?.len();
|
|
||||||
println!("Jobs after cleanup: {}", remaining_jobs);
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Stop all workers
|
|
||||||
supervisor.stop_all_workers().await;
|
|
||||||
|
|
||||||
println!("{}", "🎉 Demo completed successfully!".green().bold());
|
|
||||||
println!();
|
|
||||||
println!("{}", "Key Features Demonstrated:".blue().bold());
|
|
||||||
println!(" ✅ Supervisor lifecycle management");
|
|
||||||
println!(" ✅ Worker process spawning and management");
|
|
||||||
println!(" ✅ Job submission and execution");
|
|
||||||
println!(" ✅ Real-time job monitoring");
|
|
||||||
println!(" ✅ Multiple job handling");
|
|
||||||
println!(" ✅ Error handling and recovery");
|
|
||||||
println!(" ✅ Resource cleanup");
|
|
||||||
println!();
|
|
||||||
println!("{}", "The supervisor successfully managed the complete worker lifecycle!".green());
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
188
core/job/src/builder.rs
Normal file
188
core/job/src/builder.rs
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
use chrono::Utc;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::time::Duration;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{Job, ScriptType, JobError};
|
||||||
|
|
||||||
|
/// Builder for constructing and submitting script execution requests.
|
||||||
|
///
|
||||||
|
/// This builder provides a fluent interface for configuring script execution
|
||||||
|
/// parameters and offers two submission modes: fire-and-forget (`submit()`)
|
||||||
|
/// and request-reply (`await_response()`).
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```rust,no_run
|
||||||
|
/// use std::time::Duration;
|
||||||
|
/// use hero_supervisor::ScriptType;
|
||||||
|
///
|
||||||
|
/// # async fn example(client: &hero_supervisor::Supervisor) -> Result<String, hero_supervisor::SupervisorError> {
|
||||||
|
/// let result = client
|
||||||
|
/// .new_job()
|
||||||
|
/// .script_type(ScriptType::OSIS)
|
||||||
|
/// .script(r#"print("Hello, World!");"#)
|
||||||
|
/// .timeout(Duration::from_secs(30))
|
||||||
|
/// .await_response()
|
||||||
|
/// .await?;
|
||||||
|
/// # Ok(result)
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
pub struct JobBuilder {
|
||||||
|
request_id: String,
|
||||||
|
context_id: String,
|
||||||
|
caller_id: String,
|
||||||
|
script: String,
|
||||||
|
script_type: ScriptType,
|
||||||
|
timeout: Duration,
|
||||||
|
retries: u32,
|
||||||
|
concurrent: bool,
|
||||||
|
log_path: Option<String>,
|
||||||
|
env_vars: HashMap<String, String>,
|
||||||
|
prerequisites: Vec<String>,
|
||||||
|
dependents: Vec<String>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JobBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
request_id: "".to_string(),
|
||||||
|
context_id: "".to_string(),
|
||||||
|
caller_id: "".to_string(),
|
||||||
|
script: "".to_string(),
|
||||||
|
script_type: ScriptType::OSIS, // Default to OSIS
|
||||||
|
timeout: Duration::from_secs(5),
|
||||||
|
retries: 0,
|
||||||
|
concurrent: false,
|
||||||
|
log_path: None,
|
||||||
|
env_vars: HashMap::new(),
|
||||||
|
prerequisites: Vec::new(),
|
||||||
|
dependents: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn request_id(mut self, request_id: &str) -> Self {
|
||||||
|
self.request_id = request_id.to_string();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn script_type(mut self, script_type: ScriptType) -> Self {
|
||||||
|
self.script_type = script_type;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn context_id(mut self, context_id: &str) -> Self {
|
||||||
|
self.context_id = context_id.to_string();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn script(mut self, script: &str) -> Self {
|
||||||
|
self.script = script.to_string();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn script_path(mut self, script_path: &str) -> Self {
|
||||||
|
self.script = std::fs::read_to_string(script_path).unwrap();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.timeout = timeout;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn log_path(mut self, log_path: &str) -> Self {
|
||||||
|
self.log_path = Some(log_path.to_string());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set a single environment variable
|
||||||
|
pub fn env_var(mut self, key: &str, value: &str) -> Self {
|
||||||
|
self.env_vars.insert(key.to_string(), value.to_string());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set multiple environment variables from a HashMap
|
||||||
|
pub fn env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
|
||||||
|
self.env_vars.extend(env_vars);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clear all environment variables
|
||||||
|
pub fn clear_env_vars(mut self) -> Self {
|
||||||
|
self.env_vars.clear();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a prerequisite job ID that must complete before this job can run
|
||||||
|
pub fn prerequisite(mut self, job_id: &str) -> Self {
|
||||||
|
self.prerequisites.push(job_id.to_string());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set multiple prerequisite job IDs
|
||||||
|
pub fn prerequisites(mut self, job_ids: Vec<String>) -> Self {
|
||||||
|
self.prerequisites.extend(job_ids);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a dependent job ID that depends on this job completing
|
||||||
|
pub fn dependent(mut self, job_id: &str) -> Self {
|
||||||
|
self.dependents.push(job_id.to_string());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set multiple dependent job IDs
|
||||||
|
pub fn dependents(mut self, job_ids: Vec<String>) -> Self {
|
||||||
|
self.dependents.extend(job_ids);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clear all prerequisites
|
||||||
|
pub fn clear_prerequisites(mut self) -> Self {
|
||||||
|
self.prerequisites.clear();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clear all dependents
|
||||||
|
pub fn clear_dependents(mut self) -> Self {
|
||||||
|
self.dependents.clear();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> Result<Job, JobError> {
|
||||||
|
let request_id = if self.request_id.is_empty() {
|
||||||
|
// Generate a UUID for the request_id
|
||||||
|
Uuid::new_v4().to_string()
|
||||||
|
} else {
|
||||||
|
self.request_id.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.context_id.is_empty() {
|
||||||
|
return Err(JobError::MissingField("context_id".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.caller_id.is_empty() {
|
||||||
|
return Err(JobError::MissingField("caller_id".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let now = Utc::now();
|
||||||
|
|
||||||
|
Ok(Job {
|
||||||
|
id: request_id,
|
||||||
|
caller_id: self.caller_id,
|
||||||
|
context_id: self.context_id,
|
||||||
|
script: self.script,
|
||||||
|
script_type: self.script_type,
|
||||||
|
timeout: self.timeout,
|
||||||
|
retries: self.retries as u8,
|
||||||
|
concurrent: self.concurrent,
|
||||||
|
log_path: self.log_path.clone(),
|
||||||
|
env_vars: self.env_vars.clone(),
|
||||||
|
prerequisites: self.prerequisites.clone(),
|
||||||
|
dependents: self.dependents.clone(),
|
||||||
|
created_at: now,
|
||||||
|
updated_at: now,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -6,6 +6,8 @@ use uuid::Uuid;
|
|||||||
use redis::AsyncCommands;
|
use redis::AsyncCommands;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
mod builder;
|
||||||
|
|
||||||
/// Redis namespace prefix for all Hero job-related keys
|
/// Redis namespace prefix for all Hero job-related keys
|
||||||
pub const NAMESPACE_PREFIX: &str = "hero:job:";
|
pub const NAMESPACE_PREFIX: &str = "hero:job:";
|
||||||
|
|
||||||
|
@ -7,19 +7,35 @@ edition = "2021"
|
|||||||
name = "supervisor"
|
name = "supervisor"
|
||||||
path = "cmd/supervisor.rs"
|
path = "cmd/supervisor.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "hive-supervisor"
|
||||||
|
path = "cmd/hive_supervisor.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "hive-supervisor-tui"
|
||||||
|
path = "cmd/hive_supervisor_tui.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "hive-supervisor-tui-safe"
|
||||||
|
path = "cmd/hive_supervisor_tui_safe.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4.4", features = ["derive"] }
|
clap = { version = "4.4", features = ["derive"] }
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
redis = { version = "0.25.0", features = ["tokio-comp"] }
|
redis = { version = "0.25.0", features = ["tokio-comp"] }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
toml = "0.8"
|
||||||
uuid = { version = "1.6", features = ["v4", "serde"] }
|
uuid = { version = "1.6", features = ["v4", "serde"] }
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
tokio = { version = "1", features = ["macros", "rt-multi-thread"] } # For async main in examples, and general async
|
tokio = { version = "1", features = ["macros", "rt-multi-thread"] } # For async main in examples, and general async
|
||||||
colored = "2.0"
|
colored = "2.0"
|
||||||
hero_job = { path = "../job" }
|
hero_job = { path = "../job" }
|
||||||
zinit-client = "0.4.0"
|
zinit-client = { path = "/Users/timurgordon/code/github/threefoldtech/zinit/zinit-client" }
|
||||||
|
ratatui = "0.28"
|
||||||
|
crossterm = "0.28"
|
||||||
|
anyhow = "1.0"
|
||||||
|
|
||||||
[dev-dependencies] # For examples later
|
[dev-dependencies] # For examples later
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
|
@ -8,8 +8,6 @@ The lifecycle management system provides:
|
|||||||
|
|
||||||
- **Worker Process Management**: Start, stop, restart, and monitor worker binaries
|
- **Worker Process Management**: Start, stop, restart, and monitor worker binaries
|
||||||
- **Health Monitoring**: Automatic ping jobs every 10 minutes for idle workers
|
- **Health Monitoring**: Automatic ping jobs every 10 minutes for idle workers
|
||||||
- **Load Balancing**: Dynamic scaling of workers based on demand
|
|
||||||
- **Service Dependencies**: Proper startup ordering with dependency management
|
|
||||||
- **Graceful Shutdown**: Clean termination of worker processes
|
- **Graceful Shutdown**: Clean termination of worker processes
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
@ -313,3 +311,9 @@ redis-cli keys "hero:job:*"
|
|||||||
- **User Permissions**: Run workers with appropriate user permissions
|
- **User Permissions**: Run workers with appropriate user permissions
|
||||||
- **Network Security**: Secure Redis and Zinit socket access
|
- **Network Security**: Secure Redis and Zinit socket access
|
||||||
- **Binary Validation**: Verify worker binary integrity before deployment
|
- **Binary Validation**: Verify worker binary integrity before deployment
|
||||||
|
|
||||||
|
|
||||||
|
## Future
|
||||||
|
|
||||||
|
- **Load Balancing**: Dynamic scaling of workers based on demand
|
||||||
|
- **Service Dependencies**: Proper startup ordering with dependency management
|
@ -1,157 +1,66 @@
|
|||||||
# Rhai Client Binary
|
# Supervisor CLI
|
||||||
|
|
||||||
A command-line client for executing Rhai scripts on remote workers via Redis.
|
A command-line interface for the Hero Supervisor.
|
||||||
|
|
||||||
## Binary: `client`
|
## Binary: `hive-supervisor`
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
Build the binary:
|
Build the binary:
|
||||||
```bash
|
```bash
|
||||||
cargo build --bin client --release
|
cargo build --bin hive-supervisor --release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Basic usage - requires caller and circle keys
|
# Basic usage
|
||||||
client --caller-key <CALLER_KEY> --circle-key <CIRCLE_KEY>
|
hive-supervisor --config <CONFIG_PATH>
|
||||||
|
|
||||||
# Execute inline script
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> --script "print('Hello World!')"
|
|
||||||
|
|
||||||
# Execute script from file
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> --file script.rhai
|
|
||||||
|
|
||||||
# Use specific worker (defaults to circle key)
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> -w <WORKER_KEY> --script "2 + 2"
|
|
||||||
|
|
||||||
# Custom Redis and timeout
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> --redis-url redis://localhost:6379/1 --timeout 60
|
|
||||||
|
|
||||||
# Remove timestamps from logs
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> --no-timestamp
|
|
||||||
|
|
||||||
# Increase verbosity
|
|
||||||
client -c <CALLER_KEY> -k <CIRCLE_KEY> -v --script "debug_info()"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Command-Line Options
|
|
||||||
|
|
||||||
| Option | Short | Default | Description |
|
|
||||||
|--------|-------|---------|-------------|
|
|
||||||
| `--caller-key` | `-c` | **Required** | Caller public key (your identity) |
|
|
||||||
| `--circle-key` | `-k` | **Required** | Circle public key (execution context) |
|
|
||||||
| `--worker-key` | `-w` | `circle-key` | Worker public key (target worker) |
|
|
||||||
| `--redis-url` | `-r` | `redis://localhost:6379` | Redis connection URL |
|
|
||||||
| `--script` | `-s` | | Rhai script to execute |
|
|
||||||
| `--file` | `-f` | | Path to Rhai script file |
|
|
||||||
| `--timeout` | `-t` | `30` | Timeout for script execution (seconds) |
|
|
||||||
| `--no-timestamp` | | `false` | Remove timestamps from log output |
|
|
||||||
| `--verbose` | `-v` | | Increase verbosity (stackable) |
|
|
||||||
|
|
||||||
### Execution Modes
|
|
||||||
|
|
||||||
#### Inline Script Execution
|
|
||||||
```bash
|
|
||||||
# Execute a simple calculation
|
|
||||||
client -c caller_123 -k circle_456 -s "let result = 2 + 2; print(result);"
|
|
||||||
|
|
||||||
# Execute with specific worker
|
|
||||||
client -c caller_123 -k circle_456 -w worker_789 -s "get_user_data()"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Script File Execution
|
|
||||||
```bash
|
|
||||||
# Execute script from file
|
|
||||||
client -c caller_123 -k circle_456 -f examples/data_processing.rhai
|
|
||||||
|
|
||||||
# Execute with custom timeout
|
|
||||||
client -c caller_123 -k circle_456 -f long_running_script.rhai -t 120
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Interactive Mode
|
|
||||||
```bash
|
|
||||||
# Enter interactive REPL mode (when no script or file provided)
|
|
||||||
client -c caller_123 -k circle_456
|
|
||||||
|
|
||||||
# Interactive mode with verbose logging
|
|
||||||
client -c caller_123 -k circle_456 -v --no-timestamp
|
|
||||||
```
|
|
||||||
|
|
||||||
### Interactive Mode
|
|
||||||
|
|
||||||
When no script (`-s`) or file (`-f`) is provided, the client enters interactive mode:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
🔗 Starting Rhai Client
|
|
||||||
📋 Configuration:
|
|
||||||
Caller Key: caller_123
|
|
||||||
Circle Key: circle_456
|
|
||||||
Worker Key: circle_456
|
|
||||||
Redis URL: redis://localhost:6379
|
|
||||||
Timeout: 30s
|
|
||||||
|
|
||||||
✅ Connected to Redis at redis://localhost:6379
|
Where config is toml file with the following structure:
|
||||||
🎮 Entering interactive mode
|
```toml
|
||||||
Type Rhai scripts and press Enter to execute. Type 'exit' or 'quit' to close.
|
[global]
|
||||||
rhai> let x = 42; print(x);
|
redis_url = "redis://localhost:6379"
|
||||||
Status: completed
|
|
||||||
Output: 42
|
[osis_worker]
|
||||||
rhai> exit
|
binary_path = "/path/to/osis_worker"
|
||||||
👋 Goodbye!
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[sal_worker]
|
||||||
|
binary_path = "/path/to/sal_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[v_worker]
|
||||||
|
binary_path = "/path/to/v_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[python_worker]
|
||||||
|
binary_path = "/path/to/python_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
```
|
```
|
||||||
|
|
||||||
### Configuration Examples
|
|
||||||
|
|
||||||
#### Development Usage
|
Lets have verbosity settings etc.
|
||||||
```bash
|
CLI Offers a few commands:
|
||||||
# Simple development client
|
|
||||||
client -c dev_user -k dev_circle
|
|
||||||
|
|
||||||
# Development with clean logs
|
workers:
|
||||||
client -c dev_user -k dev_circle --no-timestamp -v
|
start
|
||||||
```
|
stop
|
||||||
|
restart
|
||||||
|
status
|
||||||
|
logs
|
||||||
|
list
|
||||||
|
|
||||||
#### Production Usage
|
jobs:
|
||||||
```bash
|
create
|
||||||
# Production client with specific worker
|
start
|
||||||
client \
|
stop
|
||||||
--caller-key prod_user_123 \
|
restart
|
||||||
--circle-key prod_circle_456 \
|
status
|
||||||
--worker-key prod_worker_789 \
|
logs
|
||||||
--redis-url redis://redis-cluster:6379/0 \
|
list
|
||||||
--timeout 300 \
|
|
||||||
--file production_script.rhai
|
repl: you can enter interactive mode to run scripts, however predefine caller_id, context_id and worker type so supervisor dispathces jobs accordingly
|
||||||
```
|
|
||||||
|
|
||||||
#### Batch Processing
|
|
||||||
```bash
|
|
||||||
# Process multiple scripts
|
|
||||||
for script in scripts/*.rhai; do
|
|
||||||
client -c batch_user -k batch_circle -f "$script" --no-timestamp
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
### Key Concepts
|
|
||||||
|
|
||||||
- **Caller Key**: Your identity - used for authentication and tracking
|
|
||||||
- **Circle Key**: Execution context - defines the environment/permissions
|
|
||||||
- **Worker Key**: Target worker - which worker should execute the script (defaults to circle key)
|
|
||||||
|
|
||||||
### Error Handling
|
|
||||||
|
|
||||||
The client provides clear error messages for:
|
|
||||||
- Missing required keys
|
|
||||||
- Redis connection failures
|
|
||||||
- Script execution timeouts
|
|
||||||
- Worker unavailability
|
|
||||||
- Script syntax errors
|
|
||||||
|
|
||||||
### Dependencies
|
|
||||||
|
|
||||||
- `rhai_supervisor`: Core client library for Redis-based script execution
|
|
||||||
- `redis`: Redis client for task queue communication
|
|
||||||
- `clap`: Command-line argument parsing
|
|
||||||
- `env_logger`: Logging infrastructure
|
|
||||||
- `tokio`: Async runtime
|
|
365
core/supervisor/cmd/hive_supervisor_tui_safe.rs
Normal file
365
core/supervisor/cmd/hive_supervisor_tui_safe.rs
Normal file
@ -0,0 +1,365 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use clap::Parser;
|
||||||
|
use crossterm::{
|
||||||
|
event::{self, DisableMouseCapture, EnableMouseCapture, Event, KeyCode, KeyEventKind},
|
||||||
|
execute,
|
||||||
|
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
|
||||||
|
};
|
||||||
|
use hero_supervisor::{Supervisor, SupervisorBuilder};
|
||||||
|
use zinit_client::ZinitClient;
|
||||||
|
use log::{error, info};
|
||||||
|
use ratatui::{
|
||||||
|
backend::CrosstermBackend,
|
||||||
|
layout::{Constraint, Direction, Layout, Rect},
|
||||||
|
style::{Color, Modifier, Style},
|
||||||
|
text::Line,
|
||||||
|
widgets::{
|
||||||
|
Block, Borders, List, ListItem, Paragraph, Tabs, Wrap,
|
||||||
|
},
|
||||||
|
Frame, Terminal,
|
||||||
|
};
|
||||||
|
use std::{
|
||||||
|
io,
|
||||||
|
path::PathBuf,
|
||||||
|
sync::Arc,
|
||||||
|
time::{Duration, Instant},
|
||||||
|
};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use toml;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(name = "hive-supervisor-tui")]
|
||||||
|
#[command(about = "Hero Supervisor Terminal User Interface")]
|
||||||
|
struct Args {
|
||||||
|
#[arg(short, long, help = "Configuration file path")]
|
||||||
|
config: PathBuf,
|
||||||
|
|
||||||
|
#[arg(short, long, help = "Enable verbose logging")]
|
||||||
|
verbose: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct Config {
|
||||||
|
global: GlobalConfig,
|
||||||
|
#[serde(flatten)]
|
||||||
|
workers: std::collections::HashMap<String, WorkerConfigToml>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct GlobalConfig {
|
||||||
|
redis_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct WorkerConfigToml {
|
||||||
|
binary_path: String,
|
||||||
|
env_vars: Option<std::collections::HashMap<String, String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
enum TabId {
|
||||||
|
Dashboard,
|
||||||
|
Workers,
|
||||||
|
Jobs,
|
||||||
|
Logs,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TabId {
|
||||||
|
fn all() -> Vec<TabId> {
|
||||||
|
vec![TabId::Dashboard, TabId::Workers, TabId::Jobs, TabId::Logs]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn title(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
TabId::Dashboard => "Dashboard",
|
||||||
|
TabId::Workers => "Workers",
|
||||||
|
TabId::Jobs => "Jobs",
|
||||||
|
TabId::Logs => "Logs",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct App {
|
||||||
|
supervisor: Arc<Supervisor>,
|
||||||
|
current_tab: TabId,
|
||||||
|
should_quit: bool,
|
||||||
|
logs: Vec<String>,
|
||||||
|
last_update: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl App {
|
||||||
|
fn new(supervisor: Arc<Supervisor>) -> Self {
|
||||||
|
Self {
|
||||||
|
supervisor,
|
||||||
|
current_tab: TabId::Dashboard,
|
||||||
|
should_quit: false,
|
||||||
|
logs: vec!["TUI started successfully".to_string()],
|
||||||
|
last_update: Instant::now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_tab(&mut self) {
|
||||||
|
let tabs = TabId::all();
|
||||||
|
let current_index = tabs.iter().position(|t| *t == self.current_tab).unwrap_or(0);
|
||||||
|
let next_index = (current_index + 1) % tabs.len();
|
||||||
|
self.current_tab = tabs[next_index].clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prev_tab(&mut self) {
|
||||||
|
let tabs = TabId::all();
|
||||||
|
let current_index = tabs.iter().position(|t| *t == self.current_tab).unwrap_or(0);
|
||||||
|
let prev_index = if current_index == 0 { tabs.len() - 1 } else { current_index - 1 };
|
||||||
|
self.current_tab = tabs[prev_index].clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_log(&mut self, message: String) {
|
||||||
|
self.logs.push(format!("[{}] {}",
|
||||||
|
chrono::Utc::now().format("%H:%M:%S"),
|
||||||
|
message
|
||||||
|
));
|
||||||
|
if self.logs.len() > 100 {
|
||||||
|
self.logs.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_key(&mut self, key: KeyCode) -> bool {
|
||||||
|
match key {
|
||||||
|
KeyCode::Char('q') => {
|
||||||
|
self.should_quit = true;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
KeyCode::Tab => {
|
||||||
|
self.next_tab();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
KeyCode::BackTab => {
|
||||||
|
self.prev_tab();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_ui(f: &mut Frame, app: &mut App) {
|
||||||
|
let chunks = Layout::default()
|
||||||
|
.direction(Direction::Vertical)
|
||||||
|
.constraints([Constraint::Length(3), Constraint::Min(0)].as_ref())
|
||||||
|
.split(f.area());
|
||||||
|
|
||||||
|
// Render tabs
|
||||||
|
let tabs_list = TabId::all();
|
||||||
|
let tab_titles: Vec<Line> = tabs_list
|
||||||
|
.iter()
|
||||||
|
.map(|t| Line::from(t.title()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let selected_tab = TabId::all().iter().position(|t| *t == app.current_tab).unwrap_or(0);
|
||||||
|
let tabs = Tabs::new(tab_titles)
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("Hero Supervisor TUI"))
|
||||||
|
.select(selected_tab)
|
||||||
|
.style(Style::default().fg(Color::Cyan))
|
||||||
|
.highlight_style(Style::default().add_modifier(Modifier::BOLD).bg(Color::Black));
|
||||||
|
|
||||||
|
f.render_widget(tabs, chunks[0]);
|
||||||
|
|
||||||
|
// Render content based on selected tab
|
||||||
|
match app.current_tab {
|
||||||
|
TabId::Dashboard => render_dashboard(f, chunks[1], app),
|
||||||
|
TabId::Workers => render_workers(f, chunks[1], app),
|
||||||
|
TabId::Jobs => render_jobs(f, chunks[1], app),
|
||||||
|
TabId::Logs => render_logs(f, chunks[1], app),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_dashboard(f: &mut Frame, area: Rect, app: &App) {
|
||||||
|
let chunks = Layout::default()
|
||||||
|
.direction(Direction::Vertical)
|
||||||
|
.constraints([Constraint::Length(7), Constraint::Min(0)].as_ref())
|
||||||
|
.split(area);
|
||||||
|
|
||||||
|
// Status overview - supervisor is already running if we get here
|
||||||
|
let status_text = "Status: ✓ Running\nWorkers: Started successfully\nJobs: Ready for processing\n\nPress 'q' to quit, Tab to navigate";
|
||||||
|
|
||||||
|
let status_paragraph = Paragraph::new(status_text)
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("System Status"))
|
||||||
|
.wrap(Wrap { trim: true });
|
||||||
|
|
||||||
|
f.render_widget(status_paragraph, chunks[0]);
|
||||||
|
|
||||||
|
// Recent logs
|
||||||
|
let log_items: Vec<ListItem> = app.logs
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.take(10)
|
||||||
|
.map(|log| ListItem::new(log.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let logs_list = List::new(log_items)
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("Recent Activity"));
|
||||||
|
|
||||||
|
f.render_widget(logs_list, chunks[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_workers(f: &mut Frame, area: Rect, _app: &App) {
|
||||||
|
let paragraph = Paragraph::new("Workers tab - Status checking not implemented yet to avoid system issues")
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("Workers"))
|
||||||
|
.wrap(Wrap { trim: true });
|
||||||
|
|
||||||
|
f.render_widget(paragraph, area);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_jobs(f: &mut Frame, area: Rect, _app: &App) {
|
||||||
|
let paragraph = Paragraph::new("Jobs tab - Job monitoring not implemented yet to avoid system issues")
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("Jobs"))
|
||||||
|
.wrap(Wrap { trim: true });
|
||||||
|
|
||||||
|
f.render_widget(paragraph, area);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_logs(f: &mut Frame, area: Rect, app: &App) {
|
||||||
|
let items: Vec<ListItem> = app.logs
|
||||||
|
.iter()
|
||||||
|
.map(|log| ListItem::new(log.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let logs_list = List::new(items)
|
||||||
|
.block(Block::default().borders(Borders::ALL).title("System Logs"));
|
||||||
|
|
||||||
|
f.render_widget(logs_list, area);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_app(
|
||||||
|
terminal: &mut Terminal<CrosstermBackend<io::Stdout>>,
|
||||||
|
app: &mut App,
|
||||||
|
) -> Result<()> {
|
||||||
|
loop {
|
||||||
|
terminal.draw(|f| render_ui(f, app))?;
|
||||||
|
|
||||||
|
// Simple, safe event handling
|
||||||
|
if event::poll(Duration::from_millis(100))? {
|
||||||
|
if let Event::Key(key) = event::read()? {
|
||||||
|
if key.kind == KeyEventKind::Press {
|
||||||
|
if app.handle_key(key.code) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if app.should_quit {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small delay to prevent excessive CPU usage
|
||||||
|
sleep(Duration::from_millis(50)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Initialize logging
|
||||||
|
if args.verbose {
|
||||||
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("debug")).init();
|
||||||
|
} else {
|
||||||
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Hero Supervisor TUI - Fail-fast initialization");
|
||||||
|
|
||||||
|
// Step 1: Load and parse configuration
|
||||||
|
info!("Step 1/4: Loading configuration from {:?}", args.config);
|
||||||
|
let config_content = std::fs::read_to_string(&args.config)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to read config file: {}", e))?;
|
||||||
|
let config: Config = toml::from_str(&config_content)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to parse config file: {}", e))?;
|
||||||
|
info!("✓ Configuration loaded successfully");
|
||||||
|
|
||||||
|
// Step 2: Check if Zinit is running
|
||||||
|
info!("Step 2/4: Checking if Zinit is running...");
|
||||||
|
let zinit_client = ZinitClient::new("/tmp/zinit.sock");
|
||||||
|
match zinit_client.status("_test_connectivity").await {
|
||||||
|
Ok(_) => {
|
||||||
|
info!("✓ Zinit is running and accessible");
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = e.to_string();
|
||||||
|
if error_msg.contains("Connection refused") || error_msg.contains("No such file") {
|
||||||
|
eprintln!("Error: Zinit process manager is not running.");
|
||||||
|
eprintln!("Please start Zinit before running the supervisor TUI.");
|
||||||
|
eprintln!("Expected Zinit socket at: /tmp/zinit.sock");
|
||||||
|
std::process::exit(1);
|
||||||
|
} else {
|
||||||
|
info!("✓ Zinit is running (service not found is expected)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Build supervisor
|
||||||
|
info!("Step 3/4: Building supervisor...");
|
||||||
|
let mut builder = SupervisorBuilder::new()
|
||||||
|
.redis_url(&config.global.redis_url);
|
||||||
|
|
||||||
|
for (worker_name, worker_config) in &config.workers {
|
||||||
|
match worker_name.as_str() {
|
||||||
|
"osis_worker" => builder = builder.osis_worker(&worker_config.binary_path),
|
||||||
|
"sal_worker" => builder = builder.sal_worker(&worker_config.binary_path),
|
||||||
|
"v_worker" => builder = builder.v_worker(&worker_config.binary_path),
|
||||||
|
"python_worker" => builder = builder.python_worker(&worker_config.binary_path),
|
||||||
|
_ => log::warn!("Unknown worker type: {}", worker_name),
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(env_vars) = &worker_config.env_vars {
|
||||||
|
for (key, value) in env_vars {
|
||||||
|
builder = builder.worker_env_var(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let supervisor = Arc::new(builder.build()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to build supervisor: {}", e))?);
|
||||||
|
info!("✓ Supervisor built successfully");
|
||||||
|
|
||||||
|
// Step 4: Start supervisor and workers
|
||||||
|
info!("Step 4/4: Starting supervisor and workers...");
|
||||||
|
supervisor.start_workers().await
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to start workers: {}", e))?;
|
||||||
|
info!("✓ All workers started successfully");
|
||||||
|
|
||||||
|
// All initialization successful - now start TUI
|
||||||
|
info!("Initialization complete - starting TUI...");
|
||||||
|
let mut app = App::new(Arc::clone(&supervisor));
|
||||||
|
|
||||||
|
// Setup terminal
|
||||||
|
enable_raw_mode()?;
|
||||||
|
let mut stdout = io::stdout();
|
||||||
|
execute!(stdout, EnterAlternateScreen, EnableMouseCapture)?;
|
||||||
|
let backend = CrosstermBackend::new(stdout);
|
||||||
|
let mut terminal = Terminal::new(backend)?;
|
||||||
|
|
||||||
|
// Run the app
|
||||||
|
let result = run_app(&mut terminal, &mut app).await;
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
disable_raw_mode()?;
|
||||||
|
execute!(
|
||||||
|
terminal.backend_mut(),
|
||||||
|
LeaveAlternateScreen,
|
||||||
|
DisableMouseCapture
|
||||||
|
)?;
|
||||||
|
terminal.show_cursor()?;
|
||||||
|
|
||||||
|
// Cleanup supervisor
|
||||||
|
if let Err(e) = supervisor.cleanup_and_shutdown().await {
|
||||||
|
error!("Error during cleanup: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Hero Supervisor TUI shutdown complete");
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
@ -1,190 +0,0 @@
|
|||||||
# Architecture of the `rhai_supervisor` Crate
|
|
||||||
|
|
||||||
The `rhai_supervisor` crate provides a Redis-based client library for submitting Rhai scripts to distributed worker services and awaiting their execution results. It implements a request-reply pattern using Redis as the message broker.
|
|
||||||
|
|
||||||
## Core Architecture
|
|
||||||
|
|
||||||
The client follows a builder pattern design with clear separation of concerns:
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
graph TD
|
|
||||||
A[RhaiSupervisorBuilder] --> B[RhaiSupervisor]
|
|
||||||
B --> C[PlayRequestBuilder]
|
|
||||||
C --> D[PlayRequest]
|
|
||||||
D --> E[Redis Task Queue]
|
|
||||||
E --> F[Worker Service]
|
|
||||||
F --> G[Redis Reply Queue]
|
|
||||||
G --> H[Client Response]
|
|
||||||
|
|
||||||
subgraph "Client Components"
|
|
||||||
A
|
|
||||||
B
|
|
||||||
C
|
|
||||||
D
|
|
||||||
end
|
|
||||||
|
|
||||||
subgraph "Redis Infrastructure"
|
|
||||||
E
|
|
||||||
G
|
|
||||||
end
|
|
||||||
|
|
||||||
subgraph "External Services"
|
|
||||||
F
|
|
||||||
end
|
|
||||||
```
|
|
||||||
|
|
||||||
## Key Components
|
|
||||||
|
|
||||||
### 1. RhaiSupervisorBuilder
|
|
||||||
|
|
||||||
A builder pattern implementation for constructing `RhaiSupervisor` instances with proper configuration validation.
|
|
||||||
|
|
||||||
**Responsibilities:**
|
|
||||||
- Configure Redis connection URL
|
|
||||||
- Set caller ID for task attribution
|
|
||||||
- Validate configuration before building client
|
|
||||||
|
|
||||||
**Key Methods:**
|
|
||||||
- `caller_id(id: &str)` - Sets the caller identifier
|
|
||||||
- `redis_url(url: &str)` - Configures Redis connection
|
|
||||||
- `build()` - Creates the final `RhaiSupervisor` instance
|
|
||||||
|
|
||||||
### 2. RhaiSupervisor
|
|
||||||
|
|
||||||
The main client interface that manages Redis connections and provides factory methods for creating play requests.
|
|
||||||
|
|
||||||
**Responsibilities:**
|
|
||||||
- Maintain Redis connection pool
|
|
||||||
- Provide factory methods for request builders
|
|
||||||
- Handle low-level Redis operations
|
|
||||||
- Manage task status queries
|
|
||||||
|
|
||||||
**Key Methods:**
|
|
||||||
- `new_play_request()` - Creates a new `PlayRequestBuilder`
|
|
||||||
- `get_task_status(task_id)` - Queries task status from Redis
|
|
||||||
- Internal methods for Redis operations
|
|
||||||
|
|
||||||
### 3. PlayRequestBuilder
|
|
||||||
|
|
||||||
A fluent builder for constructing and submitting script execution requests.
|
|
||||||
|
|
||||||
**Responsibilities:**
|
|
||||||
- Configure script execution parameters
|
|
||||||
- Handle script loading from files or strings
|
|
||||||
- Manage request timeouts
|
|
||||||
- Provide submission methods (fire-and-forget vs await-response)
|
|
||||||
|
|
||||||
**Key Methods:**
|
|
||||||
- `worker_id(id: &str)` - Target worker queue (determines which worker processes the task)
|
|
||||||
- `context_id(id: &str)` - Target context ID (determines execution context/circle)
|
|
||||||
- `script(content: &str)` - Set script content directly
|
|
||||||
- `script_path(path: &str)` - Load script from file
|
|
||||||
- `timeout(duration: Duration)` - Set execution timeout
|
|
||||||
- `submit()` - Fire-and-forget submission
|
|
||||||
- `await_response()` - Submit and wait for result
|
|
||||||
|
|
||||||
**Architecture Note:** The decoupling of `worker_id` and `context_id` allows a single worker to process tasks for multiple contexts (circles), providing greater deployment flexibility.
|
|
||||||
|
|
||||||
### 4. Data Structures
|
|
||||||
|
|
||||||
#### RhaiTaskDetails
|
|
||||||
Represents the complete state of a task throughout its lifecycle.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub struct RhaiTaskDetails {
|
|
||||||
pub task_id: String,
|
|
||||||
pub script: String,
|
|
||||||
pub status: String, // "pending", "processing", "completed", "error"
|
|
||||||
pub output: Option<String>,
|
|
||||||
pub error: Option<String>,
|
|
||||||
pub created_at: DateTime<Utc>,
|
|
||||||
pub updated_at: DateTime<Utc>,
|
|
||||||
pub caller_id: String,
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### RhaiSupervisorError
|
|
||||||
Comprehensive error handling for various failure scenarios:
|
|
||||||
- `RedisError` - Redis connection/operation failures
|
|
||||||
- `SerializationError` - JSON serialization/deserialization issues
|
|
||||||
- `Timeout` - Task execution timeouts
|
|
||||||
- `TaskNotFound` - Missing tasks after submission
|
|
||||||
|
|
||||||
## Communication Protocol
|
|
||||||
|
|
||||||
### Task Submission Flow
|
|
||||||
|
|
||||||
1. **Task Creation**: Client generates unique UUID for task identification
|
|
||||||
2. **Task Storage**: Task details stored in Redis hash: `rhailib:<task_id>`
|
|
||||||
3. **Queue Submission**: Task ID pushed to worker queue: `rhailib:<worker_id>`
|
|
||||||
4. **Reply Queue Setup**: Client listens on: `rhailib:reply:<task_id>`
|
|
||||||
|
|
||||||
### Redis Key Patterns
|
|
||||||
|
|
||||||
- **Task Storage**: `rhailib:<task_id>` (Redis Hash)
|
|
||||||
- **Worker Queues**: `rhailib:<worker_id>` (Redis List)
|
|
||||||
- **Reply Queues**: `rhailib:reply:<task_id>` (Redis List)
|
|
||||||
|
|
||||||
### Message Flow Diagram
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
sequenceDiagram
|
|
||||||
participant C as Client
|
|
||||||
participant R as Redis
|
|
||||||
participant W as Worker
|
|
||||||
|
|
||||||
C->>R: HSET rhailib:task_id (task details)
|
|
||||||
C->>R: LPUSH rhailib:worker_id task_id
|
|
||||||
C->>R: BLPOP rhailib:reply:task_id (blocking)
|
|
||||||
|
|
||||||
W->>R: BRPOP rhailib:worker_id (blocking)
|
|
||||||
W->>W: Execute Rhai Script
|
|
||||||
W->>R: LPUSH rhailib:reply:task_id (result)
|
|
||||||
|
|
||||||
R->>C: Return result from BLPOP
|
|
||||||
C->>R: DEL rhailib:reply:task_id (cleanup)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Concurrency and Async Design
|
|
||||||
|
|
||||||
The client is built on `tokio` for asynchronous operations:
|
|
||||||
|
|
||||||
- **Connection Pooling**: Uses Redis multiplexed connections for efficiency
|
|
||||||
- **Non-blocking Operations**: All Redis operations are async
|
|
||||||
- **Timeout Handling**: Configurable timeouts with proper cleanup
|
|
||||||
- **Error Propagation**: Comprehensive error handling with context
|
|
||||||
|
|
||||||
## Configuration and Deployment
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
- Redis server accessible to both client and workers
|
|
||||||
- Proper network connectivity between components
|
|
||||||
- Sufficient Redis memory for task storage
|
|
||||||
|
|
||||||
### Configuration Options
|
|
||||||
- **Redis URL**: Connection string for Redis instance
|
|
||||||
- **Caller ID**: Unique identifier for client instance
|
|
||||||
- **Timeouts**: Per-request timeout configuration
|
|
||||||
- **Worker Targeting**: Direct worker queue addressing
|
|
||||||
|
|
||||||
## Security Considerations
|
|
||||||
|
|
||||||
- **Task Isolation**: Each task uses unique identifiers
|
|
||||||
- **Queue Separation**: Worker-specific queues prevent cross-contamination
|
|
||||||
- **Cleanup**: Automatic cleanup of reply queues after completion
|
|
||||||
- **Error Handling**: Secure error propagation without sensitive data leakage
|
|
||||||
|
|
||||||
## Performance Characteristics
|
|
||||||
|
|
||||||
- **Scalability**: Horizontal scaling through multiple worker instances
|
|
||||||
- **Throughput**: Limited by Redis performance and network latency
|
|
||||||
- **Memory Usage**: Efficient with connection pooling and cleanup
|
|
||||||
- **Latency**: Low latency for local Redis deployments
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
The client integrates with:
|
|
||||||
- **Worker Services**: Via Redis queue protocol
|
|
||||||
- **Monitoring Systems**: Through structured logging
|
|
||||||
- **Application Code**: Via builder pattern API
|
|
||||||
- **Configuration Systems**: Through environment variables and builders
|
|
185
core/supervisor/examples/cli/README.md
Normal file
185
core/supervisor/examples/cli/README.md
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
# Hero Supervisor CLI Example
|
||||||
|
|
||||||
|
This example demonstrates how to use the `hive-supervisor` CLI tool for managing workers and jobs in the Hero ecosystem.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. **Redis Server**: Make sure Redis is running on `localhost:6379`
|
||||||
|
```bash
|
||||||
|
# Install Redis (macOS)
|
||||||
|
brew install redis
|
||||||
|
|
||||||
|
# Start Redis
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Zinit Process Manager**: Install and configure Zinit
|
||||||
|
```bash
|
||||||
|
# Install Zinit (example for Linux/macOS)
|
||||||
|
# Follow Zinit installation instructions for your platform
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Worker Binaries**: The configuration references worker binaries that need to be available:
|
||||||
|
- `/usr/local/bin/osis_worker`
|
||||||
|
- `/usr/local/bin/sal_worker`
|
||||||
|
- `/usr/local/bin/v_worker`
|
||||||
|
- `/usr/local/bin/python_worker`
|
||||||
|
|
||||||
|
For testing purposes, you can create mock worker binaries or update the paths in `config.toml` to point to existing binaries.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The `config.toml` file contains the supervisor configuration:
|
||||||
|
|
||||||
|
- **Global settings**: Redis URL and Zinit socket path
|
||||||
|
- **Worker configurations**: Binary paths and environment variables for each worker type
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### 1. Build the CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the supervisor directory
|
||||||
|
cargo build --bin hive-supervisor --release
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Worker Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show help
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml --help
|
||||||
|
|
||||||
|
# List all configured workers
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers list
|
||||||
|
|
||||||
|
# Start all workers
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers start
|
||||||
|
|
||||||
|
# Start specific workers
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers start osis_worker sal_worker
|
||||||
|
|
||||||
|
# Check worker status
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers status
|
||||||
|
|
||||||
|
# Stop all workers
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers stop
|
||||||
|
|
||||||
|
# Restart specific worker
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml workers restart osis_worker
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Job Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a job with inline script
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs create \
|
||||||
|
--script 'print("Hello from OSIS worker!");' \
|
||||||
|
--script-type osis \
|
||||||
|
--caller-id "user123" \
|
||||||
|
--context-id "session456"
|
||||||
|
|
||||||
|
# Create a job from file
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs create \
|
||||||
|
--file examples/cli/sample_script.rhai \
|
||||||
|
--script-type osis \
|
||||||
|
--caller-id "user123" \
|
||||||
|
--context-id "session456"
|
||||||
|
|
||||||
|
# List all jobs
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs list
|
||||||
|
|
||||||
|
# Check job status
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs status <JOB_ID>
|
||||||
|
|
||||||
|
# View job logs
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs logs <JOB_ID>
|
||||||
|
|
||||||
|
# Stop a job
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml jobs stop <JOB_ID>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Interactive REPL Mode
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enter REPL mode for OSIS scripts
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml repl \
|
||||||
|
--caller-id "user123" \
|
||||||
|
--context-id "session456" \
|
||||||
|
--script-type osis \
|
||||||
|
--timeout 60
|
||||||
|
|
||||||
|
# In REPL mode, you can:
|
||||||
|
# - Type scripts directly and press Enter to execute
|
||||||
|
# - Type 'help' for available commands
|
||||||
|
# - Type 'exit' or 'quit' to leave REPL mode
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Verbose Logging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable debug logging
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml -v workers status
|
||||||
|
|
||||||
|
# Enable trace logging
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml -vv workers status
|
||||||
|
|
||||||
|
# Disable timestamps
|
||||||
|
./target/release/hive-supervisor --config examples/cli/config.toml --no-timestamp workers status
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sample Scripts
|
||||||
|
|
||||||
|
The `sample_scripts/` directory contains example scripts for different worker types:
|
||||||
|
|
||||||
|
- `hello_osis.rhai` - Simple OSIS/HeroScript example
|
||||||
|
- `system_sal.rhai` - SAL system operation example
|
||||||
|
- `math_v.v` - V language calculation example
|
||||||
|
- `data_python.py` - Python data processing example
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Redis Connection Error**
|
||||||
|
- Ensure Redis is running: `redis-cli ping`
|
||||||
|
- Check the Redis URL in `config.toml`
|
||||||
|
|
||||||
|
2. **Zinit Socket Error**
|
||||||
|
- Verify Zinit is running and the socket path is correct
|
||||||
|
- Check permissions on the socket file
|
||||||
|
|
||||||
|
3. **Worker Binary Not Found**
|
||||||
|
- Update binary paths in `config.toml` to match your system
|
||||||
|
- Ensure worker binaries are executable
|
||||||
|
|
||||||
|
4. **Permission Denied**
|
||||||
|
- Check file permissions on configuration and binary files
|
||||||
|
- Ensure the user has access to the Zinit socket
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Run with verbose logging to see detailed operation information:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RUST_LOG=debug ./target/release/hive-supervisor --config examples/cli/config.toml -vv workers status
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Customization
|
||||||
|
|
||||||
|
You can customize the configuration for your environment:
|
||||||
|
|
||||||
|
1. **Update Redis URL**: Change `redis_url` in the `[global]` section
|
||||||
|
2. **Update Zinit Socket**: Change `zinit_socket_path` for your Zinit installation
|
||||||
|
3. **Worker Paths**: Update binary paths in worker sections to match your setup
|
||||||
|
4. **Environment Variables**: Add or modify environment variables for each worker type
|
||||||
|
|
||||||
|
## Integration with Hero Ecosystem
|
||||||
|
|
||||||
|
This CLI integrates with the broader Hero ecosystem:
|
||||||
|
|
||||||
|
- **Job Queue**: Uses Redis for job queuing and status tracking
|
||||||
|
- **Process Management**: Uses Zinit for worker lifecycle management
|
||||||
|
- **Script Execution**: Supports multiple script types (OSIS, SAL, V, Python)
|
||||||
|
- **Monitoring**: Provides real-time status and logging capabilities
|
||||||
|
|
||||||
|
For more information about the Hero ecosystem, see the main project documentation.
|
19
core/supervisor/examples/cli/config.toml
Normal file
19
core/supervisor/examples/cli/config.toml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Hero Supervisor CLI Configuration Example
|
||||||
|
# This configuration demonstrates how to set up the hive-supervisor CLI
|
||||||
|
# with different worker types for script execution.
|
||||||
|
|
||||||
|
[global]
|
||||||
|
# Redis connection URL for job queuing
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
|
||||||
|
# OSIS Worker Configuration
|
||||||
|
# Handles OSIS (HeroScript) execution
|
||||||
|
[osis_worker]
|
||||||
|
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/osis"
|
||||||
|
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "osis", "MAX_CONCURRENT_JOBS" = "5" }
|
||||||
|
|
||||||
|
# SAL Worker Configuration
|
||||||
|
# Handles System Abstraction Layer scripts
|
||||||
|
[sal_worker]
|
||||||
|
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/sal"
|
||||||
|
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "sal", "MAX_CONCURRENT_JOBS" = "3" }
|
144
core/supervisor/examples/cli/run_examples.sh
Executable file
144
core/supervisor/examples/cli/run_examples.sh
Executable file
@ -0,0 +1,144 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Hero Supervisor CLI Example Runner
|
||||||
|
# This script demonstrates various CLI operations
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
SUPERVISOR_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
CONFIG_FILE="$SCRIPT_DIR/config.toml"
|
||||||
|
CLI_BINARY="$SUPERVISOR_DIR/target/release/hive-supervisor"
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== Hero Supervisor CLI Example Runner ===${NC}"
|
||||||
|
echo "Script directory: $SCRIPT_DIR"
|
||||||
|
echo "Supervisor directory: $SUPERVISOR_DIR"
|
||||||
|
echo "Configuration file: $CONFIG_FILE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Function to run CLI command with error handling
|
||||||
|
run_cli() {
|
||||||
|
local description="$1"
|
||||||
|
shift
|
||||||
|
echo -e "${YELLOW}Running: $description${NC}"
|
||||||
|
echo "Command: $CLI_BINARY --config $CONFIG_FILE $*"
|
||||||
|
echo
|
||||||
|
|
||||||
|
if "$CLI_BINARY" --config "$CONFIG_FILE" "$@"; then
|
||||||
|
echo -e "${GREEN}✓ Success${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ Failed${NC}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if CLI binary exists
|
||||||
|
if [[ ! -f "$CLI_BINARY" ]]; then
|
||||||
|
echo -e "${YELLOW}Building CLI binary...${NC}"
|
||||||
|
cd "$SUPERVISOR_DIR"
|
||||||
|
cargo build --bin hive-supervisor --release
|
||||||
|
echo
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if config file exists
|
||||||
|
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||||
|
echo -e "${RED}Error: Configuration file not found: $CONFIG_FILE${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== CLI Help and Information ===${NC}"
|
||||||
|
run_cli "Show main help" --help
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== Worker Management Examples ===${NC}"
|
||||||
|
run_cli "List configured workers" workers list
|
||||||
|
run_cli "Show worker management help" workers --help
|
||||||
|
|
||||||
|
# Note: These commands would require actual worker binaries and Zinit setup
|
||||||
|
echo -e "${YELLOW}Note: The following commands require actual worker binaries and Zinit setup${NC}"
|
||||||
|
echo -e "${YELLOW}They are shown for demonstration but may fail without proper setup${NC}"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Uncomment these if you have the proper setup
|
||||||
|
# run_cli "Check worker status" workers status
|
||||||
|
# run_cli "Start all workers" workers start
|
||||||
|
# run_cli "Check worker status after start" workers status
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== Job Management Examples ===${NC}"
|
||||||
|
run_cli "Show job management help" jobs --help
|
||||||
|
|
||||||
|
# Create sample jobs (these will also require workers to be running)
|
||||||
|
echo -e "${YELLOW}Sample job creation commands (require running workers):${NC}"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "# Create OSIS job with inline script:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE jobs create \\"
|
||||||
|
echo " --script 'print(\"Hello from CLI!\");' \\"
|
||||||
|
echo " --script-type osis \\"
|
||||||
|
echo " --caller-id \"cli_demo\" \\"
|
||||||
|
echo " --context-id \"example_session\""
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "# Create job from sample script file:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE jobs create \\"
|
||||||
|
echo " --file \"$SCRIPT_DIR/sample_scripts/hello_osis.rhai\" \\"
|
||||||
|
echo " --script-type osis \\"
|
||||||
|
echo " --caller-id \"cli_demo\" \\"
|
||||||
|
echo " --context-id \"example_session\""
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "# List all jobs:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE jobs list"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "# Check job status (replace JOB_ID with actual job ID):"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE jobs status JOB_ID"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== REPL Mode Example ===${NC}"
|
||||||
|
echo -e "${YELLOW}REPL mode command (interactive):${NC}"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE repl \\"
|
||||||
|
echo " --caller-id \"cli_demo\" \\"
|
||||||
|
echo " --context-id \"example_session\" \\"
|
||||||
|
echo " --script-type osis \\"
|
||||||
|
echo " --timeout 60"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== Sample Scripts ===${NC}"
|
||||||
|
echo "Available sample scripts in $SCRIPT_DIR/sample_scripts/:"
|
||||||
|
for script in "$SCRIPT_DIR/sample_scripts"/*; do
|
||||||
|
if [[ -f "$script" ]]; then
|
||||||
|
basename "$script"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo -e "${BLUE}=== Verbose Logging Examples ===${NC}"
|
||||||
|
echo "# Debug logging:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE -v workers list"
|
||||||
|
echo
|
||||||
|
echo "# Trace logging:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE -vv workers list"
|
||||||
|
echo
|
||||||
|
echo "# No timestamps:"
|
||||||
|
echo "$CLI_BINARY --config $CONFIG_FILE --no-timestamp workers list"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo -e "${GREEN}=== Example Runner Complete ===${NC}"
|
||||||
|
echo -e "${YELLOW}To run actual commands, ensure you have:${NC}"
|
||||||
|
echo "1. Redis server running on localhost:6379"
|
||||||
|
echo "2. Zinit process manager installed and configured"
|
||||||
|
echo "3. Worker binaries available at the paths specified in config.toml"
|
||||||
|
echo
|
||||||
|
echo -e "${YELLOW}For testing without full setup, you can:${NC}"
|
||||||
|
echo "1. Update config.toml with paths to existing binaries"
|
||||||
|
echo "2. Use the CLI help commands and configuration validation"
|
||||||
|
echo "3. Test the REPL mode (requires workers to be running)"
|
90
core/supervisor/examples/cli/sample_scripts/data_python.py
Normal file
90
core/supervisor/examples/cli/sample_scripts/data_python.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Sample Python script for demonstration
|
||||||
|
This script demonstrates Python worker functionality
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import datetime
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=== Python Worker Demo ===")
|
||||||
|
print("Python data processing operations")
|
||||||
|
|
||||||
|
# Data structures
|
||||||
|
print("\nData structures:")
|
||||||
|
users = [
|
||||||
|
{"id": 1, "name": "Alice", "age": 30, "role": "developer"},
|
||||||
|
{"id": 2, "name": "Bob", "age": 25, "role": "designer"},
|
||||||
|
{"id": 3, "name": "Charlie", "age": 35, "role": "manager"},
|
||||||
|
{"id": 4, "name": "Diana", "age": 28, "role": "developer"}
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Total users: {len(users)}")
|
||||||
|
|
||||||
|
# Data filtering
|
||||||
|
developers = [user for user in users if user["role"] == "developer"]
|
||||||
|
print(f"Developers: {len(developers)}")
|
||||||
|
for dev in developers:
|
||||||
|
print(f" - {dev['name']} (age {dev['age']})")
|
||||||
|
|
||||||
|
# Statistical operations
|
||||||
|
print("\nStatistical operations:")
|
||||||
|
ages = [user["age"] for user in users]
|
||||||
|
avg_age = sum(ages) / len(ages)
|
||||||
|
min_age = min(ages)
|
||||||
|
max_age = max(ages)
|
||||||
|
|
||||||
|
print(f"Average age: {avg_age:.1f}")
|
||||||
|
print(f"Age range: {min_age} - {max_age}")
|
||||||
|
|
||||||
|
# Date/time operations
|
||||||
|
print("\nDate/time operations:")
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
print(f"Current time: {now.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# Calculate birth years
|
||||||
|
current_year = now.year
|
||||||
|
for user in users:
|
||||||
|
birth_year = current_year - user["age"]
|
||||||
|
print(f"{user['name']} was born in {birth_year}")
|
||||||
|
|
||||||
|
# JSON processing
|
||||||
|
print("\nJSON processing:")
|
||||||
|
json_data = json.dumps(users, indent=2)
|
||||||
|
print("User data as JSON:")
|
||||||
|
print(json_data[:200] + "..." if len(json_data) > 200 else json_data)
|
||||||
|
|
||||||
|
# File operations simulation
|
||||||
|
print("\nFile operations:")
|
||||||
|
simulate_file_processing()
|
||||||
|
|
||||||
|
print("=== Python Demo Complete ===")
|
||||||
|
|
||||||
|
def simulate_file_processing():
|
||||||
|
"""Simulate file processing operations"""
|
||||||
|
files = [
|
||||||
|
{"name": "data.csv", "size": 1024, "type": "csv"},
|
||||||
|
{"name": "config.json", "size": 512, "type": "json"},
|
||||||
|
{"name": "report.pdf", "size": 2048, "type": "pdf"},
|
||||||
|
{"name": "script.py", "size": 768, "type": "python"}
|
||||||
|
]
|
||||||
|
|
||||||
|
total_size = sum(file["size"] for file in files)
|
||||||
|
print(f"Processing {len(files)} files, total size: {total_size} bytes")
|
||||||
|
|
||||||
|
# Group by type
|
||||||
|
file_types = {}
|
||||||
|
for file in files:
|
||||||
|
file_type = file["type"]
|
||||||
|
if file_type not in file_types:
|
||||||
|
file_types[file_type] = []
|
||||||
|
file_types[file_type].append(file["name"])
|
||||||
|
|
||||||
|
print("Files by type:")
|
||||||
|
for file_type, file_names in file_types.items():
|
||||||
|
print(f" {file_type}: {', '.join(file_names)}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
34
core/supervisor/examples/cli/sample_scripts/hello_osis.rhai
Normal file
34
core/supervisor/examples/cli/sample_scripts/hello_osis.rhai
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// Sample OSIS/HeroScript for demonstration
|
||||||
|
// This script demonstrates basic OSIS worker functionality
|
||||||
|
|
||||||
|
print("=== OSIS Worker Demo ===");
|
||||||
|
print("Hello from the OSIS worker!");
|
||||||
|
|
||||||
|
// Basic variable operations
|
||||||
|
let name = "Hero";
|
||||||
|
let version = "1.0";
|
||||||
|
print(`Running ${name} version ${version}`);
|
||||||
|
|
||||||
|
// Simple calculation
|
||||||
|
let x = 10;
|
||||||
|
let y = 20;
|
||||||
|
let result = x + y;
|
||||||
|
print(`Calculation: ${x} + ${y} = ${result}`);
|
||||||
|
|
||||||
|
// Array operations
|
||||||
|
let numbers = [1, 2, 3, 4, 5];
|
||||||
|
let sum = 0;
|
||||||
|
for num in numbers {
|
||||||
|
sum += num;
|
||||||
|
}
|
||||||
|
print(`Sum of array [1,2,3,4,5]: ${sum}`);
|
||||||
|
|
||||||
|
// Function definition and call
|
||||||
|
fn greet(person) {
|
||||||
|
return `Hello, ${person}! Welcome to Hero.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
let greeting = greet("Developer");
|
||||||
|
print(greeting);
|
||||||
|
|
||||||
|
print("=== OSIS Demo Complete ===");
|
67
core/supervisor/examples/cli/sample_scripts/math_v.v
Normal file
67
core/supervisor/examples/cli/sample_scripts/math_v.v
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
// Sample V language script for demonstration
|
||||||
|
// This script demonstrates V worker functionality
|
||||||
|
|
||||||
|
module main
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println("=== V Worker Demo ===")
|
||||||
|
println("V language mathematical operations")
|
||||||
|
|
||||||
|
// Basic arithmetic
|
||||||
|
x := 15
|
||||||
|
y := 25
|
||||||
|
sum := x + y
|
||||||
|
product := x * y
|
||||||
|
println("Basic arithmetic:")
|
||||||
|
println("${x} + ${y} = ${sum}")
|
||||||
|
println("${x} * ${y} = ${product}")
|
||||||
|
|
||||||
|
// Mathematical functions
|
||||||
|
println("\nMathematical functions:")
|
||||||
|
angle := 45.0
|
||||||
|
sin_val := math.sin(math.radians(angle))
|
||||||
|
cos_val := math.cos(math.radians(angle))
|
||||||
|
println("sin(${angle}°) = ${sin_val:.4f}")
|
||||||
|
println("cos(${angle}°) = ${cos_val:.4f}")
|
||||||
|
|
||||||
|
// Array operations
|
||||||
|
numbers := [1, 4, 9, 16, 25]
|
||||||
|
println("\nArray operations:")
|
||||||
|
println("Numbers: ${numbers}")
|
||||||
|
|
||||||
|
mut total := 0
|
||||||
|
for num in numbers {
|
||||||
|
total += num
|
||||||
|
}
|
||||||
|
println("Sum: ${total}")
|
||||||
|
|
||||||
|
// Square roots
|
||||||
|
println("\nSquare roots:")
|
||||||
|
for num in numbers {
|
||||||
|
sqrt_val := math.sqrt(f64(num))
|
||||||
|
println("√${num} = ${sqrt_val:.2f}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fibonacci sequence
|
||||||
|
println("\nFibonacci sequence (first 10 numbers):")
|
||||||
|
fib := fibonacci(10)
|
||||||
|
println("${fib}")
|
||||||
|
|
||||||
|
println("=== V Demo Complete ===")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fibonacci(n int) []int {
|
||||||
|
mut fib := []int{len: n}
|
||||||
|
if n >= 1 {
|
||||||
|
fib[0] = 0
|
||||||
|
}
|
||||||
|
if n >= 2 {
|
||||||
|
fib[1] = 1
|
||||||
|
}
|
||||||
|
for i in 2 .. n {
|
||||||
|
fib[i] = fib[i-1] + fib[i-2]
|
||||||
|
}
|
||||||
|
return fib
|
||||||
|
}
|
43
core/supervisor/examples/cli/sample_scripts/system_sal.rhai
Normal file
43
core/supervisor/examples/cli/sample_scripts/system_sal.rhai
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
// Sample SAL (System Abstraction Layer) script for demonstration
|
||||||
|
// This script demonstrates system-level operations through SAL worker
|
||||||
|
|
||||||
|
print("=== SAL Worker Demo ===");
|
||||||
|
print("System Abstraction Layer operations");
|
||||||
|
|
||||||
|
// System information gathering
|
||||||
|
print("Gathering system information...");
|
||||||
|
|
||||||
|
// Simulated system operations (actual SAL would have real system calls)
|
||||||
|
let hostname = "hero-system";
|
||||||
|
let uptime = "2 days, 4 hours";
|
||||||
|
let load_avg = "0.45, 0.52, 0.48";
|
||||||
|
|
||||||
|
print(`Hostname: ${hostname}`);
|
||||||
|
print(`Uptime: ${uptime}`);
|
||||||
|
print(`Load Average: ${load_avg}`);
|
||||||
|
|
||||||
|
// File system operations
|
||||||
|
print("\nFile system operations:");
|
||||||
|
let disk_usage = "45% used";
|
||||||
|
let available_space = "120GB available";
|
||||||
|
|
||||||
|
print(`Disk Usage: ${disk_usage}`);
|
||||||
|
print(`Available Space: ${available_space}`);
|
||||||
|
|
||||||
|
// Process management simulation
|
||||||
|
print("\nProcess management:");
|
||||||
|
let active_processes = 156;
|
||||||
|
let memory_usage = "68%";
|
||||||
|
|
||||||
|
print(`Active Processes: ${active_processes}`);
|
||||||
|
print(`Memory Usage: ${memory_usage}`);
|
||||||
|
|
||||||
|
// Network status
|
||||||
|
print("\nNetwork status:");
|
||||||
|
let network_interfaces = ["eth0", "lo"];
|
||||||
|
let connectivity = "Connected";
|
||||||
|
|
||||||
|
print(`Network Interfaces: ${network_interfaces}`);
|
||||||
|
print(`Connectivity: ${connectivity}`);
|
||||||
|
|
||||||
|
print("=== SAL Demo Complete ===");
|
@ -17,7 +17,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
let redis_url = "redis://localhost:6379";
|
let redis_url = "redis://localhost:6379";
|
||||||
let zinit_socket = "/var/run/zinit.sock";
|
|
||||||
|
|
||||||
// Create supervisor
|
// Create supervisor
|
||||||
let supervisor = SupervisorBuilder::new()
|
let supervisor = SupervisorBuilder::new()
|
||||||
|
@ -12,7 +12,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// Workers are automatically launched during build
|
// Workers are automatically launched during build
|
||||||
let supervisor = SupervisorBuilder::new()
|
let supervisor = SupervisorBuilder::new()
|
||||||
.redis_url("redis://localhost:6379")
|
.redis_url("redis://localhost:6379")
|
||||||
.zinit_socket_path("/var/run/zinit.sock")
|
|
||||||
.osis_worker("/usr/local/bin/osis_worker")
|
.osis_worker("/usr/local/bin/osis_worker")
|
||||||
.sal_worker("/usr/local/bin/sal_worker")
|
.sal_worker("/usr/local/bin/sal_worker")
|
||||||
.v_worker("/usr/local/bin/v_worker")
|
.v_worker("/usr/local/bin/v_worker")
|
||||||
|
18
core/supervisor/examples/supervisor_config.toml
Normal file
18
core/supervisor/examples/supervisor_config.toml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
[global]
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
|
||||||
|
[osis_worker]
|
||||||
|
binary_path = "/path/to/osis_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[sal_worker]
|
||||||
|
binary_path = "/path/to/sal_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[v_worker]
|
||||||
|
binary_path = "/path/to/v_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
||||||
|
|
||||||
|
[python_worker]
|
||||||
|
binary_path = "/path/to/python_worker"
|
||||||
|
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
|
@ -31,6 +31,8 @@ pub enum SupervisorError {
|
|||||||
/// Zinit client operation error
|
/// Zinit client operation error
|
||||||
ZinitError(String),
|
ZinitError(String),
|
||||||
SupervisorNotConfigured,
|
SupervisorNotConfigured,
|
||||||
|
/// Configuration file parsing error
|
||||||
|
ConfigError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<redis::RedisError> for SupervisorError {
|
impl From<redis::RedisError> for SupervisorError {
|
||||||
@ -95,6 +97,9 @@ impl std::fmt::Display for SupervisorError {
|
|||||||
SupervisorError::SupervisorNotConfigured => {
|
SupervisorError::SupervisorNotConfigured => {
|
||||||
write!(f, "Supervisor not configured for health monitoring")
|
write!(f, "Supervisor not configured for health monitoring")
|
||||||
}
|
}
|
||||||
|
SupervisorError::ConfigError(msg) => {
|
||||||
|
write!(f, "Configuration error: {}", msg)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,14 @@
|
|||||||
use log::{debug, error, info, warn};
|
use log::{debug, error, info, warn};
|
||||||
use redis::AsyncCommands;
|
use redis::AsyncCommands;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use hero_job::NAMESPACE_PREFIX;
|
use hero_job::NAMESPACE_PREFIX;
|
||||||
use zinit_client::ZinitClient;
|
use zinit_client::Client as ZinitClient;
|
||||||
|
|
||||||
|
|
||||||
mod job;
|
mod job;
|
||||||
mod error;
|
mod error;
|
||||||
@ -23,46 +28,209 @@ pub struct Supervisor {
|
|||||||
|
|
||||||
pub struct SupervisorBuilder {
|
pub struct SupervisorBuilder {
|
||||||
redis_url: Option<String>,
|
redis_url: Option<String>,
|
||||||
zinit_socket_path: Option<String>,
|
|
||||||
osis_worker: Option<String>,
|
osis_worker: Option<String>,
|
||||||
sal_worker: Option<String>,
|
sal_worker: Option<String>,
|
||||||
v_worker: Option<String>,
|
v_worker: Option<String>,
|
||||||
python_worker: Option<String>,
|
python_worker: Option<String>,
|
||||||
worker_env_vars: HashMap<String, String>,
|
worker_env_vars: HashMap<String, String>,
|
||||||
|
websocket_config: Option<WebSocketServerConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper struct to pass builder data to worker launch method
|
/// Helper struct to pass builder data to worker launch method
|
||||||
|
#[derive(Clone)]
|
||||||
struct SupervisorBuilderData {
|
struct SupervisorBuilderData {
|
||||||
osis_worker: Option<String>,
|
osis_worker: Option<String>,
|
||||||
sal_worker: Option<String>,
|
sal_worker: Option<String>,
|
||||||
v_worker: Option<String>,
|
v_worker: Option<String>,
|
||||||
python_worker: Option<String>,
|
python_worker: Option<String>,
|
||||||
worker_env_vars: HashMap<String, String>,
|
worker_env_vars: HashMap<String, String>,
|
||||||
|
websocket_config: Option<WebSocketServerConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TOML configuration structure for the supervisor
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct SupervisorConfig {
|
||||||
|
pub global: GlobalConfig,
|
||||||
|
pub websocket_server: Option<WebSocketServerConfig>,
|
||||||
|
pub osis_worker: Option<WorkerConfigToml>,
|
||||||
|
pub sal_worker: Option<WorkerConfigToml>,
|
||||||
|
pub v_worker: Option<WorkerConfigToml>,
|
||||||
|
pub python_worker: Option<WorkerConfigToml>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Global configuration section
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct GlobalConfig {
|
||||||
|
pub redis_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Worker configuration section in TOML
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct WorkerConfigToml {
|
||||||
|
pub binary_path: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub env_vars: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// WebSocket server configuration section in TOML
|
||||||
|
/// This mirrors the ServerConfig from hero_websocket_server but avoids circular dependency
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub struct WebSocketServerConfig {
|
||||||
|
/// Server host address
|
||||||
|
#[serde(default = "default_host")]
|
||||||
|
pub host: String,
|
||||||
|
|
||||||
|
/// Server port
|
||||||
|
#[serde(default = "default_port")]
|
||||||
|
pub port: u16,
|
||||||
|
|
||||||
|
/// Redis connection URL
|
||||||
|
#[serde(default = "default_redis_url")]
|
||||||
|
pub redis_url: String,
|
||||||
|
|
||||||
|
/// Enable authentication
|
||||||
|
#[serde(default)]
|
||||||
|
pub auth: bool,
|
||||||
|
|
||||||
|
/// Enable TLS/WSS
|
||||||
|
#[serde(default)]
|
||||||
|
pub tls: bool,
|
||||||
|
|
||||||
|
/// Path to TLS certificate file
|
||||||
|
pub cert: Option<String>,
|
||||||
|
|
||||||
|
/// Path to TLS private key file
|
||||||
|
pub key: Option<String>,
|
||||||
|
|
||||||
|
/// Separate port for TLS connections
|
||||||
|
pub tls_port: Option<u16>,
|
||||||
|
|
||||||
|
/// Circles configuration - maps circle names to lists of member public keys
|
||||||
|
#[serde(default)]
|
||||||
|
pub circles: HashMap<String, Vec<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default value functions for WebSocket server config
|
||||||
|
fn default_host() -> String {
|
||||||
|
"127.0.0.1".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_port() -> u16 {
|
||||||
|
8443
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_redis_url() -> String {
|
||||||
|
"redis://127.0.0.1/".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SupervisorBuilder {
|
impl SupervisorBuilder {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
redis_url: None,
|
redis_url: None,
|
||||||
zinit_socket_path: Some("/var/run/zinit.sock".to_string()),
|
|
||||||
osis_worker: None,
|
osis_worker: None,
|
||||||
sal_worker: None,
|
sal_worker: None,
|
||||||
v_worker: None,
|
v_worker: None,
|
||||||
python_worker: None,
|
python_worker: None,
|
||||||
worker_env_vars: HashMap::new(),
|
worker_env_vars: HashMap::new(),
|
||||||
|
websocket_config: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a SupervisorBuilder from a TOML configuration file
|
||||||
|
pub fn from_toml<P: AsRef<Path>>(toml_path: P) -> Result<Self, SupervisorError> {
|
||||||
|
let toml_content = fs::read_to_string(toml_path)
|
||||||
|
.map_err(|e| SupervisorError::ConfigError(format!("Failed to read TOML file: {}", e)))?;
|
||||||
|
|
||||||
|
let config: SupervisorConfig = toml::from_str(&toml_content)
|
||||||
|
.map_err(|e| SupervisorError::ConfigError(format!("Failed to parse TOML: {}", e)))?;
|
||||||
|
|
||||||
|
let mut builder = Self::new()
|
||||||
|
.redis_url(&config.global.redis_url);
|
||||||
|
|
||||||
|
// Configure workers based on TOML config
|
||||||
|
if let Some(osis_config) = config.osis_worker {
|
||||||
|
builder = builder.osis_worker(&osis_config.binary_path)
|
||||||
|
.worker_env_vars(osis_config.env_vars);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(sal_config) = config.sal_worker {
|
||||||
|
builder = builder.sal_worker(&sal_config.binary_path)
|
||||||
|
.worker_env_vars(sal_config.env_vars);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(v_config) = config.v_worker {
|
||||||
|
builder = builder.v_worker(&v_config.binary_path)
|
||||||
|
.worker_env_vars(v_config.env_vars);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(python_config) = config.python_worker {
|
||||||
|
builder = builder.python_worker(&python_config.binary_path)
|
||||||
|
.worker_env_vars(python_config.env_vars);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store WebSocket configuration for later use
|
||||||
|
if let Some(ws_config) = config.websocket_server {
|
||||||
|
builder.websocket_config = Some(ws_config);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(builder)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate that all configured worker binaries exist and are executable
|
||||||
|
fn validate_worker_binaries(&self) -> Result<(), SupervisorError> {
|
||||||
|
let workers = [
|
||||||
|
("OSIS", &self.osis_worker),
|
||||||
|
("SAL", &self.sal_worker),
|
||||||
|
("V", &self.v_worker),
|
||||||
|
("Python", &self.python_worker),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (worker_type, binary_path) in workers {
|
||||||
|
if let Some(path) = binary_path {
|
||||||
|
let path_obj = Path::new(path);
|
||||||
|
|
||||||
|
if !path_obj.exists() {
|
||||||
|
return Err(SupervisorError::ConfigError(
|
||||||
|
format!("{} worker binary does not exist: {}", worker_type, path)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !path_obj.is_file() {
|
||||||
|
return Err(SupervisorError::ConfigError(
|
||||||
|
format!("{} worker path is not a file: {}", worker_type, path)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the file is executable (Unix-like systems)
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
let metadata = path_obj.metadata().map_err(|e| {
|
||||||
|
SupervisorError::ConfigError(
|
||||||
|
format!("Failed to read metadata for {} worker binary {}: {}", worker_type, path, e)
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let permissions = metadata.permissions();
|
||||||
|
if permissions.mode() & 0o111 == 0 {
|
||||||
|
return Err(SupervisorError::ConfigError(
|
||||||
|
format!("{} worker binary is not executable: {}", worker_type, path)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Validated {} worker binary: {}", worker_type, path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn redis_url(mut self, url: &str) -> Self {
|
pub fn redis_url(mut self, url: &str) -> Self {
|
||||||
self.redis_url = Some(url.to_string());
|
self.redis_url = Some(url.to_string());
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn zinit_socket_path(mut self, path: &str) -> Self {
|
|
||||||
self.zinit_socket_path = Some(path.to_string());
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn osis_worker(mut self, binary_path: &str) -> Self {
|
pub fn osis_worker(mut self, binary_path: &str) -> Self {
|
||||||
self.osis_worker = Some(binary_path.to_string());
|
self.osis_worker = Some(binary_path.to_string());
|
||||||
self
|
self
|
||||||
@ -95,21 +263,23 @@ impl SupervisorBuilder {
|
|||||||
|
|
||||||
/// Builds the final `Supervisor` instance synchronously.
|
/// Builds the final `Supervisor` instance synchronously.
|
||||||
///
|
///
|
||||||
/// This method validates the configuration and creates the Redis client.
|
/// This method validates the configuration, checks worker binary existence,
|
||||||
/// Worker launching is deferred to the `start_workers()` method.
|
/// and creates the Redis client. Worker launching is deferred to the `start_workers()` method.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// * `Ok(Supervisor)` - Successfully configured client
|
/// * `Ok(Supervisor)` - Successfully configured client with valid binaries
|
||||||
/// * `Err(SupervisorError)` - Configuration or connection error
|
/// * `Err(SupervisorError)` - Configuration, binary validation, or connection error
|
||||||
pub fn build(self) -> Result<Supervisor, SupervisorError> {
|
pub async fn build(self) -> Result<Supervisor, SupervisorError> {
|
||||||
|
// Validate that all configured worker binaries exist first
|
||||||
|
Self::validate_worker_binaries(&self)?;
|
||||||
|
|
||||||
let url = self.redis_url
|
let url = self.redis_url
|
||||||
.unwrap_or_else(|| "redis://127.0.0.1/".to_string());
|
.unwrap_or_else(|| "redis://127.0.0.1/".to_string());
|
||||||
let client = redis::Client::open(url)?;
|
let client = redis::Client::open(url)?;
|
||||||
|
|
||||||
let zinit_socket = self.zinit_socket_path
|
let zinit_client = ZinitClient::unix_socket("/tmp/zinit.sock").await
|
||||||
.unwrap_or_else(|| "/var/run/zinit.sock".to_string());
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create Zinit client: {}", e)))?;
|
||||||
let zinit_client = ZinitClient::new(&zinit_socket);
|
|
||||||
|
|
||||||
// Store builder data for later use in start_workers()
|
// Store builder data for later use in start_workers()
|
||||||
let builder_data = SupervisorBuilderData {
|
let builder_data = SupervisorBuilderData {
|
||||||
@ -118,6 +288,7 @@ impl SupervisorBuilder {
|
|||||||
v_worker: self.v_worker,
|
v_worker: self.v_worker,
|
||||||
python_worker: self.python_worker,
|
python_worker: self.python_worker,
|
||||||
worker_env_vars: self.worker_env_vars,
|
worker_env_vars: self.worker_env_vars,
|
||||||
|
websocket_config: self.websocket_config,
|
||||||
};
|
};
|
||||||
|
|
||||||
let supervisor = Supervisor {
|
let supervisor = Supervisor {
|
||||||
@ -134,14 +305,33 @@ impl Supervisor {
|
|||||||
/// Start all configured workers asynchronously.
|
/// Start all configured workers asynchronously.
|
||||||
/// This method should be called after build() to launch the workers.
|
/// This method should be called after build() to launch the workers.
|
||||||
pub async fn start_workers(&self) -> Result<(), SupervisorError> {
|
pub async fn start_workers(&self) -> Result<(), SupervisorError> {
|
||||||
|
info!("Starting Hero Supervisor workers...");
|
||||||
|
|
||||||
|
// Test Zinit connection first
|
||||||
|
info!("Testing Zinit connection at /tmp/zinit.sock...");
|
||||||
|
match self.zinit_client.list().await {
|
||||||
|
Ok(services) => {
|
||||||
|
info!("Successfully connected to Zinit. Current services: {:?}", services);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to connect to Zinit: {:?}", e);
|
||||||
|
return Err(SupervisorError::ZinitError(format!("Zinit connection failed: {}", e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Clean up any existing worker services first
|
// Clean up any existing worker services first
|
||||||
|
info!("Cleaning up existing worker services...");
|
||||||
self.cleanup_existing_workers().await?;
|
self.cleanup_existing_workers().await?;
|
||||||
|
|
||||||
// Launch configured workers if builder data is available
|
// Launch configured workers if builder data is available
|
||||||
if let Some(builder_data) = &self.builder_data {
|
if let Some(builder_data) = &self.builder_data {
|
||||||
|
info!("Launching configured workers...");
|
||||||
self.launch_configured_workers(builder_data).await?;
|
self.launch_configured_workers(builder_data).await?;
|
||||||
|
} else {
|
||||||
|
warn!("No builder data available, no workers to start");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("All workers started successfully!");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +369,11 @@ impl Supervisor {
|
|||||||
|
|
||||||
for worker_name in worker_names {
|
for worker_name in worker_names {
|
||||||
// Try to stop and delete, but don't fail if they don't exist
|
// Try to stop and delete, but don't fail if they don't exist
|
||||||
let _ = self.stop_and_delete_worker(worker_name).await;
|
info!("Attempting to cleanup worker: {}", worker_name);
|
||||||
|
match self.stop_and_delete_worker(worker_name).await {
|
||||||
|
Ok(_) => info!("Successfully cleaned up worker: {}", worker_name),
|
||||||
|
Err(e) => debug!("Failed to cleanup worker {}: {}", worker_name, e),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Existing worker cleanup completed");
|
info!("Existing worker cleanup completed");
|
||||||
@ -188,18 +382,33 @@ impl Supervisor {
|
|||||||
|
|
||||||
/// Stop and delete a worker service from zinit
|
/// Stop and delete a worker service from zinit
|
||||||
async fn stop_and_delete_worker(&self, worker_name: &str) -> Result<(), SupervisorError> {
|
async fn stop_and_delete_worker(&self, worker_name: &str) -> Result<(), SupervisorError> {
|
||||||
|
info!("Starting cleanup for worker: {}", worker_name);
|
||||||
|
|
||||||
// First try to stop the worker
|
// First try to stop the worker
|
||||||
|
info!("Attempting to stop worker: {}", worker_name);
|
||||||
if let Err(e) = self.zinit_client.stop(worker_name).await {
|
if let Err(e) = self.zinit_client.stop(worker_name).await {
|
||||||
debug!("Worker {} was not running or failed to stop: {}", worker_name, e);
|
debug!("Worker {} was not running or failed to stop: {}", worker_name, e);
|
||||||
|
} else {
|
||||||
|
info!("Successfully stopped worker: {}", worker_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then try to delete the service
|
// Then forget the service to stop monitoring it
|
||||||
if let Err(e) = self.zinit_client.delete(worker_name).await {
|
info!("Attempting to forget worker: {}", worker_name);
|
||||||
|
if let Err(e) = self.zinit_client.forget(worker_name).await {
|
||||||
|
info!("Worker {} was not being monitored or failed to forget: {}", worker_name, e);
|
||||||
|
} else {
|
||||||
|
info!("Successfully forgot worker service: {}", worker_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, delete the service configuration
|
||||||
|
info!("Attempting to delete service for worker: {}", worker_name);
|
||||||
|
if let Err(e) = self.zinit_client.delete_service(worker_name).await {
|
||||||
debug!("Worker {} service did not exist or failed to delete: {}", worker_name, e);
|
debug!("Worker {} service did not exist or failed to delete: {}", worker_name, e);
|
||||||
} else {
|
} else {
|
||||||
info!("Successfully deleted worker service: {}", worker_name);
|
info!("Successfully deleted worker service: {}", worker_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("Completed cleanup for worker: {}", worker_name);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,6 +420,157 @@ impl Supervisor {
|
|||||||
pub fn new_job(&self) -> JobBuilder {
|
pub fn new_job(&self) -> JobBuilder {
|
||||||
JobBuilder::new(self)
|
JobBuilder::new(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get WebSocket server configuration from TOML config
|
||||||
|
pub fn get_websocket_config(&self) -> Result<WebSocketServerConfig, SupervisorError> {
|
||||||
|
let builder_data = self.builder_data.as_ref().ok_or_else(|| {
|
||||||
|
SupervisorError::ConfigError("No builder data available for WebSocket config".to_string())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
builder_data.websocket_config.clone().ok_or_else(|| {
|
||||||
|
SupervisorError::ConfigError("No WebSocket server configuration found in TOML config".to_string())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract worker configurations from the supervisor's builder data
|
||||||
|
pub fn get_worker_configs(&self) -> Result<Vec<WorkerConfig>, SupervisorError> {
|
||||||
|
let builder_data = self.builder_data.as_ref().ok_or_else(|| {
|
||||||
|
SupervisorError::ConfigError("No builder data available for worker configs".to_string())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut configs = Vec::new();
|
||||||
|
let env_vars = builder_data.worker_env_vars.clone();
|
||||||
|
|
||||||
|
if let Some(osis_path) = &builder_data.osis_worker {
|
||||||
|
configs.push(
|
||||||
|
WorkerConfig::new("osis_worker_1".to_string(), PathBuf::from(osis_path), ScriptType::OSIS)
|
||||||
|
.with_env(env_vars.clone())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(sal_path) = &builder_data.sal_worker {
|
||||||
|
configs.push(
|
||||||
|
WorkerConfig::new("sal_worker_1".to_string(), PathBuf::from(sal_path), ScriptType::SAL)
|
||||||
|
.with_env(env_vars.clone())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(v_path) = &builder_data.v_worker {
|
||||||
|
configs.push(
|
||||||
|
WorkerConfig::new("v_worker_1".to_string(), PathBuf::from(v_path), ScriptType::V)
|
||||||
|
.with_env(env_vars.clone())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(python_path) = &builder_data.python_worker {
|
||||||
|
configs.push(
|
||||||
|
WorkerConfig::new("python_worker_1".to_string(), PathBuf::from(python_path), ScriptType::Python)
|
||||||
|
.with_env(env_vars.clone())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(configs)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn a background lifecycle manager that continuously monitors and maintains worker health
|
||||||
|
/// Returns a JoinHandle that can be used to stop the lifecycle manager
|
||||||
|
pub fn spawn_lifecycle_manager(
|
||||||
|
self: Arc<Self>,
|
||||||
|
worker_configs: Vec<WorkerConfig>,
|
||||||
|
health_check_interval: Duration,
|
||||||
|
) -> tokio::task::JoinHandle<Result<(), SupervisorError>> {
|
||||||
|
let supervisor = self;
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
info!("Starting background lifecycle manager with {} workers", worker_configs.len());
|
||||||
|
info!("Health check interval: {:?}", health_check_interval);
|
||||||
|
|
||||||
|
// Initial worker startup
|
||||||
|
info!("Performing initial worker startup...");
|
||||||
|
if let Err(e) = supervisor.start_workers().await {
|
||||||
|
error!("Failed to start workers during initialization: {}", e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the monitoring loop
|
||||||
|
let mut interval = tokio::time::interval(health_check_interval);
|
||||||
|
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
|
info!("Running periodic worker health check...");
|
||||||
|
|
||||||
|
// Check each worker's health and restart if needed
|
||||||
|
for worker_config in &worker_configs {
|
||||||
|
if let Err(e) = supervisor.check_and_restart_worker(worker_config).await {
|
||||||
|
error!("Failed to check/restart worker {}: {}", worker_config.name, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Health check cycle completed");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check a single worker's health and restart if needed
|
||||||
|
async fn check_and_restart_worker(&self, worker_config: &WorkerConfig) -> Result<(), SupervisorError> {
|
||||||
|
let worker_name = &worker_config.name;
|
||||||
|
|
||||||
|
// Get worker status
|
||||||
|
match self.zinit_client.status(worker_name).await {
|
||||||
|
Ok(status) => {
|
||||||
|
let is_healthy = status.state == "running" && status.pid > 0;
|
||||||
|
|
||||||
|
if is_healthy {
|
||||||
|
debug!("Worker {} is healthy (state: {}, pid: {})", worker_name, status.state, status.pid);
|
||||||
|
|
||||||
|
// Optionally send a ping job for deeper health check
|
||||||
|
if let Err(e) = self.send_ping_job(worker_config.script_type.clone()).await {
|
||||||
|
warn!("Ping job failed for worker {}: {}", worker_name, e);
|
||||||
|
// Note: We don't restart on ping failure as it might be temporary
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warn!("Worker {} is unhealthy (state: {}, pid: {}), restarting...",
|
||||||
|
worker_name, status.state, status.pid);
|
||||||
|
|
||||||
|
// Attempt to restart the worker
|
||||||
|
if let Err(e) = self.restart_worker(worker_name).await {
|
||||||
|
error!("Failed to restart unhealthy worker {}: {}", worker_name, e);
|
||||||
|
|
||||||
|
// If restart fails, try a full stop/start cycle
|
||||||
|
warn!("Attempting full stop/start cycle for worker: {}", worker_name);
|
||||||
|
if let Err(e) = self.stop_and_delete_worker(worker_name).await {
|
||||||
|
error!("Failed to stop worker {} during recovery: {}", worker_name, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = self.start_worker(worker_config).await {
|
||||||
|
error!("Failed to start worker {} during recovery: {}", worker_name, e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Successfully recovered worker: {}", worker_name);
|
||||||
|
} else {
|
||||||
|
info!("Successfully restarted worker: {}", worker_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Could not get status for worker {} (may not exist): {}", worker_name, e);
|
||||||
|
|
||||||
|
// Worker doesn't exist, try to start it
|
||||||
|
info!("Attempting to start missing worker: {}", worker_name);
|
||||||
|
if let Err(e) = self.start_worker(worker_config).await {
|
||||||
|
error!("Failed to start missing worker {}: {}", worker_name, e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Successfully started missing worker: {}", worker_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
// Internal helper to submit script details and push to work queue
|
// Internal helper to submit script details and push to work queue
|
||||||
async fn create_job_using_connection(
|
async fn create_job_using_connection(
|
||||||
|
@ -8,7 +8,7 @@ use serde_json::json;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use zinit_client::{ZinitClient, ServiceStatus, ServiceState};
|
use zinit_client::{Client as ZinitClient, Status};
|
||||||
use hero_job::ScriptType;
|
use hero_job::ScriptType;
|
||||||
use crate::{Supervisor, SupervisorError};
|
use crate::{Supervisor, SupervisorError};
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ use crate::{Supervisor, SupervisorError};
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct WorkerInfo {
|
pub struct WorkerInfo {
|
||||||
pub config: WorkerConfig,
|
pub config: WorkerConfig,
|
||||||
pub status: Option<ServiceStatus>,
|
pub status: Option<Status>,
|
||||||
pub is_running: bool,
|
pub is_running: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ impl Supervisor {
|
|||||||
for config in worker_configs {
|
for config in worker_configs {
|
||||||
let status = self.zinit_client.status(&config.name).await.ok();
|
let status = self.zinit_client.status(&config.name).await.ok();
|
||||||
let is_running = status.as_ref()
|
let is_running = status.as_ref()
|
||||||
.map(|s| matches!(s.state, ServiceState::Running) && s.pid > 0)
|
.map(|s| s.state == "running" && s.pid > 0)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
|
||||||
workers.push(WorkerInfo {
|
workers.push(WorkerInfo {
|
||||||
@ -117,6 +117,10 @@ impl Supervisor {
|
|||||||
self.zinit_client.create_service(&worker_config.name, service_config).await
|
self.zinit_client.create_service(&worker_config.name, service_config).await
|
||||||
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create service: {}", e)))?;
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create service: {}", e)))?;
|
||||||
|
|
||||||
|
// Monitor the service so Zinit starts managing it
|
||||||
|
self.zinit_client.monitor(&worker_config.name).await
|
||||||
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to monitor service: {}", e)))?;
|
||||||
|
|
||||||
// Start the service
|
// Start the service
|
||||||
self.zinit_client.start(&worker_config.name).await
|
self.zinit_client.start(&worker_config.name).await
|
||||||
.map_err(|e| SupervisorError::ZinitError(format!("Failed to start worker: {}", e)))?;
|
.map_err(|e| SupervisorError::ZinitError(format!("Failed to start worker: {}", e)))?;
|
||||||
@ -168,7 +172,7 @@ impl Supervisor {
|
|||||||
&self,
|
&self,
|
||||||
worker_name: &str,
|
worker_name: &str,
|
||||||
zinit_client: &ZinitClient,
|
zinit_client: &ZinitClient,
|
||||||
) -> Result<ServiceStatus, SupervisorError> {
|
) -> Result<Status, SupervisorError> {
|
||||||
match zinit_client.status(worker_name).await {
|
match zinit_client.status(worker_name).await {
|
||||||
Ok(status) => Ok(status),
|
Ok(status) => Ok(status),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@ -183,7 +187,7 @@ impl Supervisor {
|
|||||||
&self,
|
&self,
|
||||||
worker_configs: &[WorkerConfig],
|
worker_configs: &[WorkerConfig],
|
||||||
zinit_client: &ZinitClient,
|
zinit_client: &ZinitClient,
|
||||||
) -> Result<HashMap<String, ServiceStatus>, SupervisorError> {
|
) -> Result<HashMap<String, Status>, SupervisorError> {
|
||||||
let mut status_map = HashMap::new();
|
let mut status_map = HashMap::new();
|
||||||
|
|
||||||
for worker in worker_configs {
|
for worker in worker_configs {
|
||||||
@ -200,19 +204,7 @@ impl Supervisor {
|
|||||||
Ok(status_map)
|
Ok(status_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start multiple workers
|
|
||||||
pub async fn start_workers(
|
|
||||||
&self,
|
|
||||||
worker_configs: &[WorkerConfig],
|
|
||||||
) -> Result<(), SupervisorError> {
|
|
||||||
info!("Starting {} workers", worker_configs.len());
|
|
||||||
|
|
||||||
for worker in worker_configs {
|
|
||||||
self.start_worker(worker).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Stop multiple workers
|
/// Stop multiple workers
|
||||||
pub async fn stop_workers(
|
pub async fn stop_workers(
|
||||||
@ -240,7 +232,7 @@ impl Supervisor {
|
|||||||
for worker in worker_configs {
|
for worker in worker_configs {
|
||||||
if worker.script_type == *script_type {
|
if worker.script_type == *script_type {
|
||||||
if let Ok(status) = zinit_client.status(&worker.name).await {
|
if let Ok(status) = zinit_client.status(&worker.name).await {
|
||||||
if status.state == ServiceState::Running {
|
if status.state == "running" {
|
||||||
running_count += 1;
|
running_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -277,26 +269,35 @@ impl Supervisor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create Zinit service configuration from worker config
|
/// Create Zinit service configuration from worker config
|
||||||
fn create_service_config(&self, worker: &WorkerConfig) -> serde_json::Value {
|
fn create_service_config(&self, worker: &WorkerConfig) -> serde_json::Map<String, serde_json::Value> {
|
||||||
let mut config = json!({
|
use serde_json::{Map, Value};
|
||||||
"exec": format!("{} {}",
|
|
||||||
|
let mut config = Map::new();
|
||||||
|
|
||||||
|
config.insert(
|
||||||
|
"exec".to_string(),
|
||||||
|
Value::String(format!("{} {}",
|
||||||
worker.binary_path.display(),
|
worker.binary_path.display(),
|
||||||
worker.args.join(" ")
|
worker.args.join(" ")
|
||||||
),
|
))
|
||||||
"oneshot": !worker.restart_on_exit,
|
);
|
||||||
});
|
|
||||||
|
config.insert(
|
||||||
|
"oneshot".to_string(),
|
||||||
|
Value::Bool(!worker.restart_on_exit)
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(health_check) = &worker.health_check {
|
if let Some(health_check) = &worker.health_check {
|
||||||
config["test"] = json!(health_check);
|
config.insert("test".to_string(), Value::String(health_check.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if !worker.dependencies.is_empty() {
|
if !worker.dependencies.is_empty() {
|
||||||
config["after"] = json!(worker.dependencies);
|
config.insert("after".to_string(), json!(worker.dependencies));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add environment variables if any
|
// Add environment variables if any
|
||||||
if !worker.env.is_empty() {
|
if !worker.env.is_empty() {
|
||||||
config["env"] = json!(worker.env);
|
config.insert("env".to_string(), json!(worker.env));
|
||||||
}
|
}
|
||||||
|
|
||||||
config
|
config
|
||||||
@ -307,6 +308,8 @@ impl Supervisor {
|
|||||||
use hero_job::ScriptType;
|
use hero_job::ScriptType;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
// Launch OSIS worker if configured
|
// Launch OSIS worker if configured
|
||||||
if let Some(binary_path) = &builder.osis_worker {
|
if let Some(binary_path) = &builder.osis_worker {
|
||||||
let worker_id = "osis_worker_1";
|
let worker_id = "osis_worker_1";
|
||||||
@ -318,7 +321,11 @@ impl Supervisor {
|
|||||||
config.env.extend(builder.worker_env_vars.clone());
|
config.env.extend(builder.worker_env_vars.clone());
|
||||||
|
|
||||||
info!("Launching OSIS worker: {}", worker_id);
|
info!("Launching OSIS worker: {}", worker_id);
|
||||||
self.start_worker(&config).await?;
|
if let Err(e) = self.start_worker(&config).await {
|
||||||
|
let error_msg = format!("Failed to start OSIS worker: {}", e);
|
||||||
|
warn!("{}", error_msg);
|
||||||
|
errors.push(error_msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch SAL worker if configured
|
// Launch SAL worker if configured
|
||||||
@ -332,7 +339,11 @@ impl Supervisor {
|
|||||||
config.env.extend(builder.worker_env_vars.clone());
|
config.env.extend(builder.worker_env_vars.clone());
|
||||||
|
|
||||||
info!("Launching SAL worker: {}", worker_id);
|
info!("Launching SAL worker: {}", worker_id);
|
||||||
self.start_worker(&config).await?;
|
if let Err(e) = self.start_worker(&config).await {
|
||||||
|
let error_msg = format!("Failed to start SAL worker: {}", e);
|
||||||
|
warn!("{}", error_msg);
|
||||||
|
errors.push(error_msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch V worker if configured
|
// Launch V worker if configured
|
||||||
@ -346,7 +357,11 @@ impl Supervisor {
|
|||||||
config.env.extend(builder.worker_env_vars.clone());
|
config.env.extend(builder.worker_env_vars.clone());
|
||||||
|
|
||||||
info!("Launching V worker: {}", worker_id);
|
info!("Launching V worker: {}", worker_id);
|
||||||
self.start_worker(&config).await?;
|
if let Err(e) = self.start_worker(&config).await {
|
||||||
|
let error_msg = format!("Failed to start V worker: {}", e);
|
||||||
|
warn!("{}", error_msg);
|
||||||
|
errors.push(error_msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch Python worker if configured
|
// Launch Python worker if configured
|
||||||
@ -360,9 +375,21 @@ impl Supervisor {
|
|||||||
config.env.extend(builder.worker_env_vars.clone());
|
config.env.extend(builder.worker_env_vars.clone());
|
||||||
|
|
||||||
info!("Launching Python worker: {}", worker_id);
|
info!("Launching Python worker: {}", worker_id);
|
||||||
self.start_worker(&config).await?;
|
if let Err(e) = self.start_worker(&config).await {
|
||||||
|
let error_msg = format!("Failed to start Python worker: {}", e);
|
||||||
|
warn!("{}", error_msg);
|
||||||
|
errors.push(error_msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
// Return result based on whether any workers started successfully
|
||||||
|
if errors.is_empty() {
|
||||||
|
info!("All configured workers started successfully");
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let combined_error = format!("Some workers failed to start: {}", errors.join("; "));
|
||||||
|
warn!("{}", combined_error);
|
||||||
|
Err(SupervisorError::ZinitError(combined_error))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,26 @@ path = "src/lib.rs"
|
|||||||
name = "worker"
|
name = "worker"
|
||||||
path = "cmd/worker.rs"
|
path = "cmd/worker.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "osis"
|
||||||
|
path = "cmd/osis.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "system"
|
||||||
|
path = "cmd/system.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "trait_based_worker_demo"
|
||||||
|
path = "examples/trait_based_worker_demo.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "osis_worker_demo"
|
||||||
|
path = "examples/osis_worker_demo.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "system_worker_demo"
|
||||||
|
path = "examples/system_worker_demo.rs"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@ -24,6 +44,9 @@ env_logger = "0.10"
|
|||||||
clap = { version = "4.4", features = ["derive"] }
|
clap = { version = "4.4", features = ["derive"] }
|
||||||
uuid = { version = "1.6", features = ["v4", "serde"] } # Though task_id is string, uuid might be useful
|
uuid = { version = "1.6", features = ["v4", "serde"] } # Though task_id is string, uuid might be useful
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
|
toml = "0.8"
|
||||||
|
thiserror = "1.0"
|
||||||
|
async-trait = "0.1"
|
||||||
hero_supervisor = { path = "../supervisor" }
|
hero_supervisor = { path = "../supervisor" }
|
||||||
hero_job = { path = "../job" }
|
hero_job = { path = "../job" }
|
||||||
heromodels = { path = "../../../db/heromodels", features = ["rhai"] }
|
heromodels = { path = "../../../db/heromodels", features = ["rhai"] }
|
||||||
|
233
core/worker/cmd/osis.rs
Normal file
233
core/worker/cmd/osis.rs
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
//! OSIS Worker Binary - Synchronous worker for system-level operations
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use log::{error, info};
|
||||||
|
use rhailib_worker::config::{ConfigError, WorkerConfig};
|
||||||
|
use rhailib_worker::engine::create_heromodels_engine;
|
||||||
|
use rhailib_worker::sync_worker::SyncWorker;
|
||||||
|
use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig as TraitWorkerConfig};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::signal;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(
|
||||||
|
name = "osis",
|
||||||
|
version = "0.1.0",
|
||||||
|
about = "OSIS (Operating System Integration Service) - Synchronous Worker",
|
||||||
|
long_about = "A synchronous worker for Hero framework that processes jobs sequentially. \
|
||||||
|
Ideal for system-level operations that require careful resource management."
|
||||||
|
)]
|
||||||
|
struct Args {
|
||||||
|
/// Path to TOML configuration file
|
||||||
|
#[arg(short, long, help = "Path to TOML configuration file")]
|
||||||
|
config: PathBuf,
|
||||||
|
|
||||||
|
/// Override worker ID from config
|
||||||
|
#[arg(long, help = "Override worker ID from configuration file")]
|
||||||
|
worker_id: Option<String>,
|
||||||
|
|
||||||
|
/// Override Redis URL from config
|
||||||
|
#[arg(long, help = "Override Redis URL from configuration file")]
|
||||||
|
redis_url: Option<String>,
|
||||||
|
|
||||||
|
/// Override database path from config
|
||||||
|
#[arg(long, help = "Override database path from configuration file")]
|
||||||
|
db_path: Option<String>,
|
||||||
|
|
||||||
|
/// Enable verbose logging (debug level)
|
||||||
|
#[arg(short, long, help = "Enable verbose logging")]
|
||||||
|
verbose: bool,
|
||||||
|
|
||||||
|
/// Disable timestamps in log output
|
||||||
|
#[arg(long, help = "Remove timestamps from log output")]
|
||||||
|
no_timestamp: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Load configuration from TOML file
|
||||||
|
let mut config = match WorkerConfig::from_file(&args.config) {
|
||||||
|
Ok(config) => config,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Failed to load configuration from {:?}: {}", args.config, e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Validate that this is a sync worker configuration
|
||||||
|
if !config.is_sync() {
|
||||||
|
eprintln!("Error: OSIS worker requires a sync worker configuration");
|
||||||
|
eprintln!("Expected: [worker_type] type = \"sync\"");
|
||||||
|
eprintln!("Found: {:?}", config.worker_type);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply command line overrides
|
||||||
|
if let Some(worker_id) = args.worker_id {
|
||||||
|
config.worker_id = worker_id;
|
||||||
|
}
|
||||||
|
if let Some(redis_url) = args.redis_url {
|
||||||
|
config.redis_url = redis_url;
|
||||||
|
}
|
||||||
|
if let Some(db_path) = args.db_path {
|
||||||
|
config.db_path = db_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure logging
|
||||||
|
setup_logging(&config, args.verbose, args.no_timestamp)?;
|
||||||
|
|
||||||
|
info!("🚀 OSIS Worker starting...");
|
||||||
|
info!("Worker ID: {}", config.worker_id);
|
||||||
|
info!("Redis URL: {}", config.redis_url);
|
||||||
|
info!("Database Path: {}", config.db_path);
|
||||||
|
info!("Preserve Tasks: {}", config.preserve_tasks);
|
||||||
|
|
||||||
|
// Create Rhai engine
|
||||||
|
let engine = create_heromodels_engine();
|
||||||
|
info!("✅ Rhai engine initialized");
|
||||||
|
|
||||||
|
// Create worker configuration for the trait-based interface
|
||||||
|
let worker_config = TraitWorkerConfig::new(
|
||||||
|
config.worker_id.clone(),
|
||||||
|
config.db_path.clone(),
|
||||||
|
config.redis_url.clone(),
|
||||||
|
config.preserve_tasks,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create sync worker instance
|
||||||
|
let worker = Arc::new(SyncWorker::default());
|
||||||
|
info!("✅ Sync worker instance created");
|
||||||
|
|
||||||
|
// Setup shutdown signal handling
|
||||||
|
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
|
||||||
|
// Spawn shutdown signal handler
|
||||||
|
let shutdown_tx_clone = shutdown_tx.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = signal::ctrl_c().await {
|
||||||
|
error!("Failed to listen for shutdown signal: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
info!("🛑 Shutdown signal received");
|
||||||
|
if let Err(e) = shutdown_tx_clone.send(()).await {
|
||||||
|
error!("Failed to send shutdown signal: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spawn the worker
|
||||||
|
info!("🔄 Starting worker loop...");
|
||||||
|
let worker_handle = spawn_worker(worker, engine, shutdown_rx);
|
||||||
|
|
||||||
|
// Wait for the worker to complete
|
||||||
|
match worker_handle.await {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
info!("✅ OSIS Worker shut down gracefully");
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!("❌ OSIS Worker encountered an error: {}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("❌ Failed to join worker task: {}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Setup logging based on configuration and command line arguments
|
||||||
|
fn setup_logging(
|
||||||
|
config: &WorkerConfig,
|
||||||
|
verbose: bool,
|
||||||
|
no_timestamp: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let mut builder = env_logger::Builder::new();
|
||||||
|
|
||||||
|
// Determine log level
|
||||||
|
let log_level = if verbose {
|
||||||
|
"debug"
|
||||||
|
} else {
|
||||||
|
&config.logging.level
|
||||||
|
};
|
||||||
|
|
||||||
|
// Set log level
|
||||||
|
builder.filter_level(match log_level.to_lowercase().as_str() {
|
||||||
|
"trace" => log::LevelFilter::Trace,
|
||||||
|
"debug" => log::LevelFilter::Debug,
|
||||||
|
"info" => log::LevelFilter::Info,
|
||||||
|
"warn" => log::LevelFilter::Warn,
|
||||||
|
"error" => log::LevelFilter::Error,
|
||||||
|
_ => {
|
||||||
|
eprintln!("Invalid log level: {}. Using 'info'", log_level);
|
||||||
|
log::LevelFilter::Info
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Configure timestamps
|
||||||
|
let show_timestamps = !no_timestamp && config.logging.timestamps;
|
||||||
|
if !show_timestamps {
|
||||||
|
builder.format_timestamp(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.init();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::io::Write;
|
||||||
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_config_validation() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_osis"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert!(config.is_sync());
|
||||||
|
assert!(!config.is_async());
|
||||||
|
assert_eq!(config.worker_id, "test_osis");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_async_config_rejection() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_osis"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 300
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert!(!config.is_sync());
|
||||||
|
assert!(config.is_async());
|
||||||
|
// This would be rejected in main() function
|
||||||
|
}
|
||||||
|
}
|
302
core/worker/cmd/system.rs
Normal file
302
core/worker/cmd/system.rs
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
//! System Worker Binary - Asynchronous worker for high-throughput concurrent processing
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use log::{error, info, warn};
|
||||||
|
use rhailib_worker::async_worker_impl::AsyncWorker;
|
||||||
|
use rhailib_worker::config::{ConfigError, WorkerConfig};
|
||||||
|
use rhailib_worker::engine::create_heromodels_engine;
|
||||||
|
use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig as TraitWorkerConfig};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::signal;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(
|
||||||
|
name = "system",
|
||||||
|
version = "0.1.0",
|
||||||
|
about = "System Worker - Asynchronous Worker with Concurrent Job Processing",
|
||||||
|
long_about = "An asynchronous worker for Hero framework that processes multiple jobs \
|
||||||
|
concurrently with timeout support. Ideal for high-throughput scenarios \
|
||||||
|
where jobs can be executed in parallel."
|
||||||
|
)]
|
||||||
|
struct Args {
|
||||||
|
/// Path to TOML configuration file
|
||||||
|
#[arg(short, long, help = "Path to TOML configuration file")]
|
||||||
|
config: PathBuf,
|
||||||
|
|
||||||
|
/// Override worker ID from config
|
||||||
|
#[arg(long, help = "Override worker ID from configuration file")]
|
||||||
|
worker_id: Option<String>,
|
||||||
|
|
||||||
|
/// Override Redis URL from config
|
||||||
|
#[arg(long, help = "Override Redis URL from configuration file")]
|
||||||
|
redis_url: Option<String>,
|
||||||
|
|
||||||
|
/// Override database path from config
|
||||||
|
#[arg(long, help = "Override database path from configuration file")]
|
||||||
|
db_path: Option<String>,
|
||||||
|
|
||||||
|
/// Override default timeout in seconds
|
||||||
|
#[arg(long, help = "Override default job timeout in seconds")]
|
||||||
|
timeout: Option<u64>,
|
||||||
|
|
||||||
|
/// Enable verbose logging (debug level)
|
||||||
|
#[arg(short, long, help = "Enable verbose logging")]
|
||||||
|
verbose: bool,
|
||||||
|
|
||||||
|
/// Disable timestamps in log output
|
||||||
|
#[arg(long, help = "Remove timestamps from log output")]
|
||||||
|
no_timestamp: bool,
|
||||||
|
|
||||||
|
/// Show worker statistics periodically
|
||||||
|
#[arg(long, help = "Show periodic worker statistics")]
|
||||||
|
show_stats: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Load configuration from TOML file
|
||||||
|
let mut config = match WorkerConfig::from_file(&args.config) {
|
||||||
|
Ok(config) => config,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Failed to load configuration from {:?}: {}", args.config, e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Validate that this is an async worker configuration
|
||||||
|
if !config.is_async() {
|
||||||
|
eprintln!("Error: System worker requires an async worker configuration");
|
||||||
|
eprintln!("Expected: [worker_type] type = \"async\"");
|
||||||
|
eprintln!("Found: {:?}", config.worker_type);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply command line overrides
|
||||||
|
if let Some(worker_id) = args.worker_id {
|
||||||
|
config.worker_id = worker_id;
|
||||||
|
}
|
||||||
|
if let Some(redis_url) = args.redis_url {
|
||||||
|
config.redis_url = redis_url;
|
||||||
|
}
|
||||||
|
if let Some(db_path) = args.db_path {
|
||||||
|
config.db_path = db_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override timeout if specified
|
||||||
|
if let Some(timeout_secs) = args.timeout {
|
||||||
|
if let rhailib_worker::config::WorkerType::Async { ref mut default_timeout_seconds } = config.worker_type {
|
||||||
|
*default_timeout_seconds = timeout_secs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure logging
|
||||||
|
setup_logging(&config, args.verbose, args.no_timestamp)?;
|
||||||
|
|
||||||
|
info!("🚀 System Worker starting...");
|
||||||
|
info!("Worker ID: {}", config.worker_id);
|
||||||
|
info!("Redis URL: {}", config.redis_url);
|
||||||
|
info!("Database Path: {}", config.db_path);
|
||||||
|
info!("Preserve Tasks: {}", config.preserve_tasks);
|
||||||
|
|
||||||
|
if let Some(timeout) = config.get_default_timeout() {
|
||||||
|
info!("Default Timeout: {:?}", timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create Rhai engine
|
||||||
|
let engine = create_heromodels_engine();
|
||||||
|
info!("✅ Rhai engine initialized");
|
||||||
|
|
||||||
|
// Create worker configuration for the trait-based interface
|
||||||
|
let mut worker_config = TraitWorkerConfig::new(
|
||||||
|
config.worker_id.clone(),
|
||||||
|
config.db_path.clone(),
|
||||||
|
config.redis_url.clone(),
|
||||||
|
config.preserve_tasks,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Add timeout configuration for async worker
|
||||||
|
if let Some(timeout) = config.get_default_timeout() {
|
||||||
|
worker_config = worker_config.with_default_timeout(timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create async worker instance
|
||||||
|
let worker = Arc::new(AsyncWorker::default());
|
||||||
|
info!("✅ Async worker instance created");
|
||||||
|
|
||||||
|
// Setup shutdown signal handling
|
||||||
|
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
|
||||||
|
// Spawn shutdown signal handler
|
||||||
|
let shutdown_tx_clone = shutdown_tx.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = signal::ctrl_c().await {
|
||||||
|
error!("Failed to listen for shutdown signal: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
info!("🛑 Shutdown signal received");
|
||||||
|
if let Err(e) = shutdown_tx_clone.send(()).await {
|
||||||
|
error!("Failed to send shutdown signal: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spawn statistics reporter if requested
|
||||||
|
if args.show_stats {
|
||||||
|
let worker_stats = Arc::clone(&worker);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut interval = tokio::time::interval(Duration::from_secs(30));
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
let running_count = worker_stats.running_job_count().await;
|
||||||
|
if running_count > 0 {
|
||||||
|
info!("📊 Worker Stats: {} jobs currently running", running_count);
|
||||||
|
} else {
|
||||||
|
info!("📊 Worker Stats: No jobs currently running");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spawn the worker
|
||||||
|
info!("🔄 Starting worker loop...");
|
||||||
|
let worker_handle = spawn_worker(worker, engine, shutdown_rx);
|
||||||
|
|
||||||
|
// Wait for the worker to complete
|
||||||
|
match worker_handle.await {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
info!("✅ System Worker shut down gracefully");
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!("❌ System Worker encountered an error: {}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("❌ Failed to join worker task: {}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Setup logging based on configuration and command line arguments
|
||||||
|
fn setup_logging(
|
||||||
|
config: &WorkerConfig,
|
||||||
|
verbose: bool,
|
||||||
|
no_timestamp: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let mut builder = env_logger::Builder::new();
|
||||||
|
|
||||||
|
// Determine log level
|
||||||
|
let log_level = if verbose {
|
||||||
|
"debug"
|
||||||
|
} else {
|
||||||
|
&config.logging.level
|
||||||
|
};
|
||||||
|
|
||||||
|
// Set log level
|
||||||
|
builder.filter_level(match log_level.to_lowercase().as_str() {
|
||||||
|
"trace" => log::LevelFilter::Trace,
|
||||||
|
"debug" => log::LevelFilter::Debug,
|
||||||
|
"info" => log::LevelFilter::Info,
|
||||||
|
"warn" => log::LevelFilter::Warn,
|
||||||
|
"error" => log::LevelFilter::Error,
|
||||||
|
_ => {
|
||||||
|
warn!("Invalid log level: {}. Using 'info'", log_level);
|
||||||
|
log::LevelFilter::Info
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Configure timestamps
|
||||||
|
let show_timestamps = !no_timestamp && config.logging.timestamps;
|
||||||
|
if !show_timestamps {
|
||||||
|
builder.format_timestamp(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.init();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::io::Write;
|
||||||
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_config_validation() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_system"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 600
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert!(!config.is_sync());
|
||||||
|
assert!(config.is_async());
|
||||||
|
assert_eq!(config.worker_id, "test_system");
|
||||||
|
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sync_config_rejection() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_system"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert!(config.is_sync());
|
||||||
|
assert!(!config.is_async());
|
||||||
|
// This would be rejected in main() function
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_timeout_override() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_system"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 300
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let mut config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(300)));
|
||||||
|
|
||||||
|
// Test timeout override
|
||||||
|
if let rhailib_worker::config::WorkerType::Async { ref mut default_timeout_seconds } = config.worker_type {
|
||||||
|
*default_timeout_seconds = 600;
|
||||||
|
}
|
||||||
|
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
|
||||||
|
}
|
||||||
|
}
|
197
core/worker/examples/README.md
Normal file
197
core/worker/examples/README.md
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
# Worker Examples
|
||||||
|
|
||||||
|
This directory contains example configurations and test scripts for both OSIS and System worker binaries.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Both examples demonstrate the ping/pong functionality built into the Hero workers:
|
||||||
|
- Workers automatically detect jobs with script content "ping"
|
||||||
|
- They respond immediately with "pong" without executing the Rhai engine
|
||||||
|
- This provides a fast health check and connectivity test mechanism
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. **Redis Server**: Both examples require a running Redis server
|
||||||
|
```bash
|
||||||
|
# Install Redis (macOS)
|
||||||
|
brew install redis
|
||||||
|
|
||||||
|
# Start Redis server
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Rust Environment**: Make sure you can build the worker binaries
|
||||||
|
```bash
|
||||||
|
cd /path/to/herocode/hero/core/worker
|
||||||
|
cargo build --bin osis --bin system
|
||||||
|
```
|
||||||
|
|
||||||
|
## OSIS Worker Example
|
||||||
|
|
||||||
|
**Location**: `examples/osis/`
|
||||||
|
|
||||||
|
The OSIS (Operating System Integration Service) worker processes jobs synchronously, one at a time.
|
||||||
|
|
||||||
|
### Files
|
||||||
|
- `config.toml` - Configuration for the OSIS worker
|
||||||
|
- `example.sh` - Test script that demonstrates ping/pong functionality
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
cd examples/osis
|
||||||
|
./example.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### What the script does:
|
||||||
|
1. Checks Redis connectivity
|
||||||
|
2. Cleans up any existing jobs
|
||||||
|
3. Starts the OSIS worker in the background
|
||||||
|
4. Sends 3 ping jobs sequentially
|
||||||
|
5. Verifies each job receives a "pong" response
|
||||||
|
6. Reports success/failure statistics
|
||||||
|
7. Cleans up worker and Redis data
|
||||||
|
|
||||||
|
### Expected Output
|
||||||
|
```
|
||||||
|
=== OSIS Worker Example ===
|
||||||
|
✅ Redis is running
|
||||||
|
✅ OSIS worker started (PID: 12345)
|
||||||
|
📤 Sending ping job: ping_job_1_1234567890
|
||||||
|
✅ Job ping_job_1_1234567890 completed successfully with result: pong
|
||||||
|
...
|
||||||
|
🎉 All tests passed! OSIS worker is working correctly.
|
||||||
|
```
|
||||||
|
|
||||||
|
## System Worker Example
|
||||||
|
|
||||||
|
**Location**: `examples/system/`
|
||||||
|
|
||||||
|
The System worker processes jobs asynchronously, handling multiple jobs concurrently.
|
||||||
|
|
||||||
|
### Files
|
||||||
|
- `config.toml` - Configuration for the System worker (includes async settings)
|
||||||
|
- `example.sh` - Test script that demonstrates concurrent ping/pong functionality
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
cd examples/system
|
||||||
|
./example.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### What the script does:
|
||||||
|
1. Checks Redis connectivity
|
||||||
|
2. Cleans up any existing jobs
|
||||||
|
3. Starts the System worker with stats reporting
|
||||||
|
4. Sends 5 concurrent ping jobs
|
||||||
|
5. Sends 10 rapid-fire ping jobs to test async capabilities
|
||||||
|
6. Verifies all jobs receive "pong" responses
|
||||||
|
7. Reports comprehensive success/failure statistics
|
||||||
|
8. Cleans up worker and Redis data
|
||||||
|
|
||||||
|
### Expected Output
|
||||||
|
```
|
||||||
|
=== System Worker Example ===
|
||||||
|
✅ Redis is running
|
||||||
|
✅ System worker started (PID: 12345)
|
||||||
|
📤 Sending ping job: ping_job_1_1234567890123
|
||||||
|
✅ Job ping_job_1_1234567890123 completed successfully with result: pong
|
||||||
|
...
|
||||||
|
🎉 All tests passed! System worker is handling concurrent jobs correctly.
|
||||||
|
Overall success rate: 15/15
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Details
|
||||||
|
|
||||||
|
### OSIS Configuration (`examples/osis/config.toml`)
|
||||||
|
```toml
|
||||||
|
worker_id = "osis_example_worker"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/osis_example_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
||||||
|
```
|
||||||
|
|
||||||
|
### System Configuration (`examples/system/config.toml`)
|
||||||
|
```toml
|
||||||
|
worker_id = "system_example_worker"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/system_example_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 30
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Differences
|
||||||
|
|
||||||
|
| Feature | OSIS Worker | System Worker |
|
||||||
|
|---------|-------------|---------------|
|
||||||
|
| **Processing** | Sequential (one job at a time) | Concurrent (multiple jobs simultaneously) |
|
||||||
|
| **Use Case** | System-level operations requiring resource management | High-throughput job processing |
|
||||||
|
| **Timeout** | No timeout configuration | Configurable job timeouts |
|
||||||
|
| **Stats** | Basic logging | Optional statistics reporting (`--show-stats`) |
|
||||||
|
| **Job Handling** | Blocking job execution | Non-blocking async job execution |
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Redis Connection Issues
|
||||||
|
```bash
|
||||||
|
# Check if Redis is running
|
||||||
|
redis-cli ping
|
||||||
|
|
||||||
|
# Check Redis logs
|
||||||
|
redis-server --loglevel verbose
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker Compilation Issues
|
||||||
|
```bash
|
||||||
|
# Clean and rebuild
|
||||||
|
cargo clean
|
||||||
|
cargo build --bin osis --bin system
|
||||||
|
```
|
||||||
|
|
||||||
|
### Job Processing Issues
|
||||||
|
- Check Redis for stuck jobs: `redis-cli keys "hero:*"`
|
||||||
|
- Clear all Hero jobs: `redis-cli eval "return redis.call('del', unpack(redis.call('keys', 'hero:*')))" 0`
|
||||||
|
- Check worker logs for detailed error messages
|
||||||
|
|
||||||
|
## Extending the Examples
|
||||||
|
|
||||||
|
### Adding Custom Jobs
|
||||||
|
To test with custom Rhai scripts instead of ping jobs:
|
||||||
|
|
||||||
|
1. Modify the job creation in the shell scripts:
|
||||||
|
```bash
|
||||||
|
# Replace "ping" with your Rhai script
|
||||||
|
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
|
||||||
|
script "your_rhai_script_here"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Update result verification to expect your script's output instead of "pong"
|
||||||
|
|
||||||
|
### Testing Different Configurations
|
||||||
|
- Modify `config.toml` files to test different Redis URLs, database paths, or logging levels
|
||||||
|
- Test with `preserve_tasks = true` to inspect job details after completion
|
||||||
|
- Adjust timeout values in the System worker configuration
|
||||||
|
|
||||||
|
## Architecture Notes
|
||||||
|
|
||||||
|
Both examples demonstrate the unified Worker trait architecture:
|
||||||
|
- **Common Interface**: Both workers implement the same `Worker` trait
|
||||||
|
- **Ping/Pong Handling**: Built into the trait's `spawn` method before job delegation
|
||||||
|
- **Redis Integration**: Uses the shared Job struct from `hero_job` crate
|
||||||
|
- **Configuration**: TOML-based configuration with CLI overrides
|
||||||
|
- **Graceful Shutdown**: Both workers handle SIGTERM/SIGINT properly
|
||||||
|
|
||||||
|
This architecture allows for easy extension with new worker types while maintaining consistent behavior and configuration patterns.
|
11
core/worker/examples/osis/config.toml
Normal file
11
core/worker/examples/osis/config.toml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
worker_id = "osis_example_worker"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/osis_example_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
138
core/worker/examples/osis/example.sh
Executable file
138
core/worker/examples/osis/example.sh
Executable file
@ -0,0 +1,138 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# OSIS Worker Example Script
|
||||||
|
# This script demonstrates the OSIS worker by:
|
||||||
|
# 1. Starting the worker with the config.toml
|
||||||
|
# 2. Sending ping jobs to Redis
|
||||||
|
# 3. Verifying pong responses
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
CONFIG_FILE="$SCRIPT_DIR/config.toml"
|
||||||
|
WORKER_ID="osis_example_worker"
|
||||||
|
REDIS_URL="redis://localhost:6379"
|
||||||
|
|
||||||
|
echo "=== OSIS Worker Example ==="
|
||||||
|
echo "Script directory: $SCRIPT_DIR"
|
||||||
|
echo "Config file: $CONFIG_FILE"
|
||||||
|
echo "Worker ID: $WORKER_ID"
|
||||||
|
echo "Redis URL: $REDIS_URL"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Check if Redis is running
|
||||||
|
echo "Checking Redis connection..."
|
||||||
|
if ! redis-cli -u "$REDIS_URL" ping > /dev/null 2>&1; then
|
||||||
|
echo "❌ Error: Redis is not running or not accessible at $REDIS_URL"
|
||||||
|
echo "Please start Redis server first: redis-server"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✅ Redis is running"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Clean up any existing jobs in the queue
|
||||||
|
echo "Cleaning up existing jobs in Redis..."
|
||||||
|
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
|
||||||
|
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
|
||||||
|
echo "✅ Redis queues cleaned"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Start the OSIS worker in the background
|
||||||
|
echo "Starting OSIS worker..."
|
||||||
|
cd "$SCRIPT_DIR/../.."
|
||||||
|
cargo run --bin osis -- --config "$CONFIG_FILE" &
|
||||||
|
WORKER_PID=$!
|
||||||
|
echo "✅ OSIS worker started (PID: $WORKER_PID)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Wait a moment for the worker to initialize
|
||||||
|
echo "Waiting for worker to initialize..."
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Function to send a ping job and check for pong response
|
||||||
|
send_ping_job() {
|
||||||
|
local job_num=$1
|
||||||
|
local job_id="ping_job_${job_num}_$(date +%s)"
|
||||||
|
|
||||||
|
echo "📤 Sending ping job: $job_id"
|
||||||
|
|
||||||
|
# Create job in Redis
|
||||||
|
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
|
||||||
|
id "$job_id" \
|
||||||
|
script "ping" \
|
||||||
|
status "Queued" \
|
||||||
|
created_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||||
|
worker_id "$WORKER_ID" > /dev/null
|
||||||
|
|
||||||
|
# Add job to worker queue
|
||||||
|
redis-cli -u "$REDIS_URL" lpush "hero:jobs:$WORKER_ID" "$job_id" > /dev/null
|
||||||
|
|
||||||
|
# Wait for job completion and check result
|
||||||
|
local timeout=10
|
||||||
|
local elapsed=0
|
||||||
|
while [ $elapsed -lt $timeout ]; do
|
||||||
|
local status=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" status 2>/dev/null || echo "")
|
||||||
|
if [ "$status" = "Finished" ]; then
|
||||||
|
local result=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" result 2>/dev/null || echo "")
|
||||||
|
if [ "$result" = "pong" ]; then
|
||||||
|
echo "✅ Job $job_id completed successfully with result: $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ Job $job_id completed but with unexpected result: $result"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
elif [ "$status" = "Error" ]; then
|
||||||
|
local error=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" error 2>/dev/null || echo "")
|
||||||
|
echo "❌ Job $job_id failed with error: $error"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
elapsed=$((elapsed + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "❌ Job $job_id timed out after ${timeout}s"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send multiple ping jobs to test the worker
|
||||||
|
echo "Testing ping/pong functionality..."
|
||||||
|
success_count=0
|
||||||
|
total_jobs=3
|
||||||
|
|
||||||
|
for i in $(seq 1 $total_jobs); do
|
||||||
|
echo
|
||||||
|
echo "--- Test $i/$total_jobs ---"
|
||||||
|
if send_ping_job $i; then
|
||||||
|
success_count=$((success_count + 1))
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "=== Test Results ==="
|
||||||
|
echo "Successful ping/pong tests: $success_count/$total_jobs"
|
||||||
|
|
||||||
|
if [ $success_count -eq $total_jobs ]; then
|
||||||
|
echo "🎉 All tests passed! OSIS worker is working correctly."
|
||||||
|
exit_code=0
|
||||||
|
else
|
||||||
|
echo "⚠️ Some tests failed. Check the worker logs for details."
|
||||||
|
exit_code=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
echo
|
||||||
|
echo "Cleaning up..."
|
||||||
|
echo "Stopping OSIS worker (PID: $WORKER_PID)..."
|
||||||
|
kill $WORKER_PID 2>/dev/null || true
|
||||||
|
wait $WORKER_PID 2>/dev/null || true
|
||||||
|
echo "✅ Worker stopped"
|
||||||
|
|
||||||
|
echo "Cleaning up Redis jobs..."
|
||||||
|
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
|
||||||
|
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
|
||||||
|
echo "✅ Redis cleaned up"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "=== OSIS Worker Example Complete ==="
|
||||||
|
exit $exit_code
|
14
core/worker/examples/osis_config.toml
Normal file
14
core/worker/examples/osis_config.toml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# OSIS Worker Configuration
|
||||||
|
# Synchronous worker for system-level operations
|
||||||
|
|
||||||
|
worker_id = "osis_worker_1"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/osis_worker_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
60
core/worker/examples/osis_worker_demo.rs
Normal file
60
core/worker/examples/osis_worker_demo.rs
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::env;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
/// OSIS Worker Demo Runner
|
||||||
|
///
|
||||||
|
/// This Rust wrapper executes the OSIS worker bash script example.
|
||||||
|
/// It provides a way to run shell-based examples through Cargo.
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("🚀 OSIS Worker Demo");
|
||||||
|
println!("==================");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Get the current working directory and construct the path to the shell script
|
||||||
|
let current_dir = env::current_dir()?;
|
||||||
|
let script_path = current_dir.join("examples").join("osis").join("example.sh");
|
||||||
|
|
||||||
|
// Check if the script exists
|
||||||
|
if !script_path.exists() {
|
||||||
|
eprintln!("❌ Error: Script not found at {:?}", script_path);
|
||||||
|
eprintln!(" Make sure you're running this from the worker crate root directory.");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("📁 Script location: {:?}", script_path);
|
||||||
|
println!("🔧 Executing OSIS worker example...");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Make sure the script is executable
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
let mut perms = std::fs::metadata(&script_path)?.permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
std::fs::set_permissions(&script_path, perms)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the shell script
|
||||||
|
let mut child = Command::new("bash")
|
||||||
|
.arg(&script_path)
|
||||||
|
.current_dir(¤t_dir)
|
||||||
|
.stdin(Stdio::inherit())
|
||||||
|
.stdout(Stdio::inherit())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
// Wait for the script to complete
|
||||||
|
let status = child.wait()?;
|
||||||
|
|
||||||
|
println!();
|
||||||
|
if status.success() {
|
||||||
|
println!("✅ OSIS worker demo completed successfully!");
|
||||||
|
} else {
|
||||||
|
println!("❌ OSIS worker demo failed with exit code: {:?}", status.code());
|
||||||
|
std::process::exit(status.code().unwrap_or(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
12
core/worker/examples/system/config.toml
Normal file
12
core/worker/examples/system/config.toml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
worker_id = "system_example_worker"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/system_example_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 30
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
183
core/worker/examples/system/example.sh
Executable file
183
core/worker/examples/system/example.sh
Executable file
@ -0,0 +1,183 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# System Worker Example Script
|
||||||
|
# This script demonstrates the System worker by:
|
||||||
|
# 1. Starting the worker with the config.toml
|
||||||
|
# 2. Sending multiple concurrent ping jobs to Redis
|
||||||
|
# 3. Verifying pong responses
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
CONFIG_FILE="$SCRIPT_DIR/config.toml"
|
||||||
|
WORKER_ID="system_example_worker"
|
||||||
|
REDIS_URL="redis://localhost:6379"
|
||||||
|
|
||||||
|
echo "=== System Worker Example ==="
|
||||||
|
echo "Script directory: $SCRIPT_DIR"
|
||||||
|
echo "Config file: $CONFIG_FILE"
|
||||||
|
echo "Worker ID: $WORKER_ID"
|
||||||
|
echo "Redis URL: $REDIS_URL"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Check if Redis is running
|
||||||
|
echo "Checking Redis connection..."
|
||||||
|
if ! redis-cli -u "$REDIS_URL" ping > /dev/null 2>&1; then
|
||||||
|
echo "❌ Error: Redis is not running or not accessible at $REDIS_URL"
|
||||||
|
echo "Please start Redis server first: redis-server"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✅ Redis is running"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Clean up any existing jobs in the queue
|
||||||
|
echo "Cleaning up existing jobs in Redis..."
|
||||||
|
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
|
||||||
|
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
|
||||||
|
echo "✅ Redis queues cleaned"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Start the System worker in the background
|
||||||
|
echo "Starting System worker..."
|
||||||
|
cd "$SCRIPT_DIR/../.."
|
||||||
|
cargo run --bin system -- --config "$CONFIG_FILE" --show-stats &
|
||||||
|
WORKER_PID=$!
|
||||||
|
echo "✅ System worker started (PID: $WORKER_PID)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Wait a moment for the worker to initialize
|
||||||
|
echo "Waiting for worker to initialize..."
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Function to send a ping job (non-blocking)
|
||||||
|
send_ping_job() {
|
||||||
|
local job_num=$1
|
||||||
|
local job_id="ping_job_${job_num}_$(date +%s%N)"
|
||||||
|
|
||||||
|
echo "📤 Sending ping job: $job_id"
|
||||||
|
|
||||||
|
# Create job in Redis
|
||||||
|
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
|
||||||
|
id "$job_id" \
|
||||||
|
script "ping" \
|
||||||
|
status "Queued" \
|
||||||
|
created_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||||
|
worker_id "$WORKER_ID" > /dev/null
|
||||||
|
|
||||||
|
# Add job to worker queue
|
||||||
|
redis-cli -u "$REDIS_URL" lpush "hero:jobs:$WORKER_ID" "$job_id" > /dev/null
|
||||||
|
|
||||||
|
echo "$job_id"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to check job result
|
||||||
|
check_job_result() {
|
||||||
|
local job_id=$1
|
||||||
|
local timeout=15
|
||||||
|
local elapsed=0
|
||||||
|
|
||||||
|
while [ $elapsed -lt $timeout ]; do
|
||||||
|
local status=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" status 2>/dev/null || echo "")
|
||||||
|
if [ "$status" = "Finished" ]; then
|
||||||
|
local result=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" result 2>/dev/null || echo "")
|
||||||
|
if [ "$result" = "pong" ]; then
|
||||||
|
echo "✅ Job $job_id completed successfully with result: $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ Job $job_id completed but with unexpected result: $result"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
elif [ "$status" = "Error" ]; then
|
||||||
|
local error=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" error 2>/dev/null || echo "")
|
||||||
|
echo "❌ Job $job_id failed with error: $error"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 0.5
|
||||||
|
elapsed=$((elapsed + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "❌ Job $job_id timed out after ${timeout}s"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send multiple concurrent ping jobs to test async processing
|
||||||
|
echo "Testing concurrent ping/pong functionality..."
|
||||||
|
total_jobs=5
|
||||||
|
job_ids=()
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "--- Sending $total_jobs concurrent ping jobs ---"
|
||||||
|
for i in $(seq 1 $total_jobs); do
|
||||||
|
job_id=$(send_ping_job $i)
|
||||||
|
job_ids+=("$job_id")
|
||||||
|
sleep 0.1 # Small delay between job submissions
|
||||||
|
done
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "--- Waiting for all jobs to complete ---"
|
||||||
|
success_count=0
|
||||||
|
|
||||||
|
for job_id in "${job_ids[@]}"; do
|
||||||
|
echo "Checking job: $job_id"
|
||||||
|
if check_job_result "$job_id"; then
|
||||||
|
success_count=$((success_count + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "=== Test Results ==="
|
||||||
|
echo "Successful concurrent ping/pong tests: $success_count/$total_jobs"
|
||||||
|
|
||||||
|
if [ $success_count -eq $total_jobs ]; then
|
||||||
|
echo "🎉 All tests passed! System worker is handling concurrent jobs correctly."
|
||||||
|
exit_code=0
|
||||||
|
else
|
||||||
|
echo "⚠️ Some tests failed. Check the worker logs for details."
|
||||||
|
exit_code=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test rapid job submission to showcase async capabilities
|
||||||
|
echo
|
||||||
|
echo "--- Testing rapid job submission (10 jobs in quick succession) ---"
|
||||||
|
rapid_jobs=10
|
||||||
|
rapid_job_ids=()
|
||||||
|
|
||||||
|
for i in $(seq 1 $rapid_jobs); do
|
||||||
|
job_id=$(send_ping_job "rapid_$i")
|
||||||
|
rapid_job_ids+=("$job_id")
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Waiting for rapid jobs to complete..."
|
||||||
|
rapid_success=0
|
||||||
|
for job_id in "${rapid_job_ids[@]}"; do
|
||||||
|
if check_job_result "$job_id"; then
|
||||||
|
rapid_success=$((rapid_success + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Rapid submission test: $rapid_success/$rapid_jobs successful"
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
echo
|
||||||
|
echo "Cleaning up..."
|
||||||
|
echo "Stopping System worker (PID: $WORKER_PID)..."
|
||||||
|
kill $WORKER_PID 2>/dev/null || true
|
||||||
|
wait $WORKER_PID 2>/dev/null || true
|
||||||
|
echo "✅ Worker stopped"
|
||||||
|
|
||||||
|
echo "Cleaning up Redis jobs..."
|
||||||
|
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
|
||||||
|
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
|
||||||
|
echo "✅ Redis cleaned up"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "=== System Worker Example Complete ==="
|
||||||
|
total_success=$((success_count + rapid_success))
|
||||||
|
total_tests=$((total_jobs + rapid_jobs))
|
||||||
|
echo "Overall success rate: $total_success/$total_tests"
|
||||||
|
|
||||||
|
if [ $total_success -eq $total_tests ]; then
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
exit 1
|
||||||
|
fi
|
15
core/worker/examples/system_config.toml
Normal file
15
core/worker/examples/system_config.toml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# System Worker Configuration
|
||||||
|
# Asynchronous worker for high-throughput concurrent processing
|
||||||
|
|
||||||
|
worker_id = "system_worker_1"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/system_worker_db"
|
||||||
|
preserve_tasks = false
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 300 # 5 minutes
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
60
core/worker/examples/system_worker_demo.rs
Normal file
60
core/worker/examples/system_worker_demo.rs
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::env;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
/// System Worker Demo Runner
|
||||||
|
///
|
||||||
|
/// This Rust wrapper executes the System worker bash script example.
|
||||||
|
/// It provides a way to run shell-based examples through Cargo.
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("🚀 System Worker Demo");
|
||||||
|
println!("====================");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Get the current working directory and construct the path to the shell script
|
||||||
|
let current_dir = env::current_dir()?;
|
||||||
|
let script_path = current_dir.join("examples").join("system").join("example.sh");
|
||||||
|
|
||||||
|
// Check if the script exists
|
||||||
|
if !script_path.exists() {
|
||||||
|
eprintln!("❌ Error: Script not found at {:?}", script_path);
|
||||||
|
eprintln!(" Make sure you're running this from the worker crate root directory.");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("📁 Script location: {:?}", script_path);
|
||||||
|
println!("🔧 Executing System worker example...");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Make sure the script is executable
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
let mut perms = std::fs::metadata(&script_path)?.permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
std::fs::set_permissions(&script_path, perms)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the shell script
|
||||||
|
let mut child = Command::new("bash")
|
||||||
|
.arg(&script_path)
|
||||||
|
.current_dir(¤t_dir)
|
||||||
|
.stdin(Stdio::inherit())
|
||||||
|
.stdout(Stdio::inherit())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
// Wait for the script to complete
|
||||||
|
let status = child.wait()?;
|
||||||
|
|
||||||
|
println!();
|
||||||
|
if status.success() {
|
||||||
|
println!("✅ System worker demo completed successfully!");
|
||||||
|
} else {
|
||||||
|
println!("❌ System worker demo failed with exit code: {:?}", status.code());
|
||||||
|
std::process::exit(status.code().unwrap_or(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
322
core/worker/examples/trait_based_worker_demo.rs
Normal file
322
core/worker/examples/trait_based_worker_demo.rs
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
//! # Trait-Based Worker Demo
|
||||||
|
//!
|
||||||
|
//! This example demonstrates the new unified worker interface using the Worker trait.
|
||||||
|
//! It shows how both synchronous and asynchronous workers can be used with the same
|
||||||
|
//! API, eliminating code duplication and providing a clean, consistent interface.
|
||||||
|
//!
|
||||||
|
//! ## Features Demonstrated
|
||||||
|
//!
|
||||||
|
//! - Unified worker interface using the Worker trait
|
||||||
|
//! - Both sync and async worker implementations
|
||||||
|
//! - Shared configuration and spawn logic
|
||||||
|
//! - Clean shutdown handling
|
||||||
|
//! - Job processing with different strategies
|
||||||
|
//!
|
||||||
|
//! ## Usage
|
||||||
|
//!
|
||||||
|
//! Make sure Redis is running on localhost:6379, then run:
|
||||||
|
//! ```bash
|
||||||
|
//! cargo run --example trait_based_worker_demo
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use hero_job::{Job, JobStatus, ScriptType};
|
||||||
|
use log::{info, warn, error};
|
||||||
|
use rhailib_worker::{
|
||||||
|
SyncWorker, AsyncWorker,
|
||||||
|
spawn_sync_worker, spawn_async_worker,
|
||||||
|
engine::create_heromodels_engine,
|
||||||
|
worker_trait::{spawn_worker, Worker}
|
||||||
|
};
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
const REDIS_URL: &str = "redis://127.0.0.1:6379";
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
info!("Starting Trait-Based Worker Demo");
|
||||||
|
|
||||||
|
// Create Redis connection for job creation
|
||||||
|
let redis_client = redis::Client::open(REDIS_URL)?;
|
||||||
|
let mut redis_conn = redis_client.get_multiplexed_async_connection().await?;
|
||||||
|
|
||||||
|
// Demo 1: Using the unified trait-based interface
|
||||||
|
info!("=== Demo 1: Unified Trait-Based Interface ===");
|
||||||
|
|
||||||
|
// Create shutdown channels for both workers
|
||||||
|
let (sync_shutdown_tx, sync_shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
let (async_shutdown_tx, async_shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
|
||||||
|
// Workers are now configured using builder pattern directly
|
||||||
|
|
||||||
|
// Create worker instances using builder pattern
|
||||||
|
let sync_worker = Arc::new(
|
||||||
|
SyncWorker::builder()
|
||||||
|
.worker_id("demo_sync_worker")
|
||||||
|
.db_path("/tmp")
|
||||||
|
.redis_url("redis://localhost:6379")
|
||||||
|
.preserve_tasks(false)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build SyncWorker")
|
||||||
|
);
|
||||||
|
|
||||||
|
let async_worker = Arc::new(
|
||||||
|
AsyncWorker::builder()
|
||||||
|
.worker_id("demo_async_worker")
|
||||||
|
.db_path("/tmp")
|
||||||
|
.redis_url("redis://localhost:6379")
|
||||||
|
.default_timeout(Duration::from_secs(300))
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build AsyncWorker")
|
||||||
|
);
|
||||||
|
|
||||||
|
let sync_engine = create_heromodels_engine();
|
||||||
|
let async_engine = create_heromodels_engine();
|
||||||
|
|
||||||
|
info!("Spawning {} worker: {}", sync_worker.worker_type(), sync_worker.worker_id());
|
||||||
|
let sync_handle = spawn_worker(sync_worker.clone(), sync_engine, sync_shutdown_rx);
|
||||||
|
|
||||||
|
info!("Spawning {} worker: {}", async_worker.worker_type(), async_worker.worker_id());
|
||||||
|
let async_handle = spawn_worker(async_worker.clone(), async_engine, async_shutdown_rx);
|
||||||
|
|
||||||
|
// Give workers time to start
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Create and dispatch jobs to both workers
|
||||||
|
info!("Creating demo jobs for both workers...");
|
||||||
|
|
||||||
|
// Job for sync worker - simple calculation
|
||||||
|
let sync_job = create_demo_job(
|
||||||
|
"sync_calculation",
|
||||||
|
r#"
|
||||||
|
print("Sync worker: Starting calculation...");
|
||||||
|
let result = 0;
|
||||||
|
for i in 1..=100 {
|
||||||
|
result += i;
|
||||||
|
}
|
||||||
|
print("Sync worker: Sum of 1-100 = " + result);
|
||||||
|
result
|
||||||
|
"#,
|
||||||
|
None,
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
dispatch_job(&mut redis_conn, &sync_job, sync_worker.worker_id()).await?;
|
||||||
|
info!("Dispatched job to sync worker: {}", sync_job.id);
|
||||||
|
|
||||||
|
// Job for async worker - with timeout demonstration
|
||||||
|
let async_job = create_demo_job(
|
||||||
|
"async_calculation",
|
||||||
|
r#"
|
||||||
|
print("Async worker: Starting calculation...");
|
||||||
|
let result = 1;
|
||||||
|
for i in 1..=10 {
|
||||||
|
result *= i;
|
||||||
|
}
|
||||||
|
print("Async worker: 10! = " + result);
|
||||||
|
result
|
||||||
|
"#,
|
||||||
|
Some(15), // 15 second timeout
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
dispatch_job(&mut redis_conn, &async_job, async_worker.worker_id()).await?;
|
||||||
|
info!("Dispatched job to async worker: {}", async_job.id);
|
||||||
|
|
||||||
|
// Monitor job execution
|
||||||
|
info!("Monitoring job execution for 10 seconds...");
|
||||||
|
let monitor_start = std::time::Instant::now();
|
||||||
|
let monitor_duration = Duration::from_secs(10);
|
||||||
|
|
||||||
|
while monitor_start.elapsed() < monitor_duration {
|
||||||
|
// Check sync job status
|
||||||
|
if let Ok(status) = Job::get_status(&mut redis_conn, &sync_job.id).await {
|
||||||
|
match status {
|
||||||
|
JobStatus::Finished => {
|
||||||
|
let job_key = format!("hero:job:{}", sync_job.id);
|
||||||
|
if let Ok(result) = redis_conn.hget::<_, _, String>(&job_key, "output").await {
|
||||||
|
info!("✅ Sync Job {} COMPLETED with result: {}", sync_job.id, result);
|
||||||
|
} else {
|
||||||
|
info!("✅ Sync Job {} COMPLETED", sync_job.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
JobStatus::Error => {
|
||||||
|
let job_key = format!("hero:job:{}", sync_job.id);
|
||||||
|
if let Ok(error) = redis_conn.hget::<_, _, String>(&job_key, "error").await {
|
||||||
|
warn!("❌ Sync Job {} FAILED with error: {}", sync_job.id, error);
|
||||||
|
} else {
|
||||||
|
warn!("❌ Sync Job {} FAILED", sync_job.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => info!("🔄 Sync Job {} status: {:?}", sync_job.id, status),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check async job status
|
||||||
|
if let Ok(status) = Job::get_status(&mut redis_conn, &async_job.id).await {
|
||||||
|
match status {
|
||||||
|
JobStatus::Finished => {
|
||||||
|
let job_key = format!("hero:job:{}", async_job.id);
|
||||||
|
if let Ok(result) = redis_conn.hget::<_, _, String>(&job_key, "output").await {
|
||||||
|
info!("✅ Async Job {} COMPLETED with result: {}", async_job.id, result);
|
||||||
|
} else {
|
||||||
|
info!("✅ Async Job {} COMPLETED", async_job.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
JobStatus::Error => {
|
||||||
|
let job_key = format!("hero:job:{}", async_job.id);
|
||||||
|
if let Ok(error) = redis_conn.hget::<_, _, String>(&job_key, "error").await {
|
||||||
|
warn!("❌ Async Job {} FAILED with error: {}", async_job.id, error);
|
||||||
|
} else {
|
||||||
|
warn!("❌ Async Job {} FAILED", async_job.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => info!("🔄 Async Job {} status: {:?}", async_job.id, status),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Demo 2: Using convenience functions (backward compatibility)
|
||||||
|
info!("\n=== Demo 2: Convenience Functions (Backward Compatibility) ===");
|
||||||
|
|
||||||
|
let (conv_sync_shutdown_tx, conv_sync_shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
let (conv_async_shutdown_tx, conv_async_shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
|
||||||
|
// Spawn workers using convenience functions
|
||||||
|
let conv_sync_engine = create_heromodels_engine();
|
||||||
|
let conv_async_engine = create_heromodels_engine();
|
||||||
|
|
||||||
|
info!("Spawning sync worker using convenience function...");
|
||||||
|
let conv_sync_handle = spawn_sync_worker(
|
||||||
|
"convenience_sync_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
conv_sync_engine,
|
||||||
|
REDIS_URL.to_string(),
|
||||||
|
conv_sync_shutdown_rx,
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Spawning async worker using convenience function...");
|
||||||
|
let conv_async_handle = spawn_async_worker(
|
||||||
|
"convenience_async_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
conv_async_engine,
|
||||||
|
REDIS_URL.to_string(),
|
||||||
|
conv_async_shutdown_rx,
|
||||||
|
Duration::from_secs(20), // 20 second timeout
|
||||||
|
);
|
||||||
|
|
||||||
|
// Give convenience workers time to start
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Create jobs for convenience workers
|
||||||
|
let conv_sync_job = create_demo_job(
|
||||||
|
"convenience_sync",
|
||||||
|
r#"
|
||||||
|
print("Convenience sync worker: Hello World!");
|
||||||
|
"Hello from convenience sync worker"
|
||||||
|
"#,
|
||||||
|
None,
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
let conv_async_job = create_demo_job(
|
||||||
|
"convenience_async",
|
||||||
|
r#"
|
||||||
|
print("Convenience async worker: Hello World!");
|
||||||
|
"Hello from convenience async worker"
|
||||||
|
"#,
|
||||||
|
Some(10),
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
dispatch_job(&mut redis_conn, &conv_sync_job, "convenience_sync_worker").await?;
|
||||||
|
dispatch_job(&mut redis_conn, &conv_async_job, "convenience_async_worker").await?;
|
||||||
|
|
||||||
|
info!("Dispatched jobs to convenience workers");
|
||||||
|
|
||||||
|
// Wait a bit for jobs to complete
|
||||||
|
sleep(Duration::from_secs(5)).await;
|
||||||
|
|
||||||
|
// Shutdown all workers gracefully
|
||||||
|
info!("\n=== Shutting Down All Workers ===");
|
||||||
|
|
||||||
|
info!("Sending shutdown signals...");
|
||||||
|
let _ = sync_shutdown_tx.send(()).await;
|
||||||
|
let _ = async_shutdown_tx.send(()).await;
|
||||||
|
let _ = conv_sync_shutdown_tx.send(()).await;
|
||||||
|
let _ = conv_async_shutdown_tx.send(()).await;
|
||||||
|
|
||||||
|
info!("Waiting for workers to shutdown...");
|
||||||
|
|
||||||
|
// Wait for all workers to shutdown
|
||||||
|
let results = tokio::join!(
|
||||||
|
sync_handle,
|
||||||
|
async_handle,
|
||||||
|
conv_sync_handle,
|
||||||
|
conv_async_handle
|
||||||
|
);
|
||||||
|
|
||||||
|
match results {
|
||||||
|
(Ok(Ok(())), Ok(Ok(())), Ok(Ok(())), Ok(Ok(()))) => {
|
||||||
|
info!("All workers shut down successfully!");
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
error!("Some workers encountered errors during shutdown");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Trait-Based Worker Demo completed successfully!");
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
info!("\n=== Summary ===");
|
||||||
|
info!("✅ Demonstrated unified Worker trait interface");
|
||||||
|
info!("✅ Showed both sync and async worker implementations");
|
||||||
|
info!("✅ Used shared configuration and spawn logic");
|
||||||
|
info!("✅ Maintained backward compatibility with convenience functions");
|
||||||
|
info!("✅ Eliminated code duplication between worker types");
|
||||||
|
info!("✅ Provided clean, consistent API for all worker operations");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a demo job with the specified script and timeout
|
||||||
|
async fn create_demo_job(
|
||||||
|
name: &str,
|
||||||
|
script: &str,
|
||||||
|
timeout_seconds: Option<i32>,
|
||||||
|
) -> Result<Job, Box<dyn std::error::Error>> {
|
||||||
|
let mut job = Job::new(
|
||||||
|
format!("demo_{}", name), // caller_id
|
||||||
|
"demo_context".to_string(), // context_id
|
||||||
|
script.to_string(),
|
||||||
|
ScriptType::OSIS,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Set timeout if provided
|
||||||
|
if let Some(timeout) = timeout_seconds {
|
||||||
|
job.timeout = Duration::from_secs(timeout as u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dispatch a job to the worker queue
|
||||||
|
async fn dispatch_job(
|
||||||
|
redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
job: &Job,
|
||||||
|
worker_queue: &str,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Store job in Redis
|
||||||
|
job.store_in_redis(redis_conn).await?;
|
||||||
|
|
||||||
|
// Add job to worker queue
|
||||||
|
let queue_key = format!("hero:job:{}", worker_queue);
|
||||||
|
let _: () = redis_conn.rpush(&queue_key, &job.id).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
420
core/worker/src/async_worker_impl.rs
Normal file
420
core/worker/src/async_worker_impl.rs
Normal file
@ -0,0 +1,420 @@
|
|||||||
|
//! # Asynchronous Worker Implementation
|
||||||
|
//!
|
||||||
|
//! This module provides an asynchronous worker implementation that can process
|
||||||
|
//! multiple jobs concurrently with timeout support. Each job is spawned as a
|
||||||
|
//! separate Tokio task, allowing for parallel execution and proper timeout handling.
|
||||||
|
//!
|
||||||
|
//! ## Features
|
||||||
|
//!
|
||||||
|
//! - **Concurrent Processing**: Multiple jobs can run simultaneously
|
||||||
|
//! - **Timeout Support**: Jobs that exceed their timeout are automatically cancelled
|
||||||
|
//! - **Resource Cleanup**: Proper cleanup of aborted/cancelled jobs
|
||||||
|
//! - **Non-blocking**: Worker continues processing new jobs while others are running
|
||||||
|
//! - **Scalable**: Can handle high job throughput with parallel execution
|
||||||
|
//!
|
||||||
|
//! ## Usage
|
||||||
|
//!
|
||||||
|
//! ```rust
|
||||||
|
//! use std::sync::Arc;
|
||||||
|
//! use std::time::Duration;
|
||||||
|
//! use rhailib_worker::async_worker_impl::AsyncWorker;
|
||||||
|
//! use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig};
|
||||||
|
//! use rhailib_worker::engine::create_heromodels_engine;
|
||||||
|
//! use tokio::sync::mpsc;
|
||||||
|
//!
|
||||||
|
//! let config = WorkerConfig::new(
|
||||||
|
//! "async_worker_1".to_string(),
|
||||||
|
//! "/path/to/db".to_string(),
|
||||||
|
//! "redis://localhost:6379".to_string(),
|
||||||
|
//! false, // preserve_tasks
|
||||||
|
//! ).with_default_timeout(Duration::from_secs(300));
|
||||||
|
//!
|
||||||
|
//! let worker = Arc::new(AsyncWorker::new());
|
||||||
|
//! let engine = create_heromodels_engine();
|
||||||
|
//! let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
//!
|
||||||
|
//! let handle = spawn_worker(worker, config, engine, shutdown_rx);
|
||||||
|
//!
|
||||||
|
//! // Later, shutdown the worker
|
||||||
|
//! shutdown_tx.send(()).await.unwrap();
|
||||||
|
//! handle.await.unwrap().unwrap();
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use hero_job::{Job, JobStatus};
|
||||||
|
use log::{debug, error, info, warn};
|
||||||
|
use rhai::Engine;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
use tokio::time::timeout;
|
||||||
|
|
||||||
|
use crate::engine::eval_script;
|
||||||
|
use crate::worker_trait::{Worker, WorkerConfig};
|
||||||
|
use crate::initialize_redis_connection;
|
||||||
|
|
||||||
|
/// Represents a running job with its handle and metadata
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct RunningJob {
|
||||||
|
job_id: String,
|
||||||
|
handle: JoinHandle<()>,
|
||||||
|
started_at: std::time::Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for AsyncWorker
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct AsyncWorkerBuilder {
|
||||||
|
worker_id: Option<String>,
|
||||||
|
db_path: Option<String>,
|
||||||
|
redis_url: Option<String>,
|
||||||
|
default_timeout: Option<Duration>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncWorkerBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn worker_id<S: Into<String>>(mut self, worker_id: S) -> Self {
|
||||||
|
self.worker_id = Some(worker_id.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn db_path<S: Into<String>>(mut self, db_path: S) -> Self {
|
||||||
|
self.db_path = Some(db_path.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn redis_url<S: Into<String>>(mut self, redis_url: S) -> Self {
|
||||||
|
self.redis_url = Some(redis_url.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn default_timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.default_timeout = Some(timeout);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> Result<AsyncWorker, String> {
|
||||||
|
Ok(AsyncWorker {
|
||||||
|
worker_id: self.worker_id.ok_or("worker_id is required")?,
|
||||||
|
db_path: self.db_path.ok_or("db_path is required")?,
|
||||||
|
redis_url: self.redis_url.ok_or("redis_url is required")?,
|
||||||
|
default_timeout: self.default_timeout.unwrap_or(Duration::from_secs(300)),
|
||||||
|
running_jobs: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Asynchronous worker that processes jobs concurrently
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct AsyncWorker {
|
||||||
|
pub worker_id: String,
|
||||||
|
pub db_path: String,
|
||||||
|
pub redis_url: String,
|
||||||
|
pub default_timeout: Duration,
|
||||||
|
running_jobs: Arc<Mutex<HashMap<String, RunningJob>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncWorker {
|
||||||
|
/// Create a new AsyncWorkerBuilder
|
||||||
|
pub fn builder() -> AsyncWorkerBuilder {
|
||||||
|
AsyncWorkerBuilder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a running job to the tracking map
|
||||||
|
async fn add_running_job(&self, job_id: String, handle: JoinHandle<()>) {
|
||||||
|
let running_job = RunningJob {
|
||||||
|
job_id: job_id.clone(),
|
||||||
|
handle,
|
||||||
|
started_at: std::time::Instant::now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut jobs = self.running_jobs.lock().await;
|
||||||
|
jobs.insert(job_id.clone(), running_job);
|
||||||
|
debug!("Async Worker: Added running job '{}'. Total running: {}",
|
||||||
|
job_id, jobs.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove a completed job from the tracking map
|
||||||
|
async fn remove_running_job(&self, job_id: &str) {
|
||||||
|
let mut jobs = self.running_jobs.lock().await;
|
||||||
|
if let Some(job) = jobs.remove(job_id) {
|
||||||
|
let duration = job.started_at.elapsed();
|
||||||
|
debug!("Async Worker: Removed completed job '{}' after {:?}. Remaining: {}",
|
||||||
|
job_id, duration, jobs.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the count of currently running jobs
|
||||||
|
pub async fn running_job_count(&self) -> usize {
|
||||||
|
let jobs = self.running_jobs.lock().await;
|
||||||
|
jobs.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cleanup any finished jobs from the running jobs map
|
||||||
|
async fn cleanup_finished_jobs(&self) {
|
||||||
|
let mut jobs = self.running_jobs.lock().await;
|
||||||
|
let mut to_remove = Vec::new();
|
||||||
|
|
||||||
|
for (job_id, running_job) in jobs.iter() {
|
||||||
|
if running_job.handle.is_finished() {
|
||||||
|
to_remove.push(job_id.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for job_id in to_remove {
|
||||||
|
if let Some(job) = jobs.remove(&job_id) {
|
||||||
|
let duration = job.started_at.elapsed();
|
||||||
|
debug!("Async Worker: Cleaned up finished job '{}' after {:?}",
|
||||||
|
job_id, duration);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Execute a single job asynchronously with timeout support
|
||||||
|
async fn execute_job_with_timeout(
|
||||||
|
job: Job,
|
||||||
|
engine: Engine,
|
||||||
|
worker_id: String,
|
||||||
|
redis_url: String,
|
||||||
|
job_timeout: Duration,
|
||||||
|
) {
|
||||||
|
let job_id = job.id.clone();
|
||||||
|
info!("Async Worker '{}', Job {}: Starting execution with timeout {:?}",
|
||||||
|
worker_id, job_id, job_timeout);
|
||||||
|
|
||||||
|
// Create a new Redis connection for this job
|
||||||
|
let mut redis_conn = match initialize_redis_connection(&worker_id, &redis_url).await {
|
||||||
|
Ok(conn) => conn,
|
||||||
|
Err(e) => {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to initialize Redis connection: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Update job status to Started
|
||||||
|
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Started).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to update status to Started: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the script execution task
|
||||||
|
let script_task = async {
|
||||||
|
// Execute the Rhai script
|
||||||
|
match eval_script(&engine, &job.script) {
|
||||||
|
Ok(result) => {
|
||||||
|
let result_str = format!("{:?}", result);
|
||||||
|
info!("Async Worker '{}', Job {}: Script executed successfully. Result: {}",
|
||||||
|
worker_id, job_id, result_str);
|
||||||
|
|
||||||
|
// Update job with success result
|
||||||
|
if let Err(e) = Job::set_result(&mut redis_conn, &job_id, &result_str).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to set result: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Finished).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to update status to Finished: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = format!("Script execution error: {}", e);
|
||||||
|
error!("Async Worker '{}', Job {}: {}", worker_id, job_id, error_msg);
|
||||||
|
|
||||||
|
// Update job with error
|
||||||
|
if let Err(e) = Job::set_error(&mut redis_conn, &job_id, &error_msg).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to set error: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Error).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to update status to Error: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute the script with timeout
|
||||||
|
match timeout(job_timeout, script_task).await {
|
||||||
|
Ok(()) => {
|
||||||
|
info!("Async Worker '{}', Job {}: Completed within timeout", worker_id, job_id);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
warn!("Async Worker '{}', Job {}: Timed out after {:?}, marking as error",
|
||||||
|
worker_id, job_id, job_timeout);
|
||||||
|
|
||||||
|
let timeout_msg = format!("Job timed out after {:?}", job_timeout);
|
||||||
|
if let Err(e) = Job::set_error(&mut redis_conn, &job_id, &timeout_msg).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to set timeout error: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Error).await {
|
||||||
|
error!("Async Worker '{}', Job {}: Failed to update status to Error after timeout: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Async Worker '{}', Job {}: Job processing completed", worker_id, job_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for AsyncWorker {
|
||||||
|
fn default() -> Self {
|
||||||
|
// Default AsyncWorker with placeholder values
|
||||||
|
// In practice, use the builder pattern instead
|
||||||
|
Self {
|
||||||
|
worker_id: "default_async_worker".to_string(),
|
||||||
|
db_path: "/tmp".to_string(),
|
||||||
|
redis_url: "redis://localhost:6379".to_string(),
|
||||||
|
default_timeout: Duration::from_secs(300),
|
||||||
|
running_jobs: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for AsyncWorker {
|
||||||
|
async fn process_job(
|
||||||
|
&self,
|
||||||
|
job: Job,
|
||||||
|
engine: Engine, // Reuse the stateless engine
|
||||||
|
_redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
) {
|
||||||
|
let job_id = job.id.clone();
|
||||||
|
let worker_id = &self.worker_id.clone();
|
||||||
|
|
||||||
|
// Determine timeout (use job-specific timeout if available, otherwise default)
|
||||||
|
let job_timeout = if job.timeout.as_secs() > 0 {
|
||||||
|
job.timeout
|
||||||
|
} else {
|
||||||
|
self.default_timeout // Use worker's default timeout
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("Async Worker '{}', Job {}: Spawning job execution task with timeout {:?}",
|
||||||
|
worker_id, job_id, job_timeout);
|
||||||
|
|
||||||
|
// Clone necessary data for the spawned task
|
||||||
|
let job_id_clone = job_id.clone();
|
||||||
|
let worker_id_clone = worker_id.clone();
|
||||||
|
let worker_id_debug = worker_id.clone(); // Additional clone for debug statement
|
||||||
|
let job_id_debug = job_id.clone(); // Additional clone for debug statement
|
||||||
|
let redis_url_clone = self.redis_url.clone();
|
||||||
|
let running_jobs_clone = Arc::clone(&self.running_jobs);
|
||||||
|
|
||||||
|
// Spawn the job execution task
|
||||||
|
let job_handle = tokio::spawn(async move {
|
||||||
|
Self::execute_job_with_timeout(
|
||||||
|
job,
|
||||||
|
engine,
|
||||||
|
worker_id_clone,
|
||||||
|
redis_url_clone,
|
||||||
|
job_timeout,
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Remove this job from the running jobs map when it completes
|
||||||
|
let mut jobs = running_jobs_clone.lock().await;
|
||||||
|
if let Some(running_job) = jobs.remove(&job_id_clone) {
|
||||||
|
let duration = running_job.started_at.elapsed();
|
||||||
|
debug!("Async Worker '{}': Removed completed job '{}' after {:?}",
|
||||||
|
worker_id_debug, job_id_debug, duration);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add the job to the running jobs map
|
||||||
|
self.add_running_job(job_id, job_handle).await;
|
||||||
|
|
||||||
|
// Cleanup finished jobs periodically
|
||||||
|
self.cleanup_finished_jobs().await;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_type(&self) -> &'static str {
|
||||||
|
"Async"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_id(&self) -> &str {
|
||||||
|
&self.worker_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn redis_url(&self) -> &str {
|
||||||
|
&self.redis_url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::engine::create_heromodels_engine;
|
||||||
|
use hero_job::ScriptType;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_async_worker_creation() {
|
||||||
|
let worker = AsyncWorker::new();
|
||||||
|
assert_eq!(worker.worker_type(), "Async");
|
||||||
|
assert_eq!(worker.running_job_count().await, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_async_worker_default() {
|
||||||
|
let worker = AsyncWorker::default();
|
||||||
|
assert_eq!(worker.worker_type(), "Async");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_async_worker_job_tracking() {
|
||||||
|
let worker = AsyncWorker::new();
|
||||||
|
|
||||||
|
// Simulate adding a job
|
||||||
|
let handle = tokio::spawn(async {
|
||||||
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
worker.add_running_job("job_1".to_string(), handle).await;
|
||||||
|
assert_eq!(worker.running_job_count().await, 1);
|
||||||
|
|
||||||
|
// Wait for job to complete
|
||||||
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||||
|
worker.cleanup_finished_jobs().await;
|
||||||
|
assert_eq!(worker.running_job_count().await, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_async_worker_process_job_interface() {
|
||||||
|
let worker = AsyncWorker::new();
|
||||||
|
let engine = create_heromodels_engine();
|
||||||
|
|
||||||
|
// Create a simple test job
|
||||||
|
let job = Job::new(
|
||||||
|
"test_caller".to_string(),
|
||||||
|
"test_context".to_string(),
|
||||||
|
r#"print("Hello from async worker test!"); 42"#.to_string(),
|
||||||
|
ScriptType::OSIS,
|
||||||
|
);
|
||||||
|
|
||||||
|
let config = WorkerConfig::new(
|
||||||
|
"test_async_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
"redis://localhost:6379".to_string(),
|
||||||
|
false,
|
||||||
|
).with_default_timeout(Duration::from_secs(60));
|
||||||
|
|
||||||
|
// Note: This test doesn't actually connect to Redis, it just tests the interface
|
||||||
|
// In a real test environment, you'd need a Redis instance or mock
|
||||||
|
|
||||||
|
// The process_job method should be callable (interface test)
|
||||||
|
// worker.process_job(job, engine, &mut redis_conn, &config).await;
|
||||||
|
|
||||||
|
// For now, just verify the worker was created successfully
|
||||||
|
assert_eq!(worker.worker_type(), "Async");
|
||||||
|
}
|
||||||
|
}
|
250
core/worker/src/config.rs
Normal file
250
core/worker/src/config.rs
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
//! Worker Configuration Module - TOML-based configuration for Hero workers
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
/// Worker configuration loaded from TOML file
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct WorkerConfig {
|
||||||
|
/// Worker identification
|
||||||
|
pub worker_id: String,
|
||||||
|
|
||||||
|
/// Redis connection URL
|
||||||
|
pub redis_url: String,
|
||||||
|
|
||||||
|
/// Database path for Rhai engine
|
||||||
|
pub db_path: String,
|
||||||
|
|
||||||
|
/// Whether to preserve task details after completion
|
||||||
|
#[serde(default = "default_preserve_tasks")]
|
||||||
|
pub preserve_tasks: bool,
|
||||||
|
|
||||||
|
/// Worker type configuration
|
||||||
|
pub worker_type: WorkerType,
|
||||||
|
|
||||||
|
/// Logging configuration
|
||||||
|
#[serde(default)]
|
||||||
|
pub logging: LoggingConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Worker type configuration
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
pub enum WorkerType {
|
||||||
|
/// Synchronous worker configuration
|
||||||
|
#[serde(rename = "sync")]
|
||||||
|
Sync,
|
||||||
|
|
||||||
|
/// Asynchronous worker configuration
|
||||||
|
#[serde(rename = "async")]
|
||||||
|
Async {
|
||||||
|
/// Default timeout for jobs in seconds
|
||||||
|
#[serde(default = "default_timeout_seconds")]
|
||||||
|
default_timeout_seconds: u64,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Logging configuration
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct LoggingConfig {
|
||||||
|
/// Whether to include timestamps in log output
|
||||||
|
#[serde(default = "default_timestamps")]
|
||||||
|
pub timestamps: bool,
|
||||||
|
|
||||||
|
/// Log level (trace, debug, info, warn, error)
|
||||||
|
#[serde(default = "default_log_level")]
|
||||||
|
pub level: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for LoggingConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
timestamps: default_timestamps(),
|
||||||
|
level: default_log_level(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkerConfig {
|
||||||
|
/// Load configuration from TOML file
|
||||||
|
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
|
||||||
|
let content = fs::read_to_string(&path)
|
||||||
|
.map_err(|e| ConfigError::IoError(format!("Failed to read config file: {}", e)))?;
|
||||||
|
|
||||||
|
let config: WorkerConfig = toml::from_str(&content)
|
||||||
|
.map_err(|e| ConfigError::ParseError(format!("Failed to parse TOML: {}", e)))?;
|
||||||
|
|
||||||
|
config.validate()?;
|
||||||
|
Ok(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate the configuration
|
||||||
|
fn validate(&self) -> Result<(), ConfigError> {
|
||||||
|
if self.worker_id.is_empty() {
|
||||||
|
return Err(ConfigError::ValidationError("worker_id cannot be empty".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.redis_url.is_empty() {
|
||||||
|
return Err(ConfigError::ValidationError("redis_url cannot be empty".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.db_path.is_empty() {
|
||||||
|
return Err(ConfigError::ValidationError("db_path cannot be empty".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate log level
|
||||||
|
match self.logging.level.to_lowercase().as_str() {
|
||||||
|
"trace" | "debug" | "info" | "warn" | "error" => {},
|
||||||
|
_ => return Err(ConfigError::ValidationError(
|
||||||
|
format!("Invalid log level: {}. Must be one of: trace, debug, info, warn, error", self.logging.level)
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the default timeout duration for async workers
|
||||||
|
pub fn get_default_timeout(&self) -> Option<Duration> {
|
||||||
|
match &self.worker_type {
|
||||||
|
WorkerType::Sync => None,
|
||||||
|
WorkerType::Async { default_timeout_seconds } => {
|
||||||
|
Some(Duration::from_secs(*default_timeout_seconds))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if this is a sync worker configuration
|
||||||
|
pub fn is_sync(&self) -> bool {
|
||||||
|
matches!(self.worker_type, WorkerType::Sync)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if this is an async worker configuration
|
||||||
|
pub fn is_async(&self) -> bool {
|
||||||
|
matches!(self.worker_type, WorkerType::Async { .. })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configuration error types
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ConfigError {
|
||||||
|
#[error("IO error: {0}")]
|
||||||
|
IoError(String),
|
||||||
|
|
||||||
|
#[error("Parse error: {0}")]
|
||||||
|
ParseError(String),
|
||||||
|
|
||||||
|
#[error("Validation error: {0}")]
|
||||||
|
ValidationError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default value functions for serde
|
||||||
|
fn default_preserve_tasks() -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_timeout_seconds() -> u64 {
|
||||||
|
300 // 5 minutes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_timestamps() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_log_level() -> String {
|
||||||
|
"info".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::io::Write;
|
||||||
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sync_worker_config() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "sync_worker_1"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/worker_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = false
|
||||||
|
level = "debug"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let config: WorkerConfig = toml::from_str(config_toml).unwrap();
|
||||||
|
assert_eq!(config.worker_id, "sync_worker_1");
|
||||||
|
assert!(config.is_sync());
|
||||||
|
assert!(!config.is_async());
|
||||||
|
assert_eq!(config.get_default_timeout(), None);
|
||||||
|
assert!(!config.logging.timestamps);
|
||||||
|
assert_eq!(config.logging.level, "debug");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_async_worker_config() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "async_worker_1"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/worker_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "async"
|
||||||
|
default_timeout_seconds = 600
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
timestamps = true
|
||||||
|
level = "info"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let config: WorkerConfig = toml::from_str(config_toml).unwrap();
|
||||||
|
assert_eq!(config.worker_id, "async_worker_1");
|
||||||
|
assert!(!config.is_sync());
|
||||||
|
assert!(config.is_async());
|
||||||
|
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
|
||||||
|
assert!(config.logging.timestamps);
|
||||||
|
assert_eq!(config.logging.level, "info");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_config_from_file() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = "test_worker"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut temp_file = NamedTempFile::new().unwrap();
|
||||||
|
temp_file.write_all(config_toml.as_bytes()).unwrap();
|
||||||
|
|
||||||
|
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
|
||||||
|
assert_eq!(config.worker_id, "test_worker");
|
||||||
|
assert!(config.is_sync());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_config_validation() {
|
||||||
|
let config_toml = r#"
|
||||||
|
worker_id = ""
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
db_path = "/tmp/test_db"
|
||||||
|
|
||||||
|
[worker_type]
|
||||||
|
type = "sync"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let result: Result<WorkerConfig, _> = toml::from_str(config_toml);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let config = result.unwrap();
|
||||||
|
assert!(config.validate().is_err());
|
||||||
|
}
|
||||||
|
}
|
@ -8,11 +8,23 @@ use tokio::task::JoinHandle;
|
|||||||
/// Engine module containing Rhai engine creation and script execution utilities
|
/// Engine module containing Rhai engine creation and script execution utilities
|
||||||
pub mod engine;
|
pub mod engine;
|
||||||
|
|
||||||
|
/// Worker trait abstraction for unified worker interface
|
||||||
|
pub mod worker_trait;
|
||||||
|
|
||||||
|
/// Synchronous worker implementation
|
||||||
|
pub mod sync_worker;
|
||||||
|
|
||||||
|
/// Asynchronous worker implementation with trait-based interface
|
||||||
|
pub mod async_worker_impl;
|
||||||
|
|
||||||
|
/// Configuration module for TOML-based worker configuration
|
||||||
|
pub mod config;
|
||||||
|
|
||||||
const NAMESPACE_PREFIX: &str = "hero:job:";
|
const NAMESPACE_PREFIX: &str = "hero:job:";
|
||||||
const BLPOP_TIMEOUT_SECONDS: usize = 5;
|
const BLPOP_TIMEOUT_SECONDS: usize = 5;
|
||||||
|
|
||||||
/// Initialize Redis connection for the worker
|
/// Initialize Redis connection for the worker
|
||||||
async fn initialize_redis_connection(
|
pub(crate) async fn initialize_redis_connection(
|
||||||
worker_id: &str,
|
worker_id: &str,
|
||||||
redis_url: &str,
|
redis_url: &str,
|
||||||
) -> Result<redis::aio::MultiplexedConnection, Box<dyn std::error::Error + Send + Sync>> {
|
) -> Result<redis::aio::MultiplexedConnection, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
@ -33,7 +45,7 @@ async fn initialize_redis_connection(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Load job from Redis using Job struct
|
/// Load job from Redis using Job struct
|
||||||
async fn load_job_from_redis(
|
pub(crate) async fn load_job_from_redis(
|
||||||
redis_conn: &mut redis::aio::MultiplexedConnection,
|
redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
job_id: &str,
|
job_id: &str,
|
||||||
worker_id: &str,
|
worker_id: &str,
|
||||||
@ -232,3 +244,60 @@ pub fn spawn_rhai_worker(
|
|||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Re-export the main trait-based interface for convenience
|
||||||
|
pub use worker_trait::{Worker, WorkerConfig, spawn_worker};
|
||||||
|
pub use sync_worker::SyncWorker;
|
||||||
|
pub use async_worker_impl::AsyncWorker;
|
||||||
|
|
||||||
|
/// Convenience function to spawn a synchronous worker using the trait interface
|
||||||
|
///
|
||||||
|
/// This function provides backward compatibility with the original sync worker API
|
||||||
|
/// while using the new trait-based implementation.
|
||||||
|
pub fn spawn_sync_worker(
|
||||||
|
worker_id: String,
|
||||||
|
db_path: String,
|
||||||
|
engine: rhai::Engine,
|
||||||
|
redis_url: String,
|
||||||
|
shutdown_rx: mpsc::Receiver<()>,
|
||||||
|
preserve_tasks: bool,
|
||||||
|
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
let worker = Arc::new(
|
||||||
|
SyncWorker::builder()
|
||||||
|
.worker_id(worker_id)
|
||||||
|
.db_path(db_path)
|
||||||
|
.redis_url(redis_url)
|
||||||
|
.preserve_tasks(preserve_tasks)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build SyncWorker")
|
||||||
|
);
|
||||||
|
spawn_worker(worker, engine, shutdown_rx)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience function to spawn an asynchronous worker using the trait interface
|
||||||
|
///
|
||||||
|
/// This function provides a clean interface for the new async worker implementation
|
||||||
|
/// with timeout support.
|
||||||
|
pub fn spawn_async_worker(
|
||||||
|
worker_id: String,
|
||||||
|
db_path: String,
|
||||||
|
engine: rhai::Engine,
|
||||||
|
redis_url: String,
|
||||||
|
shutdown_rx: mpsc::Receiver<()>,
|
||||||
|
default_timeout: std::time::Duration,
|
||||||
|
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
let worker = Arc::new(
|
||||||
|
AsyncWorker::builder()
|
||||||
|
.worker_id(worker_id)
|
||||||
|
.db_path(db_path)
|
||||||
|
.redis_url(redis_url)
|
||||||
|
.default_timeout(default_timeout)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build AsyncWorker")
|
||||||
|
);
|
||||||
|
spawn_worker(worker, engine, shutdown_rx)
|
||||||
|
}
|
||||||
|
255
core/worker/src/sync_worker.rs
Normal file
255
core/worker/src/sync_worker.rs
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
//! # Synchronous Worker Implementation
|
||||||
|
//!
|
||||||
|
//! This module provides a synchronous worker implementation that processes jobs
|
||||||
|
//! one at a time in sequence. This is the original worker behavior that's suitable
|
||||||
|
//! for scenarios where job execution should not overlap or when resource constraints
|
||||||
|
//! require sequential processing.
|
||||||
|
//!
|
||||||
|
//! ## Features
|
||||||
|
//!
|
||||||
|
//! - **Sequential Processing**: Jobs are processed one at a time
|
||||||
|
//! - **Simple Resource Management**: No concurrent job tracking needed
|
||||||
|
//! - **Predictable Behavior**: Jobs complete in the order they're received
|
||||||
|
//! - **Lower Memory Usage**: Only one job active at a time
|
||||||
|
//!
|
||||||
|
//! ## Usage
|
||||||
|
//!
|
||||||
|
//! ```rust
|
||||||
|
//! use std::sync::Arc;
|
||||||
|
//! use rhailib_worker::sync_worker::SyncWorker;
|
||||||
|
//! use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig};
|
||||||
|
//! use rhailib_worker::engine::create_heromodels_engine;
|
||||||
|
//! use tokio::sync::mpsc;
|
||||||
|
//!
|
||||||
|
//! let config = WorkerConfig::new(
|
||||||
|
//! "sync_worker_1".to_string(),
|
||||||
|
//! "/path/to/db".to_string(),
|
||||||
|
//! "redis://localhost:6379".to_string(),
|
||||||
|
//! false, // preserve_tasks
|
||||||
|
//! );
|
||||||
|
//!
|
||||||
|
//! let worker = Arc::new(SyncWorker::new());
|
||||||
|
//! let engine = create_heromodels_engine();
|
||||||
|
//! let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
//!
|
||||||
|
//! let handle = spawn_worker(worker, config, engine, shutdown_rx);
|
||||||
|
//!
|
||||||
|
//! // Later, shutdown the worker
|
||||||
|
//! shutdown_tx.send(()).await.unwrap();
|
||||||
|
//! handle.await.unwrap().unwrap();
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use hero_job::{Job, JobStatus};
|
||||||
|
use log::{debug, error, info};
|
||||||
|
use rhai::Engine;
|
||||||
|
|
||||||
|
use crate::engine::eval_script;
|
||||||
|
use crate::worker_trait::{Worker, WorkerConfig};
|
||||||
|
|
||||||
|
/// Builder for SyncWorker
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct SyncWorkerBuilder {
|
||||||
|
worker_id: Option<String>,
|
||||||
|
db_path: Option<String>,
|
||||||
|
redis_url: Option<String>,
|
||||||
|
preserve_tasks: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncWorkerBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn worker_id<S: Into<String>>(mut self, worker_id: S) -> Self {
|
||||||
|
self.worker_id = Some(worker_id.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn db_path<S: Into<String>>(mut self, db_path: S) -> Self {
|
||||||
|
self.db_path = Some(db_path.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn redis_url<S: Into<String>>(mut self, redis_url: S) -> Self {
|
||||||
|
self.redis_url = Some(redis_url.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn preserve_tasks(mut self, preserve: bool) -> Self {
|
||||||
|
self.preserve_tasks = preserve;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> Result<SyncWorker, String> {
|
||||||
|
Ok(SyncWorker {
|
||||||
|
worker_id: self.worker_id.ok_or("worker_id is required")?,
|
||||||
|
db_path: self.db_path.ok_or("db_path is required")?,
|
||||||
|
redis_url: self.redis_url.ok_or("redis_url is required")?,
|
||||||
|
preserve_tasks: self.preserve_tasks,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Synchronous worker that processes jobs sequentially
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SyncWorker {
|
||||||
|
pub worker_id: String,
|
||||||
|
pub db_path: String,
|
||||||
|
pub redis_url: String,
|
||||||
|
pub preserve_tasks: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncWorker {
|
||||||
|
/// Create a new SyncWorkerBuilder
|
||||||
|
pub fn builder() -> SyncWorkerBuilder {
|
||||||
|
SyncWorkerBuilder::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SyncWorker {
|
||||||
|
fn default() -> Self {
|
||||||
|
// Default SyncWorker with placeholder values
|
||||||
|
// In practice, use the builder pattern instead
|
||||||
|
Self {
|
||||||
|
worker_id: "default_sync_worker".to_string(),
|
||||||
|
db_path: "/tmp".to_string(),
|
||||||
|
redis_url: "redis://localhost:6379".to_string(),
|
||||||
|
preserve_tasks: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for SyncWorker {
|
||||||
|
async fn process_job(
|
||||||
|
&self,
|
||||||
|
job: Job,
|
||||||
|
engine: Engine,
|
||||||
|
redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
) {
|
||||||
|
let job_id = &job.id;
|
||||||
|
let worker_id = &self.worker_id;
|
||||||
|
let db_path = &self.db_path;
|
||||||
|
|
||||||
|
info!("Sync Worker '{}', Job {}: Starting sequential processing", worker_id, job_id);
|
||||||
|
|
||||||
|
// Update job status to Started
|
||||||
|
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Started).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to update status to Started: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the Rhai script
|
||||||
|
match eval_script(&engine, &job.script) {
|
||||||
|
Ok(result) => {
|
||||||
|
let result_str = format!("{:?}", result);
|
||||||
|
info!("Sync Worker '{}', Job {}: Script executed successfully. Result: {}",
|
||||||
|
worker_id, job_id, result_str);
|
||||||
|
|
||||||
|
// Update job with success result
|
||||||
|
if let Err(e) = Job::set_result(redis_conn, job_id, &result_str).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to set result: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Finished).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to update status to Finished: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = format!("Script execution error: {}", e);
|
||||||
|
error!("Sync Worker '{}', Job {}: {}", worker_id, job_id, error_msg);
|
||||||
|
|
||||||
|
// Update job with error
|
||||||
|
if let Err(e) = Job::set_error(redis_conn, job_id, &error_msg).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to set error: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Error).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to update status to Error: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup job if preserve_tasks is false
|
||||||
|
if !self.preserve_tasks {
|
||||||
|
if let Err(e) = Job::delete_from_redis(redis_conn, job_id).await {
|
||||||
|
error!("Sync Worker '{}', Job {}: Failed to cleanup job: {}",
|
||||||
|
worker_id, job_id, e);
|
||||||
|
} else {
|
||||||
|
debug!("Sync Worker '{}', Job {}: Job cleaned up from Redis", worker_id, job_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Sync Worker '{}', Job {}: Sequential processing completed", worker_id, job_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_type(&self) -> &'static str {
|
||||||
|
"Sync"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_id(&self) -> &str {
|
||||||
|
&self.worker_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn redis_url(&self) -> &str {
|
||||||
|
&self.redis_url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::engine::create_heromodels_engine;
|
||||||
|
use hero_job::ScriptType;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_sync_worker_creation() {
|
||||||
|
let worker = SyncWorker::new();
|
||||||
|
assert_eq!(worker.worker_type(), "Sync");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_sync_worker_default() {
|
||||||
|
let worker = SyncWorker::default();
|
||||||
|
assert_eq!(worker.worker_type(), "Sync");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_sync_worker_process_job_interface() {
|
||||||
|
let worker = SyncWorker::new();
|
||||||
|
let engine = create_heromodels_engine();
|
||||||
|
|
||||||
|
// Create a simple test job
|
||||||
|
let job = Job::new(
|
||||||
|
"test_caller".to_string(),
|
||||||
|
"test_context".to_string(),
|
||||||
|
r#"print("Hello from sync worker test!"); 42"#.to_string(),
|
||||||
|
ScriptType::OSIS,
|
||||||
|
);
|
||||||
|
|
||||||
|
let config = WorkerConfig::new(
|
||||||
|
"test_sync_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
"redis://localhost:6379".to_string(),
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Note: This test doesn't actually connect to Redis, it just tests the interface
|
||||||
|
// In a real test environment, you'd need a Redis instance or mock
|
||||||
|
|
||||||
|
// The process_job method should be callable (interface test)
|
||||||
|
// worker.process_job(job, engine, &mut redis_conn, &config).await;
|
||||||
|
|
||||||
|
// For now, just verify the worker was created successfully
|
||||||
|
assert_eq!(worker.worker_type(), "Sync");
|
||||||
|
}
|
||||||
|
}
|
339
core/worker/src/worker_trait.rs
Normal file
339
core/worker/src/worker_trait.rs
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
//! # Worker Trait Abstraction
|
||||||
|
//!
|
||||||
|
//! This module provides a trait-based abstraction for Rhai workers that eliminates
|
||||||
|
//! code duplication between synchronous and asynchronous worker implementations.
|
||||||
|
//!
|
||||||
|
//! The `Worker` trait defines the common interface and behavior, while specific
|
||||||
|
//! implementations handle job processing differently (sync vs async).
|
||||||
|
//!
|
||||||
|
//! ## Architecture
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! ┌─────────────────┐ ┌─────────────────┐
|
||||||
|
//! │ SyncWorker │ │ AsyncWorker │
|
||||||
|
//! │ │ │ │
|
||||||
|
//! │ process_job() │ │ process_job() │
|
||||||
|
//! │ (sequential) │ │ (concurrent) │
|
||||||
|
//! └─────────────────┘ └─────────────────┘
|
||||||
|
//! │ │
|
||||||
|
//! └───────┬───────────────┘
|
||||||
|
//! │
|
||||||
|
//! ┌───────▼───────┐
|
||||||
|
//! │ Worker Trait │
|
||||||
|
//! │ │
|
||||||
|
//! │ spawn() │
|
||||||
|
//! │ config │
|
||||||
|
//! │ common loop │
|
||||||
|
//! └───────────────┘
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use hero_job::Job;
|
||||||
|
use log::{debug, error, info};
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
use rhai::Engine;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
|
use crate::{initialize_redis_connection, NAMESPACE_PREFIX, BLPOP_TIMEOUT_SECONDS};
|
||||||
|
|
||||||
|
/// Configuration for worker instances
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WorkerConfig {
|
||||||
|
pub worker_id: String,
|
||||||
|
pub db_path: String,
|
||||||
|
pub redis_url: String,
|
||||||
|
pub preserve_tasks: bool,
|
||||||
|
pub default_timeout: Option<Duration>, // Only used by async workers
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkerConfig {
|
||||||
|
/// Create a new worker configuration
|
||||||
|
pub fn new(
|
||||||
|
worker_id: String,
|
||||||
|
db_path: String,
|
||||||
|
redis_url: String,
|
||||||
|
preserve_tasks: bool,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
worker_id,
|
||||||
|
db_path,
|
||||||
|
redis_url,
|
||||||
|
preserve_tasks,
|
||||||
|
default_timeout: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set default timeout for async workers
|
||||||
|
pub fn with_default_timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.default_timeout = Some(timeout);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Trait defining the common interface for Rhai workers
|
||||||
|
///
|
||||||
|
/// This trait abstracts the common functionality between synchronous and
|
||||||
|
/// asynchronous workers, allowing them to share the same spawn logic and
|
||||||
|
/// Redis polling loop while implementing different job processing strategies.
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait Worker: Send + Sync + 'static {
|
||||||
|
/// Process a single job
|
||||||
|
///
|
||||||
|
/// This is the core method that differentiates worker implementations:
|
||||||
|
/// - Sync workers process jobs sequentially, one at a time
|
||||||
|
/// - Async workers spawn concurrent tasks for each job
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `job` - The job to process
|
||||||
|
/// * `engine` - Rhai engine for script execution
|
||||||
|
/// * `redis_conn` - Redis connection for status updates
|
||||||
|
async fn process_job(
|
||||||
|
&self,
|
||||||
|
job: Job,
|
||||||
|
engine: Engine,
|
||||||
|
redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
);
|
||||||
|
|
||||||
|
/// Get the worker type name for logging
|
||||||
|
fn worker_type(&self) -> &'static str;
|
||||||
|
|
||||||
|
/// Get worker ID for this worker instance
|
||||||
|
fn worker_id(&self) -> &str;
|
||||||
|
|
||||||
|
/// Get Redis URL for this worker instance
|
||||||
|
fn redis_url(&self) -> &str;
|
||||||
|
|
||||||
|
/// Spawn the worker
|
||||||
|
///
|
||||||
|
/// This method provides the common worker loop implementation that both
|
||||||
|
/// sync and async workers can use. It handles:
|
||||||
|
/// - Redis connection setup
|
||||||
|
/// - Job polling from Redis queue
|
||||||
|
/// - Shutdown signal handling
|
||||||
|
/// - Delegating job processing to the implementation
|
||||||
|
fn spawn(
|
||||||
|
self: Arc<Self>,
|
||||||
|
engine: Engine,
|
||||||
|
mut shutdown_rx: mpsc::Receiver<()>,
|
||||||
|
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let worker_id = self.worker_id();
|
||||||
|
let redis_url = self.redis_url();
|
||||||
|
let queue_key = format!("{}{}", NAMESPACE_PREFIX, worker_id);
|
||||||
|
info!(
|
||||||
|
"{} Worker '{}' starting. Connecting to Redis at {}. Listening on queue: {}",
|
||||||
|
self.worker_type(),
|
||||||
|
worker_id,
|
||||||
|
redis_url,
|
||||||
|
queue_key
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut redis_conn = initialize_redis_connection(worker_id, redis_url).await?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let blpop_keys = vec![queue_key.clone()];
|
||||||
|
tokio::select! {
|
||||||
|
// Listen for shutdown signal
|
||||||
|
_ = shutdown_rx.recv() => {
|
||||||
|
info!("{} Worker '{}': Shutdown signal received. Terminating loop.",
|
||||||
|
self.worker_type(), worker_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Listen for tasks from Redis
|
||||||
|
blpop_result = redis_conn.blpop(&blpop_keys, BLPOP_TIMEOUT_SECONDS as f64) => {
|
||||||
|
debug!("{} Worker '{}': Attempting BLPOP on queue: {}",
|
||||||
|
self.worker_type(), worker_id, queue_key);
|
||||||
|
|
||||||
|
let response: Option<(String, String)> = match blpop_result {
|
||||||
|
Ok(resp) => resp,
|
||||||
|
Err(e) => {
|
||||||
|
error!("{} Worker '{}': Redis BLPOP error on queue {}: {}. Worker for this circle might stop.",
|
||||||
|
self.worker_type(), worker_id, queue_key, e);
|
||||||
|
return Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some((_queue_name_recv, job_id)) = response {
|
||||||
|
info!("{} Worker '{}' received job_id: {} from queue: {}",
|
||||||
|
self.worker_type(), worker_id, job_id, _queue_name_recv);
|
||||||
|
|
||||||
|
// Load the job from Redis
|
||||||
|
match crate::load_job_from_redis(&mut redis_conn, &job_id, worker_id).await {
|
||||||
|
Ok(mut job) => {
|
||||||
|
// Check for ping job and handle it directly
|
||||||
|
if job.script.trim() == "ping" {
|
||||||
|
info!("{} Worker '{}': Received ping job '{}', responding with pong",
|
||||||
|
self.worker_type(), worker_id, job_id);
|
||||||
|
|
||||||
|
// Update job status to started
|
||||||
|
if let Err(e) = hero_job::Job::update_status(&mut redis_conn, &job_id, hero_job::JobStatus::Started).await {
|
||||||
|
error!("{} Worker '{}': Failed to update ping job '{}' status to Started: {}",
|
||||||
|
self.worker_type(), worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set result to "pong" and mark as finished
|
||||||
|
if let Err(e) = hero_job::Job::set_result(&mut redis_conn, &job_id, "pong").await {
|
||||||
|
error!("{} Worker '{}': Failed to set ping job '{}' result: {}",
|
||||||
|
self.worker_type(), worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("{} Worker '{}': Successfully responded to ping job '{}' with pong",
|
||||||
|
self.worker_type(), worker_id, job_id);
|
||||||
|
} else {
|
||||||
|
// Create a new engine for each job to avoid sharing state
|
||||||
|
let job_engine = crate::engine::create_heromodels_engine();
|
||||||
|
// Delegate job processing to the implementation
|
||||||
|
self.process_job(job, job_engine, &mut redis_conn).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("{} Worker '{}': Failed to load job '{}': {}",
|
||||||
|
self.worker_type(), worker_id, job_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
debug!("{} Worker '{}': BLPOP timed out on queue {}. No new tasks.",
|
||||||
|
self.worker_type(), worker_id, queue_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("{} Worker '{}' has shut down.", self.worker_type(), worker_id);
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience function to spawn a worker with the trait-based interface
|
||||||
|
///
|
||||||
|
/// This function provides a unified interface for spawning any worker implementation
|
||||||
|
/// that implements the Worker trait.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `worker` - The worker implementation to spawn
|
||||||
|
/// * `config` - Worker configuration
|
||||||
|
/// * `engine` - Rhai engine for script execution
|
||||||
|
/// * `shutdown_rx` - Channel receiver for shutdown signals
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Returns a `JoinHandle` that can be awaited to wait for worker shutdown.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use std::sync::Arc;
|
||||||
|
/// use std::time::Duration;
|
||||||
|
///
|
||||||
|
/// let config = WorkerConfig::new(
|
||||||
|
/// "worker_1".to_string(),
|
||||||
|
/// "/path/to/db".to_string(),
|
||||||
|
/// "redis://localhost:6379".to_string(),
|
||||||
|
/// false,
|
||||||
|
/// );
|
||||||
|
///
|
||||||
|
/// let worker = Arc::new(SyncWorker::new());
|
||||||
|
/// let engine = create_heromodels_engine();
|
||||||
|
/// let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
///
|
||||||
|
/// let handle = spawn_worker(worker, config, engine, shutdown_rx);
|
||||||
|
///
|
||||||
|
/// // Later, shutdown the worker
|
||||||
|
/// shutdown_tx.send(()).await.unwrap();
|
||||||
|
/// handle.await.unwrap().unwrap();
|
||||||
|
/// ```
|
||||||
|
pub fn spawn_worker<W: Worker>(
|
||||||
|
worker: Arc<W>,
|
||||||
|
engine: Engine,
|
||||||
|
shutdown_rx: mpsc::Receiver<()>,
|
||||||
|
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
|
||||||
|
worker.spawn(engine, shutdown_rx)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::engine::create_heromodels_engine;
|
||||||
|
|
||||||
|
// Mock worker for testing
|
||||||
|
struct MockWorker;
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl Worker for MockWorker {
|
||||||
|
async fn process_job(
|
||||||
|
&self,
|
||||||
|
_job: Job,
|
||||||
|
_engine: Engine,
|
||||||
|
_redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
) {
|
||||||
|
// Mock implementation - do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_type(&self) -> &'static str {
|
||||||
|
"Mock"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn worker_id(&self) -> &str {
|
||||||
|
"mock_worker"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn redis_url(&self) -> &str {
|
||||||
|
"redis://localhost:6379"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_worker_config_creation() {
|
||||||
|
let config = WorkerConfig::new(
|
||||||
|
"test_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
"redis://localhost:6379".to_string(),
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(config.worker_id, "test_worker");
|
||||||
|
assert_eq!(config.db_path, "/tmp");
|
||||||
|
assert_eq!(config.redis_url, "redis://localhost:6379");
|
||||||
|
assert!(!config.preserve_tasks);
|
||||||
|
assert!(config.default_timeout.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_worker_config_with_timeout() {
|
||||||
|
let timeout = Duration::from_secs(300);
|
||||||
|
let config = WorkerConfig::new(
|
||||||
|
"test_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
"redis://localhost:6379".to_string(),
|
||||||
|
false,
|
||||||
|
).with_default_timeout(timeout);
|
||||||
|
|
||||||
|
assert_eq!(config.default_timeout, Some(timeout));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_spawn_worker_function() {
|
||||||
|
let (_shutdown_tx, shutdown_rx) = mpsc::channel(1);
|
||||||
|
let config = WorkerConfig::new(
|
||||||
|
"test_worker".to_string(),
|
||||||
|
"/tmp".to_string(),
|
||||||
|
"redis://localhost:6379".to_string(),
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
let engine = create_heromodels_engine();
|
||||||
|
let worker = Arc::new(MockWorker);
|
||||||
|
|
||||||
|
let handle = spawn_worker(worker, config, engine, shutdown_rx);
|
||||||
|
|
||||||
|
// The worker should be created successfully
|
||||||
|
assert!(!handle.is_finished());
|
||||||
|
|
||||||
|
// Abort the worker for cleanup
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
|
}
|
@ -25,7 +25,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.host("127.0.0.1")
|
.host("127.0.0.1")
|
||||||
.port(8443)
|
.port(8443)
|
||||||
.redis_url("redis://localhost:6379")
|
.redis_url("redis://localhost:6379")
|
||||||
.worker_id("test")
|
|
||||||
.with_auth()
|
.with_auth()
|
||||||
.build() {
|
.build() {
|
||||||
Ok(server) => {
|
Ok(server) => {
|
||||||
|
@ -63,7 +63,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.host("127.0.0.1")
|
.host("127.0.0.1")
|
||||||
.port(8443)
|
.port(8443)
|
||||||
.redis_url("redis://localhost:6379")
|
.redis_url("redis://localhost:6379")
|
||||||
.worker_id("circle_test")
|
|
||||||
.with_auth()
|
.with_auth()
|
||||||
.circles(circles)
|
.circles(circles)
|
||||||
.build()?;
|
.build()?;
|
||||||
|
@ -15,7 +15,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.host("127.0.0.1")
|
.host("127.0.0.1")
|
||||||
.port(8443)
|
.port(8443)
|
||||||
.redis_url("redis://localhost:6379")
|
.redis_url("redis://localhost:6379")
|
||||||
.worker_id("test")
|
|
||||||
.build() {
|
.build() {
|
||||||
Ok(server) => {
|
Ok(server) => {
|
||||||
println!("🚀 Built server...");
|
println!("🚀 Built server...");
|
||||||
|
@ -25,7 +25,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.host("127.0.0.1")
|
.host("127.0.0.1")
|
||||||
.port(8443)
|
.port(8443)
|
||||||
.redis_url("redis://localhost:6379")
|
.redis_url("redis://localhost:6379")
|
||||||
.worker_id("test")
|
|
||||||
.with_auth()
|
.with_auth()
|
||||||
.build() {
|
.build() {
|
||||||
Ok(server) => {
|
Ok(server) => {
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use hero_supervisor::Supervisor;
|
||||||
use crate::{Server, TlsConfigError, ServerConfig};
|
use crate::{Server, TlsConfigError, ServerConfig};
|
||||||
|
|
||||||
/// ServerBuilder for constructing Server instances with a fluent API
|
/// ServerBuilder for constructing Server instances with a fluent API
|
||||||
@ -12,6 +14,7 @@ pub struct ServerBuilder {
|
|||||||
tls_port: Option<u16>,
|
tls_port: Option<u16>,
|
||||||
enable_auth: bool,
|
enable_auth: bool,
|
||||||
enable_webhooks: bool,
|
enable_webhooks: bool,
|
||||||
|
supervisor: Option<Arc<Supervisor>>,
|
||||||
|
|
||||||
circles: HashMap<String, Vec<String>>,
|
circles: HashMap<String, Vec<String>>,
|
||||||
}
|
}
|
||||||
@ -28,6 +31,7 @@ impl ServerBuilder {
|
|||||||
tls_port: None,
|
tls_port: None,
|
||||||
enable_auth: false,
|
enable_auth: false,
|
||||||
enable_webhooks: false,
|
enable_webhooks: false,
|
||||||
|
supervisor: None,
|
||||||
|
|
||||||
circles: HashMap::new(),
|
circles: HashMap::new(),
|
||||||
}
|
}
|
||||||
@ -76,6 +80,12 @@ impl ServerBuilder {
|
|||||||
self.circles = circles;
|
self.circles = circles;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the supervisor instance for job dispatching
|
||||||
|
pub fn with_supervisor(mut self, supervisor: Arc<Supervisor>) -> Self {
|
||||||
|
self.supervisor = Some(supervisor);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Load configuration from a ServerConfig instance
|
/// Load configuration from a ServerConfig instance
|
||||||
pub fn from_config(mut self, config: ServerConfig) -> Self {
|
pub fn from_config(mut self, config: ServerConfig) -> Self {
|
||||||
@ -87,7 +97,6 @@ impl ServerBuilder {
|
|||||||
self.cert_path = config.cert;
|
self.cert_path = config.cert;
|
||||||
self.key_path = config.key;
|
self.key_path = config.key;
|
||||||
self.tls_port = config.tls_port;
|
self.tls_port = config.tls_port;
|
||||||
self.enable_webhooks = config.webhooks;
|
|
||||||
self.circles = config.circles;
|
self.circles = config.circles;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@ -109,7 +118,7 @@ impl ServerBuilder {
|
|||||||
circles: self.circles,
|
circles: self.circles,
|
||||||
nonce_store: HashMap::new(),
|
nonce_store: HashMap::new(),
|
||||||
authenticated_pubkey: None,
|
authenticated_pubkey: None,
|
||||||
supervisor: None,
|
supervisor: self.supervisor,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,10 +34,6 @@ pub struct ServerConfig {
|
|||||||
/// Separate port for TLS connections
|
/// Separate port for TLS connections
|
||||||
pub tls_port: Option<u16>,
|
pub tls_port: Option<u16>,
|
||||||
|
|
||||||
/// Enable webhook handling
|
|
||||||
#[serde(default)]
|
|
||||||
pub webhooks: bool,
|
|
||||||
|
|
||||||
/// Circles configuration - maps circle names to lists of member public keys
|
/// Circles configuration - maps circle names to lists of member public keys
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub circles: HashMap<String, Vec<String>>,
|
pub circles: HashMap<String, Vec<String>>,
|
||||||
@ -54,7 +50,6 @@ impl Default for ServerConfig {
|
|||||||
cert: None,
|
cert: None,
|
||||||
key: None,
|
key: None,
|
||||||
tls_port: None,
|
tls_port: None,
|
||||||
webhooks: false,
|
|
||||||
circles: HashMap::new(),
|
circles: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,7 +118,6 @@ impl ServerConfig {
|
|||||||
cert: Some("cert.pem".to_string()),
|
cert: Some("cert.pem".to_string()),
|
||||||
key: Some("key.pem".to_string()),
|
key: Some("key.pem".to_string()),
|
||||||
tls_port: Some(8444),
|
tls_port: Some(8444),
|
||||||
webhooks: false,
|
|
||||||
circles,
|
circles,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use crate::Server;
|
use crate::Server;
|
||||||
use actix::prelude::*;
|
use actix::prelude::*;
|
||||||
use actix_web_actors::ws;
|
use actix_web_actors::ws;
|
||||||
use hero_supervisor::{Supervisor, ScriptType};
|
use hero_supervisor::ScriptType;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
@ -426,7 +426,7 @@ impl Server {
|
|||||||
supervisor
|
supervisor
|
||||||
.new_job()
|
.new_job()
|
||||||
.context_id(&circle_pk)
|
.context_id(&circle_pk)
|
||||||
.script_type(ScriptType::RhaiSAL)
|
.script_type(ScriptType::SAL)
|
||||||
.script(&script_content)
|
.script(&script_content)
|
||||||
.timeout(TASK_TIMEOUT_DURATION)
|
.timeout(TASK_TIMEOUT_DURATION)
|
||||||
.await_response()
|
.await_response()
|
||||||
|
@ -13,6 +13,7 @@ use serde_json::Value; // Removed unused json
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
use std::sync::Arc;
|
||||||
use std::sync::Mutex; // Removed unused Arc
|
use std::sync::Mutex; // Removed unused Arc
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
@ -211,7 +212,7 @@ pub struct Server {
|
|||||||
pub circles: HashMap<String, Vec<String>>,
|
pub circles: HashMap<String, Vec<String>>,
|
||||||
nonce_store: HashMap<String, NonceResponse>,
|
nonce_store: HashMap<String, NonceResponse>,
|
||||||
authenticated_pubkey: Option<String>,
|
authenticated_pubkey: Option<String>,
|
||||||
pub supervisor: Option<Supervisor>,
|
pub supervisor: Option<Arc<Supervisor>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Server {
|
impl Server {
|
||||||
@ -240,7 +241,7 @@ impl Server {
|
|||||||
let server_config_data = web::Data::new(self.clone());
|
let server_config_data = web::Data::new(self.clone());
|
||||||
|
|
||||||
let http_server = HttpServer::new(move || {
|
let http_server = HttpServer::new(move || {
|
||||||
let mut app = App::new()
|
let app = App::new()
|
||||||
.app_data(server_config_data.clone())
|
.app_data(server_config_data.clone())
|
||||||
.route("/{circle_pk}", web::get().to(ws_handler));
|
.route("/{circle_pk}", web::get().to(ws_handler));
|
||||||
|
|
||||||
@ -351,21 +352,6 @@ impl Server {
|
|||||||
client_rpc_id: Value,
|
client_rpc_id: Value,
|
||||||
ctx: &mut ws::WebsocketContext<Self>,
|
ctx: &mut ws::WebsocketContext<Self>,
|
||||||
) {
|
) {
|
||||||
if !self.enable_auth {
|
|
||||||
let err_resp = JsonRpcResponse {
|
|
||||||
jsonrpc: "2.0".to_string(),
|
|
||||||
result: None,
|
|
||||||
error: Some(JsonRpcError {
|
|
||||||
code: -32000,
|
|
||||||
message: "Authentication is disabled on this server.".to_string(),
|
|
||||||
data: None,
|
|
||||||
}),
|
|
||||||
id: client_rpc_id,
|
|
||||||
};
|
|
||||||
ctx.text(serde_json::to_string(&err_resp).unwrap());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
match serde_json::from_value::<AuthCredentials>(params) {
|
match serde_json::from_value::<AuthCredentials>(params) {
|
||||||
Ok(auth_params) => {
|
Ok(auth_params) => {
|
||||||
let nonce_response = self.nonce_store.get(&auth_params.pubkey);
|
let nonce_response = self.nonce_store.get(&auth_params.pubkey);
|
||||||
@ -550,23 +536,40 @@ impl Server {
|
|||||||
let public_key = self.authenticated_pubkey.clone();
|
let public_key = self.authenticated_pubkey.clone();
|
||||||
|
|
||||||
|
|
||||||
|
let supervisor_ref = self.supervisor.clone();
|
||||||
let fut = async move {
|
let fut = async move {
|
||||||
let caller_id = public_key.unwrap_or_else(|| "anonymous".to_string());
|
let _caller_id = public_key.unwrap_or_else(|| "anonymous".to_string());
|
||||||
match SupervisorBuilder::new()
|
|
||||||
.redis_url(&redis_url_clone)
|
// Use the passed supervisor if available, otherwise create a new one
|
||||||
.caller_id(&caller_id)
|
match supervisor_ref {
|
||||||
.build() {
|
Some(supervisor) => {
|
||||||
Ok(hero_supervisor) => {
|
supervisor
|
||||||
hero_supervisor
|
|
||||||
.new_job()
|
.new_job()
|
||||||
.context_id(&circle_pk_clone)
|
.context_id(&circle_pk_clone)
|
||||||
.script_type(hero_supervisor::ScriptType::RhaiSAL)
|
.script_type(hero_supervisor::ScriptType::SAL)
|
||||||
.script(&script_content)
|
.script(&script_content)
|
||||||
.timeout(TASK_TIMEOUT_DURATION)
|
.timeout(TASK_TIMEOUT_DURATION)
|
||||||
.await_response()
|
.await_response()
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
Err(e) => Err(e),
|
None => {
|
||||||
|
// Fallback: create a new supervisor if none was provided
|
||||||
|
match SupervisorBuilder::new()
|
||||||
|
.redis_url(&redis_url_clone)
|
||||||
|
.build().await {
|
||||||
|
Ok(hero_supervisor) => {
|
||||||
|
hero_supervisor
|
||||||
|
.new_job()
|
||||||
|
.context_id(&circle_pk_clone)
|
||||||
|
.script_type(hero_supervisor::ScriptType::SAL)
|
||||||
|
.script(&script_content)
|
||||||
|
.timeout(TASK_TIMEOUT_DURATION)
|
||||||
|
.await_response()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user