refactor wip

This commit is contained in:
Timur Gordon 2025-08-05 12:19:38 +02:00
parent 8ed40ce99c
commit 7a652c9c3c
51 changed files with 6183 additions and 840 deletions

1349
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,8 @@ edition = "2024"
anyhow = "1.0"
chrono = { version = "0.4", features = ["serde"] }
env_logger = "0.10"
hero_supervisor = { path = "core/supervisor" }
hero_websocket_server = { path = "interfaces/websocket/server" }
log = "0.4"
redis = { version = "0.25.0", features = ["tokio-comp"] }
serde = { version = "1.0", features = ["derive"] }
@ -23,6 +25,10 @@ tempfile = "3.10"
name = "simple_rhai_bench"
harness = false
[[bin]]
name = "hive"
path = "cmd/main.rs"
[workspace.dependencies]
actix = "0.13"
actix-web = { version = "4", features = ["rustls-0_23"] }

28
cmd/config.toml Normal file
View File

@ -0,0 +1,28 @@
[global]
# Redis connection URL for job queuing
redis_url = "redis://localhost:6379"
# WebSocket Server Configuration
# Handles WebSocket connections for job dispatching
[websocket_server]
host = "127.0.0.1"
port = 8443
redis_url = "redis://127.0.0.1/"
auth = false
tls = false
# cert = "/path/to/cert.pem" # Uncomment and set path for TLS
# key = "/path/to/key.pem" # Uncomment and set path for TLS
# tls_port = 8444 # Uncomment for separate TLS port
# Circles configuration - maps circle names to lists of member public keys
[websocket_server.circles]
# Example circle configuration:
# "admin" = ["04abc123...", "04def456..."]
# "users" = ["04ghi789...", "04jkl012..."]
# "ws" = [] # Public circle (no auth required)
# OSIS Worker Configuration
# Handles OSIS (HeroScript) execution
[osis_worker]
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/osis"
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "osis", "MAX_CONCURRENT_JOBS" = "5" }

186
cmd/main.rs Normal file
View File

@ -0,0 +1,186 @@
use std::env;
use std::sync::Arc;
use std::time::Duration;
use hero_supervisor::{SupervisorBuilder, SupervisorError};
use hero_websocket_server::ServerBuilder;
use tokio::signal;
use log::{info, error};
use env_logger::Builder;
/// The main entry point of the Hero Supervisor.
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
env_logger::Builder::from_default_env()
.filter_level(log::LevelFilter::Info)
.init();
info!("Hero Supervisor starting up...");
// Get config path from command line arguments or use default
let args: Vec<String> = env::args().collect();
let config_path = if let Some(config_index) = args.iter().position(|arg| arg == "--config") {
if config_index + 1 < args.len() {
&args[config_index + 1]
} else {
"cmd/config.toml"
}
} else {
"cmd/config.toml"
};
println!("Loading configuration from: {}", config_path);
let supervisor = SupervisorBuilder::from_toml(config_path)?
.build().await?;
// Wrap supervisor in Arc for sharing across tasks
let supervisor = Arc::new(supervisor);
// Extract worker configurations from TOML config
let worker_configs = supervisor.get_worker_configs()?;
info!("Loaded {} worker configurations from TOML", worker_configs.len());
// Spawn the background lifecycle manager with 5-minute health check interval
let health_check_interval = Duration::from_secs(5 * 60); // 5 minutes
let mut lifecycle_handle = supervisor.clone().spawn_lifecycle_manager(worker_configs, health_check_interval);
info!("Hero Supervisor started successfully!");
info!("Background lifecycle manager is running with 5-minute health checks.");
info!("Workers are being monitored and will be automatically restarted if they fail.");
// Start WebSocket server for job dispatching
info!("Starting WebSocket server for job dispatching...");
let ws_supervisor = supervisor.clone();
// Get WebSocket server config from TOML or use defaults
let ws_config = supervisor.get_websocket_config().unwrap_or_else(|_| {
info!("Using default WebSocket server configuration");
hero_supervisor::WebSocketServerConfig {
host: "127.0.0.1".to_string(),
port: 8443,
redis_url: "redis://127.0.0.1/".to_string(),
auth: false,
tls: false,
cert: None,
key: None,
tls_port: None,
circles: std::collections::HashMap::new(),
}
});
let mut websocket_handle = tokio::spawn(async move {
info!("WebSocket server starting on {}:{}", ws_config.host, ws_config.port);
// Create the WebSocket server with our supervisor
let mut server_builder = ServerBuilder::new()
.host(&ws_config.host)
.port(ws_config.port)
.redis_url(&ws_config.redis_url)
.with_supervisor(ws_supervisor);
// Configure auth if enabled
if ws_config.auth {
server_builder = server_builder.with_auth();
}
// Configure TLS if enabled
if ws_config.tls {
if let (Some(cert), Some(key)) = (&ws_config.cert, &ws_config.key) {
server_builder = server_builder.with_tls(cert.clone(), key.clone());
if let Some(tls_port) = ws_config.tls_port {
server_builder = server_builder.with_tls_port(tls_port);
}
}
}
// Configure circles
if !ws_config.circles.is_empty() {
server_builder = server_builder.circles(ws_config.circles.clone());
}
let server = match server_builder.build() {
Ok(server) => server,
Err(e) => {
error!("Failed to build WebSocket server: {}", e);
return;
}
};
// Start the WebSocket server
match server.spawn_circle_server() {
Ok((server_handle, _)) => {
info!("WebSocket server successfully started and ready to dispatch jobs");
if let Err(e) = server_handle.await {
error!("WebSocket server error: {:?}", e);
}
}
Err(e) => {
error!("Failed to start WebSocket server: {}", e);
}
}
});
info!("WebSocket server started - ready to accept connections and dispatch jobs");
// Set up graceful shutdown signal handlers
let shutdown_signal = async {
let ctrl_c = async {
signal::ctrl_c()
.await
.expect("failed to install Ctrl+C handler");
};
#[cfg(unix)]
let terminate = async {
signal::unix::signal(signal::unix::SignalKind::terminate())
.expect("failed to install signal handler")
.recv()
.await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {},
_ = terminate => {},
}
info!("Shutdown signal received, initiating graceful shutdown...");
};
// Wait for shutdown signal or task completion
tokio::select! {
_ = shutdown_signal => {
info!("Graceful shutdown initiated");
// Cancel background tasks
lifecycle_handle.abort();
websocket_handle.abort();
info!("Background tasks stopped");
}
result = &mut lifecycle_handle => {
match result {
Ok(Ok(())) => info!("Lifecycle manager completed successfully"),
Ok(Err(e)) => error!("Lifecycle manager error: {}", e),
Err(e) => error!("Lifecycle manager task panicked: {}", e),
}
// Also stop the websocket handle
websocket_handle.abort();
}
result = &mut websocket_handle => {
match result {
Ok(()) => info!("WebSocket server completed successfully"),
Err(e) => error!("WebSocket server task panicked: {}", e),
}
// Also stop the lifecycle handle
lifecycle_handle.abort();
}
}
info!("Hero Supervisor shutdown complete");
Ok(())
}

View File

@ -1,191 +1,70 @@
//! Hero Supervisor Worker Demo
//!
//! This example demonstrates the new Hero Supervisor API with:
//! - Synchronous build() method
//! - Asynchronous start_workers() method
//! - Proper cleanup on program exit
//! - Signal handling for graceful shutdown
use colored::*;
use hero_supervisor::{SupervisorBuilder, ScriptType, JobStatus};
use log::warn;
use std::process::Stdio;
use hero_supervisor::{SupervisorBuilder, ScriptType};
use std::time::Duration;
use tokio::process::{Child, Command as TokioCommand};
use tokio::time::sleep;
use tokio::signal;
/// Supervisor manages worker lifecycle and job execution
pub struct Supervisor {
supervisor: hero_supervisor::Supervisor,
worker_processes: Vec<WorkerProcess>,
redis_url: String,
}
async fn run_supervisor_demo() -> Result<(), Box<dyn std::error::Error>> {
println!("{}", "🚀 Hero Supervisor Demo - New API".cyan().bold());
println!("{}", "Building supervisor synchronously...".yellow());
/// Represents a managed worker process
pub struct WorkerProcess {
id: String,
script_type: ScriptType,
process: Option<Child>,
binary_path: String,
}
impl Supervisor {
/// Create a new supervisor with supervisor configuration
pub async fn new(redis_url: String) -> Result<Self, Box<dyn std::error::Error>> {
// Build supervisor synchronously (no .await needed)
let supervisor = SupervisorBuilder::new()
.caller_id("supervisor")
.context_id("demo-context")
.redis_url(&redis_url)
.heroscript_workers(vec!["hero-worker-1".to_string()])
.rhai_sal_workers(vec!["rhai-sal-worker-1".to_string()])
.rhai_dsl_workers(vec!["rhai-dsl-worker-1".to_string()])
.redis_url("redis://127.0.0.1:6379")
.osis_worker("/usr/local/bin/osis_worker")
.sal_worker("/usr/local/bin/sal_worker")
.v_worker("/usr/local/bin/v_worker")
.python_worker("/usr/local/bin/python_worker")
.worker_env_var("REDIS_URL", "redis://127.0.0.1:6379")
.worker_env_var("LOG_LEVEL", "info")
.build()?;
Ok(Self {
supervisor,
worker_processes: Vec::new(),
redis_url,
})
}
println!("{}", "✅ Supervisor built successfully!".green());
println!("{}", "Starting workers asynchronously...".yellow());
/// Start a worker for a specific script type
pub async fn start_worker(&mut self, script_type: ScriptType, worker_binary_path: &str) -> Result<(), Box<dyn std::error::Error>> {
let worker_id = match script_type {
ScriptType::HeroScript => "hero-worker-1",
ScriptType::RhaiSAL => "rhai-sal-worker-1",
ScriptType::RhaiDSL => "rhai-dsl-worker-1",
};
// Start workers asynchronously
supervisor.start_workers().await?;
println!("{}", format!("🚀 Starting {} worker: {}", script_type.as_str(), worker_id).green().bold());
println!("{}", "✅ All workers started successfully!".green());
// Check if worker binary exists
if !std::path::Path::new(worker_binary_path).exists() {
return Err(format!("Worker binary not found at: {}", worker_binary_path).into());
}
// Demonstrate job creation and execution
println!("{}", "\n📋 Creating and running test jobs...".cyan().bold());
// Start the worker process
let mut cmd = TokioCommand::new(worker_binary_path);
cmd.arg("--worker-id").arg(worker_id)
.arg("--redis-url").arg(&self.redis_url)
.arg("--no-timestamp")
.stdout(Stdio::piped())
.stderr(Stdio::piped());
// Create and run a test job
println!("📝 Creating and running OSIS job...");
let process = cmd.spawn()?;
let worker_process = WorkerProcess {
id: worker_id.to_string(),
script_type,
process: Some(process),
binary_path: worker_binary_path.to_string(),
};
self.worker_processes.push(worker_process);
// Give worker time to start up
sleep(Duration::from_millis(500)).await;
println!("{}", format!("✅ Worker {} started successfully", worker_id).green());
Ok(())
}
/// Stop all workers
pub async fn stop_all_workers(&mut self) {
println!("{}", "🛑 Stopping all workers...".yellow().bold());
for worker in &mut self.worker_processes {
if let Some(mut process) = worker.process.take() {
println!("Stopping worker: {}", worker.id);
// Try graceful shutdown first
if let Err(e) = process.kill().await {
warn!("Failed to kill worker {}: {}", worker.id, e);
}
// Wait for process to exit
if let Ok(status) = process.wait().await {
println!("Worker {} exited with status: {:?}", worker.id, status);
} else {
warn!("Failed to wait for worker {} to exit", worker.id);
}
}
}
self.worker_processes.clear();
println!("{}", "✅ All workers stopped".green());
}
/// Submit a job and return the job ID
pub async fn submit_job(&self, script_type: ScriptType, script: &str) -> Result<String, Box<dyn std::error::Error>> {
let job = self.supervisor
.new_job()
.script_type(script_type.clone())
.script(script)
// Submit and run the job
match supervisor.new_job()
.script_type(ScriptType::OSIS)
.script("println('Hello from OSIS worker!')")
.timeout(Duration::from_secs(30))
.build()?;
let job_id = job.id.clone();
self.supervisor.create_job(&job).await?;
println!("{}", format!("📝 Job {} submitted for {}", job_id, script_type.as_str()).cyan());
Ok(job_id)
.await_response().await {
Ok(result) => {
println!("{}", format!("✅ Job completed successfully: {}", result).green());
}
/// Wait for job completion and return result
pub async fn wait_for_job_completion(&self, job_id: &str, timeout_duration: Duration) -> Result<String, Box<dyn std::error::Error>> {
let start_time = std::time::Instant::now();
println!("{}", format!("⏳ Waiting for job {} to complete...", job_id).yellow());
loop {
if start_time.elapsed() > timeout_duration {
return Err("Job execution timeout".into());
}
// Check job status using supervisor methods
match self.supervisor.get_job_status(job_id).await {
Ok(status) => {
match status {
JobStatus::Finished => {
if let Ok(Some(result)) = self.supervisor.get_job_output(job_id).await {
println!("{}", format!("✅ Job {} completed successfully", job_id).green());
return Ok(result);
}
}
JobStatus::Error => {
return Err("Job failed".into());
}
_ => {
// Job still running or waiting
}
}
}
Err(_) => {
// Job not found or error checking status
Err(e) => {
println!("{}", format!("❌ Job failed: {}", e).red());
}
}
sleep(Duration::from_millis(100)).await;
}
}
// Wait for interrupt signal
println!("{}", "\n⏳ Press Ctrl+C to shutdown gracefully...".yellow());
signal::ctrl_c().await?;
/// List all jobs
pub async fn list_jobs(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
self.supervisor.list_jobs().await.map_err(|e| e.into())
}
println!("{}", "\n🛑 Shutdown signal received, cleaning up...".yellow().bold());
/// Clear all jobs
pub async fn clear_all_jobs(&self) -> Result<usize, Box<dyn std::error::Error>> {
self.supervisor.clear_all_jobs().await.map_err(|e| e.into())
}
// Cleanup workers before exit
supervisor.cleanup_and_shutdown().await?;
/// Get worker status
pub fn get_worker_status(&self) -> Vec<(String, ScriptType, bool)> {
self.worker_processes.iter().map(|w| {
(w.id.clone(), w.script_type.clone(), w.process.is_some())
}).collect()
}
}
impl Drop for Supervisor {
fn drop(&mut self) {
// Ensure workers are stopped when supervisor is dropped
if !self.worker_processes.is_empty() {
println!("{}", "⚠️ Supervisor dropping - stopping remaining workers".yellow());
}
}
println!("{}", "✅ Cleanup completed. Goodbye!".green().bold());
Ok(())
}
#[tokio::main]
@ -193,173 +72,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
env_logger::Builder::from_default_env()
.filter_level(log::LevelFilter::Info)
.format_timestamp(None)
.init();
println!("{}", "🎯 Hero Supervisor-Worker End-to-End Demo".blue().bold());
println!("{}", "==========================================".blue());
println!("{}", "Hero Supervisor Demo".cyan().bold());
println!("{}", "This demo shows the new synchronous build API".yellow());
println!();
// Configuration
let redis_url = "redis://localhost:6379".to_string();
let worker_binary_path = "../../target/debug/worker";
// Check if worker binary exists
if !std::path::Path::new(worker_binary_path).exists() {
println!("{}", "❌ Worker binary not found!".red().bold());
println!("Please build the worker first:");
println!(" cd ../worker && cargo build");
return Err("Worker binary not found".into());
// Run the demo
if let Err(e) = run_supervisor_demo().await {
eprintln!("{}", format!("Demo failed: {}", e).red().bold());
std::process::exit(1);
}
// Create supervisor
println!("{}", "🏗️ Creating supervisor...".cyan());
let mut supervisor = Supervisor::new(redis_url).await?;
println!("{}", "✅ Supervisor created successfully".green());
println!();
// Clear any existing jobs
let cleared_count = supervisor.clear_all_jobs().await?;
if cleared_count > 0 {
println!("{}", format!("🧹 Cleared {} existing jobs", cleared_count).yellow());
}
// Demo 1: Start a HeroScript worker
println!("{}", "📋 Demo 1: Starting HeroScript Worker".blue().bold());
println!("{}", "------------------------------------".blue());
supervisor.start_worker(ScriptType::HeroScript, worker_binary_path).await?;
// Show worker status
let worker_status = supervisor.get_worker_status();
println!("Active workers:");
for (id, script_type, active) in worker_status {
let status = if active { "🟢 Running" } else { "🔴 Stopped" };
println!(" {} - {} ({})", id, script_type.as_str(), status);
}
println!();
// Demo 2: Submit and execute a simple job
println!("{}", "📋 Demo 2: Submit and Execute Job".blue().bold());
println!("{}", "---------------------------------".blue());
let script = r#"
print("Hello from HeroScript worker!");
let result = 42 + 8;
print("Calculation: 42 + 8 = " + result);
result
"#;
let job_id = supervisor.submit_job(ScriptType::HeroScript, script).await?;
// Wait for job completion
match supervisor.wait_for_job_completion(&job_id, Duration::from_secs(10)).await {
Ok(result) => {
println!("{}", format!("🎉 Job result: {}", result).green().bold());
}
Err(e) => {
println!("{}", format!("❌ Job failed: {}", e).red());
}
}
println!();
// Demo 3: Submit multiple jobs
println!("{}", "📋 Demo 3: Multiple Jobs".blue().bold());
println!("{}", "------------------------".blue());
let jobs = vec![
("Job 1", r#"print("Job 1 executing"); "job1_result""#),
("Job 2", r#"print("Job 2 executing"); 100 + 200"#),
("Job 3", r#"print("Job 3 executing"); "hello_world""#),
];
let mut job_ids = Vec::new();
for (name, script) in jobs {
let job_id = supervisor.submit_job(ScriptType::HeroScript, script).await?;
job_ids.push((name, job_id));
println!("{} submitted: {}", name, job_ids.last().unwrap().1);
}
// Wait for all jobs to complete
for (name, job_id) in job_ids {
match supervisor.wait_for_job_completion(&job_id, Duration::from_secs(5)).await {
Ok(result) => {
println!("{} completed: {}", name, result);
}
Err(e) => {
println!("{} failed: {}", name, e);
}
}
}
println!();
// Demo 4: Job management
println!("{}", "📋 Demo 4: Job Management".blue().bold());
println!("{}", "-------------------------".blue());
let all_jobs = supervisor.list_jobs().await?;
println!("Total jobs in system: {}", all_jobs.len());
if !all_jobs.is_empty() {
println!("Job IDs:");
for (i, job_id) in all_jobs.iter().enumerate() {
println!(" {}. {}", i + 1, job_id);
}
}
println!();
// Demo 5: Error handling
println!("{}", "📋 Demo 5: Error Handling".blue().bold());
println!("{}", "-------------------------".blue());
let error_script = r#"
print("This job will cause an error");
let x = undefined_variable; // This will cause an error
x
"#;
let error_job_id = supervisor.submit_job(ScriptType::HeroScript, error_script).await?;
match supervisor.wait_for_job_completion(&error_job_id, Duration::from_secs(5)).await {
Ok(result) => {
println!("Unexpected success: {}", result);
}
Err(e) => {
println!("{}", format!("Expected error handled: {}", e).yellow());
}
}
println!();
// Demo 6: Cleanup
println!("{}", "📋 Demo 6: Cleanup".blue().bold());
println!("{}", "-------------------".blue());
let final_job_count = supervisor.list_jobs().await?.len();
println!("Jobs before cleanup: {}", final_job_count);
let cleared = supervisor.clear_all_jobs().await?;
println!("Jobs cleared: {}", cleared);
let remaining_jobs = supervisor.list_jobs().await?.len();
println!("Jobs after cleanup: {}", remaining_jobs);
println!();
// Stop all workers
supervisor.stop_all_workers().await;
println!("{}", "🎉 Demo completed successfully!".green().bold());
println!();
println!("{}", "Key Features Demonstrated:".blue().bold());
println!(" ✅ Supervisor lifecycle management");
println!(" ✅ Worker process spawning and management");
println!(" ✅ Job submission and execution");
println!(" ✅ Real-time job monitoring");
println!(" ✅ Multiple job handling");
println!(" ✅ Error handling and recovery");
println!(" ✅ Resource cleanup");
println!();
println!("{}", "The supervisor successfully managed the complete worker lifecycle!".green());
Ok(())
}

188
core/job/src/builder.rs Normal file
View File

@ -0,0 +1,188 @@
use chrono::Utc;
use std::collections::HashMap;
use std::time::Duration;
use uuid::Uuid;
use crate::{Job, ScriptType, JobError};
/// Builder for constructing and submitting script execution requests.
///
/// This builder provides a fluent interface for configuring script execution
/// parameters and offers two submission modes: fire-and-forget (`submit()`)
/// and request-reply (`await_response()`).
///
/// # Example
///
/// ```rust,no_run
/// use std::time::Duration;
/// use hero_supervisor::ScriptType;
///
/// # async fn example(client: &hero_supervisor::Supervisor) -> Result<String, hero_supervisor::SupervisorError> {
/// let result = client
/// .new_job()
/// .script_type(ScriptType::OSIS)
/// .script(r#"print("Hello, World!");"#)
/// .timeout(Duration::from_secs(30))
/// .await_response()
/// .await?;
/// # Ok(result)
/// # }
/// ```
pub struct JobBuilder {
request_id: String,
context_id: String,
caller_id: String,
script: String,
script_type: ScriptType,
timeout: Duration,
retries: u32,
concurrent: bool,
log_path: Option<String>,
env_vars: HashMap<String, String>,
prerequisites: Vec<String>,
dependents: Vec<String>
}
impl JobBuilder {
pub fn new() -> Self {
Self {
request_id: "".to_string(),
context_id: "".to_string(),
caller_id: "".to_string(),
script: "".to_string(),
script_type: ScriptType::OSIS, // Default to OSIS
timeout: Duration::from_secs(5),
retries: 0,
concurrent: false,
log_path: None,
env_vars: HashMap::new(),
prerequisites: Vec::new(),
dependents: Vec::new(),
}
}
pub fn request_id(mut self, request_id: &str) -> Self {
self.request_id = request_id.to_string();
self
}
pub fn script_type(mut self, script_type: ScriptType) -> Self {
self.script_type = script_type;
self
}
pub fn context_id(mut self, context_id: &str) -> Self {
self.context_id = context_id.to_string();
self
}
pub fn script(mut self, script: &str) -> Self {
self.script = script.to_string();
self
}
pub fn script_path(mut self, script_path: &str) -> Self {
self.script = std::fs::read_to_string(script_path).unwrap();
self
}
pub fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
pub fn log_path(mut self, log_path: &str) -> Self {
self.log_path = Some(log_path.to_string());
self
}
/// Set a single environment variable
pub fn env_var(mut self, key: &str, value: &str) -> Self {
self.env_vars.insert(key.to_string(), value.to_string());
self
}
/// Set multiple environment variables from a HashMap
pub fn env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
self.env_vars.extend(env_vars);
self
}
/// Clear all environment variables
pub fn clear_env_vars(mut self) -> Self {
self.env_vars.clear();
self
}
/// Add a prerequisite job ID that must complete before this job can run
pub fn prerequisite(mut self, job_id: &str) -> Self {
self.prerequisites.push(job_id.to_string());
self
}
/// Set multiple prerequisite job IDs
pub fn prerequisites(mut self, job_ids: Vec<String>) -> Self {
self.prerequisites.extend(job_ids);
self
}
/// Add a dependent job ID that depends on this job completing
pub fn dependent(mut self, job_id: &str) -> Self {
self.dependents.push(job_id.to_string());
self
}
/// Set multiple dependent job IDs
pub fn dependents(mut self, job_ids: Vec<String>) -> Self {
self.dependents.extend(job_ids);
self
}
/// Clear all prerequisites
pub fn clear_prerequisites(mut self) -> Self {
self.prerequisites.clear();
self
}
/// Clear all dependents
pub fn clear_dependents(mut self) -> Self {
self.dependents.clear();
self
}
pub fn build(self) -> Result<Job, JobError> {
let request_id = if self.request_id.is_empty() {
// Generate a UUID for the request_id
Uuid::new_v4().to_string()
} else {
self.request_id.clone()
};
if self.context_id.is_empty() {
return Err(JobError::MissingField("context_id".to_string()));
}
if self.caller_id.is_empty() {
return Err(JobError::MissingField("caller_id".to_string()));
}
let now = Utc::now();
Ok(Job {
id: request_id,
caller_id: self.caller_id,
context_id: self.context_id,
script: self.script,
script_type: self.script_type,
timeout: self.timeout,
retries: self.retries as u8,
concurrent: self.concurrent,
log_path: self.log_path.clone(),
env_vars: self.env_vars.clone(),
prerequisites: self.prerequisites.clone(),
dependents: self.dependents.clone(),
created_at: now,
updated_at: now,
})
}
}

View File

@ -6,6 +6,8 @@ use uuid::Uuid;
use redis::AsyncCommands;
use thiserror::Error;
mod builder;
/// Redis namespace prefix for all Hero job-related keys
pub const NAMESPACE_PREFIX: &str = "hero:job:";

View File

@ -7,19 +7,35 @@ edition = "2021"
name = "supervisor"
path = "cmd/supervisor.rs"
[[bin]]
name = "hive-supervisor"
path = "cmd/hive_supervisor.rs"
[[bin]]
name = "hive-supervisor-tui"
path = "cmd/hive_supervisor_tui.rs"
[[bin]]
name = "hive-supervisor-tui-safe"
path = "cmd/hive_supervisor_tui_safe.rs"
[dependencies]
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
redis = { version = "0.25.0", features = ["tokio-comp"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
uuid = { version = "1.6", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
log = "0.4"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] } # For async main in examples, and general async
colored = "2.0"
hero_job = { path = "../job" }
zinit-client = "0.4.0"
zinit-client = { path = "/Users/timurgordon/code/github/threefoldtech/zinit/zinit-client" }
ratatui = "0.28"
crossterm = "0.28"
anyhow = "1.0"
[dev-dependencies] # For examples later
env_logger = "0.10"

View File

@ -8,8 +8,6 @@ The lifecycle management system provides:
- **Worker Process Management**: Start, stop, restart, and monitor worker binaries
- **Health Monitoring**: Automatic ping jobs every 10 minutes for idle workers
- **Load Balancing**: Dynamic scaling of workers based on demand
- **Service Dependencies**: Proper startup ordering with dependency management
- **Graceful Shutdown**: Clean termination of worker processes
## Architecture
@ -313,3 +311,9 @@ redis-cli keys "hero:job:*"
- **User Permissions**: Run workers with appropriate user permissions
- **Network Security**: Secure Redis and Zinit socket access
- **Binary Validation**: Verify worker binary integrity before deployment
## Future
- **Load Balancing**: Dynamic scaling of workers based on demand
- **Service Dependencies**: Proper startup ordering with dependency management

View File

@ -1,157 +1,66 @@
# Rhai Client Binary
# Supervisor CLI
A command-line client for executing Rhai scripts on remote workers via Redis.
A command-line interface for the Hero Supervisor.
## Binary: `client`
## Binary: `hive-supervisor`
### Installation
Build the binary:
```bash
cargo build --bin client --release
cargo build --bin hive-supervisor --release
```
### Usage
```bash
# Basic usage - requires caller and circle keys
client --caller-key <CALLER_KEY> --circle-key <CIRCLE_KEY>
# Execute inline script
client -c <CALLER_KEY> -k <CIRCLE_KEY> --script "print('Hello World!')"
# Execute script from file
client -c <CALLER_KEY> -k <CIRCLE_KEY> --file script.rhai
# Use specific worker (defaults to circle key)
client -c <CALLER_KEY> -k <CIRCLE_KEY> -w <WORKER_KEY> --script "2 + 2"
# Custom Redis and timeout
client -c <CALLER_KEY> -k <CIRCLE_KEY> --redis-url redis://localhost:6379/1 --timeout 60
# Remove timestamps from logs
client -c <CALLER_KEY> -k <CIRCLE_KEY> --no-timestamp
# Increase verbosity
client -c <CALLER_KEY> -k <CIRCLE_KEY> -v --script "debug_info()"
```
### Command-Line Options
| Option | Short | Default | Description |
|--------|-------|---------|-------------|
| `--caller-key` | `-c` | **Required** | Caller public key (your identity) |
| `--circle-key` | `-k` | **Required** | Circle public key (execution context) |
| `--worker-key` | `-w` | `circle-key` | Worker public key (target worker) |
| `--redis-url` | `-r` | `redis://localhost:6379` | Redis connection URL |
| `--script` | `-s` | | Rhai script to execute |
| `--file` | `-f` | | Path to Rhai script file |
| `--timeout` | `-t` | `30` | Timeout for script execution (seconds) |
| `--no-timestamp` | | `false` | Remove timestamps from log output |
| `--verbose` | `-v` | | Increase verbosity (stackable) |
### Execution Modes
#### Inline Script Execution
```bash
# Execute a simple calculation
client -c caller_123 -k circle_456 -s "let result = 2 + 2; print(result);"
# Execute with specific worker
client -c caller_123 -k circle_456 -w worker_789 -s "get_user_data()"
```
#### Script File Execution
```bash
# Execute script from file
client -c caller_123 -k circle_456 -f examples/data_processing.rhai
# Execute with custom timeout
client -c caller_123 -k circle_456 -f long_running_script.rhai -t 120
```
#### Interactive Mode
```bash
# Enter interactive REPL mode (when no script or file provided)
client -c caller_123 -k circle_456
# Interactive mode with verbose logging
client -c caller_123 -k circle_456 -v --no-timestamp
```
### Interactive Mode
When no script (`-s`) or file (`-f`) is provided, the client enters interactive mode:
# Basic usage
hive-supervisor --config <CONFIG_PATH>
```
🔗 Starting Rhai Client
📋 Configuration:
Caller Key: caller_123
Circle Key: circle_456
Worker Key: circle_456
Redis URL: redis://localhost:6379
Timeout: 30s
✅ Connected to Redis at redis://localhost:6379
🎮 Entering interactive mode
Type Rhai scripts and press Enter to execute. Type 'exit' or 'quit' to close.
rhai> let x = 42; print(x);
Status: completed
Output: 42
rhai> exit
👋 Goodbye!
Where config is toml file with the following structure:
```toml
[global]
redis_url = "redis://localhost:6379"
[osis_worker]
binary_path = "/path/to/osis_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[sal_worker]
binary_path = "/path/to/sal_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[v_worker]
binary_path = "/path/to/v_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[python_worker]
binary_path = "/path/to/python_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
```
### Configuration Examples
#### Development Usage
```bash
# Simple development client
client -c dev_user -k dev_circle
Lets have verbosity settings etc.
CLI Offers a few commands:
# Development with clean logs
client -c dev_user -k dev_circle --no-timestamp -v
```
workers:
start
stop
restart
status
logs
list
#### Production Usage
```bash
# Production client with specific worker
client \
--caller-key prod_user_123 \
--circle-key prod_circle_456 \
--worker-key prod_worker_789 \
--redis-url redis://redis-cluster:6379/0 \
--timeout 300 \
--file production_script.rhai
```
jobs:
create
start
stop
restart
status
logs
list
#### Batch Processing
```bash
# Process multiple scripts
for script in scripts/*.rhai; do
client -c batch_user -k batch_circle -f "$script" --no-timestamp
done
```
### Key Concepts
- **Caller Key**: Your identity - used for authentication and tracking
- **Circle Key**: Execution context - defines the environment/permissions
- **Worker Key**: Target worker - which worker should execute the script (defaults to circle key)
### Error Handling
The client provides clear error messages for:
- Missing required keys
- Redis connection failures
- Script execution timeouts
- Worker unavailability
- Script syntax errors
### Dependencies
- `rhai_supervisor`: Core client library for Redis-based script execution
- `redis`: Redis client for task queue communication
- `clap`: Command-line argument parsing
- `env_logger`: Logging infrastructure
- `tokio`: Async runtime
repl: you can enter interactive mode to run scripts, however predefine caller_id, context_id and worker type so supervisor dispathces jobs accordingly

View File

@ -0,0 +1,365 @@
use anyhow::Result;
use clap::Parser;
use crossterm::{
event::{self, DisableMouseCapture, EnableMouseCapture, Event, KeyCode, KeyEventKind},
execute,
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
};
use hero_supervisor::{Supervisor, SupervisorBuilder};
use zinit_client::ZinitClient;
use log::{error, info};
use ratatui::{
backend::CrosstermBackend,
layout::{Constraint, Direction, Layout, Rect},
style::{Color, Modifier, Style},
text::Line,
widgets::{
Block, Borders, List, ListItem, Paragraph, Tabs, Wrap,
},
Frame, Terminal,
};
use std::{
io,
path::PathBuf,
sync::Arc,
time::{Duration, Instant},
};
use tokio::time::sleep;
use toml;
use serde::Deserialize;
#[derive(Parser)]
#[command(name = "hive-supervisor-tui")]
#[command(about = "Hero Supervisor Terminal User Interface")]
struct Args {
#[arg(short, long, help = "Configuration file path")]
config: PathBuf,
#[arg(short, long, help = "Enable verbose logging")]
verbose: bool,
}
#[derive(Debug, Deserialize)]
struct Config {
global: GlobalConfig,
#[serde(flatten)]
workers: std::collections::HashMap<String, WorkerConfigToml>,
}
#[derive(Debug, Deserialize)]
struct GlobalConfig {
redis_url: String,
}
#[derive(Debug, Deserialize)]
struct WorkerConfigToml {
binary_path: String,
env_vars: Option<std::collections::HashMap<String, String>>,
}
#[derive(Debug, Clone, PartialEq)]
enum TabId {
Dashboard,
Workers,
Jobs,
Logs,
}
impl TabId {
fn all() -> Vec<TabId> {
vec![TabId::Dashboard, TabId::Workers, TabId::Jobs, TabId::Logs]
}
fn title(&self) -> &str {
match self {
TabId::Dashboard => "Dashboard",
TabId::Workers => "Workers",
TabId::Jobs => "Jobs",
TabId::Logs => "Logs",
}
}
}
struct App {
supervisor: Arc<Supervisor>,
current_tab: TabId,
should_quit: bool,
logs: Vec<String>,
last_update: Instant,
}
impl App {
fn new(supervisor: Arc<Supervisor>) -> Self {
Self {
supervisor,
current_tab: TabId::Dashboard,
should_quit: false,
logs: vec!["TUI started successfully".to_string()],
last_update: Instant::now(),
}
}
fn next_tab(&mut self) {
let tabs = TabId::all();
let current_index = tabs.iter().position(|t| *t == self.current_tab).unwrap_or(0);
let next_index = (current_index + 1) % tabs.len();
self.current_tab = tabs[next_index].clone();
}
fn prev_tab(&mut self) {
let tabs = TabId::all();
let current_index = tabs.iter().position(|t| *t == self.current_tab).unwrap_or(0);
let prev_index = if current_index == 0 { tabs.len() - 1 } else { current_index - 1 };
self.current_tab = tabs[prev_index].clone();
}
fn add_log(&mut self, message: String) {
self.logs.push(format!("[{}] {}",
chrono::Utc::now().format("%H:%M:%S"),
message
));
if self.logs.len() > 100 {
self.logs.remove(0);
}
}
fn handle_key(&mut self, key: KeyCode) -> bool {
match key {
KeyCode::Char('q') => {
self.should_quit = true;
true
}
KeyCode::Tab => {
self.next_tab();
false
}
KeyCode::BackTab => {
self.prev_tab();
false
}
_ => false
}
}
}
fn render_ui(f: &mut Frame, app: &mut App) {
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([Constraint::Length(3), Constraint::Min(0)].as_ref())
.split(f.area());
// Render tabs
let tabs_list = TabId::all();
let tab_titles: Vec<Line> = tabs_list
.iter()
.map(|t| Line::from(t.title()))
.collect();
let selected_tab = TabId::all().iter().position(|t| *t == app.current_tab).unwrap_or(0);
let tabs = Tabs::new(tab_titles)
.block(Block::default().borders(Borders::ALL).title("Hero Supervisor TUI"))
.select(selected_tab)
.style(Style::default().fg(Color::Cyan))
.highlight_style(Style::default().add_modifier(Modifier::BOLD).bg(Color::Black));
f.render_widget(tabs, chunks[0]);
// Render content based on selected tab
match app.current_tab {
TabId::Dashboard => render_dashboard(f, chunks[1], app),
TabId::Workers => render_workers(f, chunks[1], app),
TabId::Jobs => render_jobs(f, chunks[1], app),
TabId::Logs => render_logs(f, chunks[1], app),
}
}
fn render_dashboard(f: &mut Frame, area: Rect, app: &App) {
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([Constraint::Length(7), Constraint::Min(0)].as_ref())
.split(area);
// Status overview - supervisor is already running if we get here
let status_text = "Status: ✓ Running\nWorkers: Started successfully\nJobs: Ready for processing\n\nPress 'q' to quit, Tab to navigate";
let status_paragraph = Paragraph::new(status_text)
.block(Block::default().borders(Borders::ALL).title("System Status"))
.wrap(Wrap { trim: true });
f.render_widget(status_paragraph, chunks[0]);
// Recent logs
let log_items: Vec<ListItem> = app.logs
.iter()
.rev()
.take(10)
.map(|log| ListItem::new(log.as_str()))
.collect();
let logs_list = List::new(log_items)
.block(Block::default().borders(Borders::ALL).title("Recent Activity"));
f.render_widget(logs_list, chunks[1]);
}
fn render_workers(f: &mut Frame, area: Rect, _app: &App) {
let paragraph = Paragraph::new("Workers tab - Status checking not implemented yet to avoid system issues")
.block(Block::default().borders(Borders::ALL).title("Workers"))
.wrap(Wrap { trim: true });
f.render_widget(paragraph, area);
}
fn render_jobs(f: &mut Frame, area: Rect, _app: &App) {
let paragraph = Paragraph::new("Jobs tab - Job monitoring not implemented yet to avoid system issues")
.block(Block::default().borders(Borders::ALL).title("Jobs"))
.wrap(Wrap { trim: true });
f.render_widget(paragraph, area);
}
fn render_logs(f: &mut Frame, area: Rect, app: &App) {
let items: Vec<ListItem> = app.logs
.iter()
.map(|log| ListItem::new(log.as_str()))
.collect();
let logs_list = List::new(items)
.block(Block::default().borders(Borders::ALL).title("System Logs"));
f.render_widget(logs_list, area);
}
async fn run_app(
terminal: &mut Terminal<CrosstermBackend<io::Stdout>>,
app: &mut App,
) -> Result<()> {
loop {
terminal.draw(|f| render_ui(f, app))?;
// Simple, safe event handling
if event::poll(Duration::from_millis(100))? {
if let Event::Key(key) = event::read()? {
if key.kind == KeyEventKind::Press {
if app.handle_key(key.code) {
break;
}
}
}
}
if app.should_quit {
break;
}
// Small delay to prevent excessive CPU usage
sleep(Duration::from_millis(50)).await;
}
Ok(())
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
// Initialize logging
if args.verbose {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("debug")).init();
} else {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
}
info!("Hero Supervisor TUI - Fail-fast initialization");
// Step 1: Load and parse configuration
info!("Step 1/4: Loading configuration from {:?}", args.config);
let config_content = std::fs::read_to_string(&args.config)
.map_err(|e| anyhow::anyhow!("Failed to read config file: {}", e))?;
let config: Config = toml::from_str(&config_content)
.map_err(|e| anyhow::anyhow!("Failed to parse config file: {}", e))?;
info!("✓ Configuration loaded successfully");
// Step 2: Check if Zinit is running
info!("Step 2/4: Checking if Zinit is running...");
let zinit_client = ZinitClient::new("/tmp/zinit.sock");
match zinit_client.status("_test_connectivity").await {
Ok(_) => {
info!("✓ Zinit is running and accessible");
}
Err(e) => {
let error_msg = e.to_string();
if error_msg.contains("Connection refused") || error_msg.contains("No such file") {
eprintln!("Error: Zinit process manager is not running.");
eprintln!("Please start Zinit before running the supervisor TUI.");
eprintln!("Expected Zinit socket at: /tmp/zinit.sock");
std::process::exit(1);
} else {
info!("✓ Zinit is running (service not found is expected)");
}
}
}
// Step 3: Build supervisor
info!("Step 3/4: Building supervisor...");
let mut builder = SupervisorBuilder::new()
.redis_url(&config.global.redis_url);
for (worker_name, worker_config) in &config.workers {
match worker_name.as_str() {
"osis_worker" => builder = builder.osis_worker(&worker_config.binary_path),
"sal_worker" => builder = builder.sal_worker(&worker_config.binary_path),
"v_worker" => builder = builder.v_worker(&worker_config.binary_path),
"python_worker" => builder = builder.python_worker(&worker_config.binary_path),
_ => log::warn!("Unknown worker type: {}", worker_name),
}
if let Some(env_vars) = &worker_config.env_vars {
for (key, value) in env_vars {
builder = builder.worker_env_var(key, value);
}
}
}
let supervisor = Arc::new(builder.build()
.map_err(|e| anyhow::anyhow!("Failed to build supervisor: {}", e))?);
info!("✓ Supervisor built successfully");
// Step 4: Start supervisor and workers
info!("Step 4/4: Starting supervisor and workers...");
supervisor.start_workers().await
.map_err(|e| anyhow::anyhow!("Failed to start workers: {}", e))?;
info!("✓ All workers started successfully");
// All initialization successful - now start TUI
info!("Initialization complete - starting TUI...");
let mut app = App::new(Arc::clone(&supervisor));
// Setup terminal
enable_raw_mode()?;
let mut stdout = io::stdout();
execute!(stdout, EnterAlternateScreen, EnableMouseCapture)?;
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend)?;
// Run the app
let result = run_app(&mut terminal, &mut app).await;
// Cleanup
disable_raw_mode()?;
execute!(
terminal.backend_mut(),
LeaveAlternateScreen,
DisableMouseCapture
)?;
terminal.show_cursor()?;
// Cleanup supervisor
if let Err(e) = supervisor.cleanup_and_shutdown().await {
error!("Error during cleanup: {}", e);
}
info!("Hero Supervisor TUI shutdown complete");
result
}

View File

@ -1,190 +0,0 @@
# Architecture of the `rhai_supervisor` Crate
The `rhai_supervisor` crate provides a Redis-based client library for submitting Rhai scripts to distributed worker services and awaiting their execution results. It implements a request-reply pattern using Redis as the message broker.
## Core Architecture
The client follows a builder pattern design with clear separation of concerns:
```mermaid
graph TD
A[RhaiSupervisorBuilder] --> B[RhaiSupervisor]
B --> C[PlayRequestBuilder]
C --> D[PlayRequest]
D --> E[Redis Task Queue]
E --> F[Worker Service]
F --> G[Redis Reply Queue]
G --> H[Client Response]
subgraph "Client Components"
A
B
C
D
end
subgraph "Redis Infrastructure"
E
G
end
subgraph "External Services"
F
end
```
## Key Components
### 1. RhaiSupervisorBuilder
A builder pattern implementation for constructing `RhaiSupervisor` instances with proper configuration validation.
**Responsibilities:**
- Configure Redis connection URL
- Set caller ID for task attribution
- Validate configuration before building client
**Key Methods:**
- `caller_id(id: &str)` - Sets the caller identifier
- `redis_url(url: &str)` - Configures Redis connection
- `build()` - Creates the final `RhaiSupervisor` instance
### 2. RhaiSupervisor
The main client interface that manages Redis connections and provides factory methods for creating play requests.
**Responsibilities:**
- Maintain Redis connection pool
- Provide factory methods for request builders
- Handle low-level Redis operations
- Manage task status queries
**Key Methods:**
- `new_play_request()` - Creates a new `PlayRequestBuilder`
- `get_task_status(task_id)` - Queries task status from Redis
- Internal methods for Redis operations
### 3. PlayRequestBuilder
A fluent builder for constructing and submitting script execution requests.
**Responsibilities:**
- Configure script execution parameters
- Handle script loading from files or strings
- Manage request timeouts
- Provide submission methods (fire-and-forget vs await-response)
**Key Methods:**
- `worker_id(id: &str)` - Target worker queue (determines which worker processes the task)
- `context_id(id: &str)` - Target context ID (determines execution context/circle)
- `script(content: &str)` - Set script content directly
- `script_path(path: &str)` - Load script from file
- `timeout(duration: Duration)` - Set execution timeout
- `submit()` - Fire-and-forget submission
- `await_response()` - Submit and wait for result
**Architecture Note:** The decoupling of `worker_id` and `context_id` allows a single worker to process tasks for multiple contexts (circles), providing greater deployment flexibility.
### 4. Data Structures
#### RhaiTaskDetails
Represents the complete state of a task throughout its lifecycle.
```rust
pub struct RhaiTaskDetails {
pub task_id: String,
pub script: String,
pub status: String, // "pending", "processing", "completed", "error"
pub output: Option<String>,
pub error: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub caller_id: String,
}
```
#### RhaiSupervisorError
Comprehensive error handling for various failure scenarios:
- `RedisError` - Redis connection/operation failures
- `SerializationError` - JSON serialization/deserialization issues
- `Timeout` - Task execution timeouts
- `TaskNotFound` - Missing tasks after submission
## Communication Protocol
### Task Submission Flow
1. **Task Creation**: Client generates unique UUID for task identification
2. **Task Storage**: Task details stored in Redis hash: `rhailib:<task_id>`
3. **Queue Submission**: Task ID pushed to worker queue: `rhailib:<worker_id>`
4. **Reply Queue Setup**: Client listens on: `rhailib:reply:<task_id>`
### Redis Key Patterns
- **Task Storage**: `rhailib:<task_id>` (Redis Hash)
- **Worker Queues**: `rhailib:<worker_id>` (Redis List)
- **Reply Queues**: `rhailib:reply:<task_id>` (Redis List)
### Message Flow Diagram
```mermaid
sequenceDiagram
participant C as Client
participant R as Redis
participant W as Worker
C->>R: HSET rhailib:task_id (task details)
C->>R: LPUSH rhailib:worker_id task_id
C->>R: BLPOP rhailib:reply:task_id (blocking)
W->>R: BRPOP rhailib:worker_id (blocking)
W->>W: Execute Rhai Script
W->>R: LPUSH rhailib:reply:task_id (result)
R->>C: Return result from BLPOP
C->>R: DEL rhailib:reply:task_id (cleanup)
```
## Concurrency and Async Design
The client is built on `tokio` for asynchronous operations:
- **Connection Pooling**: Uses Redis multiplexed connections for efficiency
- **Non-blocking Operations**: All Redis operations are async
- **Timeout Handling**: Configurable timeouts with proper cleanup
- **Error Propagation**: Comprehensive error handling with context
## Configuration and Deployment
### Prerequisites
- Redis server accessible to both client and workers
- Proper network connectivity between components
- Sufficient Redis memory for task storage
### Configuration Options
- **Redis URL**: Connection string for Redis instance
- **Caller ID**: Unique identifier for client instance
- **Timeouts**: Per-request timeout configuration
- **Worker Targeting**: Direct worker queue addressing
## Security Considerations
- **Task Isolation**: Each task uses unique identifiers
- **Queue Separation**: Worker-specific queues prevent cross-contamination
- **Cleanup**: Automatic cleanup of reply queues after completion
- **Error Handling**: Secure error propagation without sensitive data leakage
## Performance Characteristics
- **Scalability**: Horizontal scaling through multiple worker instances
- **Throughput**: Limited by Redis performance and network latency
- **Memory Usage**: Efficient with connection pooling and cleanup
- **Latency**: Low latency for local Redis deployments
## Integration Points
The client integrates with:
- **Worker Services**: Via Redis queue protocol
- **Monitoring Systems**: Through structured logging
- **Application Code**: Via builder pattern API
- **Configuration Systems**: Through environment variables and builders

View File

@ -0,0 +1,185 @@
# Hero Supervisor CLI Example
This example demonstrates how to use the `hive-supervisor` CLI tool for managing workers and jobs in the Hero ecosystem.
## Prerequisites
1. **Redis Server**: Make sure Redis is running on `localhost:6379`
```bash
# Install Redis (macOS)
brew install redis
# Start Redis
redis-server
```
2. **Zinit Process Manager**: Install and configure Zinit
```bash
# Install Zinit (example for Linux/macOS)
# Follow Zinit installation instructions for your platform
```
3. **Worker Binaries**: The configuration references worker binaries that need to be available:
- `/usr/local/bin/osis_worker`
- `/usr/local/bin/sal_worker`
- `/usr/local/bin/v_worker`
- `/usr/local/bin/python_worker`
For testing purposes, you can create mock worker binaries or update the paths in `config.toml` to point to existing binaries.
## Configuration
The `config.toml` file contains the supervisor configuration:
- **Global settings**: Redis URL and Zinit socket path
- **Worker configurations**: Binary paths and environment variables for each worker type
## Usage Examples
### 1. Build the CLI
```bash
# From the supervisor directory
cargo build --bin hive-supervisor --release
```
### 2. Worker Management
```bash
# Show help
./target/release/hive-supervisor --config examples/cli/config.toml --help
# List all configured workers
./target/release/hive-supervisor --config examples/cli/config.toml workers list
# Start all workers
./target/release/hive-supervisor --config examples/cli/config.toml workers start
# Start specific workers
./target/release/hive-supervisor --config examples/cli/config.toml workers start osis_worker sal_worker
# Check worker status
./target/release/hive-supervisor --config examples/cli/config.toml workers status
# Stop all workers
./target/release/hive-supervisor --config examples/cli/config.toml workers stop
# Restart specific worker
./target/release/hive-supervisor --config examples/cli/config.toml workers restart osis_worker
```
### 3. Job Management
```bash
# Create a job with inline script
./target/release/hive-supervisor --config examples/cli/config.toml jobs create \
--script 'print("Hello from OSIS worker!");' \
--script-type osis \
--caller-id "user123" \
--context-id "session456"
# Create a job from file
./target/release/hive-supervisor --config examples/cli/config.toml jobs create \
--file examples/cli/sample_script.rhai \
--script-type osis \
--caller-id "user123" \
--context-id "session456"
# List all jobs
./target/release/hive-supervisor --config examples/cli/config.toml jobs list
# Check job status
./target/release/hive-supervisor --config examples/cli/config.toml jobs status <JOB_ID>
# View job logs
./target/release/hive-supervisor --config examples/cli/config.toml jobs logs <JOB_ID>
# Stop a job
./target/release/hive-supervisor --config examples/cli/config.toml jobs stop <JOB_ID>
```
### 4. Interactive REPL Mode
```bash
# Enter REPL mode for OSIS scripts
./target/release/hive-supervisor --config examples/cli/config.toml repl \
--caller-id "user123" \
--context-id "session456" \
--script-type osis \
--timeout 60
# In REPL mode, you can:
# - Type scripts directly and press Enter to execute
# - Type 'help' for available commands
# - Type 'exit' or 'quit' to leave REPL mode
```
### 5. Verbose Logging
```bash
# Enable debug logging
./target/release/hive-supervisor --config examples/cli/config.toml -v workers status
# Enable trace logging
./target/release/hive-supervisor --config examples/cli/config.toml -vv workers status
# Disable timestamps
./target/release/hive-supervisor --config examples/cli/config.toml --no-timestamp workers status
```
## Sample Scripts
The `sample_scripts/` directory contains example scripts for different worker types:
- `hello_osis.rhai` - Simple OSIS/HeroScript example
- `system_sal.rhai` - SAL system operation example
- `math_v.v` - V language calculation example
- `data_python.py` - Python data processing example
## Troubleshooting
### Common Issues
1. **Redis Connection Error**
- Ensure Redis is running: `redis-cli ping`
- Check the Redis URL in `config.toml`
2. **Zinit Socket Error**
- Verify Zinit is running and the socket path is correct
- Check permissions on the socket file
3. **Worker Binary Not Found**
- Update binary paths in `config.toml` to match your system
- Ensure worker binaries are executable
4. **Permission Denied**
- Check file permissions on configuration and binary files
- Ensure the user has access to the Zinit socket
### Debug Mode
Run with verbose logging to see detailed operation information:
```bash
RUST_LOG=debug ./target/release/hive-supervisor --config examples/cli/config.toml -vv workers status
```
## Configuration Customization
You can customize the configuration for your environment:
1. **Update Redis URL**: Change `redis_url` in the `[global]` section
2. **Update Zinit Socket**: Change `zinit_socket_path` for your Zinit installation
3. **Worker Paths**: Update binary paths in worker sections to match your setup
4. **Environment Variables**: Add or modify environment variables for each worker type
## Integration with Hero Ecosystem
This CLI integrates with the broader Hero ecosystem:
- **Job Queue**: Uses Redis for job queuing and status tracking
- **Process Management**: Uses Zinit for worker lifecycle management
- **Script Execution**: Supports multiple script types (OSIS, SAL, V, Python)
- **Monitoring**: Provides real-time status and logging capabilities
For more information about the Hero ecosystem, see the main project documentation.

View File

@ -0,0 +1,19 @@
# Hero Supervisor CLI Configuration Example
# This configuration demonstrates how to set up the hive-supervisor CLI
# with different worker types for script execution.
[global]
# Redis connection URL for job queuing
redis_url = "redis://localhost:6379"
# OSIS Worker Configuration
# Handles OSIS (HeroScript) execution
[osis_worker]
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/osis"
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "osis", "MAX_CONCURRENT_JOBS" = "5" }
# SAL Worker Configuration
# Handles System Abstraction Layer scripts
[sal_worker]
binary_path = "/Users/timurgordon/code/git.ourworld.tf/herocode/hero/target/debug/sal"
env_vars = { "RUST_LOG" = "info", "WORKER_TYPE" = "sal", "MAX_CONCURRENT_JOBS" = "3" }

View File

@ -0,0 +1,144 @@
#!/bin/bash
# Hero Supervisor CLI Example Runner
# This script demonstrates various CLI operations
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SUPERVISOR_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
CONFIG_FILE="$SCRIPT_DIR/config.toml"
CLI_BINARY="$SUPERVISOR_DIR/target/release/hive-supervisor"
echo -e "${BLUE}=== Hero Supervisor CLI Example Runner ===${NC}"
echo "Script directory: $SCRIPT_DIR"
echo "Supervisor directory: $SUPERVISOR_DIR"
echo "Configuration file: $CONFIG_FILE"
echo
# Function to run CLI command with error handling
run_cli() {
local description="$1"
shift
echo -e "${YELLOW}Running: $description${NC}"
echo "Command: $CLI_BINARY --config $CONFIG_FILE $*"
echo
if "$CLI_BINARY" --config "$CONFIG_FILE" "$@"; then
echo -e "${GREEN}✓ Success${NC}"
else
echo -e "${RED}✗ Failed${NC}"
return 1
fi
echo
}
# Check if CLI binary exists
if [[ ! -f "$CLI_BINARY" ]]; then
echo -e "${YELLOW}Building CLI binary...${NC}"
cd "$SUPERVISOR_DIR"
cargo build --bin hive-supervisor --release
echo
fi
# Check if config file exists
if [[ ! -f "$CONFIG_FILE" ]]; then
echo -e "${RED}Error: Configuration file not found: $CONFIG_FILE${NC}"
exit 1
fi
echo -e "${BLUE}=== CLI Help and Information ===${NC}"
run_cli "Show main help" --help
echo -e "${BLUE}=== Worker Management Examples ===${NC}"
run_cli "List configured workers" workers list
run_cli "Show worker management help" workers --help
# Note: These commands would require actual worker binaries and Zinit setup
echo -e "${YELLOW}Note: The following commands require actual worker binaries and Zinit setup${NC}"
echo -e "${YELLOW}They are shown for demonstration but may fail without proper setup${NC}"
echo
# Uncomment these if you have the proper setup
# run_cli "Check worker status" workers status
# run_cli "Start all workers" workers start
# run_cli "Check worker status after start" workers status
echo -e "${BLUE}=== Job Management Examples ===${NC}"
run_cli "Show job management help" jobs --help
# Create sample jobs (these will also require workers to be running)
echo -e "${YELLOW}Sample job creation commands (require running workers):${NC}"
echo
echo "# Create OSIS job with inline script:"
echo "$CLI_BINARY --config $CONFIG_FILE jobs create \\"
echo " --script 'print(\"Hello from CLI!\");' \\"
echo " --script-type osis \\"
echo " --caller-id \"cli_demo\" \\"
echo " --context-id \"example_session\""
echo
echo "# Create job from sample script file:"
echo "$CLI_BINARY --config $CONFIG_FILE jobs create \\"
echo " --file \"$SCRIPT_DIR/sample_scripts/hello_osis.rhai\" \\"
echo " --script-type osis \\"
echo " --caller-id \"cli_demo\" \\"
echo " --context-id \"example_session\""
echo
echo "# List all jobs:"
echo "$CLI_BINARY --config $CONFIG_FILE jobs list"
echo
echo "# Check job status (replace JOB_ID with actual job ID):"
echo "$CLI_BINARY --config $CONFIG_FILE jobs status JOB_ID"
echo
echo -e "${BLUE}=== REPL Mode Example ===${NC}"
echo -e "${YELLOW}REPL mode command (interactive):${NC}"
echo "$CLI_BINARY --config $CONFIG_FILE repl \\"
echo " --caller-id \"cli_demo\" \\"
echo " --context-id \"example_session\" \\"
echo " --script-type osis \\"
echo " --timeout 60"
echo
echo -e "${BLUE}=== Sample Scripts ===${NC}"
echo "Available sample scripts in $SCRIPT_DIR/sample_scripts/:"
for script in "$SCRIPT_DIR/sample_scripts"/*; do
if [[ -f "$script" ]]; then
basename "$script"
fi
done
echo
echo -e "${BLUE}=== Verbose Logging Examples ===${NC}"
echo "# Debug logging:"
echo "$CLI_BINARY --config $CONFIG_FILE -v workers list"
echo
echo "# Trace logging:"
echo "$CLI_BINARY --config $CONFIG_FILE -vv workers list"
echo
echo "# No timestamps:"
echo "$CLI_BINARY --config $CONFIG_FILE --no-timestamp workers list"
echo
echo -e "${GREEN}=== Example Runner Complete ===${NC}"
echo -e "${YELLOW}To run actual commands, ensure you have:${NC}"
echo "1. Redis server running on localhost:6379"
echo "2. Zinit process manager installed and configured"
echo "3. Worker binaries available at the paths specified in config.toml"
echo
echo -e "${YELLOW}For testing without full setup, you can:${NC}"
echo "1. Update config.toml with paths to existing binaries"
echo "2. Use the CLI help commands and configuration validation"
echo "3. Test the REPL mode (requires workers to be running)"

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
Sample Python script for demonstration
This script demonstrates Python worker functionality
"""
import json
import datetime
from typing import List, Dict
def main():
print("=== Python Worker Demo ===")
print("Python data processing operations")
# Data structures
print("\nData structures:")
users = [
{"id": 1, "name": "Alice", "age": 30, "role": "developer"},
{"id": 2, "name": "Bob", "age": 25, "role": "designer"},
{"id": 3, "name": "Charlie", "age": 35, "role": "manager"},
{"id": 4, "name": "Diana", "age": 28, "role": "developer"}
]
print(f"Total users: {len(users)}")
# Data filtering
developers = [user for user in users if user["role"] == "developer"]
print(f"Developers: {len(developers)}")
for dev in developers:
print(f" - {dev['name']} (age {dev['age']})")
# Statistical operations
print("\nStatistical operations:")
ages = [user["age"] for user in users]
avg_age = sum(ages) / len(ages)
min_age = min(ages)
max_age = max(ages)
print(f"Average age: {avg_age:.1f}")
print(f"Age range: {min_age} - {max_age}")
# Date/time operations
print("\nDate/time operations:")
now = datetime.datetime.now()
print(f"Current time: {now.strftime('%Y-%m-%d %H:%M:%S')}")
# Calculate birth years
current_year = now.year
for user in users:
birth_year = current_year - user["age"]
print(f"{user['name']} was born in {birth_year}")
# JSON processing
print("\nJSON processing:")
json_data = json.dumps(users, indent=2)
print("User data as JSON:")
print(json_data[:200] + "..." if len(json_data) > 200 else json_data)
# File operations simulation
print("\nFile operations:")
simulate_file_processing()
print("=== Python Demo Complete ===")
def simulate_file_processing():
"""Simulate file processing operations"""
files = [
{"name": "data.csv", "size": 1024, "type": "csv"},
{"name": "config.json", "size": 512, "type": "json"},
{"name": "report.pdf", "size": 2048, "type": "pdf"},
{"name": "script.py", "size": 768, "type": "python"}
]
total_size = sum(file["size"] for file in files)
print(f"Processing {len(files)} files, total size: {total_size} bytes")
# Group by type
file_types = {}
for file in files:
file_type = file["type"]
if file_type not in file_types:
file_types[file_type] = []
file_types[file_type].append(file["name"])
print("Files by type:")
for file_type, file_names in file_types.items():
print(f" {file_type}: {', '.join(file_names)}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,34 @@
// Sample OSIS/HeroScript for demonstration
// This script demonstrates basic OSIS worker functionality
print("=== OSIS Worker Demo ===");
print("Hello from the OSIS worker!");
// Basic variable operations
let name = "Hero";
let version = "1.0";
print(`Running ${name} version ${version}`);
// Simple calculation
let x = 10;
let y = 20;
let result = x + y;
print(`Calculation: ${x} + ${y} = ${result}`);
// Array operations
let numbers = [1, 2, 3, 4, 5];
let sum = 0;
for num in numbers {
sum += num;
}
print(`Sum of array [1,2,3,4,5]: ${sum}`);
// Function definition and call
fn greet(person) {
return `Hello, ${person}! Welcome to Hero.`;
}
let greeting = greet("Developer");
print(greeting);
print("=== OSIS Demo Complete ===");

View File

@ -0,0 +1,67 @@
// Sample V language script for demonstration
// This script demonstrates V worker functionality
module main
import math
fn main() {
println("=== V Worker Demo ===")
println("V language mathematical operations")
// Basic arithmetic
x := 15
y := 25
sum := x + y
product := x * y
println("Basic arithmetic:")
println("${x} + ${y} = ${sum}")
println("${x} * ${y} = ${product}")
// Mathematical functions
println("\nMathematical functions:")
angle := 45.0
sin_val := math.sin(math.radians(angle))
cos_val := math.cos(math.radians(angle))
println("sin(${angle}°) = ${sin_val:.4f}")
println("cos(${angle}°) = ${cos_val:.4f}")
// Array operations
numbers := [1, 4, 9, 16, 25]
println("\nArray operations:")
println("Numbers: ${numbers}")
mut total := 0
for num in numbers {
total += num
}
println("Sum: ${total}")
// Square roots
println("\nSquare roots:")
for num in numbers {
sqrt_val := math.sqrt(f64(num))
println("√${num} = ${sqrt_val:.2f}")
}
// Fibonacci sequence
println("\nFibonacci sequence (first 10 numbers):")
fib := fibonacci(10)
println("${fib}")
println("=== V Demo Complete ===")
}
fn fibonacci(n int) []int {
mut fib := []int{len: n}
if n >= 1 {
fib[0] = 0
}
if n >= 2 {
fib[1] = 1
}
for i in 2 .. n {
fib[i] = fib[i-1] + fib[i-2]
}
return fib
}

View File

@ -0,0 +1,43 @@
// Sample SAL (System Abstraction Layer) script for demonstration
// This script demonstrates system-level operations through SAL worker
print("=== SAL Worker Demo ===");
print("System Abstraction Layer operations");
// System information gathering
print("Gathering system information...");
// Simulated system operations (actual SAL would have real system calls)
let hostname = "hero-system";
let uptime = "2 days, 4 hours";
let load_avg = "0.45, 0.52, 0.48";
print(`Hostname: ${hostname}`);
print(`Uptime: ${uptime}`);
print(`Load Average: ${load_avg}`);
// File system operations
print("\nFile system operations:");
let disk_usage = "45% used";
let available_space = "120GB available";
print(`Disk Usage: ${disk_usage}`);
print(`Available Space: ${available_space}`);
// Process management simulation
print("\nProcess management:");
let active_processes = 156;
let memory_usage = "68%";
print(`Active Processes: ${active_processes}`);
print(`Memory Usage: ${memory_usage}`);
// Network status
print("\nNetwork status:");
let network_interfaces = ["eth0", "lo"];
let connectivity = "Connected";
print(`Network Interfaces: ${network_interfaces}`);
print(`Connectivity: ${connectivity}`);
print("=== SAL Demo Complete ===");

View File

@ -17,7 +17,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Configuration
let redis_url = "redis://localhost:6379";
let zinit_socket = "/var/run/zinit.sock";
// Create supervisor
let supervisor = SupervisorBuilder::new()

View File

@ -12,7 +12,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Workers are automatically launched during build
let supervisor = SupervisorBuilder::new()
.redis_url("redis://localhost:6379")
.zinit_socket_path("/var/run/zinit.sock")
.osis_worker("/usr/local/bin/osis_worker")
.sal_worker("/usr/local/bin/sal_worker")
.v_worker("/usr/local/bin/v_worker")

View File

@ -0,0 +1,18 @@
[global]
redis_url = "redis://localhost:6379"
[osis_worker]
binary_path = "/path/to/osis_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[sal_worker]
binary_path = "/path/to/sal_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[v_worker]
binary_path = "/path/to/v_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }
[python_worker]
binary_path = "/path/to/python_worker"
env_vars = { "VAR1" = "value1", "VAR2" = "value2" }

View File

@ -31,6 +31,8 @@ pub enum SupervisorError {
/// Zinit client operation error
ZinitError(String),
SupervisorNotConfigured,
/// Configuration file parsing error
ConfigError(String),
}
impl From<redis::RedisError> for SupervisorError {
@ -95,6 +97,9 @@ impl std::fmt::Display for SupervisorError {
SupervisorError::SupervisorNotConfigured => {
write!(f, "Supervisor not configured for health monitoring")
}
SupervisorError::ConfigError(msg) => {
write!(f, "Configuration error: {}", msg)
}
}
}
}

View File

@ -1,9 +1,14 @@
use log::{debug, error, info, warn};
use redis::AsyncCommands;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use hero_job::NAMESPACE_PREFIX;
use zinit_client::ZinitClient;
use zinit_client::Client as ZinitClient;
mod job;
mod error;
@ -23,46 +28,209 @@ pub struct Supervisor {
pub struct SupervisorBuilder {
redis_url: Option<String>,
zinit_socket_path: Option<String>,
osis_worker: Option<String>,
sal_worker: Option<String>,
v_worker: Option<String>,
python_worker: Option<String>,
worker_env_vars: HashMap<String, String>,
websocket_config: Option<WebSocketServerConfig>,
}
/// Helper struct to pass builder data to worker launch method
#[derive(Clone)]
struct SupervisorBuilderData {
osis_worker: Option<String>,
sal_worker: Option<String>,
v_worker: Option<String>,
python_worker: Option<String>,
worker_env_vars: HashMap<String, String>,
websocket_config: Option<WebSocketServerConfig>,
}
/// TOML configuration structure for the supervisor
#[derive(Debug, Deserialize, Serialize)]
pub struct SupervisorConfig {
pub global: GlobalConfig,
pub websocket_server: Option<WebSocketServerConfig>,
pub osis_worker: Option<WorkerConfigToml>,
pub sal_worker: Option<WorkerConfigToml>,
pub v_worker: Option<WorkerConfigToml>,
pub python_worker: Option<WorkerConfigToml>,
}
/// Global configuration section
#[derive(Debug, Deserialize, Serialize)]
pub struct GlobalConfig {
pub redis_url: String,
}
/// Worker configuration section in TOML
#[derive(Debug, Deserialize, Serialize)]
pub struct WorkerConfigToml {
pub binary_path: String,
#[serde(default)]
pub env_vars: HashMap<String, String>,
}
/// WebSocket server configuration section in TOML
/// This mirrors the ServerConfig from hero_websocket_server but avoids circular dependency
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct WebSocketServerConfig {
/// Server host address
#[serde(default = "default_host")]
pub host: String,
/// Server port
#[serde(default = "default_port")]
pub port: u16,
/// Redis connection URL
#[serde(default = "default_redis_url")]
pub redis_url: String,
/// Enable authentication
#[serde(default)]
pub auth: bool,
/// Enable TLS/WSS
#[serde(default)]
pub tls: bool,
/// Path to TLS certificate file
pub cert: Option<String>,
/// Path to TLS private key file
pub key: Option<String>,
/// Separate port for TLS connections
pub tls_port: Option<u16>,
/// Circles configuration - maps circle names to lists of member public keys
#[serde(default)]
pub circles: HashMap<String, Vec<String>>,
}
// Default value functions for WebSocket server config
fn default_host() -> String {
"127.0.0.1".to_string()
}
fn default_port() -> u16 {
8443
}
fn default_redis_url() -> String {
"redis://127.0.0.1/".to_string()
}
impl SupervisorBuilder {
pub fn new() -> Self {
Self {
redis_url: None,
zinit_socket_path: Some("/var/run/zinit.sock".to_string()),
osis_worker: None,
sal_worker: None,
v_worker: None,
python_worker: None,
worker_env_vars: HashMap::new(),
websocket_config: None,
}
}
/// Create a SupervisorBuilder from a TOML configuration file
pub fn from_toml<P: AsRef<Path>>(toml_path: P) -> Result<Self, SupervisorError> {
let toml_content = fs::read_to_string(toml_path)
.map_err(|e| SupervisorError::ConfigError(format!("Failed to read TOML file: {}", e)))?;
let config: SupervisorConfig = toml::from_str(&toml_content)
.map_err(|e| SupervisorError::ConfigError(format!("Failed to parse TOML: {}", e)))?;
let mut builder = Self::new()
.redis_url(&config.global.redis_url);
// Configure workers based on TOML config
if let Some(osis_config) = config.osis_worker {
builder = builder.osis_worker(&osis_config.binary_path)
.worker_env_vars(osis_config.env_vars);
}
if let Some(sal_config) = config.sal_worker {
builder = builder.sal_worker(&sal_config.binary_path)
.worker_env_vars(sal_config.env_vars);
}
if let Some(v_config) = config.v_worker {
builder = builder.v_worker(&v_config.binary_path)
.worker_env_vars(v_config.env_vars);
}
if let Some(python_config) = config.python_worker {
builder = builder.python_worker(&python_config.binary_path)
.worker_env_vars(python_config.env_vars);
}
// Store WebSocket configuration for later use
if let Some(ws_config) = config.websocket_server {
builder.websocket_config = Some(ws_config);
}
Ok(builder)
}
/// Validate that all configured worker binaries exist and are executable
fn validate_worker_binaries(&self) -> Result<(), SupervisorError> {
let workers = [
("OSIS", &self.osis_worker),
("SAL", &self.sal_worker),
("V", &self.v_worker),
("Python", &self.python_worker),
];
for (worker_type, binary_path) in workers {
if let Some(path) = binary_path {
let path_obj = Path::new(path);
if !path_obj.exists() {
return Err(SupervisorError::ConfigError(
format!("{} worker binary does not exist: {}", worker_type, path)
));
}
if !path_obj.is_file() {
return Err(SupervisorError::ConfigError(
format!("{} worker path is not a file: {}", worker_type, path)
));
}
// Check if the file is executable (Unix-like systems)
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let metadata = path_obj.metadata().map_err(|e| {
SupervisorError::ConfigError(
format!("Failed to read metadata for {} worker binary {}: {}", worker_type, path, e)
)
})?;
let permissions = metadata.permissions();
if permissions.mode() & 0o111 == 0 {
return Err(SupervisorError::ConfigError(
format!("{} worker binary is not executable: {}", worker_type, path)
));
}
}
info!("Validated {} worker binary: {}", worker_type, path);
}
}
Ok(())
}
pub fn redis_url(mut self, url: &str) -> Self {
self.redis_url = Some(url.to_string());
self
}
pub fn zinit_socket_path(mut self, path: &str) -> Self {
self.zinit_socket_path = Some(path.to_string());
self
}
pub fn osis_worker(mut self, binary_path: &str) -> Self {
self.osis_worker = Some(binary_path.to_string());
self
@ -95,21 +263,23 @@ impl SupervisorBuilder {
/// Builds the final `Supervisor` instance synchronously.
///
/// This method validates the configuration and creates the Redis client.
/// Worker launching is deferred to the `start_workers()` method.
/// This method validates the configuration, checks worker binary existence,
/// and creates the Redis client. Worker launching is deferred to the `start_workers()` method.
///
/// # Returns
///
/// * `Ok(Supervisor)` - Successfully configured client
/// * `Err(SupervisorError)` - Configuration or connection error
pub fn build(self) -> Result<Supervisor, SupervisorError> {
/// * `Ok(Supervisor)` - Successfully configured client with valid binaries
/// * `Err(SupervisorError)` - Configuration, binary validation, or connection error
pub async fn build(self) -> Result<Supervisor, SupervisorError> {
// Validate that all configured worker binaries exist first
Self::validate_worker_binaries(&self)?;
let url = self.redis_url
.unwrap_or_else(|| "redis://127.0.0.1/".to_string());
let client = redis::Client::open(url)?;
let zinit_socket = self.zinit_socket_path
.unwrap_or_else(|| "/var/run/zinit.sock".to_string());
let zinit_client = ZinitClient::new(&zinit_socket);
let zinit_client = ZinitClient::unix_socket("/tmp/zinit.sock").await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create Zinit client: {}", e)))?;
// Store builder data for later use in start_workers()
let builder_data = SupervisorBuilderData {
@ -118,6 +288,7 @@ impl SupervisorBuilder {
v_worker: self.v_worker,
python_worker: self.python_worker,
worker_env_vars: self.worker_env_vars,
websocket_config: self.websocket_config,
};
let supervisor = Supervisor {
@ -134,14 +305,33 @@ impl Supervisor {
/// Start all configured workers asynchronously.
/// This method should be called after build() to launch the workers.
pub async fn start_workers(&self) -> Result<(), SupervisorError> {
info!("Starting Hero Supervisor workers...");
// Test Zinit connection first
info!("Testing Zinit connection at /tmp/zinit.sock...");
match self.zinit_client.list().await {
Ok(services) => {
info!("Successfully connected to Zinit. Current services: {:?}", services);
}
Err(e) => {
error!("Failed to connect to Zinit: {:?}", e);
return Err(SupervisorError::ZinitError(format!("Zinit connection failed: {}", e)));
}
}
// Clean up any existing worker services first
info!("Cleaning up existing worker services...");
self.cleanup_existing_workers().await?;
// Launch configured workers if builder data is available
if let Some(builder_data) = &self.builder_data {
info!("Launching configured workers...");
self.launch_configured_workers(builder_data).await?;
} else {
warn!("No builder data available, no workers to start");
}
info!("All workers started successfully!");
Ok(())
}
@ -179,7 +369,11 @@ impl Supervisor {
for worker_name in worker_names {
// Try to stop and delete, but don't fail if they don't exist
let _ = self.stop_and_delete_worker(worker_name).await;
info!("Attempting to cleanup worker: {}", worker_name);
match self.stop_and_delete_worker(worker_name).await {
Ok(_) => info!("Successfully cleaned up worker: {}", worker_name),
Err(e) => debug!("Failed to cleanup worker {}: {}", worker_name, e),
}
}
info!("Existing worker cleanup completed");
@ -188,18 +382,33 @@ impl Supervisor {
/// Stop and delete a worker service from zinit
async fn stop_and_delete_worker(&self, worker_name: &str) -> Result<(), SupervisorError> {
info!("Starting cleanup for worker: {}", worker_name);
// First try to stop the worker
info!("Attempting to stop worker: {}", worker_name);
if let Err(e) = self.zinit_client.stop(worker_name).await {
debug!("Worker {} was not running or failed to stop: {}", worker_name, e);
} else {
info!("Successfully stopped worker: {}", worker_name);
}
// Then try to delete the service
if let Err(e) = self.zinit_client.delete(worker_name).await {
// Then forget the service to stop monitoring it
info!("Attempting to forget worker: {}", worker_name);
if let Err(e) = self.zinit_client.forget(worker_name).await {
info!("Worker {} was not being monitored or failed to forget: {}", worker_name, e);
} else {
info!("Successfully forgot worker service: {}", worker_name);
}
// Finally, delete the service configuration
info!("Attempting to delete service for worker: {}", worker_name);
if let Err(e) = self.zinit_client.delete_service(worker_name).await {
debug!("Worker {} service did not exist or failed to delete: {}", worker_name, e);
} else {
info!("Successfully deleted worker service: {}", worker_name);
}
info!("Completed cleanup for worker: {}", worker_name);
Ok(())
}
@ -212,6 +421,157 @@ impl Supervisor {
JobBuilder::new(self)
}
/// Get WebSocket server configuration from TOML config
pub fn get_websocket_config(&self) -> Result<WebSocketServerConfig, SupervisorError> {
let builder_data = self.builder_data.as_ref().ok_or_else(|| {
SupervisorError::ConfigError("No builder data available for WebSocket config".to_string())
})?;
builder_data.websocket_config.clone().ok_or_else(|| {
SupervisorError::ConfigError("No WebSocket server configuration found in TOML config".to_string())
})
}
/// Extract worker configurations from the supervisor's builder data
pub fn get_worker_configs(&self) -> Result<Vec<WorkerConfig>, SupervisorError> {
let builder_data = self.builder_data.as_ref().ok_or_else(|| {
SupervisorError::ConfigError("No builder data available for worker configs".to_string())
})?;
let mut configs = Vec::new();
let env_vars = builder_data.worker_env_vars.clone();
if let Some(osis_path) = &builder_data.osis_worker {
configs.push(
WorkerConfig::new("osis_worker_1".to_string(), PathBuf::from(osis_path), ScriptType::OSIS)
.with_env(env_vars.clone())
);
}
if let Some(sal_path) = &builder_data.sal_worker {
configs.push(
WorkerConfig::new("sal_worker_1".to_string(), PathBuf::from(sal_path), ScriptType::SAL)
.with_env(env_vars.clone())
);
}
if let Some(v_path) = &builder_data.v_worker {
configs.push(
WorkerConfig::new("v_worker_1".to_string(), PathBuf::from(v_path), ScriptType::V)
.with_env(env_vars.clone())
);
}
if let Some(python_path) = &builder_data.python_worker {
configs.push(
WorkerConfig::new("python_worker_1".to_string(), PathBuf::from(python_path), ScriptType::Python)
.with_env(env_vars.clone())
);
}
Ok(configs)
}
/// Spawn a background lifecycle manager that continuously monitors and maintains worker health
/// Returns a JoinHandle that can be used to stop the lifecycle manager
pub fn spawn_lifecycle_manager(
self: Arc<Self>,
worker_configs: Vec<WorkerConfig>,
health_check_interval: Duration,
) -> tokio::task::JoinHandle<Result<(), SupervisorError>> {
let supervisor = self;
tokio::spawn(async move {
info!("Starting background lifecycle manager with {} workers", worker_configs.len());
info!("Health check interval: {:?}", health_check_interval);
// Initial worker startup
info!("Performing initial worker startup...");
if let Err(e) = supervisor.start_workers().await {
error!("Failed to start workers during initialization: {}", e);
return Err(e);
}
// Start the monitoring loop
let mut interval = tokio::time::interval(health_check_interval);
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
interval.tick().await;
info!("Running periodic worker health check...");
// Check each worker's health and restart if needed
for worker_config in &worker_configs {
if let Err(e) = supervisor.check_and_restart_worker(worker_config).await {
error!("Failed to check/restart worker {}: {}", worker_config.name, e);
}
}
info!("Health check cycle completed");
}
})
}
/// Check a single worker's health and restart if needed
async fn check_and_restart_worker(&self, worker_config: &WorkerConfig) -> Result<(), SupervisorError> {
let worker_name = &worker_config.name;
// Get worker status
match self.zinit_client.status(worker_name).await {
Ok(status) => {
let is_healthy = status.state == "running" && status.pid > 0;
if is_healthy {
debug!("Worker {} is healthy (state: {}, pid: {})", worker_name, status.state, status.pid);
// Optionally send a ping job for deeper health check
if let Err(e) = self.send_ping_job(worker_config.script_type.clone()).await {
warn!("Ping job failed for worker {}: {}", worker_name, e);
// Note: We don't restart on ping failure as it might be temporary
}
} else {
warn!("Worker {} is unhealthy (state: {}, pid: {}), restarting...",
worker_name, status.state, status.pid);
// Attempt to restart the worker
if let Err(e) = self.restart_worker(worker_name).await {
error!("Failed to restart unhealthy worker {}: {}", worker_name, e);
// If restart fails, try a full stop/start cycle
warn!("Attempting full stop/start cycle for worker: {}", worker_name);
if let Err(e) = self.stop_and_delete_worker(worker_name).await {
error!("Failed to stop worker {} during recovery: {}", worker_name, e);
}
if let Err(e) = self.start_worker(worker_config).await {
error!("Failed to start worker {} during recovery: {}", worker_name, e);
return Err(e);
}
info!("Successfully recovered worker: {}", worker_name);
} else {
info!("Successfully restarted worker: {}", worker_name);
}
}
}
Err(e) => {
warn!("Could not get status for worker {} (may not exist): {}", worker_name, e);
// Worker doesn't exist, try to start it
info!("Attempting to start missing worker: {}", worker_name);
if let Err(e) = self.start_worker(worker_config).await {
error!("Failed to start missing worker {}: {}", worker_name, e);
return Err(e);
}
info!("Successfully started missing worker: {}", worker_name);
}
}
Ok(())
}
// Internal helper to submit script details and push to work queue
async fn create_job_using_connection(
&self,

View File

@ -8,7 +8,7 @@ use serde_json::json;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use zinit_client::{ZinitClient, ServiceStatus, ServiceState};
use zinit_client::{Client as ZinitClient, Status};
use hero_job::ScriptType;
use crate::{Supervisor, SupervisorError};
@ -16,7 +16,7 @@ use crate::{Supervisor, SupervisorError};
#[derive(Debug, Clone)]
pub struct WorkerInfo {
pub config: WorkerConfig,
pub status: Option<ServiceStatus>,
pub status: Option<Status>,
pub is_running: bool,
}
@ -90,7 +90,7 @@ impl Supervisor {
for config in worker_configs {
let status = self.zinit_client.status(&config.name).await.ok();
let is_running = status.as_ref()
.map(|s| matches!(s.state, ServiceState::Running) && s.pid > 0)
.map(|s| s.state == "running" && s.pid > 0)
.unwrap_or(false);
workers.push(WorkerInfo {
@ -117,6 +117,10 @@ impl Supervisor {
self.zinit_client.create_service(&worker_config.name, service_config).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create service: {}", e)))?;
// Monitor the service so Zinit starts managing it
self.zinit_client.monitor(&worker_config.name).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to monitor service: {}", e)))?;
// Start the service
self.zinit_client.start(&worker_config.name).await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to start worker: {}", e)))?;
@ -168,7 +172,7 @@ impl Supervisor {
&self,
worker_name: &str,
zinit_client: &ZinitClient,
) -> Result<ServiceStatus, SupervisorError> {
) -> Result<Status, SupervisorError> {
match zinit_client.status(worker_name).await {
Ok(status) => Ok(status),
Err(e) => {
@ -183,7 +187,7 @@ impl Supervisor {
&self,
worker_configs: &[WorkerConfig],
zinit_client: &ZinitClient,
) -> Result<HashMap<String, ServiceStatus>, SupervisorError> {
) -> Result<HashMap<String, Status>, SupervisorError> {
let mut status_map = HashMap::new();
for worker in worker_configs {
@ -200,19 +204,7 @@ impl Supervisor {
Ok(status_map)
}
/// Start multiple workers
pub async fn start_workers(
&self,
worker_configs: &[WorkerConfig],
) -> Result<(), SupervisorError> {
info!("Starting {} workers", worker_configs.len());
for worker in worker_configs {
self.start_worker(worker).await?;
}
Ok(())
}
/// Stop multiple workers
pub async fn stop_workers(
@ -240,7 +232,7 @@ impl Supervisor {
for worker in worker_configs {
if worker.script_type == *script_type {
if let Ok(status) = zinit_client.status(&worker.name).await {
if status.state == ServiceState::Running {
if status.state == "running" {
running_count += 1;
}
}
@ -277,26 +269,35 @@ impl Supervisor {
}
/// Create Zinit service configuration from worker config
fn create_service_config(&self, worker: &WorkerConfig) -> serde_json::Value {
let mut config = json!({
"exec": format!("{} {}",
fn create_service_config(&self, worker: &WorkerConfig) -> serde_json::Map<String, serde_json::Value> {
use serde_json::{Map, Value};
let mut config = Map::new();
config.insert(
"exec".to_string(),
Value::String(format!("{} {}",
worker.binary_path.display(),
worker.args.join(" ")
),
"oneshot": !worker.restart_on_exit,
});
))
);
config.insert(
"oneshot".to_string(),
Value::Bool(!worker.restart_on_exit)
);
if let Some(health_check) = &worker.health_check {
config["test"] = json!(health_check);
config.insert("test".to_string(), Value::String(health_check.clone()));
}
if !worker.dependencies.is_empty() {
config["after"] = json!(worker.dependencies);
config.insert("after".to_string(), json!(worker.dependencies));
}
// Add environment variables if any
if !worker.env.is_empty() {
config["env"] = json!(worker.env);
config.insert("env".to_string(), json!(worker.env));
}
config
@ -307,6 +308,8 @@ impl Supervisor {
use hero_job::ScriptType;
use std::path::PathBuf;
let mut errors = Vec::new();
// Launch OSIS worker if configured
if let Some(binary_path) = &builder.osis_worker {
let worker_id = "osis_worker_1";
@ -318,7 +321,11 @@ impl Supervisor {
config.env.extend(builder.worker_env_vars.clone());
info!("Launching OSIS worker: {}", worker_id);
self.start_worker(&config).await?;
if let Err(e) = self.start_worker(&config).await {
let error_msg = format!("Failed to start OSIS worker: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch SAL worker if configured
@ -332,7 +339,11 @@ impl Supervisor {
config.env.extend(builder.worker_env_vars.clone());
info!("Launching SAL worker: {}", worker_id);
self.start_worker(&config).await?;
if let Err(e) = self.start_worker(&config).await {
let error_msg = format!("Failed to start SAL worker: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch V worker if configured
@ -346,7 +357,11 @@ impl Supervisor {
config.env.extend(builder.worker_env_vars.clone());
info!("Launching V worker: {}", worker_id);
self.start_worker(&config).await?;
if let Err(e) = self.start_worker(&config).await {
let error_msg = format!("Failed to start V worker: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Launch Python worker if configured
@ -360,9 +375,21 @@ impl Supervisor {
config.env.extend(builder.worker_env_vars.clone());
info!("Launching Python worker: {}", worker_id);
self.start_worker(&config).await?;
if let Err(e) = self.start_worker(&config).await {
let error_msg = format!("Failed to start Python worker: {}", e);
warn!("{}", error_msg);
errors.push(error_msg);
}
}
// Return result based on whether any workers started successfully
if errors.is_empty() {
info!("All configured workers started successfully");
Ok(())
} else {
let combined_error = format!("Some workers failed to start: {}", errors.join("; "));
warn!("{}", combined_error);
Err(SupervisorError::ZinitError(combined_error))
}
}
}

View File

@ -11,6 +11,26 @@ path = "src/lib.rs"
name = "worker"
path = "cmd/worker.rs"
[[bin]]
name = "osis"
path = "cmd/osis.rs"
[[bin]]
name = "system"
path = "cmd/system.rs"
[[example]]
name = "trait_based_worker_demo"
path = "examples/trait_based_worker_demo.rs"
[[example]]
name = "osis_worker_demo"
path = "examples/osis_worker_demo.rs"
[[example]]
name = "system_worker_demo"
path = "examples/system_worker_demo.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
@ -24,6 +44,9 @@ env_logger = "0.10"
clap = { version = "4.4", features = ["derive"] }
uuid = { version = "1.6", features = ["v4", "serde"] } # Though task_id is string, uuid might be useful
chrono = { version = "0.4", features = ["serde"] }
toml = "0.8"
thiserror = "1.0"
async-trait = "0.1"
hero_supervisor = { path = "../supervisor" }
hero_job = { path = "../job" }
heromodels = { path = "../../../db/heromodels", features = ["rhai"] }

233
core/worker/cmd/osis.rs Normal file
View File

@ -0,0 +1,233 @@
//! OSIS Worker Binary - Synchronous worker for system-level operations
use clap::Parser;
use log::{error, info};
use rhailib_worker::config::{ConfigError, WorkerConfig};
use rhailib_worker::engine::create_heromodels_engine;
use rhailib_worker::sync_worker::SyncWorker;
use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig as TraitWorkerConfig};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::signal;
use tokio::sync::mpsc;
#[derive(Parser, Debug)]
#[command(
name = "osis",
version = "0.1.0",
about = "OSIS (Operating System Integration Service) - Synchronous Worker",
long_about = "A synchronous worker for Hero framework that processes jobs sequentially. \
Ideal for system-level operations that require careful resource management."
)]
struct Args {
/// Path to TOML configuration file
#[arg(short, long, help = "Path to TOML configuration file")]
config: PathBuf,
/// Override worker ID from config
#[arg(long, help = "Override worker ID from configuration file")]
worker_id: Option<String>,
/// Override Redis URL from config
#[arg(long, help = "Override Redis URL from configuration file")]
redis_url: Option<String>,
/// Override database path from config
#[arg(long, help = "Override database path from configuration file")]
db_path: Option<String>,
/// Enable verbose logging (debug level)
#[arg(short, long, help = "Enable verbose logging")]
verbose: bool,
/// Disable timestamps in log output
#[arg(long, help = "Remove timestamps from log output")]
no_timestamp: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let args = Args::parse();
// Load configuration from TOML file
let mut config = match WorkerConfig::from_file(&args.config) {
Ok(config) => config,
Err(e) => {
eprintln!("Failed to load configuration from {:?}: {}", args.config, e);
std::process::exit(1);
}
};
// Validate that this is a sync worker configuration
if !config.is_sync() {
eprintln!("Error: OSIS worker requires a sync worker configuration");
eprintln!("Expected: [worker_type] type = \"sync\"");
eprintln!("Found: {:?}", config.worker_type);
std::process::exit(1);
}
// Apply command line overrides
if let Some(worker_id) = args.worker_id {
config.worker_id = worker_id;
}
if let Some(redis_url) = args.redis_url {
config.redis_url = redis_url;
}
if let Some(db_path) = args.db_path {
config.db_path = db_path;
}
// Configure logging
setup_logging(&config, args.verbose, args.no_timestamp)?;
info!("🚀 OSIS Worker starting...");
info!("Worker ID: {}", config.worker_id);
info!("Redis URL: {}", config.redis_url);
info!("Database Path: {}", config.db_path);
info!("Preserve Tasks: {}", config.preserve_tasks);
// Create Rhai engine
let engine = create_heromodels_engine();
info!("✅ Rhai engine initialized");
// Create worker configuration for the trait-based interface
let worker_config = TraitWorkerConfig::new(
config.worker_id.clone(),
config.db_path.clone(),
config.redis_url.clone(),
config.preserve_tasks,
);
// Create sync worker instance
let worker = Arc::new(SyncWorker::default());
info!("✅ Sync worker instance created");
// Setup shutdown signal handling
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
// Spawn shutdown signal handler
let shutdown_tx_clone = shutdown_tx.clone();
tokio::spawn(async move {
if let Err(e) = signal::ctrl_c().await {
error!("Failed to listen for shutdown signal: {}", e);
return;
}
info!("🛑 Shutdown signal received");
if let Err(e) = shutdown_tx_clone.send(()).await {
error!("Failed to send shutdown signal: {}", e);
}
});
// Spawn the worker
info!("🔄 Starting worker loop...");
let worker_handle = spawn_worker(worker, engine, shutdown_rx);
// Wait for the worker to complete
match worker_handle.await {
Ok(Ok(())) => {
info!("✅ OSIS Worker shut down gracefully");
}
Ok(Err(e)) => {
error!("❌ OSIS Worker encountered an error: {}", e);
std::process::exit(1);
}
Err(e) => {
error!("❌ Failed to join worker task: {}", e);
std::process::exit(1);
}
}
Ok(())
}
/// Setup logging based on configuration and command line arguments
fn setup_logging(
config: &WorkerConfig,
verbose: bool,
no_timestamp: bool,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut builder = env_logger::Builder::new();
// Determine log level
let log_level = if verbose {
"debug"
} else {
&config.logging.level
};
// Set log level
builder.filter_level(match log_level.to_lowercase().as_str() {
"trace" => log::LevelFilter::Trace,
"debug" => log::LevelFilter::Debug,
"info" => log::LevelFilter::Info,
"warn" => log::LevelFilter::Warn,
"error" => log::LevelFilter::Error,
_ => {
eprintln!("Invalid log level: {}. Using 'info'", log_level);
log::LevelFilter::Info
}
});
// Configure timestamps
let show_timestamps = !no_timestamp && config.logging.timestamps;
if !show_timestamps {
builder.format_timestamp(None);
}
builder.init();
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_config_validation() {
let config_toml = r#"
worker_id = "test_osis"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "sync"
[logging]
level = "info"
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert!(config.is_sync());
assert!(!config.is_async());
assert_eq!(config.worker_id, "test_osis");
}
#[test]
fn test_async_config_rejection() {
let config_toml = r#"
worker_id = "test_osis"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "async"
default_timeout_seconds = 300
[logging]
level = "info"
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert!(!config.is_sync());
assert!(config.is_async());
// This would be rejected in main() function
}
}

302
core/worker/cmd/system.rs Normal file
View File

@ -0,0 +1,302 @@
//! System Worker Binary - Asynchronous worker for high-throughput concurrent processing
use clap::Parser;
use log::{error, info, warn};
use rhailib_worker::async_worker_impl::AsyncWorker;
use rhailib_worker::config::{ConfigError, WorkerConfig};
use rhailib_worker::engine::create_heromodels_engine;
use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig as TraitWorkerConfig};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::signal;
use tokio::sync::mpsc;
#[derive(Parser, Debug)]
#[command(
name = "system",
version = "0.1.0",
about = "System Worker - Asynchronous Worker with Concurrent Job Processing",
long_about = "An asynchronous worker for Hero framework that processes multiple jobs \
concurrently with timeout support. Ideal for high-throughput scenarios \
where jobs can be executed in parallel."
)]
struct Args {
/// Path to TOML configuration file
#[arg(short, long, help = "Path to TOML configuration file")]
config: PathBuf,
/// Override worker ID from config
#[arg(long, help = "Override worker ID from configuration file")]
worker_id: Option<String>,
/// Override Redis URL from config
#[arg(long, help = "Override Redis URL from configuration file")]
redis_url: Option<String>,
/// Override database path from config
#[arg(long, help = "Override database path from configuration file")]
db_path: Option<String>,
/// Override default timeout in seconds
#[arg(long, help = "Override default job timeout in seconds")]
timeout: Option<u64>,
/// Enable verbose logging (debug level)
#[arg(short, long, help = "Enable verbose logging")]
verbose: bool,
/// Disable timestamps in log output
#[arg(long, help = "Remove timestamps from log output")]
no_timestamp: bool,
/// Show worker statistics periodically
#[arg(long, help = "Show periodic worker statistics")]
show_stats: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let args = Args::parse();
// Load configuration from TOML file
let mut config = match WorkerConfig::from_file(&args.config) {
Ok(config) => config,
Err(e) => {
eprintln!("Failed to load configuration from {:?}: {}", args.config, e);
std::process::exit(1);
}
};
// Validate that this is an async worker configuration
if !config.is_async() {
eprintln!("Error: System worker requires an async worker configuration");
eprintln!("Expected: [worker_type] type = \"async\"");
eprintln!("Found: {:?}", config.worker_type);
std::process::exit(1);
}
// Apply command line overrides
if let Some(worker_id) = args.worker_id {
config.worker_id = worker_id;
}
if let Some(redis_url) = args.redis_url {
config.redis_url = redis_url;
}
if let Some(db_path) = args.db_path {
config.db_path = db_path;
}
// Override timeout if specified
if let Some(timeout_secs) = args.timeout {
if let rhailib_worker::config::WorkerType::Async { ref mut default_timeout_seconds } = config.worker_type {
*default_timeout_seconds = timeout_secs;
}
}
// Configure logging
setup_logging(&config, args.verbose, args.no_timestamp)?;
info!("🚀 System Worker starting...");
info!("Worker ID: {}", config.worker_id);
info!("Redis URL: {}", config.redis_url);
info!("Database Path: {}", config.db_path);
info!("Preserve Tasks: {}", config.preserve_tasks);
if let Some(timeout) = config.get_default_timeout() {
info!("Default Timeout: {:?}", timeout);
}
// Create Rhai engine
let engine = create_heromodels_engine();
info!("✅ Rhai engine initialized");
// Create worker configuration for the trait-based interface
let mut worker_config = TraitWorkerConfig::new(
config.worker_id.clone(),
config.db_path.clone(),
config.redis_url.clone(),
config.preserve_tasks,
);
// Add timeout configuration for async worker
if let Some(timeout) = config.get_default_timeout() {
worker_config = worker_config.with_default_timeout(timeout);
}
// Create async worker instance
let worker = Arc::new(AsyncWorker::default());
info!("✅ Async worker instance created");
// Setup shutdown signal handling
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
// Spawn shutdown signal handler
let shutdown_tx_clone = shutdown_tx.clone();
tokio::spawn(async move {
if let Err(e) = signal::ctrl_c().await {
error!("Failed to listen for shutdown signal: {}", e);
return;
}
info!("🛑 Shutdown signal received");
if let Err(e) = shutdown_tx_clone.send(()).await {
error!("Failed to send shutdown signal: {}", e);
}
});
// Spawn statistics reporter if requested
if args.show_stats {
let worker_stats = Arc::clone(&worker);
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
let running_count = worker_stats.running_job_count().await;
if running_count > 0 {
info!("📊 Worker Stats: {} jobs currently running", running_count);
} else {
info!("📊 Worker Stats: No jobs currently running");
}
}
});
}
// Spawn the worker
info!("🔄 Starting worker loop...");
let worker_handle = spawn_worker(worker, engine, shutdown_rx);
// Wait for the worker to complete
match worker_handle.await {
Ok(Ok(())) => {
info!("✅ System Worker shut down gracefully");
}
Ok(Err(e)) => {
error!("❌ System Worker encountered an error: {}", e);
std::process::exit(1);
}
Err(e) => {
error!("❌ Failed to join worker task: {}", e);
std::process::exit(1);
}
}
Ok(())
}
/// Setup logging based on configuration and command line arguments
fn setup_logging(
config: &WorkerConfig,
verbose: bool,
no_timestamp: bool,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut builder = env_logger::Builder::new();
// Determine log level
let log_level = if verbose {
"debug"
} else {
&config.logging.level
};
// Set log level
builder.filter_level(match log_level.to_lowercase().as_str() {
"trace" => log::LevelFilter::Trace,
"debug" => log::LevelFilter::Debug,
"info" => log::LevelFilter::Info,
"warn" => log::LevelFilter::Warn,
"error" => log::LevelFilter::Error,
_ => {
warn!("Invalid log level: {}. Using 'info'", log_level);
log::LevelFilter::Info
}
});
// Configure timestamps
let show_timestamps = !no_timestamp && config.logging.timestamps;
if !show_timestamps {
builder.format_timestamp(None);
}
builder.init();
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_config_validation() {
let config_toml = r#"
worker_id = "test_system"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "async"
default_timeout_seconds = 600
[logging]
level = "info"
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert!(!config.is_sync());
assert!(config.is_async());
assert_eq!(config.worker_id, "test_system");
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
}
#[test]
fn test_sync_config_rejection() {
let config_toml = r#"
worker_id = "test_system"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "sync"
[logging]
level = "info"
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert!(config.is_sync());
assert!(!config.is_async());
// This would be rejected in main() function
}
#[test]
fn test_timeout_override() {
let config_toml = r#"
worker_id = "test_system"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "async"
default_timeout_seconds = 300
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let mut config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(300)));
// Test timeout override
if let rhailib_worker::config::WorkerType::Async { ref mut default_timeout_seconds } = config.worker_type {
*default_timeout_seconds = 600;
}
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
}
}

View File

@ -0,0 +1,197 @@
# Worker Examples
This directory contains example configurations and test scripts for both OSIS and System worker binaries.
## Overview
Both examples demonstrate the ping/pong functionality built into the Hero workers:
- Workers automatically detect jobs with script content "ping"
- They respond immediately with "pong" without executing the Rhai engine
- This provides a fast health check and connectivity test mechanism
## Prerequisites
1. **Redis Server**: Both examples require a running Redis server
```bash
# Install Redis (macOS)
brew install redis
# Start Redis server
redis-server
```
2. **Rust Environment**: Make sure you can build the worker binaries
```bash
cd /path/to/herocode/hero/core/worker
cargo build --bin osis --bin system
```
## OSIS Worker Example
**Location**: `examples/osis/`
The OSIS (Operating System Integration Service) worker processes jobs synchronously, one at a time.
### Files
- `config.toml` - Configuration for the OSIS worker
- `example.sh` - Test script that demonstrates ping/pong functionality
### Usage
```bash
cd examples/osis
./example.sh
```
### What the script does:
1. Checks Redis connectivity
2. Cleans up any existing jobs
3. Starts the OSIS worker in the background
4. Sends 3 ping jobs sequentially
5. Verifies each job receives a "pong" response
6. Reports success/failure statistics
7. Cleans up worker and Redis data
### Expected Output
```
=== OSIS Worker Example ===
✅ Redis is running
✅ OSIS worker started (PID: 12345)
📤 Sending ping job: ping_job_1_1234567890
✅ Job ping_job_1_1234567890 completed successfully with result: pong
...
🎉 All tests passed! OSIS worker is working correctly.
```
## System Worker Example
**Location**: `examples/system/`
The System worker processes jobs asynchronously, handling multiple jobs concurrently.
### Files
- `config.toml` - Configuration for the System worker (includes async settings)
- `example.sh` - Test script that demonstrates concurrent ping/pong functionality
### Usage
```bash
cd examples/system
./example.sh
```
### What the script does:
1. Checks Redis connectivity
2. Cleans up any existing jobs
3. Starts the System worker with stats reporting
4. Sends 5 concurrent ping jobs
5. Sends 10 rapid-fire ping jobs to test async capabilities
6. Verifies all jobs receive "pong" responses
7. Reports comprehensive success/failure statistics
8. Cleans up worker and Redis data
### Expected Output
```
=== System Worker Example ===
✅ Redis is running
✅ System worker started (PID: 12345)
📤 Sending ping job: ping_job_1_1234567890123
✅ Job ping_job_1_1234567890123 completed successfully with result: pong
...
🎉 All tests passed! System worker is handling concurrent jobs correctly.
Overall success rate: 15/15
```
## Configuration Details
### OSIS Configuration (`examples/osis/config.toml`)
```toml
worker_id = "osis_example_worker"
redis_url = "redis://localhost:6379"
db_path = "/tmp/osis_example_db"
preserve_tasks = false
[worker_type]
type = "sync"
[logging]
timestamps = true
level = "info"
```
### System Configuration (`examples/system/config.toml`)
```toml
worker_id = "system_example_worker"
redis_url = "redis://localhost:6379"
db_path = "/tmp/system_example_db"
preserve_tasks = false
[worker_type]
type = "async"
default_timeout_seconds = 30
[logging]
timestamps = true
level = "info"
```
## Key Differences
| Feature | OSIS Worker | System Worker |
|---------|-------------|---------------|
| **Processing** | Sequential (one job at a time) | Concurrent (multiple jobs simultaneously) |
| **Use Case** | System-level operations requiring resource management | High-throughput job processing |
| **Timeout** | No timeout configuration | Configurable job timeouts |
| **Stats** | Basic logging | Optional statistics reporting (`--show-stats`) |
| **Job Handling** | Blocking job execution | Non-blocking async job execution |
## Troubleshooting
### Redis Connection Issues
```bash
# Check if Redis is running
redis-cli ping
# Check Redis logs
redis-server --loglevel verbose
```
### Worker Compilation Issues
```bash
# Clean and rebuild
cargo clean
cargo build --bin osis --bin system
```
### Job Processing Issues
- Check Redis for stuck jobs: `redis-cli keys "hero:*"`
- Clear all Hero jobs: `redis-cli eval "return redis.call('del', unpack(redis.call('keys', 'hero:*')))" 0`
- Check worker logs for detailed error messages
## Extending the Examples
### Adding Custom Jobs
To test with custom Rhai scripts instead of ping jobs:
1. Modify the job creation in the shell scripts:
```bash
# Replace "ping" with your Rhai script
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
script "your_rhai_script_here"
```
2. Update result verification to expect your script's output instead of "pong"
### Testing Different Configurations
- Modify `config.toml` files to test different Redis URLs, database paths, or logging levels
- Test with `preserve_tasks = true` to inspect job details after completion
- Adjust timeout values in the System worker configuration
## Architecture Notes
Both examples demonstrate the unified Worker trait architecture:
- **Common Interface**: Both workers implement the same `Worker` trait
- **Ping/Pong Handling**: Built into the trait's `spawn` method before job delegation
- **Redis Integration**: Uses the shared Job struct from `hero_job` crate
- **Configuration**: TOML-based configuration with CLI overrides
- **Graceful Shutdown**: Both workers handle SIGTERM/SIGINT properly
This architecture allows for easy extension with new worker types while maintaining consistent behavior and configuration patterns.

View File

@ -0,0 +1,11 @@
worker_id = "osis_example_worker"
redis_url = "redis://localhost:6379"
db_path = "/tmp/osis_example_db"
preserve_tasks = false
[worker_type]
type = "sync"
[logging]
timestamps = true
level = "info"

View File

@ -0,0 +1,138 @@
#!/bin/bash
# OSIS Worker Example Script
# This script demonstrates the OSIS worker by:
# 1. Starting the worker with the config.toml
# 2. Sending ping jobs to Redis
# 3. Verifying pong responses
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="$SCRIPT_DIR/config.toml"
WORKER_ID="osis_example_worker"
REDIS_URL="redis://localhost:6379"
echo "=== OSIS Worker Example ==="
echo "Script directory: $SCRIPT_DIR"
echo "Config file: $CONFIG_FILE"
echo "Worker ID: $WORKER_ID"
echo "Redis URL: $REDIS_URL"
echo
# Check if Redis is running
echo "Checking Redis connection..."
if ! redis-cli -u "$REDIS_URL" ping > /dev/null 2>&1; then
echo "❌ Error: Redis is not running or not accessible at $REDIS_URL"
echo "Please start Redis server first: redis-server"
exit 1
fi
echo "✅ Redis is running"
echo
# Clean up any existing jobs in the queue
echo "Cleaning up existing jobs in Redis..."
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
echo "✅ Redis queues cleaned"
echo
# Start the OSIS worker in the background
echo "Starting OSIS worker..."
cd "$SCRIPT_DIR/../.."
cargo run --bin osis -- --config "$CONFIG_FILE" &
WORKER_PID=$!
echo "✅ OSIS worker started (PID: $WORKER_PID)"
echo
# Wait a moment for the worker to initialize
echo "Waiting for worker to initialize..."
sleep 3
# Function to send a ping job and check for pong response
send_ping_job() {
local job_num=$1
local job_id="ping_job_${job_num}_$(date +%s)"
echo "📤 Sending ping job: $job_id"
# Create job in Redis
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
id "$job_id" \
script "ping" \
status "Queued" \
created_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
worker_id "$WORKER_ID" > /dev/null
# Add job to worker queue
redis-cli -u "$REDIS_URL" lpush "hero:jobs:$WORKER_ID" "$job_id" > /dev/null
# Wait for job completion and check result
local timeout=10
local elapsed=0
while [ $elapsed -lt $timeout ]; do
local status=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" status 2>/dev/null || echo "")
if [ "$status" = "Finished" ]; then
local result=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" result 2>/dev/null || echo "")
if [ "$result" = "pong" ]; then
echo "✅ Job $job_id completed successfully with result: $result"
return 0
else
echo "❌ Job $job_id completed but with unexpected result: $result"
return 1
fi
elif [ "$status" = "Error" ]; then
local error=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" error 2>/dev/null || echo "")
echo "❌ Job $job_id failed with error: $error"
return 1
fi
sleep 1
elapsed=$((elapsed + 1))
done
echo "❌ Job $job_id timed out after ${timeout}s"
return 1
}
# Send multiple ping jobs to test the worker
echo "Testing ping/pong functionality..."
success_count=0
total_jobs=3
for i in $(seq 1 $total_jobs); do
echo
echo "--- Test $i/$total_jobs ---"
if send_ping_job $i; then
success_count=$((success_count + 1))
fi
sleep 1
done
echo
echo "=== Test Results ==="
echo "Successful ping/pong tests: $success_count/$total_jobs"
if [ $success_count -eq $total_jobs ]; then
echo "🎉 All tests passed! OSIS worker is working correctly."
exit_code=0
else
echo "⚠️ Some tests failed. Check the worker logs for details."
exit_code=1
fi
# Clean up
echo
echo "Cleaning up..."
echo "Stopping OSIS worker (PID: $WORKER_PID)..."
kill $WORKER_PID 2>/dev/null || true
wait $WORKER_PID 2>/dev/null || true
echo "✅ Worker stopped"
echo "Cleaning up Redis jobs..."
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
echo "✅ Redis cleaned up"
echo
echo "=== OSIS Worker Example Complete ==="
exit $exit_code

View File

@ -0,0 +1,14 @@
# OSIS Worker Configuration
# Synchronous worker for system-level operations
worker_id = "osis_worker_1"
redis_url = "redis://localhost:6379"
db_path = "/tmp/osis_worker_db"
preserve_tasks = false
[worker_type]
type = "sync"
[logging]
timestamps = true
level = "info"

View File

@ -0,0 +1,60 @@
use std::process::{Command, Stdio};
use std::path::Path;
use std::env;
use std::io::{self, Write};
/// OSIS Worker Demo Runner
///
/// This Rust wrapper executes the OSIS worker bash script example.
/// It provides a way to run shell-based examples through Cargo.
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🚀 OSIS Worker Demo");
println!("==================");
println!();
// Get the current working directory and construct the path to the shell script
let current_dir = env::current_dir()?;
let script_path = current_dir.join("examples").join("osis").join("example.sh");
// Check if the script exists
if !script_path.exists() {
eprintln!("❌ Error: Script not found at {:?}", script_path);
eprintln!(" Make sure you're running this from the worker crate root directory.");
std::process::exit(1);
}
println!("📁 Script location: {:?}", script_path);
println!("🔧 Executing OSIS worker example...");
println!();
// Make sure the script is executable
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = std::fs::metadata(&script_path)?.permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&script_path, perms)?;
}
// Execute the shell script
let mut child = Command::new("bash")
.arg(&script_path)
.current_dir(&current_dir)
.stdin(Stdio::inherit())
.stdout(Stdio::inherit())
.stderr(Stdio::inherit())
.spawn()?;
// Wait for the script to complete
let status = child.wait()?;
println!();
if status.success() {
println!("✅ OSIS worker demo completed successfully!");
} else {
println!("❌ OSIS worker demo failed with exit code: {:?}", status.code());
std::process::exit(status.code().unwrap_or(1));
}
Ok(())
}

View File

@ -0,0 +1,12 @@
worker_id = "system_example_worker"
redis_url = "redis://localhost:6379"
db_path = "/tmp/system_example_db"
preserve_tasks = false
[worker_type]
type = "async"
default_timeout_seconds = 30
[logging]
timestamps = true
level = "info"

View File

@ -0,0 +1,183 @@
#!/bin/bash
# System Worker Example Script
# This script demonstrates the System worker by:
# 1. Starting the worker with the config.toml
# 2. Sending multiple concurrent ping jobs to Redis
# 3. Verifying pong responses
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="$SCRIPT_DIR/config.toml"
WORKER_ID="system_example_worker"
REDIS_URL="redis://localhost:6379"
echo "=== System Worker Example ==="
echo "Script directory: $SCRIPT_DIR"
echo "Config file: $CONFIG_FILE"
echo "Worker ID: $WORKER_ID"
echo "Redis URL: $REDIS_URL"
echo
# Check if Redis is running
echo "Checking Redis connection..."
if ! redis-cli -u "$REDIS_URL" ping > /dev/null 2>&1; then
echo "❌ Error: Redis is not running or not accessible at $REDIS_URL"
echo "Please start Redis server first: redis-server"
exit 1
fi
echo "✅ Redis is running"
echo
# Clean up any existing jobs in the queue
echo "Cleaning up existing jobs in Redis..."
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
echo "✅ Redis queues cleaned"
echo
# Start the System worker in the background
echo "Starting System worker..."
cd "$SCRIPT_DIR/../.."
cargo run --bin system -- --config "$CONFIG_FILE" --show-stats &
WORKER_PID=$!
echo "✅ System worker started (PID: $WORKER_PID)"
echo
# Wait a moment for the worker to initialize
echo "Waiting for worker to initialize..."
sleep 3
# Function to send a ping job (non-blocking)
send_ping_job() {
local job_num=$1
local job_id="ping_job_${job_num}_$(date +%s%N)"
echo "📤 Sending ping job: $job_id"
# Create job in Redis
redis-cli -u "$REDIS_URL" hset "hero:job:$job_id" \
id "$job_id" \
script "ping" \
status "Queued" \
created_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
worker_id "$WORKER_ID" > /dev/null
# Add job to worker queue
redis-cli -u "$REDIS_URL" lpush "hero:jobs:$WORKER_ID" "$job_id" > /dev/null
echo "$job_id"
}
# Function to check job result
check_job_result() {
local job_id=$1
local timeout=15
local elapsed=0
while [ $elapsed -lt $timeout ]; do
local status=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" status 2>/dev/null || echo "")
if [ "$status" = "Finished" ]; then
local result=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" result 2>/dev/null || echo "")
if [ "$result" = "pong" ]; then
echo "✅ Job $job_id completed successfully with result: $result"
return 0
else
echo "❌ Job $job_id completed but with unexpected result: $result"
return 1
fi
elif [ "$status" = "Error" ]; then
local error=$(redis-cli -u "$REDIS_URL" hget "hero:job:$job_id" error 2>/dev/null || echo "")
echo "❌ Job $job_id failed with error: $error"
return 1
fi
sleep 0.5
elapsed=$((elapsed + 1))
done
echo "❌ Job $job_id timed out after ${timeout}s"
return 1
}
# Send multiple concurrent ping jobs to test async processing
echo "Testing concurrent ping/pong functionality..."
total_jobs=5
job_ids=()
echo
echo "--- Sending $total_jobs concurrent ping jobs ---"
for i in $(seq 1 $total_jobs); do
job_id=$(send_ping_job $i)
job_ids+=("$job_id")
sleep 0.1 # Small delay between job submissions
done
echo
echo "--- Waiting for all jobs to complete ---"
success_count=0
for job_id in "${job_ids[@]}"; do
echo "Checking job: $job_id"
if check_job_result "$job_id"; then
success_count=$((success_count + 1))
fi
done
echo
echo "=== Test Results ==="
echo "Successful concurrent ping/pong tests: $success_count/$total_jobs"
if [ $success_count -eq $total_jobs ]; then
echo "🎉 All tests passed! System worker is handling concurrent jobs correctly."
exit_code=0
else
echo "⚠️ Some tests failed. Check the worker logs for details."
exit_code=1
fi
# Test rapid job submission to showcase async capabilities
echo
echo "--- Testing rapid job submission (10 jobs in quick succession) ---"
rapid_jobs=10
rapid_job_ids=()
for i in $(seq 1 $rapid_jobs); do
job_id=$(send_ping_job "rapid_$i")
rapid_job_ids+=("$job_id")
done
echo "Waiting for rapid jobs to complete..."
rapid_success=0
for job_id in "${rapid_job_ids[@]}"; do
if check_job_result "$job_id"; then
rapid_success=$((rapid_success + 1))
fi
done
echo "Rapid submission test: $rapid_success/$rapid_jobs successful"
# Clean up
echo
echo "Cleaning up..."
echo "Stopping System worker (PID: $WORKER_PID)..."
kill $WORKER_PID 2>/dev/null || true
wait $WORKER_PID 2>/dev/null || true
echo "✅ Worker stopped"
echo "Cleaning up Redis jobs..."
redis-cli -u "$REDIS_URL" del "hero:jobs:$WORKER_ID" > /dev/null 2>&1 || true
redis-cli -u "$REDIS_URL" eval "return redis.call('del', unpack(redis.call('keys', 'hero:job:*')))" 0 > /dev/null 2>&1 || true
echo "✅ Redis cleaned up"
echo
echo "=== System Worker Example Complete ==="
total_success=$((success_count + rapid_success))
total_tests=$((total_jobs + rapid_jobs))
echo "Overall success rate: $total_success/$total_tests"
if [ $total_success -eq $total_tests ]; then
exit 0
else
exit 1
fi

View File

@ -0,0 +1,15 @@
# System Worker Configuration
# Asynchronous worker for high-throughput concurrent processing
worker_id = "system_worker_1"
redis_url = "redis://localhost:6379"
db_path = "/tmp/system_worker_db"
preserve_tasks = false
[worker_type]
type = "async"
default_timeout_seconds = 300 # 5 minutes
[logging]
timestamps = true
level = "info"

View File

@ -0,0 +1,60 @@
use std::process::{Command, Stdio};
use std::path::Path;
use std::env;
use std::io::{self, Write};
/// System Worker Demo Runner
///
/// This Rust wrapper executes the System worker bash script example.
/// It provides a way to run shell-based examples through Cargo.
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🚀 System Worker Demo");
println!("====================");
println!();
// Get the current working directory and construct the path to the shell script
let current_dir = env::current_dir()?;
let script_path = current_dir.join("examples").join("system").join("example.sh");
// Check if the script exists
if !script_path.exists() {
eprintln!("❌ Error: Script not found at {:?}", script_path);
eprintln!(" Make sure you're running this from the worker crate root directory.");
std::process::exit(1);
}
println!("📁 Script location: {:?}", script_path);
println!("🔧 Executing System worker example...");
println!();
// Make sure the script is executable
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = std::fs::metadata(&script_path)?.permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&script_path, perms)?;
}
// Execute the shell script
let mut child = Command::new("bash")
.arg(&script_path)
.current_dir(&current_dir)
.stdin(Stdio::inherit())
.stdout(Stdio::inherit())
.stderr(Stdio::inherit())
.spawn()?;
// Wait for the script to complete
let status = child.wait()?;
println!();
if status.success() {
println!("✅ System worker demo completed successfully!");
} else {
println!("❌ System worker demo failed with exit code: {:?}", status.code());
std::process::exit(status.code().unwrap_or(1));
}
Ok(())
}

View File

@ -0,0 +1,322 @@
//! # Trait-Based Worker Demo
//!
//! This example demonstrates the new unified worker interface using the Worker trait.
//! It shows how both synchronous and asynchronous workers can be used with the same
//! API, eliminating code duplication and providing a clean, consistent interface.
//!
//! ## Features Demonstrated
//!
//! - Unified worker interface using the Worker trait
//! - Both sync and async worker implementations
//! - Shared configuration and spawn logic
//! - Clean shutdown handling
//! - Job processing with different strategies
//!
//! ## Usage
//!
//! Make sure Redis is running on localhost:6379, then run:
//! ```bash
//! cargo run --example trait_based_worker_demo
//! ```
use hero_job::{Job, JobStatus, ScriptType};
use log::{info, warn, error};
use rhailib_worker::{
SyncWorker, AsyncWorker,
spawn_sync_worker, spawn_async_worker,
engine::create_heromodels_engine,
worker_trait::{spawn_worker, Worker}
};
use redis::AsyncCommands;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tokio::time::sleep;
const REDIS_URL: &str = "redis://127.0.0.1:6379";
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
info!("Starting Trait-Based Worker Demo");
// Create Redis connection for job creation
let redis_client = redis::Client::open(REDIS_URL)?;
let mut redis_conn = redis_client.get_multiplexed_async_connection().await?;
// Demo 1: Using the unified trait-based interface
info!("=== Demo 1: Unified Trait-Based Interface ===");
// Create shutdown channels for both workers
let (sync_shutdown_tx, sync_shutdown_rx) = mpsc::channel::<()>(1);
let (async_shutdown_tx, async_shutdown_rx) = mpsc::channel::<()>(1);
// Workers are now configured using builder pattern directly
// Create worker instances using builder pattern
let sync_worker = Arc::new(
SyncWorker::builder()
.worker_id("demo_sync_worker")
.db_path("/tmp")
.redis_url("redis://localhost:6379")
.preserve_tasks(false)
.build()
.expect("Failed to build SyncWorker")
);
let async_worker = Arc::new(
AsyncWorker::builder()
.worker_id("demo_async_worker")
.db_path("/tmp")
.redis_url("redis://localhost:6379")
.default_timeout(Duration::from_secs(300))
.build()
.expect("Failed to build AsyncWorker")
);
let sync_engine = create_heromodels_engine();
let async_engine = create_heromodels_engine();
info!("Spawning {} worker: {}", sync_worker.worker_type(), sync_worker.worker_id());
let sync_handle = spawn_worker(sync_worker.clone(), sync_engine, sync_shutdown_rx);
info!("Spawning {} worker: {}", async_worker.worker_type(), async_worker.worker_id());
let async_handle = spawn_worker(async_worker.clone(), async_engine, async_shutdown_rx);
// Give workers time to start
sleep(Duration::from_secs(1)).await;
// Create and dispatch jobs to both workers
info!("Creating demo jobs for both workers...");
// Job for sync worker - simple calculation
let sync_job = create_demo_job(
"sync_calculation",
r#"
print("Sync worker: Starting calculation...");
let result = 0;
for i in 1..=100 {
result += i;
}
print("Sync worker: Sum of 1-100 = " + result);
result
"#,
None,
).await?;
dispatch_job(&mut redis_conn, &sync_job, sync_worker.worker_id()).await?;
info!("Dispatched job to sync worker: {}", sync_job.id);
// Job for async worker - with timeout demonstration
let async_job = create_demo_job(
"async_calculation",
r#"
print("Async worker: Starting calculation...");
let result = 1;
for i in 1..=10 {
result *= i;
}
print("Async worker: 10! = " + result);
result
"#,
Some(15), // 15 second timeout
).await?;
dispatch_job(&mut redis_conn, &async_job, async_worker.worker_id()).await?;
info!("Dispatched job to async worker: {}", async_job.id);
// Monitor job execution
info!("Monitoring job execution for 10 seconds...");
let monitor_start = std::time::Instant::now();
let monitor_duration = Duration::from_secs(10);
while monitor_start.elapsed() < monitor_duration {
// Check sync job status
if let Ok(status) = Job::get_status(&mut redis_conn, &sync_job.id).await {
match status {
JobStatus::Finished => {
let job_key = format!("hero:job:{}", sync_job.id);
if let Ok(result) = redis_conn.hget::<_, _, String>(&job_key, "output").await {
info!("✅ Sync Job {} COMPLETED with result: {}", sync_job.id, result);
} else {
info!("✅ Sync Job {} COMPLETED", sync_job.id);
}
}
JobStatus::Error => {
let job_key = format!("hero:job:{}", sync_job.id);
if let Ok(error) = redis_conn.hget::<_, _, String>(&job_key, "error").await {
warn!("❌ Sync Job {} FAILED with error: {}", sync_job.id, error);
} else {
warn!("❌ Sync Job {} FAILED", sync_job.id);
}
}
_ => info!("🔄 Sync Job {} status: {:?}", sync_job.id, status),
}
}
// Check async job status
if let Ok(status) = Job::get_status(&mut redis_conn, &async_job.id).await {
match status {
JobStatus::Finished => {
let job_key = format!("hero:job:{}", async_job.id);
if let Ok(result) = redis_conn.hget::<_, _, String>(&job_key, "output").await {
info!("✅ Async Job {} COMPLETED with result: {}", async_job.id, result);
} else {
info!("✅ Async Job {} COMPLETED", async_job.id);
}
}
JobStatus::Error => {
let job_key = format!("hero:job:{}", async_job.id);
if let Ok(error) = redis_conn.hget::<_, _, String>(&job_key, "error").await {
warn!("❌ Async Job {} FAILED with error: {}", async_job.id, error);
} else {
warn!("❌ Async Job {} FAILED", async_job.id);
}
}
_ => info!("🔄 Async Job {} status: {:?}", async_job.id, status),
}
}
sleep(Duration::from_secs(2)).await;
}
// Demo 2: Using convenience functions (backward compatibility)
info!("\n=== Demo 2: Convenience Functions (Backward Compatibility) ===");
let (conv_sync_shutdown_tx, conv_sync_shutdown_rx) = mpsc::channel::<()>(1);
let (conv_async_shutdown_tx, conv_async_shutdown_rx) = mpsc::channel::<()>(1);
// Spawn workers using convenience functions
let conv_sync_engine = create_heromodels_engine();
let conv_async_engine = create_heromodels_engine();
info!("Spawning sync worker using convenience function...");
let conv_sync_handle = spawn_sync_worker(
"convenience_sync_worker".to_string(),
"/tmp".to_string(),
conv_sync_engine,
REDIS_URL.to_string(),
conv_sync_shutdown_rx,
false,
);
info!("Spawning async worker using convenience function...");
let conv_async_handle = spawn_async_worker(
"convenience_async_worker".to_string(),
"/tmp".to_string(),
conv_async_engine,
REDIS_URL.to_string(),
conv_async_shutdown_rx,
Duration::from_secs(20), // 20 second timeout
);
// Give convenience workers time to start
sleep(Duration::from_secs(1)).await;
// Create jobs for convenience workers
let conv_sync_job = create_demo_job(
"convenience_sync",
r#"
print("Convenience sync worker: Hello World!");
"Hello from convenience sync worker"
"#,
None,
).await?;
let conv_async_job = create_demo_job(
"convenience_async",
r#"
print("Convenience async worker: Hello World!");
"Hello from convenience async worker"
"#,
Some(10),
).await?;
dispatch_job(&mut redis_conn, &conv_sync_job, "convenience_sync_worker").await?;
dispatch_job(&mut redis_conn, &conv_async_job, "convenience_async_worker").await?;
info!("Dispatched jobs to convenience workers");
// Wait a bit for jobs to complete
sleep(Duration::from_secs(5)).await;
// Shutdown all workers gracefully
info!("\n=== Shutting Down All Workers ===");
info!("Sending shutdown signals...");
let _ = sync_shutdown_tx.send(()).await;
let _ = async_shutdown_tx.send(()).await;
let _ = conv_sync_shutdown_tx.send(()).await;
let _ = conv_async_shutdown_tx.send(()).await;
info!("Waiting for workers to shutdown...");
// Wait for all workers to shutdown
let results = tokio::join!(
sync_handle,
async_handle,
conv_sync_handle,
conv_async_handle
);
match results {
(Ok(Ok(())), Ok(Ok(())), Ok(Ok(())), Ok(Ok(()))) => {
info!("All workers shut down successfully!");
}
_ => {
error!("Some workers encountered errors during shutdown");
}
}
info!("Trait-Based Worker Demo completed successfully!");
// Summary
info!("\n=== Summary ===");
info!("✅ Demonstrated unified Worker trait interface");
info!("✅ Showed both sync and async worker implementations");
info!("✅ Used shared configuration and spawn logic");
info!("✅ Maintained backward compatibility with convenience functions");
info!("✅ Eliminated code duplication between worker types");
info!("✅ Provided clean, consistent API for all worker operations");
Ok(())
}
/// Create a demo job with the specified script and timeout
async fn create_demo_job(
name: &str,
script: &str,
timeout_seconds: Option<i32>,
) -> Result<Job, Box<dyn std::error::Error>> {
let mut job = Job::new(
format!("demo_{}", name), // caller_id
"demo_context".to_string(), // context_id
script.to_string(),
ScriptType::OSIS,
);
// Set timeout if provided
if let Some(timeout) = timeout_seconds {
job.timeout = Duration::from_secs(timeout as u64);
}
Ok(job)
}
/// Dispatch a job to the worker queue
async fn dispatch_job(
redis_conn: &mut redis::aio::MultiplexedConnection,
job: &Job,
worker_queue: &str,
) -> Result<(), Box<dyn std::error::Error>> {
// Store job in Redis
job.store_in_redis(redis_conn).await?;
// Add job to worker queue
let queue_key = format!("hero:job:{}", worker_queue);
let _: () = redis_conn.rpush(&queue_key, &job.id).await?;
Ok(())
}

View File

@ -0,0 +1,420 @@
//! # Asynchronous Worker Implementation
//!
//! This module provides an asynchronous worker implementation that can process
//! multiple jobs concurrently with timeout support. Each job is spawned as a
//! separate Tokio task, allowing for parallel execution and proper timeout handling.
//!
//! ## Features
//!
//! - **Concurrent Processing**: Multiple jobs can run simultaneously
//! - **Timeout Support**: Jobs that exceed their timeout are automatically cancelled
//! - **Resource Cleanup**: Proper cleanup of aborted/cancelled jobs
//! - **Non-blocking**: Worker continues processing new jobs while others are running
//! - **Scalable**: Can handle high job throughput with parallel execution
//!
//! ## Usage
//!
//! ```rust
//! use std::sync::Arc;
//! use std::time::Duration;
//! use rhailib_worker::async_worker_impl::AsyncWorker;
//! use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig};
//! use rhailib_worker::engine::create_heromodels_engine;
//! use tokio::sync::mpsc;
//!
//! let config = WorkerConfig::new(
//! "async_worker_1".to_string(),
//! "/path/to/db".to_string(),
//! "redis://localhost:6379".to_string(),
//! false, // preserve_tasks
//! ).with_default_timeout(Duration::from_secs(300));
//!
//! let worker = Arc::new(AsyncWorker::new());
//! let engine = create_heromodels_engine();
//! let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
//!
//! let handle = spawn_worker(worker, config, engine, shutdown_rx);
//!
//! // Later, shutdown the worker
//! shutdown_tx.send(()).await.unwrap();
//! handle.await.unwrap().unwrap();
//! ```
use async_trait::async_trait;
use hero_job::{Job, JobStatus};
use log::{debug, error, info, warn};
use rhai::Engine;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
use tokio::time::timeout;
use crate::engine::eval_script;
use crate::worker_trait::{Worker, WorkerConfig};
use crate::initialize_redis_connection;
/// Represents a running job with its handle and metadata
#[derive(Debug)]
struct RunningJob {
job_id: String,
handle: JoinHandle<()>,
started_at: std::time::Instant,
}
/// Builder for AsyncWorker
#[derive(Debug, Default)]
pub struct AsyncWorkerBuilder {
worker_id: Option<String>,
db_path: Option<String>,
redis_url: Option<String>,
default_timeout: Option<Duration>,
}
impl AsyncWorkerBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn worker_id<S: Into<String>>(mut self, worker_id: S) -> Self {
self.worker_id = Some(worker_id.into());
self
}
pub fn db_path<S: Into<String>>(mut self, db_path: S) -> Self {
self.db_path = Some(db_path.into());
self
}
pub fn redis_url<S: Into<String>>(mut self, redis_url: S) -> Self {
self.redis_url = Some(redis_url.into());
self
}
pub fn default_timeout(mut self, timeout: Duration) -> Self {
self.default_timeout = Some(timeout);
self
}
pub fn build(self) -> Result<AsyncWorker, String> {
Ok(AsyncWorker {
worker_id: self.worker_id.ok_or("worker_id is required")?,
db_path: self.db_path.ok_or("db_path is required")?,
redis_url: self.redis_url.ok_or("redis_url is required")?,
default_timeout: self.default_timeout.unwrap_or(Duration::from_secs(300)),
running_jobs: Arc::new(Mutex::new(HashMap::new())),
})
}
}
/// Asynchronous worker that processes jobs concurrently
#[derive(Debug, Clone)]
pub struct AsyncWorker {
pub worker_id: String,
pub db_path: String,
pub redis_url: String,
pub default_timeout: Duration,
running_jobs: Arc<Mutex<HashMap<String, RunningJob>>>,
}
impl AsyncWorker {
/// Create a new AsyncWorkerBuilder
pub fn builder() -> AsyncWorkerBuilder {
AsyncWorkerBuilder::new()
}
/// Add a running job to the tracking map
async fn add_running_job(&self, job_id: String, handle: JoinHandle<()>) {
let running_job = RunningJob {
job_id: job_id.clone(),
handle,
started_at: std::time::Instant::now(),
};
let mut jobs = self.running_jobs.lock().await;
jobs.insert(job_id.clone(), running_job);
debug!("Async Worker: Added running job '{}'. Total running: {}",
job_id, jobs.len());
}
/// Remove a completed job from the tracking map
async fn remove_running_job(&self, job_id: &str) {
let mut jobs = self.running_jobs.lock().await;
if let Some(job) = jobs.remove(job_id) {
let duration = job.started_at.elapsed();
debug!("Async Worker: Removed completed job '{}' after {:?}. Remaining: {}",
job_id, duration, jobs.len());
}
}
/// Get the count of currently running jobs
pub async fn running_job_count(&self) -> usize {
let jobs = self.running_jobs.lock().await;
jobs.len()
}
/// Cleanup any finished jobs from the running jobs map
async fn cleanup_finished_jobs(&self) {
let mut jobs = self.running_jobs.lock().await;
let mut to_remove = Vec::new();
for (job_id, running_job) in jobs.iter() {
if running_job.handle.is_finished() {
to_remove.push(job_id.clone());
}
}
for job_id in to_remove {
if let Some(job) = jobs.remove(&job_id) {
let duration = job.started_at.elapsed();
debug!("Async Worker: Cleaned up finished job '{}' after {:?}",
job_id, duration);
}
}
}
/// Execute a single job asynchronously with timeout support
async fn execute_job_with_timeout(
job: Job,
engine: Engine,
worker_id: String,
redis_url: String,
job_timeout: Duration,
) {
let job_id = job.id.clone();
info!("Async Worker '{}', Job {}: Starting execution with timeout {:?}",
worker_id, job_id, job_timeout);
// Create a new Redis connection for this job
let mut redis_conn = match initialize_redis_connection(&worker_id, &redis_url).await {
Ok(conn) => conn,
Err(e) => {
error!("Async Worker '{}', Job {}: Failed to initialize Redis connection: {}",
worker_id, job_id, e);
return;
}
};
// Update job status to Started
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Started).await {
error!("Async Worker '{}', Job {}: Failed to update status to Started: {}",
worker_id, job_id, e);
return;
}
// Create the script execution task
let script_task = async {
// Execute the Rhai script
match eval_script(&engine, &job.script) {
Ok(result) => {
let result_str = format!("{:?}", result);
info!("Async Worker '{}', Job {}: Script executed successfully. Result: {}",
worker_id, job_id, result_str);
// Update job with success result
if let Err(e) = Job::set_result(&mut redis_conn, &job_id, &result_str).await {
error!("Async Worker '{}', Job {}: Failed to set result: {}",
worker_id, job_id, e);
return;
}
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Finished).await {
error!("Async Worker '{}', Job {}: Failed to update status to Finished: {}",
worker_id, job_id, e);
}
}
Err(e) => {
let error_msg = format!("Script execution error: {}", e);
error!("Async Worker '{}', Job {}: {}", worker_id, job_id, error_msg);
// Update job with error
if let Err(e) = Job::set_error(&mut redis_conn, &job_id, &error_msg).await {
error!("Async Worker '{}', Job {}: Failed to set error: {}",
worker_id, job_id, e);
return;
}
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Error).await {
error!("Async Worker '{}', Job {}: Failed to update status to Error: {}",
worker_id, job_id, e);
}
}
}
};
// Execute the script with timeout
match timeout(job_timeout, script_task).await {
Ok(()) => {
info!("Async Worker '{}', Job {}: Completed within timeout", worker_id, job_id);
}
Err(_) => {
warn!("Async Worker '{}', Job {}: Timed out after {:?}, marking as error",
worker_id, job_id, job_timeout);
let timeout_msg = format!("Job timed out after {:?}", job_timeout);
if let Err(e) = Job::set_error(&mut redis_conn, &job_id, &timeout_msg).await {
error!("Async Worker '{}', Job {}: Failed to set timeout error: {}",
worker_id, job_id, e);
}
if let Err(e) = Job::update_status(&mut redis_conn, &job_id, JobStatus::Error).await {
error!("Async Worker '{}', Job {}: Failed to update status to Error after timeout: {}",
worker_id, job_id, e);
}
}
}
info!("Async Worker '{}', Job {}: Job processing completed", worker_id, job_id);
}
}
impl Default for AsyncWorker {
fn default() -> Self {
// Default AsyncWorker with placeholder values
// In practice, use the builder pattern instead
Self {
worker_id: "default_async_worker".to_string(),
db_path: "/tmp".to_string(),
redis_url: "redis://localhost:6379".to_string(),
default_timeout: Duration::from_secs(300),
running_jobs: Arc::new(Mutex::new(HashMap::new())),
}
}
}
#[async_trait]
impl Worker for AsyncWorker {
async fn process_job(
&self,
job: Job,
engine: Engine, // Reuse the stateless engine
_redis_conn: &mut redis::aio::MultiplexedConnection,
) {
let job_id = job.id.clone();
let worker_id = &self.worker_id.clone();
// Determine timeout (use job-specific timeout if available, otherwise default)
let job_timeout = if job.timeout.as_secs() > 0 {
job.timeout
} else {
self.default_timeout // Use worker's default timeout
};
info!("Async Worker '{}', Job {}: Spawning job execution task with timeout {:?}",
worker_id, job_id, job_timeout);
// Clone necessary data for the spawned task
let job_id_clone = job_id.clone();
let worker_id_clone = worker_id.clone();
let worker_id_debug = worker_id.clone(); // Additional clone for debug statement
let job_id_debug = job_id.clone(); // Additional clone for debug statement
let redis_url_clone = self.redis_url.clone();
let running_jobs_clone = Arc::clone(&self.running_jobs);
// Spawn the job execution task
let job_handle = tokio::spawn(async move {
Self::execute_job_with_timeout(
job,
engine,
worker_id_clone,
redis_url_clone,
job_timeout,
).await;
// Remove this job from the running jobs map when it completes
let mut jobs = running_jobs_clone.lock().await;
if let Some(running_job) = jobs.remove(&job_id_clone) {
let duration = running_job.started_at.elapsed();
debug!("Async Worker '{}': Removed completed job '{}' after {:?}",
worker_id_debug, job_id_debug, duration);
}
});
// Add the job to the running jobs map
self.add_running_job(job_id, job_handle).await;
// Cleanup finished jobs periodically
self.cleanup_finished_jobs().await;
}
fn worker_type(&self) -> &'static str {
"Async"
}
fn worker_id(&self) -> &str {
&self.worker_id
}
fn redis_url(&self) -> &str {
&self.redis_url
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::engine::create_heromodels_engine;
use hero_job::ScriptType;
#[tokio::test]
async fn test_async_worker_creation() {
let worker = AsyncWorker::new();
assert_eq!(worker.worker_type(), "Async");
assert_eq!(worker.running_job_count().await, 0);
}
#[tokio::test]
async fn test_async_worker_default() {
let worker = AsyncWorker::default();
assert_eq!(worker.worker_type(), "Async");
}
#[tokio::test]
async fn test_async_worker_job_tracking() {
let worker = AsyncWorker::new();
// Simulate adding a job
let handle = tokio::spawn(async {
tokio::time::sleep(Duration::from_millis(100)).await;
});
worker.add_running_job("job_1".to_string(), handle).await;
assert_eq!(worker.running_job_count().await, 1);
// Wait for job to complete
tokio::time::sleep(Duration::from_millis(200)).await;
worker.cleanup_finished_jobs().await;
assert_eq!(worker.running_job_count().await, 0);
}
#[tokio::test]
async fn test_async_worker_process_job_interface() {
let worker = AsyncWorker::new();
let engine = create_heromodels_engine();
// Create a simple test job
let job = Job::new(
"test_caller".to_string(),
"test_context".to_string(),
r#"print("Hello from async worker test!"); 42"#.to_string(),
ScriptType::OSIS,
);
let config = WorkerConfig::new(
"test_async_worker".to_string(),
"/tmp".to_string(),
"redis://localhost:6379".to_string(),
false,
).with_default_timeout(Duration::from_secs(60));
// Note: This test doesn't actually connect to Redis, it just tests the interface
// In a real test environment, you'd need a Redis instance or mock
// The process_job method should be callable (interface test)
// worker.process_job(job, engine, &mut redis_conn, &config).await;
// For now, just verify the worker was created successfully
assert_eq!(worker.worker_type(), "Async");
}
}

250
core/worker/src/config.rs Normal file
View File

@ -0,0 +1,250 @@
//! Worker Configuration Module - TOML-based configuration for Hero workers
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
use std::time::Duration;
/// Worker configuration loaded from TOML file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkerConfig {
/// Worker identification
pub worker_id: String,
/// Redis connection URL
pub redis_url: String,
/// Database path for Rhai engine
pub db_path: String,
/// Whether to preserve task details after completion
#[serde(default = "default_preserve_tasks")]
pub preserve_tasks: bool,
/// Worker type configuration
pub worker_type: WorkerType,
/// Logging configuration
#[serde(default)]
pub logging: LoggingConfig,
}
/// Worker type configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum WorkerType {
/// Synchronous worker configuration
#[serde(rename = "sync")]
Sync,
/// Asynchronous worker configuration
#[serde(rename = "async")]
Async {
/// Default timeout for jobs in seconds
#[serde(default = "default_timeout_seconds")]
default_timeout_seconds: u64,
},
}
/// Logging configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoggingConfig {
/// Whether to include timestamps in log output
#[serde(default = "default_timestamps")]
pub timestamps: bool,
/// Log level (trace, debug, info, warn, error)
#[serde(default = "default_log_level")]
pub level: String,
}
impl Default for LoggingConfig {
fn default() -> Self {
Self {
timestamps: default_timestamps(),
level: default_log_level(),
}
}
}
impl WorkerConfig {
/// Load configuration from TOML file
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
let content = fs::read_to_string(&path)
.map_err(|e| ConfigError::IoError(format!("Failed to read config file: {}", e)))?;
let config: WorkerConfig = toml::from_str(&content)
.map_err(|e| ConfigError::ParseError(format!("Failed to parse TOML: {}", e)))?;
config.validate()?;
Ok(config)
}
/// Validate the configuration
fn validate(&self) -> Result<(), ConfigError> {
if self.worker_id.is_empty() {
return Err(ConfigError::ValidationError("worker_id cannot be empty".to_string()));
}
if self.redis_url.is_empty() {
return Err(ConfigError::ValidationError("redis_url cannot be empty".to_string()));
}
if self.db_path.is_empty() {
return Err(ConfigError::ValidationError("db_path cannot be empty".to_string()));
}
// Validate log level
match self.logging.level.to_lowercase().as_str() {
"trace" | "debug" | "info" | "warn" | "error" => {},
_ => return Err(ConfigError::ValidationError(
format!("Invalid log level: {}. Must be one of: trace, debug, info, warn, error", self.logging.level)
)),
}
Ok(())
}
/// Get the default timeout duration for async workers
pub fn get_default_timeout(&self) -> Option<Duration> {
match &self.worker_type {
WorkerType::Sync => None,
WorkerType::Async { default_timeout_seconds } => {
Some(Duration::from_secs(*default_timeout_seconds))
}
}
}
/// Check if this is a sync worker configuration
pub fn is_sync(&self) -> bool {
matches!(self.worker_type, WorkerType::Sync)
}
/// Check if this is an async worker configuration
pub fn is_async(&self) -> bool {
matches!(self.worker_type, WorkerType::Async { .. })
}
}
/// Configuration error types
#[derive(Debug, thiserror::Error)]
pub enum ConfigError {
#[error("IO error: {0}")]
IoError(String),
#[error("Parse error: {0}")]
ParseError(String),
#[error("Validation error: {0}")]
ValidationError(String),
}
// Default value functions for serde
fn default_preserve_tasks() -> bool {
false
}
fn default_timeout_seconds() -> u64 {
300 // 5 minutes
}
fn default_timestamps() -> bool {
true
}
fn default_log_level() -> String {
"info".to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_sync_worker_config() {
let config_toml = r#"
worker_id = "sync_worker_1"
redis_url = "redis://localhost:6379"
db_path = "/tmp/worker_db"
[worker_type]
type = "sync"
[logging]
timestamps = false
level = "debug"
"#;
let config: WorkerConfig = toml::from_str(config_toml).unwrap();
assert_eq!(config.worker_id, "sync_worker_1");
assert!(config.is_sync());
assert!(!config.is_async());
assert_eq!(config.get_default_timeout(), None);
assert!(!config.logging.timestamps);
assert_eq!(config.logging.level, "debug");
}
#[test]
fn test_async_worker_config() {
let config_toml = r#"
worker_id = "async_worker_1"
redis_url = "redis://localhost:6379"
db_path = "/tmp/worker_db"
[worker_type]
type = "async"
default_timeout_seconds = 600
[logging]
timestamps = true
level = "info"
"#;
let config: WorkerConfig = toml::from_str(config_toml).unwrap();
assert_eq!(config.worker_id, "async_worker_1");
assert!(!config.is_sync());
assert!(config.is_async());
assert_eq!(config.get_default_timeout(), Some(Duration::from_secs(600)));
assert!(config.logging.timestamps);
assert_eq!(config.logging.level, "info");
}
#[test]
fn test_config_from_file() {
let config_toml = r#"
worker_id = "test_worker"
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "sync"
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(config_toml.as_bytes()).unwrap();
let config = WorkerConfig::from_file(temp_file.path()).unwrap();
assert_eq!(config.worker_id, "test_worker");
assert!(config.is_sync());
}
#[test]
fn test_config_validation() {
let config_toml = r#"
worker_id = ""
redis_url = "redis://localhost:6379"
db_path = "/tmp/test_db"
[worker_type]
type = "sync"
"#;
let result: Result<WorkerConfig, _> = toml::from_str(config_toml);
assert!(result.is_ok());
let config = result.unwrap();
assert!(config.validate().is_err());
}
}

View File

@ -8,11 +8,23 @@ use tokio::task::JoinHandle;
/// Engine module containing Rhai engine creation and script execution utilities
pub mod engine;
/// Worker trait abstraction for unified worker interface
pub mod worker_trait;
/// Synchronous worker implementation
pub mod sync_worker;
/// Asynchronous worker implementation with trait-based interface
pub mod async_worker_impl;
/// Configuration module for TOML-based worker configuration
pub mod config;
const NAMESPACE_PREFIX: &str = "hero:job:";
const BLPOP_TIMEOUT_SECONDS: usize = 5;
/// Initialize Redis connection for the worker
async fn initialize_redis_connection(
pub(crate) async fn initialize_redis_connection(
worker_id: &str,
redis_url: &str,
) -> Result<redis::aio::MultiplexedConnection, Box<dyn std::error::Error + Send + Sync>> {
@ -33,7 +45,7 @@ async fn initialize_redis_connection(
}
/// Load job from Redis using Job struct
async fn load_job_from_redis(
pub(crate) async fn load_job_from_redis(
redis_conn: &mut redis::aio::MultiplexedConnection,
job_id: &str,
worker_id: &str,
@ -232,3 +244,60 @@ pub fn spawn_rhai_worker(
Ok(())
})
}
// Re-export the main trait-based interface for convenience
pub use worker_trait::{Worker, WorkerConfig, spawn_worker};
pub use sync_worker::SyncWorker;
pub use async_worker_impl::AsyncWorker;
/// Convenience function to spawn a synchronous worker using the trait interface
///
/// This function provides backward compatibility with the original sync worker API
/// while using the new trait-based implementation.
pub fn spawn_sync_worker(
worker_id: String,
db_path: String,
engine: rhai::Engine,
redis_url: String,
shutdown_rx: mpsc::Receiver<()>,
preserve_tasks: bool,
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
use std::sync::Arc;
let worker = Arc::new(
SyncWorker::builder()
.worker_id(worker_id)
.db_path(db_path)
.redis_url(redis_url)
.preserve_tasks(preserve_tasks)
.build()
.expect("Failed to build SyncWorker")
);
spawn_worker(worker, engine, shutdown_rx)
}
/// Convenience function to spawn an asynchronous worker using the trait interface
///
/// This function provides a clean interface for the new async worker implementation
/// with timeout support.
pub fn spawn_async_worker(
worker_id: String,
db_path: String,
engine: rhai::Engine,
redis_url: String,
shutdown_rx: mpsc::Receiver<()>,
default_timeout: std::time::Duration,
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
use std::sync::Arc;
let worker = Arc::new(
AsyncWorker::builder()
.worker_id(worker_id)
.db_path(db_path)
.redis_url(redis_url)
.default_timeout(default_timeout)
.build()
.expect("Failed to build AsyncWorker")
);
spawn_worker(worker, engine, shutdown_rx)
}

View File

@ -0,0 +1,255 @@
//! # Synchronous Worker Implementation
//!
//! This module provides a synchronous worker implementation that processes jobs
//! one at a time in sequence. This is the original worker behavior that's suitable
//! for scenarios where job execution should not overlap or when resource constraints
//! require sequential processing.
//!
//! ## Features
//!
//! - **Sequential Processing**: Jobs are processed one at a time
//! - **Simple Resource Management**: No concurrent job tracking needed
//! - **Predictable Behavior**: Jobs complete in the order they're received
//! - **Lower Memory Usage**: Only one job active at a time
//!
//! ## Usage
//!
//! ```rust
//! use std::sync::Arc;
//! use rhailib_worker::sync_worker::SyncWorker;
//! use rhailib_worker::worker_trait::{spawn_worker, WorkerConfig};
//! use rhailib_worker::engine::create_heromodels_engine;
//! use tokio::sync::mpsc;
//!
//! let config = WorkerConfig::new(
//! "sync_worker_1".to_string(),
//! "/path/to/db".to_string(),
//! "redis://localhost:6379".to_string(),
//! false, // preserve_tasks
//! );
//!
//! let worker = Arc::new(SyncWorker::new());
//! let engine = create_heromodels_engine();
//! let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
//!
//! let handle = spawn_worker(worker, config, engine, shutdown_rx);
//!
//! // Later, shutdown the worker
//! shutdown_tx.send(()).await.unwrap();
//! handle.await.unwrap().unwrap();
//! ```
use async_trait::async_trait;
use hero_job::{Job, JobStatus};
use log::{debug, error, info};
use rhai::Engine;
use crate::engine::eval_script;
use crate::worker_trait::{Worker, WorkerConfig};
/// Builder for SyncWorker
#[derive(Debug, Default)]
pub struct SyncWorkerBuilder {
worker_id: Option<String>,
db_path: Option<String>,
redis_url: Option<String>,
preserve_tasks: bool,
}
impl SyncWorkerBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn worker_id<S: Into<String>>(mut self, worker_id: S) -> Self {
self.worker_id = Some(worker_id.into());
self
}
pub fn db_path<S: Into<String>>(mut self, db_path: S) -> Self {
self.db_path = Some(db_path.into());
self
}
pub fn redis_url<S: Into<String>>(mut self, redis_url: S) -> Self {
self.redis_url = Some(redis_url.into());
self
}
pub fn preserve_tasks(mut self, preserve: bool) -> Self {
self.preserve_tasks = preserve;
self
}
pub fn build(self) -> Result<SyncWorker, String> {
Ok(SyncWorker {
worker_id: self.worker_id.ok_or("worker_id is required")?,
db_path: self.db_path.ok_or("db_path is required")?,
redis_url: self.redis_url.ok_or("redis_url is required")?,
preserve_tasks: self.preserve_tasks,
})
}
}
/// Synchronous worker that processes jobs sequentially
#[derive(Debug, Clone)]
pub struct SyncWorker {
pub worker_id: String,
pub db_path: String,
pub redis_url: String,
pub preserve_tasks: bool,
}
impl SyncWorker {
/// Create a new SyncWorkerBuilder
pub fn builder() -> SyncWorkerBuilder {
SyncWorkerBuilder::new()
}
}
impl Default for SyncWorker {
fn default() -> Self {
// Default SyncWorker with placeholder values
// In practice, use the builder pattern instead
Self {
worker_id: "default_sync_worker".to_string(),
db_path: "/tmp".to_string(),
redis_url: "redis://localhost:6379".to_string(),
preserve_tasks: false,
}
}
}
#[async_trait]
impl Worker for SyncWorker {
async fn process_job(
&self,
job: Job,
engine: Engine,
redis_conn: &mut redis::aio::MultiplexedConnection,
) {
let job_id = &job.id;
let worker_id = &self.worker_id;
let db_path = &self.db_path;
info!("Sync Worker '{}', Job {}: Starting sequential processing", worker_id, job_id);
// Update job status to Started
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Started).await {
error!("Sync Worker '{}', Job {}: Failed to update status to Started: {}",
worker_id, job_id, e);
return;
}
// Execute the Rhai script
match eval_script(&engine, &job.script) {
Ok(result) => {
let result_str = format!("{:?}", result);
info!("Sync Worker '{}', Job {}: Script executed successfully. Result: {}",
worker_id, job_id, result_str);
// Update job with success result
if let Err(e) = Job::set_result(redis_conn, job_id, &result_str).await {
error!("Sync Worker '{}', Job {}: Failed to set result: {}",
worker_id, job_id, e);
return;
}
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Finished).await {
error!("Sync Worker '{}', Job {}: Failed to update status to Finished: {}",
worker_id, job_id, e);
}
}
Err(e) => {
let error_msg = format!("Script execution error: {}", e);
error!("Sync Worker '{}', Job {}: {}", worker_id, job_id, error_msg);
// Update job with error
if let Err(e) = Job::set_error(redis_conn, job_id, &error_msg).await {
error!("Sync Worker '{}', Job {}: Failed to set error: {}",
worker_id, job_id, e);
return;
}
if let Err(e) = Job::update_status(redis_conn, job_id, JobStatus::Error).await {
error!("Sync Worker '{}', Job {}: Failed to update status to Error: {}",
worker_id, job_id, e);
}
}
}
// Cleanup job if preserve_tasks is false
if !self.preserve_tasks {
if let Err(e) = Job::delete_from_redis(redis_conn, job_id).await {
error!("Sync Worker '{}', Job {}: Failed to cleanup job: {}",
worker_id, job_id, e);
} else {
debug!("Sync Worker '{}', Job {}: Job cleaned up from Redis", worker_id, job_id);
}
}
info!("Sync Worker '{}', Job {}: Sequential processing completed", worker_id, job_id);
}
fn worker_type(&self) -> &'static str {
"Sync"
}
fn worker_id(&self) -> &str {
&self.worker_id
}
fn redis_url(&self) -> &str {
&self.redis_url
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::engine::create_heromodels_engine;
use hero_job::ScriptType;
use std::time::Duration;
#[tokio::test]
async fn test_sync_worker_creation() {
let worker = SyncWorker::new();
assert_eq!(worker.worker_type(), "Sync");
}
#[tokio::test]
async fn test_sync_worker_default() {
let worker = SyncWorker::default();
assert_eq!(worker.worker_type(), "Sync");
}
#[tokio::test]
async fn test_sync_worker_process_job_interface() {
let worker = SyncWorker::new();
let engine = create_heromodels_engine();
// Create a simple test job
let job = Job::new(
"test_caller".to_string(),
"test_context".to_string(),
r#"print("Hello from sync worker test!"); 42"#.to_string(),
ScriptType::OSIS,
);
let config = WorkerConfig::new(
"test_sync_worker".to_string(),
"/tmp".to_string(),
"redis://localhost:6379".to_string(),
false,
);
// Note: This test doesn't actually connect to Redis, it just tests the interface
// In a real test environment, you'd need a Redis instance or mock
// The process_job method should be callable (interface test)
// worker.process_job(job, engine, &mut redis_conn, &config).await;
// For now, just verify the worker was created successfully
assert_eq!(worker.worker_type(), "Sync");
}
}

View File

@ -0,0 +1,339 @@
//! # Worker Trait Abstraction
//!
//! This module provides a trait-based abstraction for Rhai workers that eliminates
//! code duplication between synchronous and asynchronous worker implementations.
//!
//! The `Worker` trait defines the common interface and behavior, while specific
//! implementations handle job processing differently (sync vs async).
//!
//! ## Architecture
//!
//! ```text
//! ┌─────────────────┐ ┌─────────────────┐
//! │ SyncWorker │ │ AsyncWorker │
//! │ │ │ │
//! │ process_job() │ │ process_job() │
//! │ (sequential) │ │ (concurrent) │
//! └─────────────────┘ └─────────────────┘
//! │ │
//! └───────┬───────────────┘
//! │
//! ┌───────▼───────┐
//! │ Worker Trait │
//! │ │
//! │ spawn() │
//! │ config │
//! │ common loop │
//! └───────────────┘
//! ```
use hero_job::Job;
use log::{debug, error, info};
use redis::AsyncCommands;
use rhai::Engine;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
use crate::{initialize_redis_connection, NAMESPACE_PREFIX, BLPOP_TIMEOUT_SECONDS};
/// Configuration for worker instances
#[derive(Debug, Clone)]
pub struct WorkerConfig {
pub worker_id: String,
pub db_path: String,
pub redis_url: String,
pub preserve_tasks: bool,
pub default_timeout: Option<Duration>, // Only used by async workers
}
impl WorkerConfig {
/// Create a new worker configuration
pub fn new(
worker_id: String,
db_path: String,
redis_url: String,
preserve_tasks: bool,
) -> Self {
Self {
worker_id,
db_path,
redis_url,
preserve_tasks,
default_timeout: None,
}
}
/// Set default timeout for async workers
pub fn with_default_timeout(mut self, timeout: Duration) -> Self {
self.default_timeout = Some(timeout);
self
}
}
/// Trait defining the common interface for Rhai workers
///
/// This trait abstracts the common functionality between synchronous and
/// asynchronous workers, allowing them to share the same spawn logic and
/// Redis polling loop while implementing different job processing strategies.
#[async_trait::async_trait]
pub trait Worker: Send + Sync + 'static {
/// Process a single job
///
/// This is the core method that differentiates worker implementations:
/// - Sync workers process jobs sequentially, one at a time
/// - Async workers spawn concurrent tasks for each job
///
/// # Arguments
///
/// * `job` - The job to process
/// * `engine` - Rhai engine for script execution
/// * `redis_conn` - Redis connection for status updates
async fn process_job(
&self,
job: Job,
engine: Engine,
redis_conn: &mut redis::aio::MultiplexedConnection,
);
/// Get the worker type name for logging
fn worker_type(&self) -> &'static str;
/// Get worker ID for this worker instance
fn worker_id(&self) -> &str;
/// Get Redis URL for this worker instance
fn redis_url(&self) -> &str;
/// Spawn the worker
///
/// This method provides the common worker loop implementation that both
/// sync and async workers can use. It handles:
/// - Redis connection setup
/// - Job polling from Redis queue
/// - Shutdown signal handling
/// - Delegating job processing to the implementation
fn spawn(
self: Arc<Self>,
engine: Engine,
mut shutdown_rx: mpsc::Receiver<()>,
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
tokio::spawn(async move {
let worker_id = self.worker_id();
let redis_url = self.redis_url();
let queue_key = format!("{}{}", NAMESPACE_PREFIX, worker_id);
info!(
"{} Worker '{}' starting. Connecting to Redis at {}. Listening on queue: {}",
self.worker_type(),
worker_id,
redis_url,
queue_key
);
let mut redis_conn = initialize_redis_connection(worker_id, redis_url).await?;
loop {
let blpop_keys = vec![queue_key.clone()];
tokio::select! {
// Listen for shutdown signal
_ = shutdown_rx.recv() => {
info!("{} Worker '{}': Shutdown signal received. Terminating loop.",
self.worker_type(), worker_id);
break;
}
// Listen for tasks from Redis
blpop_result = redis_conn.blpop(&blpop_keys, BLPOP_TIMEOUT_SECONDS as f64) => {
debug!("{} Worker '{}': Attempting BLPOP on queue: {}",
self.worker_type(), worker_id, queue_key);
let response: Option<(String, String)> = match blpop_result {
Ok(resp) => resp,
Err(e) => {
error!("{} Worker '{}': Redis BLPOP error on queue {}: {}. Worker for this circle might stop.",
self.worker_type(), worker_id, queue_key, e);
return Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>);
}
};
if let Some((_queue_name_recv, job_id)) = response {
info!("{} Worker '{}' received job_id: {} from queue: {}",
self.worker_type(), worker_id, job_id, _queue_name_recv);
// Load the job from Redis
match crate::load_job_from_redis(&mut redis_conn, &job_id, worker_id).await {
Ok(mut job) => {
// Check for ping job and handle it directly
if job.script.trim() == "ping" {
info!("{} Worker '{}': Received ping job '{}', responding with pong",
self.worker_type(), worker_id, job_id);
// Update job status to started
if let Err(e) = hero_job::Job::update_status(&mut redis_conn, &job_id, hero_job::JobStatus::Started).await {
error!("{} Worker '{}': Failed to update ping job '{}' status to Started: {}",
self.worker_type(), worker_id, job_id, e);
}
// Set result to "pong" and mark as finished
if let Err(e) = hero_job::Job::set_result(&mut redis_conn, &job_id, "pong").await {
error!("{} Worker '{}': Failed to set ping job '{}' result: {}",
self.worker_type(), worker_id, job_id, e);
}
info!("{} Worker '{}': Successfully responded to ping job '{}' with pong",
self.worker_type(), worker_id, job_id);
} else {
// Create a new engine for each job to avoid sharing state
let job_engine = crate::engine::create_heromodels_engine();
// Delegate job processing to the implementation
self.process_job(job, job_engine, &mut redis_conn).await;
}
}
Err(e) => {
error!("{} Worker '{}': Failed to load job '{}': {}",
self.worker_type(), worker_id, job_id, e);
}
}
} else {
debug!("{} Worker '{}': BLPOP timed out on queue {}. No new tasks.",
self.worker_type(), worker_id, queue_key);
}
}
}
}
info!("{} Worker '{}' has shut down.", self.worker_type(), worker_id);
Ok(())
})
}
}
/// Convenience function to spawn a worker with the trait-based interface
///
/// This function provides a unified interface for spawning any worker implementation
/// that implements the Worker trait.
///
/// # Arguments
///
/// * `worker` - The worker implementation to spawn
/// * `config` - Worker configuration
/// * `engine` - Rhai engine for script execution
/// * `shutdown_rx` - Channel receiver for shutdown signals
///
/// # Returns
///
/// Returns a `JoinHandle` that can be awaited to wait for worker shutdown.
///
/// # Example
///
/// ```rust
/// use std::sync::Arc;
/// use std::time::Duration;
///
/// let config = WorkerConfig::new(
/// "worker_1".to_string(),
/// "/path/to/db".to_string(),
/// "redis://localhost:6379".to_string(),
/// false,
/// );
///
/// let worker = Arc::new(SyncWorker::new());
/// let engine = create_heromodels_engine();
/// let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
///
/// let handle = spawn_worker(worker, config, engine, shutdown_rx);
///
/// // Later, shutdown the worker
/// shutdown_tx.send(()).await.unwrap();
/// handle.await.unwrap().unwrap();
/// ```
pub fn spawn_worker<W: Worker>(
worker: Arc<W>,
engine: Engine,
shutdown_rx: mpsc::Receiver<()>,
) -> JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>> {
worker.spawn(engine, shutdown_rx)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::engine::create_heromodels_engine;
// Mock worker for testing
struct MockWorker;
#[async_trait::async_trait]
impl Worker for MockWorker {
async fn process_job(
&self,
_job: Job,
_engine: Engine,
_redis_conn: &mut redis::aio::MultiplexedConnection,
) {
// Mock implementation - do nothing
}
fn worker_type(&self) -> &'static str {
"Mock"
}
fn worker_id(&self) -> &str {
"mock_worker"
}
fn redis_url(&self) -> &str {
"redis://localhost:6379"
}
}
#[tokio::test]
async fn test_worker_config_creation() {
let config = WorkerConfig::new(
"test_worker".to_string(),
"/tmp".to_string(),
"redis://localhost:6379".to_string(),
false,
);
assert_eq!(config.worker_id, "test_worker");
assert_eq!(config.db_path, "/tmp");
assert_eq!(config.redis_url, "redis://localhost:6379");
assert!(!config.preserve_tasks);
assert!(config.default_timeout.is_none());
}
#[tokio::test]
async fn test_worker_config_with_timeout() {
let timeout = Duration::from_secs(300);
let config = WorkerConfig::new(
"test_worker".to_string(),
"/tmp".to_string(),
"redis://localhost:6379".to_string(),
false,
).with_default_timeout(timeout);
assert_eq!(config.default_timeout, Some(timeout));
}
#[tokio::test]
async fn test_spawn_worker_function() {
let (_shutdown_tx, shutdown_rx) = mpsc::channel(1);
let config = WorkerConfig::new(
"test_worker".to_string(),
"/tmp".to_string(),
"redis://localhost:6379".to_string(),
false,
);
let engine = create_heromodels_engine();
let worker = Arc::new(MockWorker);
let handle = spawn_worker(worker, config, engine, shutdown_rx);
// The worker should be created successfully
assert!(!handle.is_finished());
// Abort the worker for cleanup
handle.abort();
}
}

View File

@ -25,7 +25,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.host("127.0.0.1")
.port(8443)
.redis_url("redis://localhost:6379")
.worker_id("test")
.with_auth()
.build() {
Ok(server) => {

View File

@ -63,7 +63,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.host("127.0.0.1")
.port(8443)
.redis_url("redis://localhost:6379")
.worker_id("circle_test")
.with_auth()
.circles(circles)
.build()?;

View File

@ -15,7 +15,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.host("127.0.0.1")
.port(8443)
.redis_url("redis://localhost:6379")
.worker_id("test")
.build() {
Ok(server) => {
println!("🚀 Built server...");

View File

@ -25,7 +25,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.host("127.0.0.1")
.port(8443)
.redis_url("redis://localhost:6379")
.worker_id("test")
.with_auth()
.build() {
Ok(server) => {

View File

@ -1,4 +1,6 @@
use std::collections::HashMap;
use std::sync::Arc;
use hero_supervisor::Supervisor;
use crate::{Server, TlsConfigError, ServerConfig};
/// ServerBuilder for constructing Server instances with a fluent API
@ -12,6 +14,7 @@ pub struct ServerBuilder {
tls_port: Option<u16>,
enable_auth: bool,
enable_webhooks: bool,
supervisor: Option<Arc<Supervisor>>,
circles: HashMap<String, Vec<String>>,
}
@ -28,6 +31,7 @@ impl ServerBuilder {
tls_port: None,
enable_auth: false,
enable_webhooks: false,
supervisor: None,
circles: HashMap::new(),
}
@ -77,6 +81,12 @@ impl ServerBuilder {
self
}
/// Set the supervisor instance for job dispatching
pub fn with_supervisor(mut self, supervisor: Arc<Supervisor>) -> Self {
self.supervisor = Some(supervisor);
self
}
/// Load configuration from a ServerConfig instance
pub fn from_config(mut self, config: ServerConfig) -> Self {
self.host = config.host;
@ -87,7 +97,6 @@ impl ServerBuilder {
self.cert_path = config.cert;
self.key_path = config.key;
self.tls_port = config.tls_port;
self.enable_webhooks = config.webhooks;
self.circles = config.circles;
self
}
@ -109,7 +118,7 @@ impl ServerBuilder {
circles: self.circles,
nonce_store: HashMap::new(),
authenticated_pubkey: None,
supervisor: None,
supervisor: self.supervisor,
})
}
}

View File

@ -34,10 +34,6 @@ pub struct ServerConfig {
/// Separate port for TLS connections
pub tls_port: Option<u16>,
/// Enable webhook handling
#[serde(default)]
pub webhooks: bool,
/// Circles configuration - maps circle names to lists of member public keys
#[serde(default)]
pub circles: HashMap<String, Vec<String>>,
@ -54,7 +50,6 @@ impl Default for ServerConfig {
cert: None,
key: None,
tls_port: None,
webhooks: false,
circles: HashMap::new(),
}
}
@ -123,7 +118,6 @@ impl ServerConfig {
cert: Some("cert.pem".to_string()),
key: Some("key.pem".to_string()),
tls_port: Some(8444),
webhooks: false,
circles,
}
}

View File

@ -1,7 +1,7 @@
use crate::Server;
use actix::prelude::*;
use actix_web_actors::ws;
use hero_supervisor::{Supervisor, ScriptType};
use hero_supervisor::ScriptType;
use serde_json::{json, Value};
use std::time::Duration;
@ -426,7 +426,7 @@ impl Server {
supervisor
.new_job()
.context_id(&circle_pk)
.script_type(ScriptType::RhaiSAL)
.script_type(ScriptType::SAL)
.script(&script_content)
.timeout(TASK_TIMEOUT_DURATION)
.await_response()

View File

@ -13,6 +13,7 @@ use serde_json::Value; // Removed unused json
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
use std::sync::Arc;
use std::sync::Mutex; // Removed unused Arc
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::task::JoinHandle;
@ -211,7 +212,7 @@ pub struct Server {
pub circles: HashMap<String, Vec<String>>,
nonce_store: HashMap<String, NonceResponse>,
authenticated_pubkey: Option<String>,
pub supervisor: Option<Supervisor>,
pub supervisor: Option<Arc<Supervisor>>,
}
impl Server {
@ -240,7 +241,7 @@ impl Server {
let server_config_data = web::Data::new(self.clone());
let http_server = HttpServer::new(move || {
let mut app = App::new()
let app = App::new()
.app_data(server_config_data.clone())
.route("/{circle_pk}", web::get().to(ws_handler));
@ -351,21 +352,6 @@ impl Server {
client_rpc_id: Value,
ctx: &mut ws::WebsocketContext<Self>,
) {
if !self.enable_auth {
let err_resp = JsonRpcResponse {
jsonrpc: "2.0".to_string(),
result: None,
error: Some(JsonRpcError {
code: -32000,
message: "Authentication is disabled on this server.".to_string(),
data: None,
}),
id: client_rpc_id,
};
ctx.text(serde_json::to_string(&err_resp).unwrap());
return;
}
match serde_json::from_value::<AuthCredentials>(params) {
Ok(auth_params) => {
let nonce_response = self.nonce_store.get(&auth_params.pubkey);
@ -550,17 +536,32 @@ impl Server {
let public_key = self.authenticated_pubkey.clone();
let supervisor_ref = self.supervisor.clone();
let fut = async move {
let caller_id = public_key.unwrap_or_else(|| "anonymous".to_string());
let _caller_id = public_key.unwrap_or_else(|| "anonymous".to_string());
// Use the passed supervisor if available, otherwise create a new one
match supervisor_ref {
Some(supervisor) => {
supervisor
.new_job()
.context_id(&circle_pk_clone)
.script_type(hero_supervisor::ScriptType::SAL)
.script(&script_content)
.timeout(TASK_TIMEOUT_DURATION)
.await_response()
.await
}
None => {
// Fallback: create a new supervisor if none was provided
match SupervisorBuilder::new()
.redis_url(&redis_url_clone)
.caller_id(&caller_id)
.build() {
.build().await {
Ok(hero_supervisor) => {
hero_supervisor
.new_job()
.context_id(&circle_pk_clone)
.script_type(hero_supervisor::ScriptType::RhaiSAL)
.script_type(hero_supervisor::ScriptType::SAL)
.script(&script_content)
.timeout(TASK_TIMEOUT_DURATION)
.await_response()
@ -568,6 +569,8 @@ impl Server {
}
Err(e) => Err(e),
}
}
}
};
ctx.spawn(