rename worker to actor

This commit is contained in:
Timur Gordon
2025-08-05 15:44:33 +02:00
parent 5283f383b3
commit 89e953ca1d
67 changed files with 1629 additions and 1737 deletions

View File

@@ -16,7 +16,7 @@ mod lifecycle;
pub use crate::error::SupervisorError;
pub use crate::job::JobBuilder;
pub use crate::lifecycle::WorkerConfig;
pub use crate::lifecycle::ActorConfig;
// Re-export types from hero_job for public API
pub use hero_job::{Job, JobStatus, ScriptType};
@@ -28,22 +28,22 @@ pub struct Supervisor {
pub struct SupervisorBuilder {
redis_url: Option<String>,
osis_worker: Option<String>,
sal_worker: Option<String>,
v_worker: Option<String>,
python_worker: Option<String>,
worker_env_vars: HashMap<String, String>,
osis_actor: Option<String>,
sal_actor: Option<String>,
v_actor: Option<String>,
python_actor: Option<String>,
actor_env_vars: HashMap<String, String>,
websocket_config: Option<WebSocketServerConfig>,
}
/// Helper struct to pass builder data to worker launch method
/// Helper struct to pass builder data to actor launch method
#[derive(Clone)]
struct SupervisorBuilderData {
osis_worker: Option<String>,
sal_worker: Option<String>,
v_worker: Option<String>,
python_worker: Option<String>,
worker_env_vars: HashMap<String, String>,
osis_actor: Option<String>,
sal_actor: Option<String>,
v_actor: Option<String>,
python_actor: Option<String>,
actor_env_vars: HashMap<String, String>,
websocket_config: Option<WebSocketServerConfig>,
}
@@ -52,10 +52,10 @@ struct SupervisorBuilderData {
pub struct SupervisorConfig {
pub global: GlobalConfig,
pub websocket_server: Option<WebSocketServerConfig>,
pub osis_worker: Option<WorkerConfigToml>,
pub sal_worker: Option<WorkerConfigToml>,
pub v_worker: Option<WorkerConfigToml>,
pub python_worker: Option<WorkerConfigToml>,
pub osis_actor: Option<ActorConfigToml>,
pub sal_actor: Option<ActorConfigToml>,
pub v_actor: Option<ActorConfigToml>,
pub python_actor: Option<ActorConfigToml>,
}
/// Global configuration section
@@ -64,12 +64,10 @@ pub struct GlobalConfig {
pub redis_url: String,
}
/// Worker configuration section in TOML
/// Actor configuration section in TOML
#[derive(Debug, Deserialize, Serialize)]
pub struct WorkerConfigToml {
pub struct ActorConfigToml {
pub binary_path: String,
#[serde(default)]
pub env_vars: HashMap<String, String>,
}
/// WebSocket server configuration section in TOML
@@ -127,11 +125,11 @@ impl SupervisorBuilder {
pub fn new() -> Self {
Self {
redis_url: None,
osis_worker: None,
sal_worker: None,
v_worker: None,
python_worker: None,
worker_env_vars: HashMap::new(),
osis_actor: None,
sal_actor: None,
v_actor: None,
python_actor: None,
actor_env_vars: HashMap::new(),
websocket_config: None,
}
}
@@ -147,25 +145,21 @@ impl SupervisorBuilder {
let mut builder = Self::new()
.redis_url(&config.global.redis_url);
// Configure workers based on TOML config
if let Some(osis_config) = config.osis_worker {
builder = builder.osis_worker(&osis_config.binary_path)
.worker_env_vars(osis_config.env_vars);
// Configure actors based on TOML config
if let Some(osis_config) = config.osis_actor {
builder = builder.osis_actor(&osis_config.binary_path);
}
if let Some(sal_config) = config.sal_worker {
builder = builder.sal_worker(&sal_config.binary_path)
.worker_env_vars(sal_config.env_vars);
if let Some(sal_config) = config.sal_actor {
builder = builder.sal_actor(&sal_config.binary_path);
}
if let Some(v_config) = config.v_worker {
builder = builder.v_worker(&v_config.binary_path)
.worker_env_vars(v_config.env_vars);
if let Some(v_config) = config.v_actor {
builder = builder.v_actor(&v_config.binary_path);
}
if let Some(python_config) = config.python_worker {
builder = builder.python_worker(&python_config.binary_path)
.worker_env_vars(python_config.env_vars);
if let Some(python_config) = config.python_actor {
builder = builder.python_actor(&python_config.binary_path);
}
// Store WebSocket configuration for later use
@@ -176,28 +170,28 @@ impl SupervisorBuilder {
Ok(builder)
}
/// Validate that all configured worker binaries exist and are executable
fn validate_worker_binaries(&self) -> Result<(), SupervisorError> {
let workers = [
("OSIS", &self.osis_worker),
("SAL", &self.sal_worker),
("V", &self.v_worker),
("Python", &self.python_worker),
/// Validate that all configured actor binaries exist and are executable
fn validate_actor_binaries(&self) -> Result<(), SupervisorError> {
let actors = [
("OSIS", &self.osis_actor),
("SAL", &self.sal_actor),
("V", &self.v_actor),
("Python", &self.python_actor),
];
for (worker_type, binary_path) in workers {
for (actor_type, binary_path) in actors {
if let Some(path) = binary_path {
let path_obj = Path::new(path);
if !path_obj.exists() {
return Err(SupervisorError::ConfigError(
format!("{} worker binary does not exist: {}", worker_type, path)
format!("{} actor binary does not exist: {}", actor_type, path)
));
}
if !path_obj.is_file() {
return Err(SupervisorError::ConfigError(
format!("{} worker path is not a file: {}", worker_type, path)
format!("{} actor path is not a file: {}", actor_type, path)
));
}
@@ -207,19 +201,19 @@ impl SupervisorBuilder {
use std::os::unix::fs::PermissionsExt;
let metadata = path_obj.metadata().map_err(|e| {
SupervisorError::ConfigError(
format!("Failed to read metadata for {} worker binary {}: {}", worker_type, path, e)
format!("Failed to read metadata for {} actor binary {}: {}", actor_type, path, e)
)
})?;
let permissions = metadata.permissions();
if permissions.mode() & 0o111 == 0 {
return Err(SupervisorError::ConfigError(
format!("{} worker binary is not executable: {}", worker_type, path)
format!("{} actor binary is not executable: {}", actor_type, path)
));
}
}
info!("Validated {} worker binary: {}", worker_type, path);
info!("Validated {} actor binary: {}", actor_type, path);
}
}
@@ -231,48 +225,48 @@ impl SupervisorBuilder {
self
}
pub fn osis_worker(mut self, binary_path: &str) -> Self {
self.osis_worker = Some(binary_path.to_string());
pub fn osis_actor(mut self, binary_path: &str) -> Self {
self.osis_actor = Some(binary_path.to_string());
self
}
pub fn sal_worker(mut self, binary_path: &str) -> Self {
self.sal_worker = Some(binary_path.to_string());
pub fn sal_actor(mut self, binary_path: &str) -> Self {
self.sal_actor = Some(binary_path.to_string());
self
}
pub fn v_worker(mut self, binary_path: &str) -> Self {
self.v_worker = Some(binary_path.to_string());
pub fn v_actor(mut self, binary_path: &str) -> Self {
self.v_actor = Some(binary_path.to_string());
self
}
pub fn python_worker(mut self, binary_path: &str) -> Self {
self.python_worker = Some(binary_path.to_string());
pub fn python_actor(mut self, binary_path: &str) -> Self {
self.python_actor = Some(binary_path.to_string());
self
}
pub fn worker_env_var(mut self, key: &str, value: &str) -> Self {
self.worker_env_vars.insert(key.to_string(), value.to_string());
pub fn actor_env_var(mut self, key: &str, value: &str) -> Self {
self.actor_env_vars.insert(key.to_string(), value.to_string());
self
}
pub fn worker_env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
self.worker_env_vars.extend(env_vars);
pub fn actor_env_vars(mut self, env_vars: HashMap<String, String>) -> Self {
self.actor_env_vars.extend(env_vars);
self
}
/// Builds the final `Supervisor` instance synchronously.
///
/// This method validates the configuration, checks worker binary existence,
/// and creates the Redis client. Worker launching is deferred to the `start_workers()` method.
/// This method validates the configuration, checks actor binary existence,
/// and creates the Redis client. Actor launching is deferred to the `start_actors()` method.
///
/// # Returns
///
/// * `Ok(Supervisor)` - Successfully configured client with valid binaries
/// * `Err(SupervisorError)` - Configuration, binary validation, or connection error
pub async fn build(self) -> Result<Supervisor, SupervisorError> {
// Validate that all configured worker binaries exist first
Self::validate_worker_binaries(&self)?;
// Validate that all configured actor binaries exist first
Self::validate_actor_binaries(&self)?;
let url = self.redis_url
.unwrap_or_else(|| "redis://127.0.0.1/".to_string());
@@ -281,13 +275,13 @@ impl SupervisorBuilder {
let zinit_client = ZinitClient::unix_socket("/tmp/zinit.sock").await
.map_err(|e| SupervisorError::ZinitError(format!("Failed to create Zinit client: {}", e)))?;
// Store builder data for later use in start_workers()
// Store builder data for later use in start_actors()
let builder_data = SupervisorBuilderData {
osis_worker: self.osis_worker,
sal_worker: self.sal_worker,
v_worker: self.v_worker,
python_worker: self.python_worker,
worker_env_vars: self.worker_env_vars,
osis_actor: self.osis_actor,
sal_actor: self.sal_actor,
v_actor: self.v_actor,
python_actor: self.python_actor,
actor_env_vars: self.actor_env_vars,
websocket_config: self.websocket_config,
};
@@ -302,10 +296,10 @@ impl SupervisorBuilder {
}
impl Supervisor {
/// Start all configured workers asynchronously.
/// This method should be called after build() to launch the workers.
pub async fn start_workers(&self) -> Result<(), SupervisorError> {
info!("Starting Hero Supervisor workers...");
/// Start all configured actors asynchronously.
/// This method should be called after build() to launch the actors.
pub async fn start_actors(&self) -> Result<(), SupervisorError> {
info!("Starting Hero Supervisor actors...");
// Test Zinit connection first
info!("Testing Zinit connection at /tmp/zinit.sock...");
@@ -319,102 +313,102 @@ impl Supervisor {
}
}
// Clean up any existing worker services first
info!("Cleaning up existing worker services...");
self.cleanup_existing_workers().await?;
// Clean up any existing actor services first
info!("Cleaning up existing actor services...");
self.cleanup_existing_actors().await?;
// Launch configured workers if builder data is available
// Launch configured actors if builder data is available
if let Some(builder_data) = &self.builder_data {
info!("Launching configured workers...");
self.launch_configured_workers(builder_data).await?;
info!("Launching configured actors...");
self.launch_configured_actors(builder_data).await?;
} else {
warn!("No builder data available, no workers to start");
warn!("No builder data available, no actors to start");
}
info!("All workers started successfully!");
info!("All actors started successfully!");
Ok(())
}
/// Clean up all worker services from zinit on program exit
/// Clean up all actor services from zinit on program exit
pub async fn cleanup_and_shutdown(&self) -> Result<(), SupervisorError> {
info!("Cleaning up worker services before shutdown...");
info!("Cleaning up actor services before shutdown...");
let worker_names = vec![
"osis_worker_1",
"sal_worker_1",
"v_worker_1",
"python_worker_1"
let actor_names = vec![
"osis_actor_1",
"sal_actor_1",
"v_actor_1",
"python_actor_1"
];
for worker_name in worker_names {
if let Err(e) = self.stop_and_delete_worker(worker_name).await {
warn!("Failed to cleanup worker {}: {}", worker_name, e);
for actor_name in actor_names {
if let Err(e) = self.stop_and_delete_actor(actor_name).await {
warn!("Failed to cleanup actor {}: {}", actor_name, e);
}
}
info!("Worker cleanup completed");
info!("Actor cleanup completed");
Ok(())
}
/// Clean up any existing worker services on startup
async fn cleanup_existing_workers(&self) -> Result<(), SupervisorError> {
info!("Cleaning up any existing worker services...");
/// Clean up any existing actor services on startup
async fn cleanup_existing_actors(&self) -> Result<(), SupervisorError> {
info!("Cleaning up any existing actor services...");
let worker_names = vec![
"osis_worker_1",
"sal_worker_1",
"v_worker_1",
"python_worker_1"
let actor_names = vec![
"osis_actor_1",
"sal_actor_1",
"v_actor_1",
"python_actor_1"
];
for worker_name in worker_names {
for actor_name in actor_names {
// Try to stop and delete, but don't fail if they don't exist
info!("Attempting to cleanup worker: {}", worker_name);
match self.stop_and_delete_worker(worker_name).await {
Ok(_) => info!("Successfully cleaned up worker: {}", worker_name),
Err(e) => debug!("Failed to cleanup worker {}: {}", worker_name, e),
info!("Attempting to cleanup actor: {}", actor_name);
match self.stop_and_delete_actor(actor_name).await {
Ok(_) => info!("Successfully cleaned up actor: {}", actor_name),
Err(e) => debug!("Failed to cleanup actor {}: {}", actor_name, e),
}
}
info!("Existing worker cleanup completed");
info!("Existing actor cleanup completed");
Ok(())
}
/// Stop and delete a worker service from zinit
async fn stop_and_delete_worker(&self, worker_name: &str) -> Result<(), SupervisorError> {
info!("Starting cleanup for worker: {}", worker_name);
/// Stop and delete a actor service from zinit
async fn stop_and_delete_actor(&self, actor_name: &str) -> Result<(), SupervisorError> {
info!("Starting cleanup for actor: {}", actor_name);
// First try to stop the worker
info!("Attempting to stop worker: {}", worker_name);
if let Err(e) = self.zinit_client.stop(worker_name).await {
debug!("Worker {} was not running or failed to stop: {}", worker_name, e);
// First try to stop the actor
info!("Attempting to stop actor: {}", actor_name);
if let Err(e) = self.zinit_client.stop(actor_name).await {
debug!("Actor {} was not running or failed to stop: {}", actor_name, e);
} else {
info!("Successfully stopped worker: {}", worker_name);
info!("Successfully stopped actor: {}", actor_name);
}
// Then forget the service to stop monitoring it
info!("Attempting to forget worker: {}", worker_name);
if let Err(e) = self.zinit_client.forget(worker_name).await {
info!("Worker {} was not being monitored or failed to forget: {}", worker_name, e);
info!("Attempting to forget actor: {}", actor_name);
if let Err(e) = self.zinit_client.forget(actor_name).await {
info!("Actor {} was not being monitored or failed to forget: {}", actor_name, e);
} else {
info!("Successfully forgot worker service: {}", worker_name);
info!("Successfully forgot actor service: {}", actor_name);
}
// Finally, delete the service configuration
info!("Attempting to delete service for worker: {}", worker_name);
if let Err(e) = self.zinit_client.delete_service(worker_name).await {
debug!("Worker {} service did not exist or failed to delete: {}", worker_name, e);
info!("Attempting to delete service for actor: {}", actor_name);
if let Err(e) = self.zinit_client.delete_service(actor_name).await {
debug!("Actor {} service did not exist or failed to delete: {}", actor_name, e);
} else {
info!("Successfully deleted worker service: {}", worker_name);
info!("Successfully deleted actor service: {}", actor_name);
}
info!("Completed cleanup for worker: {}", worker_name);
info!("Completed cleanup for actor: {}", actor_name);
Ok(())
}
/// Get the hardcoded worker queue key for the script type
fn get_worker_queue_key(&self, script_type: &ScriptType) -> String {
format!("{}worker_queue:{}", NAMESPACE_PREFIX, script_type.worker_queue_suffix())
/// Get the hardcoded actor queue key for the script type
fn get_actor_queue_key(&self, script_type: &ScriptType) -> String {
format!("{}actor_queue:{}", NAMESPACE_PREFIX, script_type.actor_queue_suffix())
}
pub fn new_job(&self) -> JobBuilder {
@@ -432,63 +426,58 @@ impl Supervisor {
})
}
/// Extract worker configurations from the supervisor's builder data
pub fn get_worker_configs(&self) -> Result<Vec<WorkerConfig>, SupervisorError> {
/// Extract actor configurations from the supervisor's builder data
pub fn get_actor_configs(&self) -> Result<Vec<ActorConfig>, SupervisorError> {
let builder_data = self.builder_data.as_ref().ok_or_else(|| {
SupervisorError::ConfigError("No builder data available for worker configs".to_string())
SupervisorError::ConfigError("No builder data available for actor configs".to_string())
})?;
let mut configs = Vec::new();
let env_vars = builder_data.worker_env_vars.clone();
if let Some(osis_path) = &builder_data.osis_worker {
if let Some(osis_path) = &builder_data.osis_actor {
configs.push(
WorkerConfig::new("osis_worker_1".to_string(), PathBuf::from(osis_path), ScriptType::OSIS)
.with_env(env_vars.clone())
ActorConfig::new("osis_actor_1".to_string(), PathBuf::from(osis_path), ScriptType::OSIS)
);
}
if let Some(sal_path) = &builder_data.sal_worker {
if let Some(sal_path) = &builder_data.sal_actor {
configs.push(
WorkerConfig::new("sal_worker_1".to_string(), PathBuf::from(sal_path), ScriptType::SAL)
.with_env(env_vars.clone())
ActorConfig::new("sal_actor_1".to_string(), PathBuf::from(sal_path), ScriptType::SAL)
);
}
if let Some(v_path) = &builder_data.v_worker {
if let Some(v_path) = &builder_data.v_actor {
configs.push(
WorkerConfig::new("v_worker_1".to_string(), PathBuf::from(v_path), ScriptType::V)
.with_env(env_vars.clone())
ActorConfig::new("v_actor_1".to_string(), PathBuf::from(v_path), ScriptType::V)
);
}
if let Some(python_path) = &builder_data.python_worker {
if let Some(python_path) = &builder_data.python_actor {
configs.push(
WorkerConfig::new("python_worker_1".to_string(), PathBuf::from(python_path), ScriptType::Python)
.with_env(env_vars.clone())
ActorConfig::new("python_actor_1".to_string(), PathBuf::from(python_path), ScriptType::Python)
);
}
Ok(configs)
}
/// Spawn a background lifecycle manager that continuously monitors and maintains worker health
/// Spawn a background lifecycle manager that continuously monitors and maintains actor health
/// Returns a JoinHandle that can be used to stop the lifecycle manager
pub fn spawn_lifecycle_manager(
self: Arc<Self>,
worker_configs: Vec<WorkerConfig>,
actor_configs: Vec<ActorConfig>,
health_check_interval: Duration,
) -> tokio::task::JoinHandle<Result<(), SupervisorError>> {
let supervisor = self;
tokio::spawn(async move {
info!("Starting background lifecycle manager with {} workers", worker_configs.len());
info!("Starting background lifecycle manager with {} actors", actor_configs.len());
info!("Health check interval: {:?}", health_check_interval);
// Initial worker startup
info!("Performing initial worker startup...");
if let Err(e) = supervisor.start_workers().await {
error!("Failed to start workers during initialization: {}", e);
// Initial actor startup
info!("Performing initial actor startup...");
if let Err(e) = supervisor.start_actors().await {
error!("Failed to start actors during initialization: {}", e);
return Err(e);
}
@@ -499,12 +488,12 @@ impl Supervisor {
loop {
interval.tick().await;
info!("Running periodic worker health check...");
info!("Running periodic actor health check...");
// Check each worker's health and restart if needed
for worker_config in &worker_configs {
if let Err(e) = supervisor.check_and_restart_worker(worker_config).await {
error!("Failed to check/restart worker {}: {}", worker_config.name, e);
// Check each actor's health and restart if needed
for actor_config in &actor_configs {
if let Err(e) = supervisor.check_and_restart_actor(actor_config).await {
error!("Failed to check/restart actor {}: {}", actor_config.name, e);
}
}
@@ -513,59 +502,59 @@ impl Supervisor {
})
}
/// Check a single worker's health and restart if needed
async fn check_and_restart_worker(&self, worker_config: &WorkerConfig) -> Result<(), SupervisorError> {
let worker_name = &worker_config.name;
/// Check a single actor's health and restart if needed
async fn check_and_restart_actor(&self, actor_config: &ActorConfig) -> Result<(), SupervisorError> {
let actor_name = &actor_config.name;
// Get worker status
match self.zinit_client.status(worker_name).await {
// Get actor status
match self.zinit_client.status(actor_name).await {
Ok(status) => {
let is_healthy = status.state == "running" && status.pid > 0;
if is_healthy {
debug!("Worker {} is healthy (state: {}, pid: {})", worker_name, status.state, status.pid);
debug!("Actor {} is healthy (state: {}, pid: {})", actor_name, status.state, status.pid);
// Optionally send a ping job for deeper health check
if let Err(e) = self.send_ping_job(worker_config.script_type.clone()).await {
warn!("Ping job failed for worker {}: {}", worker_name, e);
if let Err(e) = self.send_ping_job(actor_config.script_type.clone()).await {
warn!("Ping job failed for actor {}: {}", actor_name, e);
// Note: We don't restart on ping failure as it might be temporary
}
} else {
warn!("Worker {} is unhealthy (state: {}, pid: {}), restarting...",
worker_name, status.state, status.pid);
warn!("Actor {} is unhealthy (state: {}, pid: {}), restarting...",
actor_name, status.state, status.pid);
// Attempt to restart the worker
if let Err(e) = self.restart_worker(worker_name).await {
error!("Failed to restart unhealthy worker {}: {}", worker_name, e);
// Attempt to restart the actor
if let Err(e) = self.restart_actor(actor_name).await {
error!("Failed to restart unhealthy actor {}: {}", actor_name, e);
// If restart fails, try a full stop/start cycle
warn!("Attempting full stop/start cycle for worker: {}", worker_name);
if let Err(e) = self.stop_and_delete_worker(worker_name).await {
error!("Failed to stop worker {} during recovery: {}", worker_name, e);
warn!("Attempting full stop/start cycle for actor: {}", actor_name);
if let Err(e) = self.stop_and_delete_actor(actor_name).await {
error!("Failed to stop actor {} during recovery: {}", actor_name, e);
}
if let Err(e) = self.start_worker(worker_config).await {
error!("Failed to start worker {} during recovery: {}", worker_name, e);
if let Err(e) = self.start_actor(actor_config).await {
error!("Failed to start actor {} during recovery: {}", actor_name, e);
return Err(e);
}
info!("Successfully recovered worker: {}", worker_name);
info!("Successfully recovered actor: {}", actor_name);
} else {
info!("Successfully restarted worker: {}", worker_name);
info!("Successfully restarted actor: {}", actor_name);
}
}
}
Err(e) => {
warn!("Could not get status for worker {} (may not exist): {}", worker_name, e);
warn!("Could not get status for actor {} (may not exist): {}", actor_name, e);
// Worker doesn't exist, try to start it
info!("Attempting to start missing worker: {}", worker_name);
if let Err(e) = self.start_worker(worker_config).await {
error!("Failed to start missing worker {}: {}", worker_name, e);
// Actor doesn't exist, try to start it
info!("Attempting to start missing actor: {}", actor_name);
if let Err(e) = self.start_actor(actor_config).await {
error!("Failed to start missing actor {}: {}", actor_name, e);
return Err(e);
}
info!("Successfully started missing worker: {}", worker_name);
info!("Successfully started missing actor: {}", actor_name);
}
}
@@ -597,18 +586,18 @@ impl Supervisor {
job_id: String,
script_type: &ScriptType
) -> Result<(), SupervisorError> {
let worker_queue_key = self.get_worker_queue_key(script_type);
let actor_queue_key = self.get_actor_queue_key(script_type);
// lpush also infers its types, RV is typically i64 (length of list) or () depending on exact command variant
// For `redis::AsyncCommands::lpush`, it's `RedisResult<R>` where R: FromRedisValue
// Often this is the length of the list. Let's allow inference or specify if needed.
let _: redis::RedisResult<i64> =
conn.lpush(&worker_queue_key, job_id.clone()).await;
conn.lpush(&actor_queue_key, job_id.clone()).await;
Ok(())
}
// Internal helper to await response from worker
// Internal helper to await response from actor
async fn await_response_from_connection(
&self,
conn: &mut redis::aio::MultiplexedConnection,
@@ -679,7 +668,7 @@ impl Supervisor {
Ok(())
}
// New method using dedicated reply queue with automatic worker selection
// New method using dedicated reply queue with automatic actor selection
pub async fn run_job_and_await_result(
&self,
job: &Job
@@ -782,7 +771,7 @@ impl Supervisor {
pub async fn stop_job(&self, job_id: &str) -> Result<(), SupervisorError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
// Get job details to determine script type and appropriate worker
// Get job details to determine script type and appropriate actor
let job_key = format!("{}job:{}", NAMESPACE_PREFIX, job_id);
let job_data: std::collections::HashMap<String, String> = conn.hgetall(&job_key).await?;
@@ -798,7 +787,7 @@ impl Supervisor {
.map_err(|e| SupervisorError::InvalidInput(format!("Invalid script type: {}", e)))?;
// Use hardcoded stop queue key for this script type
let stop_queue_key = format!("{}stop_queue:{}", NAMESPACE_PREFIX, script_type.worker_queue_suffix());
let stop_queue_key = format!("{}stop_queue:{}", NAMESPACE_PREFIX, script_type.actor_queue_suffix());
// Push job ID to the stop queue
conn.lpush::<_, _, ()>(&stop_queue_key, job_id).await?;
@@ -931,7 +920,7 @@ impl Supervisor {
/// Dispatch jobs that are ready (have all prerequisites completed)
pub async fn dispatch_ready_jobs(&self, ready_job_ids: Vec<String>) -> Result<(), SupervisorError> {
for job_id in ready_job_ids {
// Get job data to determine script type and select worker
// Get job data to determine script type and select actor
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let job_key = format!("{}job:{}", NAMESPACE_PREFIX, job_id);
let job_data: std::collections::HashMap<String, String> = conn.hgetall(&job_key).await?;