This commit is contained in:
Maxime Van Hees
2025-08-14 14:14:34 +02:00
parent 04a1af2423
commit 0ebda7c1aa
59 changed files with 6950 additions and 354 deletions

View File

@@ -408,7 +408,8 @@ impl Supervisor {
/// Get the hardcoded actor queue key for the script type
fn get_actor_queue_key(&self, script_type: &ScriptType) -> String {
format!("{}actor_queue:{}", NAMESPACE_PREFIX, script_type.actor_queue_suffix())
// Canonical type queue
hero_job::keys::work_type(script_type)
}
pub fn new_job(&self) -> JobBuilder {
@@ -586,14 +587,9 @@ impl Supervisor {
job_id: String,
script_type: &ScriptType
) -> Result<(), SupervisorError> {
let actor_queue_key = self.get_actor_queue_key(script_type);
// lpush also infers its types, RV is typically i64 (length of list) or () depending on exact command variant
// For `redis::AsyncCommands::lpush`, it's `RedisResult<R>` where R: FromRedisValue
// Often this is the length of the list. Let's allow inference or specify if needed.
let _: redis::RedisResult<i64> =
conn.lpush(&actor_queue_key, job_id.clone()).await;
// Canonical dispatch to type queue
let actor_queue_key = hero_job::keys::work_type(script_type);
let _: redis::RedisResult<i64> = conn.lpush(&actor_queue_key, job_id.clone()).await;
Ok(())
}
@@ -675,7 +671,8 @@ impl Supervisor {
) -> Result<String, SupervisorError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let reply_queue_key = format!("{}:reply:{}", NAMESPACE_PREFIX, job.id); // Derived from the passed job_id
// Canonical reply queue
let reply_queue_key = hero_job::keys::reply(&job.id);
self.create_job_using_connection(
&mut conn,
@@ -692,13 +689,48 @@ impl Supervisor {
job.timeout
);
self.await_response_from_connection(
&mut conn,
&job.id,
&reply_queue_key,
job.timeout,
)
.await
// Some actors update the job hash directly and do not use reply queues.
// Poll the job hash for output until timeout to support both models.
let start_time = std::time::Instant::now();
loop {
// If output is present in the job hash, return it immediately
match self.get_job_output(&job.id).await {
Ok(Some(output)) => {
// Optional: cleanup reply queue in case it was created
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
return Ok(output);
}
Ok(None) => {
// Check for error state
match self.get_job_status(&job.id).await {
Ok(JobStatus::Error) => {
// Try to read the error field for context
let mut conn2 = self.redis_client.get_multiplexed_async_connection().await?;
let job_key = format!("{}{}", NAMESPACE_PREFIX, job.id);
let err: Option<String> = conn2.hget(&job_key, "error").await.ok();
return Err(SupervisorError::InvalidInput(
err.unwrap_or_else(|| "Job failed".to_string())
));
}
_ => {
// keep polling
}
}
}
Err(_) => {
// Ignore transient read errors and continue polling
}
}
if start_time.elapsed() >= job.timeout {
// On timeout, ensure any reply queue is cleaned up and return a Timeout error
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
return Err(SupervisorError::Timeout(job.id.clone()));
}
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
}
}
// Method to get job status
@@ -772,7 +804,7 @@ impl Supervisor {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
// Get job details to determine script type and appropriate actor
let job_key = format!("{}job:{}", NAMESPACE_PREFIX, job_id);
let job_key = format!("{}{}", NAMESPACE_PREFIX, job_id);
let job_data: std::collections::HashMap<String, String> = conn.hgetall(&job_key).await?;
if job_data.is_empty() {
@@ -787,7 +819,8 @@ impl Supervisor {
.map_err(|e| SupervisorError::InvalidInput(format!("Invalid script type: {}", e)))?;
// Use hardcoded stop queue key for this script type
let stop_queue_key = format!("{}stop_queue:{}", NAMESPACE_PREFIX, script_type.actor_queue_suffix());
// Stop queue per protocol: hero:stop_queue:{suffix}
let stop_queue_key = format!("hero:stop_queue:{}", script_type.actor_queue_suffix());
// Push job ID to the stop queue
conn.lpush::<_, _, ()>(&stop_queue_key, job_id).await?;
@@ -799,7 +832,7 @@ impl Supervisor {
/// Get logs for a job by reading from its log file
pub async fn get_job_logs(&self, job_id: &str) -> Result<Option<String>, SupervisorError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let job_key = format!("{}job:{}", NAMESPACE_PREFIX, job_id);
let job_key = format!("{}{}", NAMESPACE_PREFIX, job_id);
// Get the job data to find the log path
let result_map: Option<std::collections::HashMap<String, String>> =
@@ -922,7 +955,7 @@ impl Supervisor {
for job_id in ready_job_ids {
// Get job data to determine script type and select actor
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let job_key = format!("{}job:{}", NAMESPACE_PREFIX, job_id);
let job_key = format!("{}{}", NAMESPACE_PREFIX, job_id);
let job_data: std::collections::HashMap<String, String> = conn.hgetall(&job_key).await?;
if let Some(script_type_str) = job_data.get("script_type") {