cleanup and refactor

2025-11-13 14:41:30 +01:00
parent 4b516d9d7e
commit 2625534152
29 changed files with 2662 additions and 3276 deletions
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -13,8 +13,8 @@ path = "src/bin/supervisor.rs"

 [dependencies]
 # Job types
-hero-job = { git = "https://git.ourworld.tf/herocode/job.git" }
-hero-job-client = { git = "https://git.ourworld.tf/herocode/job.git" }
+hero-job = { path = "../../job/rust" }
+hero-job-client = { path = "../../job/rust/client" }

 # Async runtime
 tokio = { version = "1.0", features = ["full"] }
@@ -23,37 +23,37 @@ tokio = { version = "1.0", features = ["full"] }
 async-trait = "0.1"

 # Redis client
-redis = { version = "0.25", features = ["aio", "tokio-comp"] }
+redis = { version = "0.25", features = ["tokio-comp", "connection-manager"] }

 # Job module dependencies (now integrated)
-uuid = { version = "1.0", features = ["v4"] }
+uuid = { version = "1.6", features = ["v4", "serde"] }

 # Logging
 log = "0.4"
 thiserror = "1.0"
-chrono = "0.4"
+chrono = { version = "0.4", features = ["serde"] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 env_logger = "0.10"

 # CLI argument parsing
-clap = { version = "4.0", features = ["derive"] }
+clap = { version = "4.4", features = ["derive"] }
 toml = "0.8"

 # OpenRPC dependencies (now always included)
-jsonrpsee = { version = "0.24", features = ["server", "macros"] }
+jsonrpsee = { version = "0.26", features = ["server", "macros"] }
 anyhow = "1.0"
+futures = "0.3"

 # CORS support for OpenRPC server
 tower-http = { version = "0.5", features = ["cors"] }
-tower = "0.4"
+tower = "0.5"
 hyper = { version = "1.0", features = ["full"] }
 hyper-util = { version = "0.1", features = ["tokio"] }
+http-body-util = "0.1"

-# Mycelium integration (optional)
-base64 = { version = "0.22", optional = true }
-rand = { version = "0.8", optional = true }
-reqwest = { version = "0.12", features = ["json"], optional = true }
+# Osiris client for persistent storage
+osiris-client = { path = "../../osiris/client" }

 [dev-dependencies]
 tokio-test = "0.4"
@@ -63,7 +63,6 @@ escargot = "0.5"
 [features]
 default = ["cli"]
 cli = []
-mycelium = ["base64", "rand", "reqwest"]

 # Examples
 [[example]]
--- a/core/src/app.rs
+++ b/core/src/app.rs
@@ -1,190 +0,0 @@
-//! # Hero Supervisor Application
-//!
-//! Simplified supervisor application that wraps a built Supervisor instance.
-//! Use SupervisorBuilder to construct the supervisor with all configuration,
-//! then pass it to SupervisorApp for runtime management.
-
-use crate::Supervisor;
-#[cfg(feature = "mycelium")]
-use crate::mycelium::MyceliumIntegration;
-use log::{info, error, debug};
-#[cfg(feature = "mycelium")]
-use std::sync::Arc;
-#[cfg(feature = "mycelium")]
-use tokio::sync::Mutex;
-
-/// Main supervisor application
-pub struct SupervisorApp {
-    pub supervisor: Supervisor,
-    pub mycelium_url: String,
-    pub topic: String,
-}
-
-impl SupervisorApp {
-    /// Create a new supervisor application with a built supervisor
-    pub fn new(supervisor: Supervisor, mycelium_url: String, topic: String) -> Self {
-        Self { 
-            supervisor,
-            mycelium_url,
-            topic,
-        }
-    }
-
-    /// Start the complete supervisor application
-    /// This method handles the entire application lifecycle:
-    /// - Starts all configured runners
-    /// - Connects to Mycelium daemon for message transport
-    /// - Sets up graceful shutdown handling
-    /// - Keeps the application running
-    pub async fn start(&mut self) -> Result<(), Box<dyn std::error::Error>> {
-        info!("Starting Hero Supervisor Application");
-
-        // Start all configured runners
-        self.start_all().await?;
-
-        // Start Mycelium integration
-        self.start_mycelium_integration().await?;
-
-        // Set up graceful shutdown
-        self.setup_graceful_shutdown().await;
-
-        // Keep the application running
-        info!("Supervisor is running. Press Ctrl+C to shutdown.");
-        self.run_main_loop().await;
-
-        Ok(())
-    }
-
-    /// Start the Mycelium integration
-    async fn start_mycelium_integration(&self) -> Result<(), Box<dyn std::error::Error>> {
-        #[cfg(feature = "mycelium")]
-        {
-            // Skip Mycelium if URL is empty
-            if self.mycelium_url.is_empty() {
-                info!("Mycelium integration disabled (no URL provided)");
-                return Ok(());
-            }
-            
-            info!("Starting Mycelium integration...");
-            
-            let supervisor_for_mycelium = Arc::new(Mutex::new(self.supervisor.clone()));
-            let mycelium_url = self.mycelium_url.clone();
-            let topic = self.topic.clone();
-            
-            let mycelium_integration = MyceliumIntegration::new(
-                supervisor_for_mycelium,
-                mycelium_url,
-                topic,
-            );
-            
-            // Start the Mycelium integration in a background task
-            let integration_handle = tokio::spawn(async move {
-                if let Err(e) = mycelium_integration.start().await {
-                    error!("Mycelium integration error: {}", e);
-                }
-            });
-
-            // Give the integration a moment to start
-            tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
-            info!("Mycelium integration started successfully");
-
-            // Store the handle for potential cleanup
-            std::mem::forget(integration_handle); // For now, let it run in background
-        }
-        
-        #[cfg(not(feature = "mycelium"))]
-        {
-            info!("Mycelium integration not enabled (compile with --features mycelium)");
-        }
-
-        Ok(())
-    }
-
-    /// Set up graceful shutdown handling
-    async fn setup_graceful_shutdown(&self) {
-        tokio::spawn(async move {
-            tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
-            info!("Received shutdown signal");
-            std::process::exit(0);
-        });
-    }
-
-    /// Main application loop
-    async fn run_main_loop(&self) {
-        // Keep the main thread alive
-        loop {
-            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
-        }
-    }
-
-    /// Start all configured runners
-    pub async fn start_all(&mut self) -> Result<(), Box<dyn std::error::Error>> {
-        info!("Starting all runners");
-        
-        let results = self.supervisor.start_all().await;
-        let mut failed_count = 0;
-        
-        for (runner_id, result) in results {
-            match result {
-                Ok(_) => info!("Runner {} started successfully", runner_id),
-                Err(e) => {
-                    error!("Failed to start runner {}: {}", runner_id, e);
-                    failed_count += 1;
-                }
-            }
-        }
-        
-        if failed_count == 0 {
-            info!("All runners started successfully");
-        } else {
-            error!("Failed to start {} runners", failed_count);
-        }
-
-        Ok(())
-    }
-
-    /// Stop all configured runners
-    pub async fn stop_all(&mut self, force: bool) -> Result<(), Box<dyn std::error::Error>> {
-        info!("Stopping all runners (force: {})", force);
-        
-        let results = self.supervisor.stop_all(force).await;
-        let mut failed_count = 0;
-        
-        for (runner_id, result) in results {
-            match result {
-                Ok(_) => info!("Runner {} stopped successfully", runner_id),
-                Err(e) => {
-                    error!("Failed to stop runner {}: {}", runner_id, e);
-                    failed_count += 1;
-                }
-            }
-        }
-        
-        if failed_count == 0 {
-            info!("All runners stopped successfully");
-        } else {
-            error!("Failed to stop {} runners", failed_count);
-        }
-
-        Ok(())
-    }
-
-
-    /// Get status of all runners
-    pub async fn get_status(&self) -> Result<Vec<(String, String)>, Box<dyn std::error::Error>> {
-        debug!("Getting status of all runners");
-        
-        let statuses = self.supervisor.get_all_runner_status().await
-            .map_err(|e| Box::new(e) as Box<dyn std::error::Error>)?;
-        
-        let status_strings: Vec<(String, String)> = statuses
-            .into_iter()
-            .map(|(runner_id, status)| {
-                let status_str = format!("{:?}", status);
-                (runner_id, status_str)
-            })
-            .collect();
-        
-        Ok(status_strings)
-    }
-}
--- a/core/src/auth.rs
+++ b/core/src/auth.rs
@@ -65,66 +65,6 @@ impl ApiKey {
    }
 }

-/// API key store
-#[derive(Debug, Clone, Default)]
-pub struct ApiKeyStore {
-    /// Map of key -> ApiKey
-    keys: HashMap<String, ApiKey>,
-}
-
-impl ApiKeyStore {
-    pub fn new() -> Self {
-        Self {
-            keys: HashMap::new(),
-        }
-    }
-
-    /// Add a new API key
-    pub fn add_key(&mut self, key: ApiKey) {
-        self.keys.insert(key.key.clone(), key);
-    }
-
-    /// Remove an API key by its key value
-    pub fn remove_key(&mut self, key: &str) -> Option<ApiKey> {
-        self.keys.remove(key)
-    }
-
-    /// Get an API key by its key value
-    pub fn get_key(&self, key: &str) -> Option<&ApiKey> {
-        self.keys.get(key)
-    }
-
-    /// Verify a key and return its metadata if valid
-    pub fn verify_key(&self, key: &str) -> Option<&ApiKey> {
-        self.get_key(key)
-    }
-
-    /// List all keys with a specific scope
-    pub fn list_keys_by_scope(&self, scope: ApiKeyScope) -> Vec<&ApiKey> {
-        self.keys
-            .values()
-            .filter(|k| k.scope == scope)
-            .collect()
-    }
-
-    /// List all keys
-    pub fn list_all_keys(&self) -> Vec<&ApiKey> {
-        self.keys.values().collect()
-    }
-
-    /// Count keys by scope
-    pub fn count_by_scope(&self, scope: ApiKeyScope) -> usize {
-        self.keys.values().filter(|k| k.scope == scope).count()
-    }
-
-    /// Bootstrap with an initial admin key
-    pub fn bootstrap_admin_key(&mut self, name: String) -> ApiKey {
-        let key = ApiKey::new(name, ApiKeyScope::Admin);
-        self.add_key(key.clone());
-        key
-    }
-}
-
 /// Response for auth verification
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AuthVerifyResponse {
@@ -132,3 +72,35 @@ pub struct AuthVerifyResponse {
    pub name: String,
    pub scope: String,
 }
+
+/// Method authorization requirements
+/// Maps RPC method names to required scopes
+pub fn get_method_required_scopes(method: &str) -> Option<Vec<ApiKeyScope>> {
+    use ApiKeyScope::*;
+    
+    match method {
+        // Admin-only methods
+        "key.add" | "key.remove" | "key.list" |
+        "auth.create_key" | "auth.remove_key" | "auth.list_keys" |
+        "supervisor.info" |
+        "secrets.list_admin" | "secrets.list_user" | "secrets.list_register" => {
+            Some(vec![Admin])
+        }
+        
+        // Admin or Registrar methods
+        "runner.register" | "runner.add" | "runner.remove" => {
+            Some(vec![Admin, Registrar])
+        }
+        
+        // Admin or User methods
+        "jobs.create" | "job.run" | "job.start" | "job.stop" | "job.delete" => {
+            Some(vec![Admin, User])
+        }
+        
+        // Public methods (no auth required)
+        "rpc.discover" => None,
+        
+        // Any authenticated user
+        _ => Some(vec![Admin, Registrar, User]),
+    }
+}
--- a/core/src/bin/supervisor.rs
+++ b/core/src/bin/supervisor.rs
@@ -1,8 +1,10 @@
 //! Hero Supervisor Binary

-use hero_supervisor::{SupervisorApp, SupervisorBuilder};
+use hero_supervisor::SupervisorBuilder;
 use clap::Parser;
-use log::error;
+use log::{error, info};
+use std::sync::Arc;
+use tokio::sync::Mutex;

 /// Hero Supervisor - manages actors and dispatches jobs
 #[derive(Parser, Debug)]
@@ -37,14 +39,6 @@ struct Args {
    #[arg(long, default_value = "127.0.0.1")]
    bind_address: String,
    
-    /// Mycelium daemon URL (optional)
-    #[arg(long, default_value = "")]
-    mycelium_url: String,
-    
-    /// Mycelium topic for supervisor RPC messages
-    #[arg(long, default_value = "supervisor.rpc")]
-    topic: String,
-    
    /// Pre-configured runner names (comma-separated)
    #[arg(long, value_name = "NAMES", value_delimiter = ',')]
    runners: Vec<String>,
@@ -55,13 +49,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    env_logger::init();
    let args = Args::parse();

-    // Store first admin secret for runner registration
-    let admin_secret = args.admin_secrets[0].clone();
-
    // Build supervisor
    let mut builder = SupervisorBuilder::new()
-        .redis_url(&args.redis_url)
-        .namespace(&args.namespace)
        .admin_secrets(args.admin_secrets);
    
    if !args.user_secrets.is_empty() {
@@ -74,10 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

    let mut supervisor = builder.build().await?;
    
-    // Register pre-configured runners using first admin secret
+    // Register pre-configured runners
    if !args.runners.is_empty() {
        for runner_name in &args.runners {
-            match supervisor.register_runner(&admin_secret, runner_name, &format!("queue:{}", runner_name)).await {
+            match supervisor.runner_create(runner_name.clone()).await {
                Ok(_) => {},
                Err(e) => error!("Failed to register runner '{}': {}", runner_name, e),
            }
@@ -85,16 +74,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    }
    
    // Start OpenRPC server
-    use std::sync::Arc;
-    use tokio::sync::Mutex;
    use hero_supervisor::openrpc::start_http_openrpc_server;
    
-    let supervisor_arc = Arc::new(Mutex::new(supervisor.clone()));
+    let supervisor_clone = supervisor.clone();
    let bind_addr = args.bind_address.clone();
    let port = args.port;
    
    tokio::spawn(async move {
-        match start_http_openrpc_server(supervisor_arc, &bind_addr, port).await {
+        match start_http_openrpc_server(supervisor_clone, &bind_addr, port).await {
            Ok(handle) => {
                handle.stopped().await;
                error!("OpenRPC server stopped unexpectedly");
@@ -107,15 +94,19 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    
    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
    
-    // Print minimal startup info
+    // Print startup info
    println!("📡 http://{}:{}", args.bind_address, args.port);
-    #[cfg(feature = "mycelium")]
-    if !args.mycelium_url.is_empty() {
-        println!("🌐 {}", args.mycelium_url);
-    }
+    info!("Hero Supervisor is running. Press Ctrl+C to shutdown.");
    
-    let mut app = SupervisorApp::new(supervisor, args.mycelium_url, args.topic);
-    app.start().await?;
-
-    Ok(())
+    // Set up graceful shutdown
+    tokio::spawn(async move {
+        tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
+        info!("Received shutdown signal");
+        std::process::exit(0);
+    });
+    
+    // Keep the application running
+    loop {
+        tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
+    }
 }
--- a/core/src/builder.rs
+++ b/core/src/builder.rs
@@ -0,0 +1,198 @@
+//! Supervisor builder for configuration and initialization.
+
+use crate::error::{SupervisorError, SupervisorResult};
+use crate::Supervisor;
+use hero_job_client::ClientBuilder;
+
+/// Builder for constructing a Supervisor instance
+pub struct SupervisorBuilder {
+    /// Set of registered runner IDs
+    runners: std::collections::HashSet<String>,
+    /// Redis URL for connection
+    redis_url: String,
+    /// Admin secrets for bootstrapping API keys
+    admin_secrets: Vec<String>,
+    /// User secrets for bootstrapping API keys
+    user_secrets: Vec<String>,
+    /// Register secrets for bootstrapping API keys
+    register_secrets: Vec<String>,
+    client_builder: ClientBuilder,
+    /// Osiris URL for queries (optional)
+    osiris_url: Option<String>,
+    /// Supervisor URL for commands via Osiris (optional)
+    supervisor_url: Option<String>,
+    /// Supervisor secret for Osiris commands (optional)
+    supervisor_secret: Option<String>,
+    /// Runner name for Osiris operations (optional)
+    osiris_runner_name: Option<String>,
+}
+
+impl SupervisorBuilder {
+    /// Create a new supervisor builder
+    pub fn new() -> Self {
+        Self {
+            runners: std::collections::HashSet::new(),
+            redis_url: "redis://localhost:6379".to_string(),
+            admin_secrets: Vec::new(),
+            user_secrets: Vec::new(),
+            register_secrets: Vec::new(),
+            client_builder: ClientBuilder::new(),
+            osiris_url: None,
+            supervisor_url: None,
+            supervisor_secret: None,
+            osiris_runner_name: None,
+        }
+    }
+    
+    /// Set the Osiris URL for queries
+    pub fn osiris_url<S: Into<String>>(mut self, url: S) -> Self {
+        self.osiris_url = Some(url.into());
+        self
+    }
+    
+    /// Set the Supervisor URL for Osiris commands
+    pub fn supervisor_url_for_osiris<S: Into<String>>(mut self, url: S) -> Self {
+        self.supervisor_url = Some(url.into());
+        self
+    }
+    
+    /// Set the Supervisor secret for Osiris commands
+    pub fn supervisor_secret<S: Into<String>>(mut self, secret: S) -> Self {
+        self.supervisor_secret = Some(secret.into());
+        self
+    }
+    
+    /// Set the runner name for Osiris operations
+    pub fn osiris_runner_name<S: Into<String>>(mut self, name: S) -> Self {
+        self.osiris_runner_name = Some(name.into());
+        self
+    }
+
+    /// Add an admin secret
+    pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
+        self.admin_secrets.push(secret.into());
+        self
+    }
+
+    /// Add multiple admin secrets
+    pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self 
+    where
+        I: IntoIterator<Item = S>,
+        S: Into<String>,
+    {
+        self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
+        self
+    }
+
+    /// Add a user secret
+    pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
+        self.user_secrets.push(secret.into());
+        self
+    }
+
+    /// Add multiple user secrets
+    pub fn user_secrets<I, S>(mut self, secrets: I) -> Self 
+    where
+        I: IntoIterator<Item = S>,
+        S: Into<String>,
+    {
+        self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
+        self
+    }
+
+    /// Add a register secret
+    pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
+        self.register_secrets.push(secret.into());
+        self
+    }
+
+    /// Add multiple register secrets
+    pub fn register_secrets<I, S>(mut self, secrets: I) -> Self 
+    where
+        I: IntoIterator<Item = S>,
+        S: Into<String>,
+    {
+        self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
+        self
+    }
+
+    /// Add a runner to the supervisor
+    pub fn add_runner(mut self, runner_id: String) -> Self {
+        self.runners.insert(runner_id);
+        self
+    }
+
+    /// Build the supervisor
+    pub async fn build(self) -> SupervisorResult<Supervisor> {
+        // Create Redis client
+        let redis_client = redis::Client::open(self.redis_url.as_str())
+            .map_err(|e| SupervisorError::ConfigError {
+                reason: format!("Invalid Redis URL: {}", e),
+            })?;
+
+        // Create the store
+        let mut store = crate::store::Store::new();
+        
+        // Add admin secrets as API keys
+        for secret in &self.admin_secrets {
+            store.key_create(
+                crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Admin),
+            );
+        }
+        
+        // Add user secrets as API keys
+        for secret in &self.user_secrets {
+            store.key_create(
+                crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::User),
+            );
+        }
+        
+        // Add register secrets as API keys
+        for secret in &self.register_secrets {
+            store.key_create(
+                crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Registrar),
+            );
+        }
+
+        // Build the client
+        let client = self.client_builder.build().await?;
+
+        // Build Osiris client if configured
+        let osiris_client = if let (Some(osiris_url), Some(supervisor_url)) = 
+            (self.osiris_url, self.supervisor_url) {
+            let mut builder = osiris_client::OsirisClient::builder()
+                .osiris_url(osiris_url)
+                .supervisor_url(supervisor_url)
+                .runner_name(self.osiris_runner_name.unwrap_or_else(|| "osiris-runner".to_string()));
+            
+            if let Some(secret) = self.supervisor_secret {
+                builder = builder.supervisor_secret(secret);
+            }
+            
+            let client = builder.build().map_err(|e| SupervisorError::ConfigError {
+                reason: format!("Failed to build Osiris client: {}", e),
+            })?;
+            Some(client)
+        } else {
+            None
+        };
+
+        // Add pre-configured runners to the store
+        for runner_id in self.runners {
+            let _ = store.runner_add(runner_id);
+        }
+
+        Ok(Supervisor {
+            store: std::sync::Arc::new(tokio::sync::Mutex::new(store)),
+            job_client: client,
+            redis_client,
+            osiris_client,
+        })
+    }
+}
+
+impl Default for SupervisorBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/core/src/error.rs
+++ b/core/src/error.rs
@@ -0,0 +1,73 @@
+//! Error types for supervisor operations.
+
+use thiserror::Error;
+use jsonrpsee::types::{ErrorObject, ErrorObjectOwned};
+
+/// Result type for supervisor operations
+pub type SupervisorResult<T> = Result<T, SupervisorError>;
+
+/// Errors that can occur during supervisor operations
+#[derive(Debug, Error)]
+pub enum SupervisorError {
+    #[error("Runner '{runner_id}' not found")]
+    RunnerNotFound { runner_id: String },
+    
+    #[error("Runner '{runner_id}' is already registered")]
+    RunnerAlreadyRegistered { runner_id: String },
+    
+    #[error("Job '{job_id}' not found")]
+    JobNotFound { job_id: String },
+    
+    #[error("Failed to queue job for runner '{runner_id}': {reason}")]
+    QueueError { runner_id: String, reason: String },
+    
+    #[error("Configuration error: {reason}")]
+    ConfigError { reason: String },
+    
+    #[error("Invalid secret or API key: {0}")]
+    InvalidSecret(String),
+    
+    #[error("Authentication error: {message}")]
+    AuthenticationError { message: String },
+    
+    #[error("Insufficient permissions: {message}")]
+    PermissionDenied { message: String },
+    
+    #[error("Redis error: {source}")]
+    RedisError {
+        #[from]
+        source: redis::RedisError,
+    },
+    
+    #[error("Job error: {source}")]
+    JobError {
+        #[from]
+        source: hero_job::JobError,
+    },
+    
+    #[error("Job client error: {source}")]
+    JobClientError {
+        #[from]
+        source: hero_job_client::ClientError,
+    },
+    
+    #[error("IO error: {source}")]
+    IoError {
+        #[from]
+        source: std::io::Error,
+    },
+    
+    #[error("Osiris client error: {0}")]
+    OsirisError(String),
+}
+
+/// Implement conversion from SupervisorError → RPC ErrorObject
+impl From<SupervisorError> for ErrorObject<'static> {
+    fn from(err: SupervisorError) -> Self {
+        ErrorObject::owned(
+            -32603, // Internal error code
+            format!("Supervisor error: {err}"),
+            None::<()>,
+        )
+    }
+}
--- a/core/src/job.rs
+++ b/core/src/job.rs
@@ -1,3 +0,0 @@
-// Re-export job types from the hero-job crate
-pub use hero_job::{Job, JobBuilder, JobStatus, JobError};
-use hero_job_client::{Client, ClientBuilder};
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -2,24 +2,15 @@
 //! 
 //! See README.md for detailed documentation and usage examples.

-pub mod runner;
-pub mod job;
 pub mod supervisor;
-pub mod app;
+pub mod builder;
+pub mod error;
 pub mod openrpc;
 pub mod auth;
-pub mod services;
-
-#[cfg(feature = "mycelium")]
-pub mod mycelium;
+pub mod store;

 // Re-export main types for convenience
-pub use runner::{Runner, RunnerConfig, RunnerResult, RunnerStatus};
-// pub use sal_service_manager::{ProcessManager, SimpleProcessManager, TmuxProcessManager};
-pub use supervisor::{Supervisor, SupervisorBuilder, ProcessManagerType};
+pub use supervisor::Supervisor;
+pub use builder::SupervisorBuilder;
+pub use error::{SupervisorError, SupervisorResult};
 pub use hero_job::{Job, JobBuilder, JobStatus, JobError};
-use hero_job_client::{Client, ClientBuilder};
-pub use app::SupervisorApp;
-
-#[cfg(feature = "mycelium")]
-pub use mycelium::{MyceliumIntegration, MyceliumServer};
--- a/core/src/mycelium.rs
+++ b/core/src/mycelium.rs
@@ -1,519 +0,0 @@
-//! # Mycelium Integration for Hero Supervisor
-//!
-//! This module integrates the supervisor with Mycelium's message transport system.
-//! Instead of running its own server, it connects to an existing Mycelium daemon
-//! and listens for incoming supervisor RPC messages via HTTP REST API.
-
-use std::sync::Arc;
-use tokio::sync::Mutex;
-use serde_json::{Value, json};
-use log::{info, error, debug, trace};
-use base64::Engine;
-use reqwest::Client as HttpClient;
-use crate::Supervisor;
-use tokio::time::{sleep, Duration};
-
-/// Mycelium integration that connects to a Mycelium daemon and handles supervisor RPC messages
-pub struct MyceliumIntegration {
-    supervisor: Arc<Mutex<Supervisor>>,
-    mycelium_url: String,
-    http_client: HttpClient,
-    topic: String,
-    running: Arc<Mutex<bool>>,
-}
-
-impl MyceliumIntegration {
-    pub fn new(supervisor: Arc<Mutex<Supervisor>>, mycelium_url: String, topic: String) -> Self {
-        Self {
-            supervisor,
-            mycelium_url,
-            http_client: HttpClient::new(),
-            topic,
-            running: Arc::new(Mutex::new(false)),
-        }
-    }
-
-    /// Start listening for messages on the Mycelium network
-    pub async fn start(&self) -> Result<(), Box<dyn std::error::Error>> {
-        info!("Starting Mycelium integration with daemon at {}", self.mycelium_url);
-        
-        // Skip connection test for now due to API compatibility issues
-        // TODO: Fix Mycelium API compatibility
-        info!("Skipping connection test - assuming Mycelium daemon is running");
-        
-        // Set running flag
-        {
-            let mut running = self.running.lock().await;
-            *running = true;
-        }
-        
-        info!("Mycelium integration started successfully, listening on topic: {}", self.topic);
-        
-        // Start message polling loop
-        let supervisor = Arc::clone(&self.supervisor);
-        let http_client = self.http_client.clone();
-        let mycelium_url = self.mycelium_url.clone();
-        let topic = self.topic.clone();
-        let running = Arc::clone(&self.running);
-        
-        tokio::spawn(async move {
-            Self::message_loop(supervisor, http_client, mycelium_url, topic, running).await;
-        });
-        
-        Ok(())
-    }
-    
-    /// Test connection to Mycelium daemon using JSON-RPC
-    async fn test_connection(&self) -> Result<(), Box<dyn std::error::Error>> {
-        let test_request = json!({
-            "jsonrpc": "2.0",
-            "method": "getInfo",
-            "params": [],
-            "id": 1
-        });
-        
-        let response = self.http_client
-            .post(&self.mycelium_url)
-            .json(&test_request)
-            .send()
-            .await?;
-            
-        if response.status().is_success() {
-            let result: Value = response.json().await?;
-            if result.get("result").is_some() {
-                info!("Successfully connected to Mycelium daemon at {}", self.mycelium_url);
-                Ok(())
-            } else {
-                error!("Mycelium daemon returned error: {}", result);
-                Err("Mycelium daemon returned error".into())
-            }
-        } else {
-            let status = response.status();
-            let text = response.text().await.unwrap_or_default();
-            error!("Failed to connect to Mycelium daemon: {} - {}", status, text);
-            Err(format!("Mycelium connection failed: {}", status).into())
-        }
-    }
-    
-    /// Handle incoming supervisor RPC message (called by Mycelium daemon via pushMessage)
-    pub async fn handle_supervisor_message(
-        &self,
-        payload_b64: &str,
-        reply_info: Option<(String, String)>,
-    ) -> Result<Option<String>, Box<dyn std::error::Error + Send + Sync>> {
-        // Decode the base64 payload
-        let payload_bytes = base64::engine::general_purpose::STANDARD
-            .decode(payload_b64.as_bytes())?;
-        let payload_str = String::from_utf8(payload_bytes)?;
-        
-        info!("Received supervisor message: {}", payload_str);
-        
-        // Parse the JSON-RPC request
-        let request: Value = serde_json::from_str(&payload_str)?;
-        
-        debug!("Decoded supervisor RPC: {}", request);
-        
-        // Extract method and params from supervisor JSON-RPC
-        let method = request.get("method")
-            .and_then(|v| v.as_str())
-            .ok_or("missing method")?;
-        
-        let rpc_params = request.get("params")
-            .cloned()
-            .unwrap_or(json!([]));
-        
-        let rpc_id = request.get("id").cloned();
-        
-        // Route to appropriate supervisor method
-        let result = self.route_supervisor_call(method, rpc_params).await?;
-        
-        // If we have reply info, send the response back via Mycelium
-        if let Some((src_ip, _msg_id)) = reply_info {
-            let supervisor_response = json!({
-                "jsonrpc": "2.0",
-                "id": rpc_id,
-                "result": result
-            });
-            
-            let response_b64 = base64::engine::general_purpose::STANDARD
-                .encode(serde_json::to_string(&supervisor_response)?.as_bytes());
-            
-            info!("Sending response back to client at {}: {}", src_ip, supervisor_response);
-            
-            // Send reply back to the client
-            match self.send_reply(&src_ip, &response_b64).await {
-                Ok(()) => info!("✅ Response sent successfully to {}", src_ip),
-                Err(e) => error!("❌ Failed to send response to {}: {}", src_ip, e),
-            }
-        }
-        
-        Ok(Some("handled".to_string()))
-    }
-    
-    /// Send a reply message back to a client using Mycelium JSON-RPC
-    async fn send_reply(
-        &self,
-        dst_ip: &str,
-        payload_b64: &str,
-    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-        // Send response to a dedicated response topic
-        let response_topic = "supervisor.response";
-        let topic_b64 = base64::engine::general_purpose::STANDARD.encode(response_topic.as_bytes());
-        
-        let message_info = json!({
-            "dst": { "ip": dst_ip },
-            "topic": topic_b64,
-            "payload": payload_b64  // payload_b64 is already base64 encoded
-        });
-        
-        let push_request = json!({
-            "jsonrpc": "2.0",
-            "method": "pushMessage",
-            "params": [message_info, null],
-            "id": 1
-        });
-        
-        let response = self.http_client
-            .post(&self.mycelium_url)
-            .json(&push_request)
-            .send()
-            .await?;
-            
-        if response.status().is_success() {
-            let result: Value = response.json().await?;
-            if result.get("result").is_some() {
-                debug!("Sent reply to {}", dst_ip);
-                Ok(())
-            } else {
-                error!("Failed to send reply, Mycelium error: {}", result);
-                Err("Mycelium pushMessage failed".into())
-            }
-        } else {
-            let status = response.status();
-            let text = response.text().await.unwrap_or_default();
-            error!("Failed to send reply: {} - {}", status, text);
-            Err(format!("Failed to send reply: {}", status).into())
-        }
-    }
-    
-    /// Route supervisor method calls to the appropriate supervisor functions
-    async fn route_supervisor_call(
-        &self,
-        method: &str,
-        params: Value,
-    ) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
-        let mut supervisor_guard = self.supervisor.lock().await;
-        
-        match method {
-            "list_runners" => {
-                // list_runners doesn't require parameters
-                let runners = supervisor_guard.list_runners();
-                Ok(json!(runners))
-            }
-            
-            "register_runner" => {
-                if let Some(param_obj) = params.as_array().and_then(|arr| arr.get(0)) {
-                    let secret = param_obj.get("secret")
-                        .and_then(|v| v.as_str())
-                        .ok_or("missing secret")?;
-                    let name = param_obj.get("name")
-                        .and_then(|v| v.as_str())
-                        .ok_or("missing name")?;
-                    let queue = param_obj.get("queue")
-                        .and_then(|v| v.as_str())
-                        .ok_or("missing queue")?;
-                    
-                    supervisor_guard.register_runner(secret, name, queue).await?;
-                    Ok(json!("success"))
-                } else {
-                    Err("invalid register_runner params".into())
-                }
-            }
-            
-            "start_runner" => {
-                if let Some(actor_id) = params.as_array().and_then(|arr| arr.get(0)).and_then(|v| v.as_str()) {
-                    supervisor_guard.start_runner(actor_id).await?;
-                    Ok(json!("success"))
-                } else {
-                    Err("invalid start_runner params".into())
-                }
-            }
-            
-            "stop_runner" => {
-                if let Some(arr) = params.as_array() {
-                    let actor_id = arr.get(0).and_then(|v| v.as_str()).ok_or("missing actor_id")?;
-                    let force = arr.get(1).and_then(|v| v.as_bool()).unwrap_or(false);
-                    supervisor_guard.stop_runner(actor_id, force).await?;
-                    Ok(json!("success"))
-                } else {
-                    Err("invalid stop_runner params".into())
-                }
-            }
-            
-            "get_runner_status" => {
-                if let Some(actor_id) = params.as_array().and_then(|arr| arr.get(0)).and_then(|v| v.as_str()) {
-                    let status = supervisor_guard.get_runner_status(actor_id).await?;
-                    Ok(json!(format!("{:?}", status)))
-                } else {
-                    Err("invalid get_runner_status params".into())
-                }
-            }
-            
-            "get_all_runner_status" => {
-                let statuses = supervisor_guard.get_all_runner_status().await?;
-                let status_map: std::collections::HashMap<String, String> = statuses
-                    .into_iter()
-                    .map(|(id, status)| (id, format!("{:?}", status)))
-                    .collect();
-                Ok(json!(status_map))
-            }
-            
-            "start_all" => {
-                let results = supervisor_guard.start_all().await;
-                let status_results: Vec<(String, String)> = results
-                    .into_iter()
-                    .map(|(id, result)| {
-                        let status = match result {
-                            Ok(_) => "started".to_string(),
-                            Err(e) => format!("error: {}", e),
-                        };
-                        (id, status)
-                    })
-                    .collect();
-                Ok(json!(status_results))
-            }
-            
-            "stop_all" => {
-                let force = params.as_array()
-                    .and_then(|arr| arr.get(0))
-                    .and_then(|v| v.as_bool())
-                    .unwrap_or(false);
-                let results = supervisor_guard.stop_all(force).await;
-                let status_results: Vec<(String, String)> = results
-                    .into_iter()
-                    .map(|(id, result)| {
-                        let status = match result {
-                            Ok(_) => "stopped".to_string(),
-                            Err(e) => format!("error: {}", e),
-                        };
-                        (id, status)
-                    })
-                    .collect();
-                Ok(json!(status_results))
-            }
-            
-            "job.run" => {
-                // Run job and wait for result (blocking)
-                if let Some(param_obj) = params.as_array().and_then(|arr| arr.get(0)) {
-                    let _secret = param_obj.get("secret")
-                        .and_then(|v| v.as_str())
-                        .ok_or("missing secret")?;
-                    
-                    let job_value = param_obj.get("job")
-                        .ok_or("missing job")?;
-                    
-                    let timeout = param_obj.get("timeout")
-                        .and_then(|v| v.as_u64())
-                        .unwrap_or(60);
-                    
-                    // Deserialize the job
-                    let job: hero_job::Job = serde_json::from_value(job_value.clone())
-                        .map_err(|e| format!("invalid job format: {}", e))?;
-                    
-                    let job_id = job.id.clone();
-                    let runner_name = job.runner.clone();
-                    
-                    // Verify signatures
-                    job.verify_signatures()
-                        .map_err(|e| format!("signature verification failed: {}", e))?;
-                    
-                    info!("Job {} signature verification passed for signatories: {:?}", 
-                          job_id, job.signatories());
-                    
-                    // Queue and wait for result
-                    let mut supervisor_guard = self.supervisor.lock().await;
-                    let result = supervisor_guard.queue_and_wait(&runner_name, job, timeout)
-                        .await
-                        .map_err(|e| format!("job execution failed: {}", e))?;
-                    
-                    Ok(json!({
-                        "job_id": job_id,
-                        "status": "completed",
-                        "result": result
-                    }))
-                } else {
-                    Err("invalid job.run params".into())
-                }
-            }
-            
-            "job.start" => {
-                // Start job without waiting (non-blocking)
-                if let Some(param_obj) = params.as_array().and_then(|arr| arr.get(0)) {
-                    let _secret = param_obj.get("secret")
-                        .and_then(|v| v.as_str())
-                        .ok_or("missing secret")?;
-                    
-                    let job_value = param_obj.get("job")
-                        .ok_or("missing job")?;
-                    
-                    // Deserialize the job
-                    let job: hero_job::Job = serde_json::from_value(job_value.clone())
-                        .map_err(|e| format!("invalid job format: {}", e))?;
-                    
-                    let job_id = job.id.clone();
-                    let runner_name = job.runner.clone();
-                    
-                    // Verify signatures
-                    job.verify_signatures()
-                        .map_err(|e| format!("signature verification failed: {}", e))?;
-                    
-                    info!("Job {} signature verification passed for signatories: {:?}", 
-                          job_id, job.signatories());
-                    
-                    // Queue the job without waiting
-                    let mut supervisor_guard = self.supervisor.lock().await;
-                    supervisor_guard.queue_job_to_runner(&runner_name, job)
-                        .await
-                        .map_err(|e| format!("failed to queue job: {}", e))?;
-                    
-                    Ok(json!({
-                        "job_id": job_id,
-                        "status": "queued"
-                    }))
-                } else {
-                    Err("invalid job.start params".into())
-                }
-            }
-            
-            "job.status" => {
-                if let Some(_job_id) = params.as_array().and_then(|arr| arr.get(0)).and_then(|v| v.as_str()) {
-                    // TODO: Implement actual job status lookup
-                    Ok(json!({"status": "completed"}))
-                } else {
-                    Err("invalid job.status params".into())
-                }
-            }
-            
-            "job.result" => {
-                if let Some(_job_id) = params.as_array().and_then(|arr| arr.get(0)).and_then(|v| v.as_str()) {
-                    // TODO: Implement actual job result lookup
-                    Ok(json!({"success": "job completed successfully"}))
-                } else {
-                    Err("invalid job.result params".into())
-                }
-            }
-            
-            "rpc.discover" => {
-                let methods = vec![
-                    "list_runners", "register_runner", "start_runner", "stop_runner",
-                    "get_runner_status", "get_all_runner_status", "start_all", "stop_all",
-                    "job.run", "job.start", "job.status", "job.result", "rpc.discover"
-                ];
-                Ok(json!(methods))
-            }
-            
-            _ => {
-                error!("Unknown method: {}", method);
-                Err(format!("unknown method: {}", method).into())
-            }
-        }
-    }
-    
-    /// Message polling loop that listens for incoming messages
-    async fn message_loop(
-        supervisor: Arc<Mutex<Supervisor>>,
-        http_client: HttpClient,
-        mycelium_url: String,
-        topic: String,
-        running: Arc<Mutex<bool>>,
-    ) {
-        info!("Starting message polling loop for topic: {} (base64: {})", topic, base64::engine::general_purpose::STANDARD.encode(topic.as_bytes()));
-        
-        while {
-            let running_guard = running.lock().await;
-            *running_guard
-        } {
-            // Poll for messages using Mycelium JSON-RPC API
-            // Topic needs to be base64 encoded for the RPC API
-            let topic_b64 = base64::engine::general_purpose::STANDARD.encode(topic.as_bytes());
-            let poll_request = json!({
-                "jsonrpc": "2.0",
-                "method": "popMessage",
-                "params": [null, 1, topic_b64],  // Reduced timeout to 1 second
-                "id": 1
-            });
-            
-            debug!("Polling for messages with request: {}", poll_request);
-            match tokio::time::timeout(
-                Duration::from_secs(10), 
-                http_client.post(&mycelium_url).json(&poll_request).send()
-            ).await {
-                Ok(Ok(response)) => {
-                    if response.status().is_success() {
-                        match response.json::<Value>().await {
-                            Ok(rpc_response) => {
-                                if let Some(message) = rpc_response.get("result") {
-                                    debug!("Received message: {}", message);
-                                    
-                                    // Extract message details
-                                    if let (Some(payload), Some(src_ip), Some(msg_id)) = (
-                                        message.get("payload").and_then(|v| v.as_str()),
-                                        message.get("srcIp").and_then(|v| v.as_str()),
-                                        message.get("id").and_then(|v| v.as_str()),
-                                    ) {
-                                    // Create a temporary integration instance to handle the message
-                                    let integration = MyceliumIntegration {
-                                        supervisor: Arc::clone(&supervisor),
-                                        mycelium_url: mycelium_url.clone(),
-                                        http_client: http_client.clone(),
-                                        topic: topic.clone(),
-                                        running: Arc::clone(&running),
-                                    };
-                                    
-                                    let reply_info = Some((src_ip.to_string(), msg_id.to_string()));
-                                    
-                                        if let Err(e) = integration.handle_supervisor_message(payload, reply_info).await {
-                                            error!("Error handling supervisor message: {}", e);
-                                        }
-                                    }
-                                } else if let Some(error) = rpc_response.get("error") {
-                                    let error_code = error.get("code").and_then(|c| c.as_i64()).unwrap_or(0);
-                                    if error_code == -32014 {
-                                        // Timeout - no message available, continue polling
-                                        trace!("No messages available (timeout)");
-                                    } else {
-                                        error!("Mycelium RPC error: {}", error);
-                                        sleep(Duration::from_secs(1)).await;
-                                    }
-                                } else {
-                                    trace!("No messages available");
-                                }
-                            }
-                            Err(e) => {
-                                error!("Failed to parse RPC response JSON: {}", e);
-                            }
-                        }
-                    } else {
-                        let status = response.status();
-                        let text = response.text().await.unwrap_or_default();
-                        error!("Message polling error: {} - {}", status, text);
-                        sleep(Duration::from_secs(1)).await;
-                    }
-                }
-                Ok(Err(e)) => {
-                    error!("HTTP request failed: {}", e);
-                    sleep(Duration::from_secs(1)).await;
-                }
-                Err(_) => {
-                    error!("Polling request timed out after 10 seconds");
-                    sleep(Duration::from_secs(1)).await;
-                }
-            }
-        }
-        
-        info!("Message polling loop stopped");
-    }
-}
-
-// Legacy type alias for backward compatibility
-pub type MyceliumServer = MyceliumIntegration;
--- a/core/src/openrpc.rs
+++ b/core/src/openrpc.rs
--- a/core/src/openrpc/tests.rs
+++ b/core/src/openrpc/tests.rs
@@ -1,230 +0,0 @@
-//! Tests for the new job API methods
-
-#[cfg(test)]
-mod job_api_tests {
-    use super::super::*;
-    use crate::supervisor::{Supervisor, SupervisorBuilder};
-    use crate::job::{Job, JobBuilder};
-    use std::sync::Arc;
-    use tokio::sync::Mutex;
-    use serde_json::json;
-
-    async fn create_test_supervisor() -> Arc<Mutex<Supervisor>> {
-        let supervisor = SupervisorBuilder::new()
-            .redis_url("redis://localhost:6379")
-            .namespace("test_job_api")
-            .build()
-            .await
-            .unwrap_or_else(|_| Supervisor::default());
-        
-        let mut supervisor = supervisor;
-        supervisor.add_admin_secret("test-admin-secret".to_string());
-        supervisor.add_user_secret("test-user-secret".to_string());
-        
-        Arc::new(Mutex::new(supervisor))
-    }
-
-    fn create_test_job() -> Job {
-        JobBuilder::new()
-            .id("test-job-123".to_string())
-            .caller_id("test-client".to_string())
-            .context_id("test-context".to_string())
-            .script("print('Hello World')".to_string())
-            .script_type(crate::job::ScriptType::Osis)
-            .timeout(30)
-            .build()
-            .unwrap()
-    }
-
-    #[tokio::test]
-    async fn test_jobs_create() {
-        let supervisor = create_test_supervisor().await;
-        let job = create_test_job();
-        
-        let params = RunJobParams {
-            secret: "test-user-secret".to_string(),
-            job: job.clone(),
-        };
-
-        let result = supervisor.jobs_create(params).await;
-        assert!(result.is_ok());
-        
-        let job_id = result.unwrap();
-        assert_eq!(job_id, job.id);
-    }
-
-    #[tokio::test]
-    async fn test_jobs_create_invalid_secret() {
-        let supervisor = create_test_supervisor().await;
-        let job = create_test_job();
-        
-        let params = RunJobParams {
-            secret: "invalid-secret".to_string(),
-            job,
-        };
-
-        let result = supervisor.jobs_create(params).await;
-        assert!(result.is_err());
-    }
-
-    #[tokio::test]
-    async fn test_jobs_list() {
-        let supervisor = create_test_supervisor().await;
-        
-        let result = supervisor.jobs_list().await;
-        // Should not error even if Redis is not available (will return empty list or error)
-        // The important thing is that the method signature works
-        assert!(result.is_ok() || result.is_err());
-    }
-
-    #[tokio::test]
-    async fn test_job_run_success_format() {
-        let supervisor = create_test_supervisor().await;
-        let job = create_test_job();
-        
-        let params = RunJobParams {
-            secret: "test-user-secret".to_string(),
-            job,
-        };
-
-        let result = supervisor.job_run(params).await;
-        
-        // The result should be a JobResult enum
-        match result {
-            Ok(JobResult::Success { success: _ }) => {
-                // Success case - job executed and returned output
-            },
-            Ok(JobResult::Error { error: _ }) => {
-                // Error case - job failed but method worked
-            },
-            Err(_) => {
-                // Method error (authentication, etc.)
-                // This is acceptable for testing without actual runners
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_job_start() {
-        let supervisor = create_test_supervisor().await;
-        
-        let params = StartJobParams {
-            secret: "test-user-secret".to_string(),
-            job_id: "test-job-123".to_string(),
-        };
-
-        let result = supervisor.job_start(params).await;
-        
-        // Should fail gracefully if job doesn't exist
-        assert!(result.is_err() || result.is_ok());
-    }
-
-    #[tokio::test]
-    async fn test_job_start_invalid_secret() {
-        let supervisor = create_test_supervisor().await;
-        
-        let params = StartJobParams {
-            secret: "invalid-secret".to_string(),
-            job_id: "test-job-123".to_string(),
-        };
-
-        let result = supervisor.job_start(params).await;
-        assert!(result.is_err());
-    }
-
-    #[tokio::test]
-    async fn test_job_status() {
-        let supervisor = create_test_supervisor().await;
-        
-        let result = supervisor.job_status("test-job-123".to_string()).await;
-        
-        // Should return error for non-existent job
-        assert!(result.is_err());
-    }
-
-    #[tokio::test]
-    async fn test_job_result() {
-        let supervisor = create_test_supervisor().await;
-        
-        let result = supervisor.job_result("test-job-123".to_string()).await;
-        
-        // Should return error for non-existent job
-        assert!(result.is_err());
-    }
-
-    #[test]
-    fn test_job_result_enum_serialization() {
-        let success_result = JobResult::Success {
-            success: "Job completed successfully".to_string(),
-        };
-        
-        let serialized = serde_json::to_string(&success_result).unwrap();
-        assert!(serialized.contains("success"));
-        assert!(serialized.contains("Job completed successfully"));
-
-        let error_result = JobResult::Error {
-            error: "Job failed with error".to_string(),
-        };
-        
-        let serialized = serde_json::to_string(&error_result).unwrap();
-        assert!(serialized.contains("error"));
-        assert!(serialized.contains("Job failed with error"));
-    }
-
-    #[test]
-    fn test_job_status_response_serialization() {
-        let status_response = JobStatusResponse {
-            job_id: "test-job-123".to_string(),
-            status: "running".to_string(),
-            created_at: "2023-01-01T00:00:00Z".to_string(),
-            started_at: Some("2023-01-01T00:00:05Z".to_string()),
-            completed_at: None,
-        };
-        
-        let serialized = serde_json::to_string(&status_response).unwrap();
-        assert!(serialized.contains("test-job-123"));
-        assert!(serialized.contains("running"));
-        assert!(serialized.contains("2023-01-01T00:00:00Z"));
-        assert!(serialized.contains("2023-01-01T00:00:05Z"));
-        
-        let deserialized: JobStatusResponse = serde_json::from_str(&serialized).unwrap();
-        assert_eq!(deserialized.job_id, "test-job-123");
-        assert_eq!(deserialized.status, "running");
-        assert_eq!(deserialized.started_at, Some("2023-01-01T00:00:05Z".to_string()));
-        assert_eq!(deserialized.completed_at, None);
-    }
-
-    #[test]
-    fn test_start_job_params_serialization() {
-        let params = StartJobParams {
-            secret: "test-secret".to_string(),
-            job_id: "job-123".to_string(),
-        };
-        
-        let serialized = serde_json::to_string(&params).unwrap();
-        assert!(serialized.contains("test-secret"));
-        assert!(serialized.contains("job-123"));
-        
-        let deserialized: StartJobParams = serde_json::from_str(&serialized).unwrap();
-        assert_eq!(deserialized.secret, "test-secret");
-        assert_eq!(deserialized.job_id, "job-123");
-    }
-
-    #[test]
-    fn test_method_naming_convention() {
-        // Test that method names follow the jobs./job. convention
-        
-        // These should be the actual method names in the trait
-        let jobs_methods = vec!["jobs.create", "jobs.list"];
-        let job_methods = vec!["job.run", "job.start", "job.status", "job.result"];
-        
-        // Verify naming convention
-        for method in jobs_methods {
-            assert!(method.starts_with("jobs."));
-        }
-        
-        for method in job_methods {
-            assert!(method.starts_with("job."));
-        }
-    }
-}
--- a/core/src/runner.rs
+++ b/core/src/runner.rs
@@ -1,207 +0,0 @@
-//! Runner implementation for actor process management.
-
-// use sal_service_manager::{ProcessManagerError as ServiceProcessManagerError, ProcessStatus, ProcessConfig};
-
-/// Simple process status enum to replace sal_service_manager dependency
-#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
-pub enum ProcessStatus {
-    NotStarted,
-    Starting,
-    Running,
-    Stopping,
-    Stopped,
-    Failed,
-    Error(String),
-}
-
-/// Simple process config to replace sal_service_manager dependency
-#[derive(Debug, Clone)]
-pub struct ProcessConfig {
-    pub command: String,
-    pub args: Vec<String>,
-    pub working_dir: Option<String>,
-    pub env_vars: Vec<(String, String)>,
-}
-
-impl ProcessConfig {
-    pub fn new(command: String, args: Vec<String>, working_dir: Option<String>, env_vars: Vec<(String, String)>) -> Self {
-        Self {
-            command,
-            args,
-            working_dir,
-            env_vars,
-        }
-    }
-}
-
-/// Simple process manager error to replace sal_service_manager dependency
-#[derive(Debug, thiserror::Error)]
-pub enum ProcessManagerError {
-    #[error("Process execution failed: {0}")]
-    ExecutionFailed(String),
-    #[error("Process not found: {0}")]
-    ProcessNotFound(String),
-    #[error("IO error: {0}")]
-    IoError(String),
-}
-use std::path::PathBuf;
-
-/// Represents the current status of an actor/runner (alias for ProcessStatus)
-pub type RunnerStatus = ProcessStatus;
-
-/// Log information structure with serialization support
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-pub struct LogInfo {
-    pub timestamp: String,
-    pub level: String,
-    pub message: String,
-}
-
-/// Runner configuration and state (merged from RunnerConfig)
-#[derive(Debug, Clone)]
-pub struct Runner {
-    /// Unique identifier for the runner
-    pub id: String,
-    pub name: String,
-    pub namespace: String,
-    /// Path to the actor binary
-    pub command: PathBuf, // Command to run runner by, used only if supervisor is used to run runners
-    /// Redis URL for job queue
-    pub redis_url: String,
-    /// Additional command-line arguments
-    pub extra_args: Vec<String>,
-}
-
-impl Runner {
-    /// Create a new runner from configuration
-    pub fn from_config(config: RunnerConfig) -> Self {
-        Self {
-            id: config.id,
-            name: config.name,
-            namespace: config.namespace,
-            command: config.command,
-            redis_url: config.redis_url,
-            extra_args: config.extra_args,
-        }
-    }
-    
-    /// Create a new runner with extra arguments
-    pub fn with_args(
-        id: String,
-        name: String,
-        namespace: String,
-        command: PathBuf,
-        redis_url: String,
-        extra_args: Vec<String>,
-    ) -> Self {
-        Self {
-            id,
-            name,
-            namespace,
-            command,
-            redis_url,
-            extra_args,
-        }
-    }
-
-    /// Get the queue key for this runner with the given namespace
-    pub fn get_queue(&self) -> String {
-        if self.namespace == "" {
-            format!("runner:{}", self.name)
-        } else {
-            format!("{}:runner:{}", self.namespace, self.name)
-        }
-    }
-}
-
-/// Result type for runner operations
-pub type RunnerResult<T> = Result<T, RunnerError>;
-
-/// Errors that can occur during runner operations
-#[derive(Debug, thiserror::Error)]
-pub enum RunnerError {
-    #[error("Actor '{actor_id}' not found")]
-    ActorNotFound { actor_id: String },
-    
-    #[error("Actor '{actor_id}' is already running")]
-    ActorAlreadyRunning { actor_id: String },
-    
-    #[error("Actor '{actor_id}' is not running")]
-    ActorNotRunning { actor_id: String },
-    
-    #[error("Failed to start actor '{actor_id}': {reason}")]
-    StartupFailed { actor_id: String, reason: String },
-    
-    #[error("Failed to stop actor '{actor_id}': {reason}")]
-    StopFailed { actor_id: String, reason: String },
-    
-    #[error("Timeout waiting for actor '{actor_id}' to start")]
-    StartupTimeout { actor_id: String },
-    
-    #[error("Job queue error for actor '{actor_id}': {reason}")]
-    QueueError { actor_id: String, reason: String },
-    
-    #[error("Process manager error: {source}")]
-    ProcessManagerError {
-        #[from]
-        source: ProcessManagerError,
-    },
-    
-    #[error("Configuration error: {reason}")]
-    ConfigError { reason: String },
-    
-    #[error("Invalid secret: {0}")]
-    InvalidSecret(String),
-    
-    #[error("IO error: {source}")]
-    IoError {
-        #[from]
-        source: std::io::Error,
-    },
-    
-    #[error("Redis error: {source}")]
-    RedisError {
-        #[from]
-        source: redis::RedisError,
-    },
-    
-    #[error("Job error: {source}")]
-    JobError {
-        #[from]
-        source: hero_job::JobError,
-    },
-    
-    #[error("Job client error: {source}")]
-    JobClientError {
-        #[from]
-        source: hero_job_client::ClientError,
-    },
-    
-    #[error("Job '{job_id}' not found")]
-    JobNotFound { job_id: String },
-    
-    #[error("Authentication error: {message}")]
-    AuthenticationError { message: String },
-}
-
-// Type alias for backward compatibility
-pub type RunnerConfig = Runner;
-
-/// Convert Runner to ProcessConfig
-pub fn runner_to_process_config(config: &Runner) -> ProcessConfig {
-    let mut args = vec![
-        config.id.clone(),  // First positional argument is the runner ID
-        "--redis-url".to_string(),
-        config.redis_url.clone(),
-    ];
-    
-    // Add extra arguments (e.g., context configurations)
-    args.extend(config.extra_args.clone());
-    
-    ProcessConfig::new(
-        config.command.to_string_lossy().to_string(),
-        args,
-        Some("/tmp".to_string()), // Default working directory since Runner doesn't have working_dir field
-        vec![]
-    )
-}
--- a/core/src/services.rs
+++ b/core/src/services.rs
@@ -1,269 +0,0 @@
-//! Service layer for persistent storage of keys, runners, and jobs
-//! 
-//! This module provides database/storage services for the supervisor.
-//! Currently uses in-memory storage, but designed to be easily extended
-//! to use Redis, PostgreSQL, or other persistent storage backends.
-
-use crate::auth::{ApiKey, ApiKeyScope};
-use crate::job::Job;
-use crate::runner::Runner;
-use std::collections::HashMap;
-use std::sync::Arc;
-use tokio::sync::Mutex;
-use serde::{Deserialize, Serialize};
-
-/// Service for managing API keys
-#[derive(Debug, Clone)]
-pub struct ApiKeyService {
-    store: Arc<Mutex<HashMap<String, ApiKey>>>,
-}
-
-impl ApiKeyService {
-    /// Create a new API key service
-    pub fn new() -> Self {
-        Self {
-            store: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    /// Store an API key
-    pub async fn store(&self, key: ApiKey) -> Result<(), String> {
-        let mut store = self.store.lock().await;
-        store.insert(key.key.clone(), key);
-        Ok(())
-    }
-
-    /// Get an API key by its key string
-    pub async fn get(&self, key: &str) -> Option<ApiKey> {
-        let store = self.store.lock().await;
-        store.get(key).cloned()
-    }
-
-    /// List all API keys
-    pub async fn list(&self) -> Vec<ApiKey> {
-        let store = self.store.lock().await;
-        store.values().cloned().collect()
-    }
-
-    /// Remove an API key
-    pub async fn remove(&self, key: &str) -> Option<ApiKey> {
-        let mut store = self.store.lock().await;
-        store.remove(key)
-    }
-
-    /// Count API keys by scope
-    pub async fn count_by_scope(&self, scope: ApiKeyScope) -> usize {
-        let store = self.store.lock().await;
-        store.values().filter(|k| k.scope == scope).count()
-    }
-
-    /// Clear all API keys (for testing)
-    pub async fn clear(&self) {
-        let mut store = self.store.lock().await;
-        store.clear();
-    }
-}
-
-impl Default for ApiKeyService {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Service for managing runners
-#[derive(Debug, Clone)]
-pub struct RunnerService {
-    store: Arc<Mutex<HashMap<String, RunnerMetadata>>>,
-}
-
-/// Metadata about a runner for storage
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RunnerMetadata {
-    pub id: String,
-    pub name: String,
-    pub queue: String,
-    pub registered_at: String,
-    pub registered_by: String, // API key name that registered this runner
-}
-
-impl RunnerService {
-    /// Create a new runner service
-    pub fn new() -> Self {
-        Self {
-            store: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    /// Store runner metadata
-    pub async fn store(&self, metadata: RunnerMetadata) -> Result<(), String> {
-        let mut store = self.store.lock().await;
-        store.insert(metadata.id.clone(), metadata);
-        Ok(())
-    }
-
-    /// Get runner metadata by ID
-    pub async fn get(&self, id: &str) -> Option<RunnerMetadata> {
-        let store = self.store.lock().await;
-        store.get(id).cloned()
-    }
-
-    /// List all runners
-    pub async fn list(&self) -> Vec<RunnerMetadata> {
-        let store = self.store.lock().await;
-        store.values().cloned().collect()
-    }
-
-    /// Remove a runner
-    pub async fn remove(&self, id: &str) -> Option<RunnerMetadata> {
-        let mut store = self.store.lock().await;
-        store.remove(id)
-    }
-
-    /// Count total runners
-    pub async fn count(&self) -> usize {
-        let store = self.store.lock().await;
-        store.len()
-    }
-
-    /// Clear all runners (for testing)
-    pub async fn clear(&self) {
-        let mut store = self.store.lock().await;
-        store.clear();
-    }
-}
-
-impl Default for RunnerService {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Service for managing jobs
-#[derive(Debug, Clone)]
-pub struct JobService {
-    store: Arc<Mutex<HashMap<String, JobMetadata>>>,
-}
-
-/// Metadata about a job for storage
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct JobMetadata {
-    pub job_id: String,
-    pub runner: String,
-    pub created_at: String,
-    pub created_by: String, // API key name that created this job
-    pub status: String,
-    pub job: Job,
-}
-
-impl JobService {
-    /// Create a new job service
-    pub fn new() -> Self {
-        Self {
-            store: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    /// Store job metadata
-    pub async fn store(&self, metadata: JobMetadata) -> Result<(), String> {
-        let mut store = self.store.lock().await;
-        store.insert(metadata.job_id.clone(), metadata);
-        Ok(())
-    }
-
-    /// Get job metadata by ID
-    pub async fn get(&self, job_id: &str) -> Option<JobMetadata> {
-        let store = self.store.lock().await;
-        store.get(job_id).cloned()
-    }
-
-    /// List all jobs
-    pub async fn list(&self) -> Vec<JobMetadata> {
-        let store = self.store.lock().await;
-        store.values().cloned().collect()
-    }
-
-    /// List jobs by runner
-    pub async fn list_by_runner(&self, runner: &str) -> Vec<JobMetadata> {
-        let store = self.store.lock().await;
-        store.values()
-            .filter(|j| j.runner == runner)
-            .cloned()
-            .collect()
-    }
-
-    /// List jobs by creator (API key name)
-    pub async fn list_by_creator(&self, creator: &str) -> Vec<JobMetadata> {
-        let store = self.store.lock().await;
-        store.values()
-            .filter(|j| j.created_by == creator)
-            .cloned()
-            .collect()
-    }
-
-    /// Update job status
-    pub async fn update_status(&self, job_id: &str, status: String) -> Result<(), String> {
-        let mut store = self.store.lock().await;
-        if let Some(metadata) = store.get_mut(job_id) {
-            metadata.status = status;
-            Ok(())
-        } else {
-            Err(format!("Job not found: {}", job_id))
-        }
-    }
-
-    /// Remove a job
-    pub async fn remove(&self, job_id: &str) -> Option<JobMetadata> {
-        let mut store = self.store.lock().await;
-        store.remove(job_id)
-    }
-
-    /// Count total jobs
-    pub async fn count(&self) -> usize {
-        let store = self.store.lock().await;
-        store.len()
-    }
-
-    /// Clear all jobs (for testing)
-    pub async fn clear(&self) {
-        let mut store = self.store.lock().await;
-        store.clear();
-    }
-}
-
-impl Default for JobService {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Combined service container for all storage services
-#[derive(Debug, Clone)]
-pub struct Services {
-    pub api_keys: ApiKeyService,
-    pub runners: RunnerService,
-    pub jobs: JobService,
-}
-
-impl Services {
-    /// Create a new services container
-    pub fn new() -> Self {
-        Self {
-            api_keys: ApiKeyService::new(),
-            runners: RunnerService::new(),
-            jobs: JobService::new(),
-        }
-    }
-
-    /// Clear all data (for testing)
-    pub async fn clear_all(&self) {
-        self.api_keys.clear().await;
-        self.runners.clear().await;
-        self.jobs.clear().await;
-    }
-}
-
-impl Default for Services {
-    fn default() -> Self {
-        Self::new()
-    }
-}
--- a/core/src/store.rs
+++ b/core/src/store.rs
@@ -0,0 +1,286 @@
+//! In-memory storage layer for Supervisor
+//! 
+//! Provides CRUD operations for:
+//! - API Keys
+//! - Runners
+//! - Jobs
+
+use crate::auth::{ApiKey, ApiKeyScope};
+use crate::error::{SupervisorError, SupervisorResult};
+use hero_job::Job;
+use std::collections::{HashMap, HashSet};
+
+/// In-memory storage for all supervisor data
+pub struct Store {
+    /// API keys (key_value -> ApiKey)
+    api_keys: HashMap<String, ApiKey>,
+    /// Registered runner IDs
+    runners: HashSet<String>,
+    /// In-memory job storage (job_id -> Job)
+    jobs: HashMap<String, Job>,
+}
+
+impl Store {
+    /// Create a new store
+    pub fn new() -> Self {
+        Self {
+            api_keys: HashMap::new(),
+            runners: HashSet::new(),
+            jobs: HashMap::new(),
+        }
+    }
+
+    // ==================== API Key Operations ====================
+
+    /// Create an API key with a specific value
+    pub fn key_create(&mut self, key: ApiKey) -> ApiKey {
+        self.api_keys.insert(key.name.clone(), key.clone());
+        key
+    }
+
+    /// Create a new API key with generated UUID
+    pub fn key_create_new(&mut self, name: String, scope: ApiKeyScope) -> ApiKey {
+        let key = ApiKey::new(name, scope);
+        self.api_keys.insert(key.name.clone(), key.clone());
+        key
+    }
+
+    /// Get an API key by its value
+    pub fn key_get(&self, key_name: &str) -> Option<&ApiKey> {
+        self.api_keys.get(key_name)
+    }
+
+    /// Delete an API key
+    pub fn key_delete(&mut self, key_name: &str) -> Option<ApiKey> {
+        self.api_keys.remove(key_name)
+    }
+
+    /// List all API keys
+    pub fn key_list(&self) -> Vec<ApiKey> {
+        self.api_keys.values().cloned().collect()
+    }
+
+    /// List API keys by scope
+    pub fn key_list_by_scope(&self, scope: ApiKeyScope) -> Vec<ApiKey> {
+        self.api_keys
+            .values()
+            .filter(|k| k.scope == scope)
+            .cloned()
+            .collect()
+    }
+
+    // ==================== Runner Operations ====================
+
+    /// Add a runner
+    pub fn runner_add(&mut self, runner_id: String) -> SupervisorResult<()> {
+        self.runners.insert(runner_id);
+        Ok(())
+    }
+
+    /// Remove a runner
+    pub fn runner_remove(&mut self, runner_id: &str) -> SupervisorResult<()> {
+        self.runners.remove(runner_id);
+        Ok(())
+    }
+
+    /// Check if a runner exists
+    pub fn runner_exists(&self, runner_id: &str) -> bool {
+        self.runners.contains(runner_id)
+    }
+
+    /// List all runner IDs
+    pub fn runner_list_all(&self) -> Vec<String> {
+        self.runners.iter().cloned().collect()
+    }
+
+    // ==================== Job Operations ====================
+
+    /// Store a job in memory
+    pub fn job_store(&mut self, job: Job) -> SupervisorResult<()> {
+        self.jobs.insert(job.id.clone(), job);
+        Ok(())
+    }
+
+    /// Get a job from memory
+    pub fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
+        self.jobs
+            .get(job_id)
+            .cloned()
+            .ok_or_else(|| SupervisorError::JobNotFound {
+                job_id: job_id.to_string(),
+            })
+    }
+
+    /// Delete a job from memory
+    pub fn job_delete(&mut self, job_id: &str) -> SupervisorResult<()> {
+        self.jobs
+            .remove(job_id)
+            .ok_or_else(|| SupervisorError::JobNotFound {
+                job_id: job_id.to_string(),
+            })?;
+        Ok(())
+    }
+
+    /// List all job IDs
+    pub fn job_list(&self) -> Vec<String> {
+        self.jobs.keys().cloned().collect()
+    }
+
+    /// Check if a job exists
+    pub fn job_exists(&self, job_id: &str) -> bool {
+        self.jobs.contains_key(job_id)
+    }
+}
+
+impl Clone for Store {
+    fn clone(&self) -> Self {
+        Self {
+            api_keys: self.api_keys.clone(),
+            runners: self.runners.clone(),
+            jobs: self.jobs.clone(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use hero_job::JobBuilder;
+
+    fn create_test_store() -> Store {
+        Store::new()
+    }
+
+    fn create_test_job(id: &str, runner: &str) -> Job {
+        let mut job = JobBuilder::new()
+            .caller_id("test_caller")
+            .context_id("test_context")
+            .runner(runner)
+            .executor("test")
+            .payload("test payload")
+            .build()
+            .unwrap();
+        job.id = id.to_string(); // Set ID manually
+        job
+    }
+
+    #[test]
+    fn test_api_key_operations() {
+        let mut store = create_test_store();
+
+        // Create key
+        let key = store.key_create_new("test_key".to_string(), ApiKeyScope::Admin);
+        assert_eq!(key.name, "test_key");
+        assert_eq!(key.scope, ApiKeyScope::Admin);
+
+        // Get key
+        let retrieved = store.key_get(&key.key);
+        assert!(retrieved.is_some());
+        assert_eq!(retrieved.unwrap().name, "test_key");
+
+        // List keys
+        let keys = store.key_list();
+        assert_eq!(keys.len(), 1);
+
+        // List by scope
+        let admin_keys = store.key_list_by_scope(ApiKeyScope::Admin);
+        assert_eq!(admin_keys.len(), 1);
+
+        // Delete key
+        let removed = store.key_delete(&key.key);
+        assert!(removed.is_some());
+        assert!(store.key_get(&key.key).is_none());
+    }
+
+    #[test]
+    fn test_runner_operations() {
+        let mut store = create_test_store();
+
+        // Add runner
+        assert!(store.runner_add("runner1".to_string()).is_ok());
+        assert!(store.runner_exists("runner1"));
+
+        // List runners
+        let runners = store.runner_list_all();
+        assert_eq!(runners.len(), 1);
+        assert!(runners.contains(&"runner1".to_string()));
+
+        // List all runners
+        let all_runners = store.runner_list_all();
+        assert_eq!(all_runners.len(), 1);
+
+        // Remove runner
+        assert!(store.runner_remove("runner1").is_ok());
+        assert!(!store.runner_exists("runner1"));
+    }
+
+    #[test]
+    fn test_job_operations() {
+        let mut store = create_test_store();
+        let job = create_test_job("job1", "runner1");
+
+        // Store job
+        assert!(store.job_store(job.clone()).is_ok());
+        assert!(store.job_exists("job1"));
+
+        // Get job
+        let retrieved = store.job_get("job1");
+        assert!(retrieved.is_ok());
+        assert_eq!(retrieved.unwrap().id, "job1");
+
+        // List jobs
+        let jobs = store.job_list();
+        assert_eq!(jobs.len(), 1);
+        assert!(jobs.contains(&"job1".to_string()));
+
+        // Delete job
+        assert!(store.job_delete("job1").is_ok());
+        assert!(!store.job_exists("job1"));
+        assert!(store.job_get("job1").is_err());
+    }
+
+    #[test]
+    fn test_job_not_found() {
+        let store = create_test_store();
+        let result = store.job_get("nonexistent");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_multiple_jobs() {
+        let mut store = create_test_store();
+        
+        // Add multiple jobs
+        for i in 1..=3 {
+            let job = create_test_job(&format!("job{}", i), "runner1");
+            assert!(store.job_store(job).is_ok());
+        }
+
+        // Verify all exist
+        assert_eq!(store.job_list().len(), 3);
+        assert!(store.job_exists("job1"));
+        assert!(store.job_exists("job2"));
+        assert!(store.job_exists("job3"));
+
+        // Delete one
+        assert!(store.job_delete("job2").is_ok());
+        assert_eq!(store.job_list().len(), 2);
+        assert!(!store.job_exists("job2"));
+    }
+
+    #[test]
+    fn test_store_clone() {
+        let mut store = create_test_store();
+        store.runner_add("runner1".to_string()).unwrap();
+        
+        let job = create_test_job("job1", "runner1");
+        store.job_store(job).unwrap();
+
+        // Clone the store
+        let cloned = store.clone();
+        
+        // Verify cloned data
+        assert!(cloned.runner_exists("runner1"));
+        assert!(cloned.job_exists("job1"));
+    }
+}
--- a/core/src/supervisor.rs
+++ b/core/src/supervisor.rs
--- a/core/tests/README.md
+++ b/core/tests/README.md
@@ -0,0 +1,195 @@
+# Supervisor End-to-End Tests
+
+Comprehensive integration tests for all Hero Supervisor OpenRPC client methods.
+
+## Prerequisites
+
+1. **Redis Server Running:**
+   ```bash
+   redis-server
+   ```
+
+2. **Supervisor Running:**
+   ```bash
+   cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
+   ./scripts/run.sh
+   ```
+
+## Running Tests
+
+### Run All Tests
+```bash
+cargo test --test end_to_end
+```
+
+### Run Specific Test
+```bash
+cargo test --test end_to_end test_01_rpc_discover
+```
+
+### Run with Output
+```bash
+cargo test --test end_to_end -- --nocapture
+```
+
+### Run in Order (Sequential)
+```bash
+cargo test --test end_to_end -- --test-threads=1 --nocapture
+```
+
+## Test Coverage
+
+### ✅ Discovery & Info
+- `test_01_rpc_discover` - OpenRPC specification discovery
+- `test_15_supervisor_info` - Supervisor information
+
+### ✅ Runner Management
+- `test_02_runner_register` - Register a new runner
+- `test_03_runner_list` - List all runners
+- `test_14_runner_remove` - Remove a runner
+
+### ✅ Job Management
+- `test_04_jobs_create` - Create a job without running
+- `test_05_jobs_list` - List all jobs
+- `test_06_job_run_simple` - Run a job and wait for result
+- `test_07_job_status` - Get job status
+- `test_08_job_get` - Get job by ID
+- `test_09_job_delete` - Delete a job
+
+### ✅ Authentication & API Keys
+- `test_10_auth_verify` - Verify current API key
+- `test_11_auth_key_create` - Create new API key
+- `test_12_auth_key_list` - List all API keys
+- `test_13_auth_key_remove` - Remove an API key
+
+### ✅ Complete Workflow
+- `test_99_complete_workflow` - End-to-end integration test
+
+## Test Configuration
+
+Tests use the following defaults:
+- **Supervisor URL:** `http://127.0.0.1:3030`
+- **Admin Secret:** `807470fd1e1ccc3fb997a1d4177cceb31a68cb355a4412c8fd6e66e517e902be`
+- **Test Runner:** `test-runner` (all tests use this runner name)
+
+**Important:** All tests use the same runner name (`test-runner`), so you only need to start one runner with that name to run all tests.
+
+## Expected Behavior
+
+### Successful Tests
+All tests should pass when:
+- Supervisor is running on port 3030
+- Admin secret matches configuration
+- Redis is accessible
+
+### Expected Warnings
+Some tests may show warnings if:
+- `job.run` times out (no actual runner connected to Redis)
+- Runners already exist from previous test runs
+
+These are expected and don't indicate test failure.
+
+## Troubleshooting
+
+### Connection Refused
+```
+Error: tcp connect error, 127.0.0.1:3030, Connection refused
+```
+**Solution:** Start the supervisor with `./scripts/run.sh`
+
+### Method Not Found
+```
+Error: Method not found
+```
+**Solution:** Rebuild supervisor with latest code:
+```bash
+cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
+cargo build
+```
+
+### Authorization Failed
+```
+Error: Missing Authorization header
+```
+**Solution:** Check that `ADMIN_SECRET` in test matches supervisor configuration
+
+### Job Tests Timeout
+```
+Error: JsonRpc(RequestTimeout)
+```
+**Solution:** Make sure you have a runner connected with the name `test-runner`:
+```bash
+cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner/rust
+cargo run --bin runner_osiris -- test-runner
+```
+
+## Continuous Integration
+
+To run tests in CI:
+
+```bash
+#!/bin/bash
+# Start Redis
+redis-server --daemonize yes
+
+# Start Supervisor
+cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
+./scripts/run.sh &
+SUPERVISOR_PID=$!
+
+# Wait for supervisor to be ready
+sleep 2
+
+# Run tests
+cargo test --test end_to_end
+
+# Cleanup
+kill $SUPERVISOR_PID
+redis-cli shutdown
+```
+
+## Adding New Tests
+
+1. Create a new test function:
+   ```rust
+   #[tokio::test]
+   async fn test_XX_my_new_test() {
+       println!("\n🧪 Test: my.new.method");
+       let client = create_client().await;
+       // ... test code ...
+       println!("✅ my.new.method works");
+   }
+   ```
+
+2. Run it:
+   ```bash
+   cargo test --test end_to_end test_XX_my_new_test -- --nocapture
+   ```
+
+## Test Output Example
+
+```
+🧪 Test: rpc.discover
+✅ rpc.discover works
+
+🧪 Test: runner.register
+✅ runner.register works - registered: test-runner-e2e
+
+🧪 Test: runner.list
+✅ runner.list works - found 3 runners
+   - osiris
+   - freezone
+   - test-runner-e2e
+
+🧪 Test: jobs.create
+✅ jobs.create works - created job: 550e8400-e29b-41d4-a716-446655440000
+
+...
+```
+
+## Notes
+
+- Tests are designed to be idempotent (can run multiple times)
+- Tests clean up after themselves when possible
+- Some tests depend on previous test state (use `--test-threads=1` for strict ordering)
+- Job execution tests may timeout if no runner is connected to Redis (this is expected)
--- a/core/tests/end_to_end.rs
+++ b/core/tests/end_to_end.rs
@@ -0,0 +1,408 @@
+//! End-to-End Integration Tests for Hero Supervisor
+//! 
+//! Tests all OpenRPC client methods against a running supervisor instance.
+
+use hero_supervisor_openrpc_client::SupervisorClient;
+use hero_job::{Job, JobBuilder};
+
+/// Test configuration
+const SUPERVISOR_URL: &str = "http://127.0.0.1:3030";
+const ADMIN_SECRET: &str = "807470fd1e1ccc3fb997a1d4177cceb31a68cb355a4412c8fd6e66e517e902be";
+const TEST_RUNNER_NAME: &str = "test-runner";
+
+/// Helper to create a test client
+async fn create_client() -> SupervisorClient {
+    SupervisorClient::builder()
+        .url(SUPERVISOR_URL)
+        .secret(ADMIN_SECRET)
+        .build()
+        .expect("Failed to create supervisor client")
+}
+
+/// Helper to create a test job (always uses TEST_RUNNER_NAME)
+fn create_test_job(payload: &str) -> Job {
+    JobBuilder::new()
+        .caller_id("e2e-test")
+        .context_id("test-context")
+        .runner(TEST_RUNNER_NAME)
+        .payload(payload)
+        .executor("rhai")
+        .timeout(30)
+        .build()
+        .expect("Failed to build test job")
+}
+
+#[tokio::test]
+async fn test_01_rpc_discover() {
+    println!("\n🧪 Test: rpc.discover");
+    
+    let client = create_client().await;
+    let result = client.discover().await;
+    
+    assert!(result.is_ok(), "rpc.discover should succeed");
+    let spec = result.unwrap();
+    
+    // Verify it's a valid OpenRPC spec
+    assert!(spec.get("openrpc").is_some(), "Should have openrpc field");
+    assert!(spec.get("methods").is_some(), "Should have methods field");
+    
+    println!("✅ rpc.discover works");
+}
+
+#[tokio::test]
+async fn test_02_runner_register() {
+    println!("\n🧪 Test: runner.register");
+    
+    let client = create_client().await;
+    
+    // Register a test runner
+    let result = client.register_runner(TEST_RUNNER_NAME).await;
+    
+    // Should succeed or already exist
+    match result {
+        Ok(name) => {
+            assert_eq!(name, TEST_RUNNER_NAME);
+            println!("✅ runner.register works - registered: {}", name);
+        }
+        Err(e) => {
+            // If it fails, it might already exist, which is okay
+            println!("⚠️  runner.register: {:?} (may already exist)", e);
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_03_runner_list() {
+    println!("\n🧪 Test: runner.list");
+    
+    let client = create_client().await;
+    
+    // First ensure our test runner exists
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    
+    // List all runners
+    let result = client.list_runners().await;
+    
+    assert!(result.is_ok(), "runner.list should succeed");
+    let runners = result.unwrap();
+    
+    assert!(!runners.is_empty(), "Should have at least one runner");
+    assert!(runners.contains(&TEST_RUNNER_NAME.to_string()), 
+            "Should contain our test runner");
+    
+    println!("✅ runner.list works - found {} runners", runners.len());
+    for runner in &runners {
+        println!("   - {}", runner);
+    }
+}
+
+#[tokio::test]
+async fn test_04_jobs_create() {
+    println!("\n🧪 Test: jobs.create");
+    
+    let client = create_client().await;
+    
+    // Ensure runner exists
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    
+    // Create a job without running it
+    let job = create_test_job("print('test job');");
+    let result = client.jobs_create(job).await;
+    
+    match &result {
+        Ok(_) => {},
+        Err(e) => println!("   Error: {:?}", e),
+    }
+    assert!(result.is_ok(), "jobs.create should succeed");
+    let job_id = result.unwrap();
+    
+    assert!(!job_id.is_empty(), "Should return a job ID");
+    println!("✅ jobs.create works - created job: {}", job_id);
+}
+
+#[tokio::test]
+async fn test_05_jobs_list() {
+    println!("\n🧪 Test: jobs.list");
+    
+    let client = create_client().await;
+    
+    // Create a job first
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    let job = create_test_job("print('list test');");
+    let _ = client.jobs_create(job).await;
+    
+    // List all jobs
+    let result = client.jobs_list().await;
+    
+    assert!(result.is_ok(), "jobs.list should succeed");
+    let jobs = result.unwrap();
+    
+    println!("✅ jobs.list works - found {} jobs", jobs.len());
+}
+
+#[tokio::test]
+async fn test_06_job_run_simple() {
+    println!("\n🧪 Test: job.run (simple script)");
+    
+    let client = create_client().await;
+    
+    // Ensure runner exists
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    
+    // Run a simple job
+    let job = create_test_job(r#"
+        print("Hello from test!");
+        42
+    "#);
+    
+    let result = client.job_run(job, Some(30)).await;
+    
+    // Note: This will timeout if no runner is actually connected to Redis
+    // but we're testing the API call itself
+    match result {
+        Ok(response) => {
+            println!("✅ job.run works - job_id: {}, status: {}", 
+                     response.job_id, response.status);
+        }
+        Err(e) => {
+            println!("⚠️  job.run: {:?} (runner may not be connected)", e);
+            // This is expected if no actual runner is listening
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_07_job_status() {
+    println!("\n🧪 Test: job.status");
+    
+    let client = create_client().await;
+    
+    // Create a job first
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    let job = create_test_job("print('status test');");
+    let job_id = client.jobs_create(job).await.expect("Failed to create job");
+    
+    // Get job status
+    let result = client.job_status(&job_id).await;
+    
+    assert!(result.is_ok(), "job.status should succeed");
+    let status = result.unwrap();
+    
+    assert_eq!(status.job_id, job_id);
+    println!("✅ job.status works - job: {}, status: {}", 
+             status.job_id, status.status);
+}
+
+#[tokio::test]
+async fn test_08_job_get() {
+    println!("\n🧪 Test: job.get");
+    
+    let client = create_client().await;
+    
+    // Create a job first
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    let original_job = create_test_job("print('get test');");
+    let job_id = client.jobs_create(original_job.clone()).await
+        .expect("Failed to create job");
+    
+    // Get the job
+    let result = client.get_job(&job_id).await;
+    
+    assert!(result.is_ok(), "job.get should succeed");
+    let job = result.unwrap();
+    
+    assert_eq!(job.id, job_id);
+    println!("✅ job.get works - retrieved job: {}", job.id);
+}
+
+#[tokio::test]
+async fn test_09_job_delete() {
+    println!("\n🧪 Test: job.delete");
+    
+    let client = create_client().await;
+    
+    // Create a job first
+    let _ = client.register_runner(TEST_RUNNER_NAME).await;
+    let job = create_test_job("print('delete test');");
+    let job_id = client.jobs_create(job).await.expect("Failed to create job");
+    
+    // Delete the job
+    let result = client.job_delete(&job_id).await;
+    
+    assert!(result.is_ok(), "job.delete should succeed");
+    println!("✅ job.delete works - deleted job: {}", job_id);
+    
+    // Verify it's gone
+    let get_result = client.get_job(&job_id).await;
+    assert!(get_result.is_err(), "Job should not exist after deletion");
+}
+
+#[tokio::test]
+async fn test_10_auth_verify() {
+    println!("\n🧪 Test: auth.verify");
+    
+    let client = create_client().await;
+    
+    let result = client.auth_verify().await;
+    
+    assert!(result.is_ok(), "auth.verify should succeed with valid key");
+    let auth_info = result.unwrap();
+    
+    println!("✅ auth.verify works");
+    println!("   Scope: {}", auth_info.scope);
+    println!("   Name: {}", auth_info.name.unwrap_or_else(|| "N/A".to_string()));
+}
+
+#[tokio::test]
+async fn test_11_auth_key_create() {
+    println!("\n🧪 Test: auth.key.create");
+    
+    let client = create_client().await;
+    
+    let result = client.auth_create_key("test-key".to_string(), "user".to_string()).await;
+    
+    assert!(result.is_ok(), "auth.key.create should succeed");
+    let api_key = result.unwrap();
+    
+    assert!(!api_key.key.is_empty(), "Should return a key");
+    assert_eq!(api_key.name, "test-key");
+    assert_eq!(api_key.scope, "user");
+    
+    println!("✅ auth.key.create works - created key: {}...", 
+             &api_key.key[..api_key.key.len().min(8)]);
+}
+
+#[tokio::test]
+async fn test_12_auth_key_list() {
+    println!("\n🧪 Test: auth.key.list");
+    
+    let client = create_client().await;
+    
+    // Create a key first
+    let _ = client.auth_create_key("list-test-key".to_string(), "user".to_string()).await;
+    
+    let result = client.auth_list_keys().await;
+    
+    assert!(result.is_ok(), "auth.key.list should succeed");
+    let keys = result.unwrap();
+    
+    println!("✅ auth.key.list works - found {} keys", keys.len());
+    for key in &keys {
+        println!("   - {} ({}): {}...", key.name, key.scope, 
+                 &key.key[..key.key.len().min(8)]);
+    }
+}
+
+#[tokio::test]
+async fn test_13_auth_key_remove() {
+    println!("\n🧪 Test: auth.key.remove");
+    
+    let client = create_client().await;
+    
+    // Create a key first
+    let api_key = client.auth_create_key("remove-test-key".to_string(), "user".to_string())
+        .await
+        .expect("Failed to create key");
+    
+    // Remove it
+    let result = client.auth_remove_key(api_key.key.clone()).await;
+    
+    assert!(result.is_ok(), "auth.key.remove should succeed");
+    let removed = result.unwrap();
+    
+    assert!(removed, "Should return true when key is removed");
+    println!("✅ auth.key.remove works - removed key: {}...", 
+             &api_key.key[..api_key.key.len().min(8)]);
+}
+
+#[tokio::test]
+async fn test_14_runner_remove() {
+    println!("\n🧪 Test: runner.remove");
+    
+    let client = create_client().await;
+    
+    // Register a runner to remove
+    let runner_name = "test-runner-to-remove";
+    let _ = client.register_runner(runner_name).await;
+    
+    // Remove it
+    let result = client.remove_runner(runner_name).await;
+    
+    assert!(result.is_ok(), "runner.remove should succeed");
+    println!("✅ runner.remove works - removed: {}", runner_name);
+    
+    // Verify it's gone
+    let runners = client.list_runners().await.unwrap();
+    assert!(!runners.contains(&runner_name.to_string()), 
+            "Runner should not exist after removal");
+}
+
+#[tokio::test]
+async fn test_15_supervisor_info() {
+    println!("\n🧪 Test: supervisor.info");
+    
+    let client = create_client().await;
+    
+    let result = client.get_supervisor_info().await;
+    
+    assert!(result.is_ok(), "supervisor.info should succeed");
+    let info = result.unwrap();
+    
+    println!("✅ supervisor.info works");
+    println!("   Server URL: {}", info.server_url);
+}
+
+/// Integration test that runs a complete workflow
+#[tokio::test]
+async fn test_99_complete_workflow() {
+    println!("\n🧪 Test: Complete Workflow");
+    
+    let client = create_client().await;
+    
+    // 1. Register runner
+    println!("  1. Registering runner...");
+    let _ = client.register_runner("workflow-runner").await;
+    
+    // 2. List runners
+    println!("  2. Listing runners...");
+    let runners = client.list_runners().await.unwrap();
+    assert!(runners.contains(&"workflow-runner".to_string()));
+    
+    // 3. Create API key
+    println!("  3. Creating API key...");
+    let api_key = client.auth_create_key("workflow-key".to_string(), "user".to_string())
+        .await.unwrap();
+    
+    // 4. Verify auth
+    println!("  4. Verifying auth...");
+    let _ = client.auth_verify().await.unwrap();
+    
+    // 5. Create job
+    println!("  5. Creating job...");
+    let job = create_test_job("print('workflow test');");
+    let job_id = client.jobs_create(job).await.unwrap();
+    
+    // 6. Get job status
+    println!("  6. Getting job status...");
+    let status = client.job_status(&job_id).await.unwrap();
+    assert_eq!(status.job_id, job_id);
+    
+    // 7. List all jobs
+    println!("  7. Listing all jobs...");
+    let jobs = client.jobs_list().await.unwrap();
+    assert!(!jobs.is_empty());
+    
+    // 8. Delete job
+    println!("  8. Deleting job...");
+    let _ = client.job_delete(&job_id).await.unwrap();
+    
+    // 9. Remove API key
+    println!("  9. Removing API key...");
+    let _ = client.auth_remove_key(api_key.key).await.unwrap();
+    
+    // 10. Remove runner
+    println!("  10. Removing runner...");
+    let _ = client.remove_runner("workflow-runner").await.unwrap();
+    
+    println!("✅ Complete workflow test passed!");
+}