add complete binary and benchmarking

This commit is contained in:
Timur Gordon
2025-11-18 20:39:25 +01:00
parent f66edba1d3
commit 4142f62e54
17 changed files with 2559 additions and 2 deletions

40
bin/horus/Cargo.toml Normal file
View File

@@ -0,0 +1,40 @@
[package]
name = "horus-mono"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[[bin]]
name = "horus"
path = "src/main.rs"
[dependencies]
# Workspace dependencies
tokio = { workspace = true }
clap = { workspace = true }
log = { workspace = true }
env_logger = { workspace = true }
anyhow = { workspace = true }
# Internal dependencies - coordinator
hero-coordinator = { path = "../coordinator" }
hero-supervisor-openrpc-client = { path = "../../lib/clients/supervisor" }
# Internal dependencies - supervisor
hero-supervisor = { path = "../supervisor" }
# Internal dependencies - osiris server
osiris-core = { path = "../../lib/osiris/core" }
axum = "0.7"
tower = "0.4"
tower-http = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
# Internal dependencies - runners
hero-runner = { path = "../../lib/runner" }
hero-job = { path = "../../lib/models/job" }

145
bin/horus/README.md Normal file
View File

@@ -0,0 +1,145 @@
# Horus - Hero System Mono Binary
A unified binary that runs all Hero system components: coordinator, supervisor, osiris server, and runners.
## Installation
Build the binary:
```bash
cargo build -p horus-mono --release
```
The binary will be available at `target/release/horus`.
## Usage
### Run Individual Services
#### Coordinator
Manages job execution across runners:
```bash
horus coordinator \
--mycelium-ip 127.0.0.1 \
--mycelium-port 8990 \
--redis-addr 127.0.0.1:6379 \
--api-http-ip 127.0.0.1 \
--api-http-port 9652 \
--api-ws-ip 127.0.0.1 \
--api-ws-port 9653
```
#### Supervisor
Manages actors and dispatches jobs:
```bash
horus supervisor \
--redis-url redis://127.0.0.1:6379 \
--admin-secret your-admin-secret \
--port 3030 \
--bind-address 127.0.0.1 \
--runners osiris,sal,hero
```
#### Osiris Server
REST API server for Osiris data structures:
```bash
horus osiris \
--bind-address 0.0.0.0 \
--port 8081
```
### Run All Services Together
Start all services with a single command:
```bash
horus all \
--redis-url redis://127.0.0.1:6379 \
--admin-secret your-admin-secret
```
**Kill existing processes on ports before starting:**
```bash
horus all \
--redis-url redis://127.0.0.1:6379 \
--admin-secret your-admin-secret \
--kill-ports
```
This will start:
- **Supervisor** on `http://127.0.0.1:3030`
- **Coordinator HTTP** on `http://127.0.0.1:9652`
- **Coordinator WebSocket** on `ws://127.0.0.1:9653`
- **Osiris Server** on `http://0.0.0.0:8081`
The `--kill-ports` flag will automatically kill any processes using ports 3030, 8081, 9652, and 9653 before starting the services.
## Environment Variables
You can also configure services using environment variables:
### Coordinator
- `MYCELIUM_IP` - Mycelium IP address (default: 127.0.0.1)
- `MYCELIUM_PORT` - Mycelium port (default: 8990)
- `REDIS_ADDR` - Redis address (default: 127.0.0.1:6379)
- `API_HTTP_IP` - HTTP API bind IP (default: 127.0.0.1)
- `API_HTTP_PORT` - HTTP API port (default: 9652)
- `API_WS_IP` - WebSocket API bind IP (default: 127.0.0.1)
- `API_WS_PORT` - WebSocket API port (default: 9653)
### Logging
Set the `RUST_LOG` environment variable to control logging:
```bash
RUST_LOG=info horus all --admin-secret your-secret
```
Available levels: `error`, `warn`, `info`, `debug`, `trace`
## Prerequisites
- Redis server running on localhost:6379 (or specify custom address)
- For coordinator: Mycelium service running (if using Mycelium transport)
## Architecture
The horus binary consolidates the following components:
1. **Coordinator** - Routes jobs between contexts and manages job execution
2. **Supervisor** - Manages runner registration and job dispatching
3. **Osiris Server** - Provides REST API for Osiris data structures
4. **Runners** (not included in mono binary, run separately):
- OSIRIS runner - Script execution with Osiris support
- SAL runner - Script execution with SAL support
- Hero runner - Command execution
## Examples
### Development Setup
```bash
# Start Redis
redis-server
# Run all services (kills any existing processes on required ports)
RUST_LOG=info horus all --admin-secret dev-secret --kill-ports
```
### Production Setup
```bash
# Build release binary
cargo build -p horus-mono --release
# Run with production settings
RUST_LOG=warn ./target/release/horus all \
--redis-url redis://prod-redis:6379 \
--admin-secret $ADMIN_SECRET
```
## Help
For detailed help on any command:
```bash
horus --help
horus coordinator --help
horus supervisor --help
horus osiris --help
horus all --help
```

569
bin/horus/src/main.rs Normal file
View File

@@ -0,0 +1,569 @@
//! Horus - Mono binary for running all Hero components
//!
//! This binary provides subcommands to run:
//! - coordinator: Job coordination service
//! - supervisor: Actor and job management
//! - osiris: REST API server
//! - runner-osiris: Osiris script runner
//! - runner-sal: SAL script runner
//! - runner-hero: Command execution runner
//! - all: Run all components together
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "horus")]
#[command(about = "Horus - Hero system mono binary", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Run the coordinator service
Coordinator {
#[arg(long, default_value = "127.0.0.1")]
mycelium_ip: String,
#[arg(long, default_value = "8990")]
mycelium_port: u16,
#[arg(long, default_value = "127.0.0.1:6379")]
redis_addr: String,
#[arg(long, default_value = "127.0.0.1")]
api_http_ip: String,
#[arg(long, default_value = "9652")]
api_http_port: u16,
#[arg(long, default_value = "127.0.0.1")]
api_ws_ip: String,
#[arg(long, default_value = "9653")]
api_ws_port: u16,
},
/// Run the supervisor service
Supervisor {
#[arg(long, default_value = "redis://127.0.0.1:6379")]
redis_url: String,
#[arg(long, default_value = "")]
namespace: String,
#[arg(long = "admin-secret", required = true)]
admin_secrets: Vec<String>,
#[arg(long = "user-secret")]
user_secrets: Vec<String>,
#[arg(long = "register-secret")]
register_secrets: Vec<String>,
#[arg(long, default_value = "3030")]
port: u16,
#[arg(long, default_value = "127.0.0.1")]
bind_address: String,
#[arg(long, value_delimiter = ',')]
runners: Vec<String>,
},
/// Run the Osiris REST API server
Osiris {
#[arg(long, default_value = "0.0.0.0")]
bind_address: String,
#[arg(long, default_value = "8081")]
port: u16,
},
/// Run all services together
All {
#[arg(long, default_value = "redis://127.0.0.1:6379")]
redis_url: String,
#[arg(long = "admin-secret", required = true)]
admin_secrets: Vec<String>,
#[arg(long, help = "Kill processes using required ports before starting")]
kill_ports: bool,
},
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
match cli.command {
Commands::Coordinator {
mycelium_ip,
mycelium_port,
redis_addr,
api_http_ip,
api_http_port,
api_ws_ip,
api_ws_port,
} => {
run_coordinator(
mycelium_ip,
mycelium_port,
redis_addr,
api_http_ip,
api_http_port,
api_ws_ip,
api_ws_port,
false,
).await?;
}
Commands::Supervisor {
redis_url,
namespace,
admin_secrets,
user_secrets,
register_secrets,
port,
bind_address,
runners,
} => {
run_supervisor(
redis_url,
namespace,
admin_secrets,
user_secrets,
register_secrets,
port,
bind_address,
runners,
false,
).await?;
}
Commands::Osiris { bind_address, port } => {
run_osiris(bind_address, port, false).await?;
}
Commands::All {
redis_url,
admin_secrets,
kill_ports,
} => {
run_all(redis_url, admin_secrets, kill_ports).await?;
}
}
Ok(())
}
async fn run_coordinator(
mycelium_ip: String,
mycelium_port: u16,
redis_addr: String,
api_http_ip: String,
api_http_port: u16,
api_ws_ip: String,
api_ws_port: u16,
skip_logging_init: bool,
) -> Result<(), Box<dyn std::error::Error>> {
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;
use tracing::{error, info};
use tracing_subscriber::EnvFilter;
if !skip_logging_init {
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
tracing_subscriber::fmt()
.with_env_filter(filter)
.pretty()
.with_target(true)
.with_level(true)
.init();
}
let mycelium_ip: IpAddr = mycelium_ip.parse()?;
let api_http_ip: IpAddr = api_http_ip.parse()?;
let api_ws_ip: IpAddr = api_ws_ip.parse()?;
let redis_addr: SocketAddr = redis_addr.parse()?;
let http_addr = SocketAddr::new(api_http_ip, api_http_port);
let ws_addr = SocketAddr::new(api_ws_ip, api_ws_port);
let redis = hero_coordinator::storage::RedisDriver::new(redis_addr.to_string())
.await
.expect("Failed to connect to Redis");
let service = hero_coordinator::service::AppService::new(redis);
let service_for_router = service.clone();
let state = Arc::new(hero_coordinator::rpc::AppState::new(service));
// Only initialize router if not skipping logging (i.e., not in "all" mode)
// In "all" mode, we skip Mycelium since everything is local
if !skip_logging_init {
let base_url = format!("http://{}:{}", mycelium_ip, mycelium_port);
let mycelium = Arc::new(
hero_supervisor_openrpc_client::transports::MyceliumClient::new(&base_url)
.expect("Failed to create MyceliumClient")
);
let hub = hero_supervisor_openrpc_client::transports::SupervisorHub::new_with_client(
mycelium,
"supervisor.rpc".to_string(),
);
let cfg = hero_coordinator::router::RouterConfig {
context_ids: Vec::new(),
concurrency: 32,
base_url,
topic: "supervisor.rpc".to_string(),
sup_hub: hub.clone(),
transport_poll_interval_secs: 2,
transport_poll_timeout_secs: 300,
};
let _auto_handle = hero_coordinator::router::start_router_auto(service_for_router, cfg);
}
let http_module = hero_coordinator::rpc::build_module(state.clone());
let ws_module = hero_coordinator::rpc::build_module(state.clone());
info!(%http_addr, %ws_addr, %redis_addr, "Starting Coordinator JSON-RPC servers");
let _http_handle = match hero_coordinator::rpc::start_http(http_addr, http_module).await {
Ok(handle) => handle,
Err(e) => {
error!("Failed to start HTTP server on {}: {}", http_addr, e);
return Err(format!("Failed to start HTTP server: {}", e).into());
}
};
let _ws_handle = match hero_coordinator::rpc::start_ws(ws_addr, ws_module).await {
Ok(handle) => handle,
Err(e) => {
error!("Failed to start WS server on {}: {}", ws_addr, e);
return Err(format!("Failed to start WS server: {}", e).into());
}
};
if let Err(e) = tokio::signal::ctrl_c().await {
error!(error=%e, "Failed to listen for shutdown signal");
}
info!("Shutdown signal received, exiting.");
Ok(())
}
async fn run_supervisor(
_redis_url: String,
_namespace: String,
admin_secrets: Vec<String>,
user_secrets: Vec<String>,
register_secrets: Vec<String>,
port: u16,
bind_address: String,
runners: Vec<String>,
skip_logging_init: bool,
) -> Result<(), Box<dyn std::error::Error>> {
use hero_supervisor::SupervisorBuilder;
use log::{error, info};
if !skip_logging_init {
env_logger::init();
}
let mut builder = SupervisorBuilder::new()
.admin_secrets(admin_secrets);
if !user_secrets.is_empty() {
builder = builder.user_secrets(user_secrets);
}
if !register_secrets.is_empty() {
builder = builder.register_secrets(register_secrets);
}
let supervisor = builder.build().await?;
if !runners.is_empty() {
for runner_name in &runners {
match supervisor.runner_create(runner_name.clone()).await {
Ok(_) => {},
Err(e) => error!("Failed to register runner '{}': {}", runner_name, e),
}
}
}
use hero_supervisor::openrpc::start_http_openrpc_server;
let supervisor_clone = supervisor.clone();
let bind_addr = bind_address.clone();
tokio::spawn(async move {
match start_http_openrpc_server(supervisor_clone, &bind_addr, port).await {
Ok(handle) => {
handle.stopped().await;
error!("OpenRPC server stopped unexpectedly");
}
Err(e) => {
error!("OpenRPC server error: {}", e);
}
}
});
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
println!("📡 Supervisor: http://{}:{}", bind_address, port);
info!("Hero Supervisor is running. Press Ctrl+C to shutdown.");
tokio::spawn(async move {
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
info!("Received shutdown signal");
std::process::exit(0);
});
loop {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}
async fn run_osiris(
bind_address: String,
port: u16,
skip_logging_init: bool,
) -> Result<(), Box<dyn std::error::Error>> {
use axum::{
extract::{Path, Query, State},
http::StatusCode,
response::{IntoResponse, Json},
routing::get,
Router,
};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::sync::Arc;
use tower_http::cors::{Any, CorsLayer};
use tracing::{info, warn};
if !skip_logging_init {
tracing_subscriber::fmt()
.with_target(false)
.compact()
.init();
}
#[derive(Clone)]
struct AppState {
store: Arc<tokio::sync::RwLock<HashMap<String, HashMap<String, Value>>>>,
}
impl AppState {
fn new() -> Self {
Self {
store: Arc::new(tokio::sync::RwLock::new(HashMap::new())),
}
}
}
async fn health_check() -> impl IntoResponse {
Json(json!({
"status": "healthy",
"service": "osiris-server",
"version": "0.1.0"
}))
}
async fn get_struct(
State(state): State<AppState>,
Path((struct_name, id)): Path<(String, String)>,
) -> Result<Json<Value>, (StatusCode, String)> {
info!("GET /api/{}/{}", struct_name, id);
let store = state.store.read().await;
if let Some(struct_store) = store.get(&struct_name) {
if let Some(data) = struct_store.get(&id) {
return Ok(Json(data.clone()));
}
}
warn!("Not found: {}/{}", struct_name, id);
Err((
StatusCode::NOT_FOUND,
format!("{}/{} not found", struct_name, id),
))
}
async fn list_structs(
State(state): State<AppState>,
Path(struct_name): Path<String>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<Vec<Value>>, (StatusCode, String)> {
info!("GET /api/{} with params: {:?}", struct_name, params);
let store = state.store.read().await;
if let Some(struct_store) = store.get(&struct_name) {
let mut results: Vec<Value> = struct_store.values().cloned().collect();
if !params.is_empty() {
results.retain(|item| {
params.iter().all(|(key, value)| {
item.get(key)
.and_then(|v| v.as_str())
.map(|v| v == value)
.unwrap_or(false)
})
});
}
return Ok(Json(results));
}
Ok(Json(vec![]))
}
let state = AppState::new();
let app = Router::new()
.route("/health", get(health_check))
.route("/api/:struct_name", get(list_structs))
.route("/api/:struct_name/:id", get(get_struct))
.layer(
CorsLayer::new()
.allow_origin(Any)
.allow_methods(Any)
.allow_headers(Any),
)
.with_state(state);
let addr = format!("{}:{}", bind_address, port);
info!("🚀 Osiris Server starting on {}", addr);
let listener = tokio::net::TcpListener::bind(&addr)
.await
.expect("Failed to bind address");
axum::serve(listener, app)
.await
.expect("Server failed");
Ok(())
}
/// Kill any process using the specified port
async fn kill_port(port: u16) -> Result<(), Box<dyn std::error::Error>> {
use std::process::Command;
use log::info;
// Use lsof to find the process using the port
let output = Command::new("lsof")
.args(&["-ti", &format!(":{}", port)])
.output()?;
if !output.status.success() || output.stdout.is_empty() {
// No process found on this port
return Ok(());
}
let pid_str = String::from_utf8_lossy(&output.stdout);
let pids: Vec<&str> = pid_str.trim().lines().collect();
for pid in pids {
if let Ok(pid_num) = pid.trim().parse::<i32>() {
info!("Killing process {} on port {}", pid_num, port);
let _ = Command::new("kill")
.arg(pid)
.output();
}
}
Ok(())
}
async fn run_all(
redis_url: String,
admin_secrets: Vec<String>,
kill_ports: bool,
) -> Result<(), Box<dyn std::error::Error>> {
use log::{info, warn};
// Initialize logging once for all services
env_logger::init();
// Kill processes on required ports if requested
if kill_ports {
let ports = vec![3030, 8081, 9652, 9653];
info!("🔪 Killing processes on ports: {:?}", ports);
for port in ports {
if let Err(e) = kill_port(port).await {
warn!("Failed to kill port {}: {}", port, e);
}
}
// Give the OS a moment to release the ports
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
info!("🚀 Starting all Horus services...");
// Start Osiris server
let osiris_handle = tokio::spawn(async move {
if let Err(e) = run_osiris("0.0.0.0".to_string(), 8081, true).await {
eprintln!("Osiris server error: {}", e);
}
});
// Start Supervisor
let redis_url_clone = redis_url.clone();
let admin_secrets_clone = admin_secrets.clone();
let supervisor_handle = tokio::spawn(async move {
if let Err(e) = run_supervisor(
redis_url_clone,
"".to_string(),
admin_secrets_clone,
vec![],
vec![],
3030,
"127.0.0.1".to_string(),
vec!["osiris".to_string(), "sal".to_string(), "hero".to_string()],
true,
).await {
eprintln!("Supervisor error: {}", e);
}
});
// Give supervisor time to start
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
// Start Coordinator
let coordinator_handle = tokio::spawn(async move {
if let Err(e) = run_coordinator(
"127.0.0.1".to_string(),
8990,
"127.0.0.1:6379".to_string(),
"127.0.0.1".to_string(),
9652,
"127.0.0.1".to_string(),
9653,
true,
).await {
eprintln!("Coordinator error: {}", e);
}
});
info!("✅ All services started:");
info!(" 📡 Supervisor: http://127.0.0.1:3030");
info!(" 🔗 Coordinator HTTP: http://127.0.0.1:9652");
info!(" 🔗 Coordinator WS: ws://127.0.0.1:9653");
info!(" 🌐 Osiris: http://0.0.0.0:8081");
// Wait for all services
tokio::select! {
_ = osiris_handle => {},
_ = supervisor_handle => {},
_ = coordinator_handle => {},
}
Ok(())
}

View File

@@ -159,7 +159,7 @@ mod tests {
.payload("test payload")
.build()
.unwrap();
job.id = id.to_string(); // Set ID manually
// job.id = id.to_string(); // Set ID manually
job
}