move repos into monorepo
This commit is contained in:
36
.gitignore
vendored
Normal file
36
.gitignore
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Rust
|
||||||
|
/target
|
||||||
|
**/*.rs.bk
|
||||||
|
*.pdb
|
||||||
|
Cargo.lock
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
/test-data
|
||||||
|
|
||||||
|
# Build artifacts
|
||||||
|
/dist
|
||||||
|
/pkg
|
||||||
|
*.wasm
|
||||||
|
wasm-pack.log
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
/book
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
99
Cargo.toml
Normal file
99
Cargo.toml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
[workspace]
|
||||||
|
resolver = "2"
|
||||||
|
members = [
|
||||||
|
"bin/coordinator",
|
||||||
|
"bin/osiris",
|
||||||
|
"bin/runners/osiris",
|
||||||
|
"bin/runners/sal",
|
||||||
|
"bin/supervisor",
|
||||||
|
"lib/clients/job",
|
||||||
|
"lib/clients/osiris",
|
||||||
|
"lib/clients/supervisor",
|
||||||
|
"lib/models/job",
|
||||||
|
"lib/osiris/core",
|
||||||
|
"lib/osiris/derive",
|
||||||
|
"lib/runner",
|
||||||
|
]
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
authors = ["Hero Team"]
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
repository = "https://git.ourworld.tf/herocode/horus"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
# Async runtime
|
||||||
|
tokio = { version = "1.0", features = ["full"] }
|
||||||
|
async-trait = "0.1"
|
||||||
|
|
||||||
|
# Serialization
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
|
||||||
|
# Error handling
|
||||||
|
thiserror = "1.0"
|
||||||
|
anyhow = "1.0"
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
log = "0.4"
|
||||||
|
env_logger = "0.11"
|
||||||
|
|
||||||
|
# Time
|
||||||
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
|
|
||||||
|
# UUID
|
||||||
|
uuid = { version = "1.6", features = ["v4", "serde"] }
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
redis = { version = "0.25", features = ["tokio-comp", "connection-manager"] }
|
||||||
|
|
||||||
|
# JSON-RPC
|
||||||
|
jsonrpsee = { version = "0.26", features = ["server", "macros", "http-client"] }
|
||||||
|
|
||||||
|
# HTTP/Web
|
||||||
|
tower = "0.5"
|
||||||
|
tower-http = { version = "0.5", features = ["cors", "trace"] }
|
||||||
|
hyper = { version = "1.0", features = ["full"] }
|
||||||
|
hyper-util = { version = "0.1", features = ["tokio"] }
|
||||||
|
http = "1.0"
|
||||||
|
http-body-util = "0.1"
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
|
||||||
|
# CLI
|
||||||
|
clap = { version = "4.4", features = ["derive", "env"] }
|
||||||
|
toml = "0.8"
|
||||||
|
|
||||||
|
# WASM
|
||||||
|
wasm-bindgen = "0.2"
|
||||||
|
wasm-bindgen-futures = "0.4"
|
||||||
|
js-sys = "0.3"
|
||||||
|
web-sys = "0.3"
|
||||||
|
serde-wasm-bindgen = "0.6"
|
||||||
|
console_log = "1.0"
|
||||||
|
getrandom = { version = "0.2", features = ["js"] }
|
||||||
|
|
||||||
|
# Crypto
|
||||||
|
secp256k1 = { version = "0.29", features = ["rand", "global-context"] }
|
||||||
|
sha2 = "0.10"
|
||||||
|
hex = "0.4"
|
||||||
|
|
||||||
|
# Collections
|
||||||
|
indexmap = "2.0"
|
||||||
|
dashmap = "6.0"
|
||||||
|
lazy_static = "1.4"
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
futures = "0.3"
|
||||||
|
|
||||||
|
# Scripting
|
||||||
|
rhai = { version = "1.21.0", features = ["std", "sync", "serde"] }
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
tempfile = "3.8"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
opt-level = 3
|
||||||
|
lto = true
|
||||||
|
codegen-units = 1
|
||||||
102
README.md
Normal file
102
README.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# Horus
|
||||||
|
|
||||||
|
Horus is a comprehensive workspace for Hero infrastructure components.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
horus/
|
||||||
|
├── bin/
|
||||||
|
│ └── supervisor/ # Hero Supervisor - job orchestration and runner management
|
||||||
|
└── lib/
|
||||||
|
└── clients/
|
||||||
|
└── supervisor/ # OpenRPC client for Hero Supervisor (native + WASM)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Components
|
||||||
|
|
||||||
|
### Hero Supervisor (`bin/supervisor`)
|
||||||
|
|
||||||
|
The Hero Supervisor manages job execution across distributed runners with:
|
||||||
|
- Job lifecycle management (create, start, stop, delete)
|
||||||
|
- Runner registration and management
|
||||||
|
- Redis-based job queuing
|
||||||
|
- Osiris integration for persistent storage
|
||||||
|
- OpenRPC JSON-RPC API with authentication
|
||||||
|
- CORS-enabled HTTP server
|
||||||
|
|
||||||
|
### Supervisor Client (`lib/clients/supervisor`)
|
||||||
|
|
||||||
|
OpenRPC client library for Hero Supervisor with dual-target support:
|
||||||
|
- **Native**: Full async Rust client using `jsonrpsee`
|
||||||
|
- **WASM**: Browser-compatible client for web applications
|
||||||
|
|
||||||
|
## Building
|
||||||
|
|
||||||
|
### Build everything
|
||||||
|
```bash
|
||||||
|
cargo build --workspace
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build supervisor binary
|
||||||
|
```bash
|
||||||
|
cargo build -p hero-supervisor
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build client library
|
||||||
|
```bash
|
||||||
|
cargo build -p hero-supervisor-openrpc-client
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build WASM client
|
||||||
|
```bash
|
||||||
|
cd lib/clients/supervisor
|
||||||
|
wasm-pack build --target web
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
### Start the supervisor
|
||||||
|
```bash
|
||||||
|
cargo run -p hero-supervisor -- \
|
||||||
|
--bind-address 127.0.0.1 \
|
||||||
|
--port 3030 \
|
||||||
|
--redis-url redis://127.0.0.1:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
### With configuration file
|
||||||
|
```bash
|
||||||
|
cargo run -p hero-supervisor -- --config config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Run tests
|
||||||
|
```bash
|
||||||
|
cargo test --workspace
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check all code
|
||||||
|
```bash
|
||||||
|
cargo check --workspace
|
||||||
|
```
|
||||||
|
|
||||||
|
### Format code
|
||||||
|
```bash
|
||||||
|
cargo fmt --all
|
||||||
|
```
|
||||||
|
|
||||||
|
### Lint
|
||||||
|
```bash
|
||||||
|
cargo clippy --workspace -- -D warnings
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- **Rust**: 1.70+
|
||||||
|
- **Redis**: Required for job queuing
|
||||||
|
- **Osiris**: Optional, for persistent storage
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT OR Apache-2.0
|
||||||
164
WORKSPACE_STRUCTURE.md
Normal file
164
WORKSPACE_STRUCTURE.md
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
# Horus Workspace Structure
|
||||||
|
|
||||||
|
The Horus workspace consolidates all Hero ecosystem components into a single, well-organized monorepo.
|
||||||
|
|
||||||
|
## Workspace Members
|
||||||
|
|
||||||
|
### Binaries (`bin/`)
|
||||||
|
|
||||||
|
#### `bin/supervisor/`
|
||||||
|
- **Package**: `hero-supervisor`
|
||||||
|
- **Description**: Main supervisor for managing actor runners
|
||||||
|
- **Binaries**: `supervisor`
|
||||||
|
- **Library**: `hero_supervisor`
|
||||||
|
|
||||||
|
#### `bin/osiris/`
|
||||||
|
- **Package**: `osiris-server`
|
||||||
|
- **Description**: Osiris HTTP server for object storage
|
||||||
|
- **Binaries**: `osiris`
|
||||||
|
|
||||||
|
#### `bin/runners/sal/`
|
||||||
|
- **Package**: `runner-sal`
|
||||||
|
- **Description**: System Abstraction Layer (SAL) runner
|
||||||
|
- **Binaries**: `runner_sal`
|
||||||
|
|
||||||
|
#### `bin/runners/osiris/`
|
||||||
|
- **Package**: `runner-osiris`
|
||||||
|
- **Description**: Osiris-backed runner with database support
|
||||||
|
- **Binaries**: `runner_osiris`
|
||||||
|
|
||||||
|
### Libraries (`lib/`)
|
||||||
|
|
||||||
|
#### Models (`lib/models/`)
|
||||||
|
|
||||||
|
##### `lib/models/job/`
|
||||||
|
- **Package**: `hero-job`
|
||||||
|
- **Description**: Job model types and builders
|
||||||
|
- **Library**: `hero_job`
|
||||||
|
|
||||||
|
#### Clients (`lib/clients/`)
|
||||||
|
|
||||||
|
##### `lib/clients/job/`
|
||||||
|
- **Package**: `hero-job-client`
|
||||||
|
- **Description**: Redis-based job client
|
||||||
|
- **Library**: `hero_job_client`
|
||||||
|
|
||||||
|
##### `lib/clients/supervisor/`
|
||||||
|
- **Package**: `hero-supervisor-openrpc-client`
|
||||||
|
- **Description**: OpenRPC client for supervisor (native + WASM)
|
||||||
|
- **Library**: `hero_supervisor_openrpc_client`
|
||||||
|
|
||||||
|
##### `lib/clients/osiris/`
|
||||||
|
- **Package**: `osiris-client`
|
||||||
|
- **Description**: Client library for Osiris
|
||||||
|
- **Library**: `osiris_client`
|
||||||
|
|
||||||
|
#### Core Libraries
|
||||||
|
|
||||||
|
##### `lib/runner/`
|
||||||
|
- **Package**: `hero-runner`
|
||||||
|
- **Description**: Core runner library for executing jobs
|
||||||
|
- **Library**: `hero_runner`
|
||||||
|
|
||||||
|
##### `lib/osiris/core/`
|
||||||
|
- **Package**: `osiris-core`
|
||||||
|
- **Description**: Osiris core - object storage and indexing
|
||||||
|
- **Library**: `osiris`
|
||||||
|
|
||||||
|
##### `lib/osiris/derive/`
|
||||||
|
- **Package**: `osiris-derive`
|
||||||
|
- **Description**: Derive macros for Osiris
|
||||||
|
- **Type**: Procedural macro crate
|
||||||
|
|
||||||
|
## Dependency Graph
|
||||||
|
|
||||||
|
```
|
||||||
|
bin/supervisor
|
||||||
|
├── lib/models/job
|
||||||
|
├── lib/clients/job
|
||||||
|
└── (jsonrpsee, redis, tokio, etc.)
|
||||||
|
|
||||||
|
bin/osiris
|
||||||
|
└── lib/osiris/core
|
||||||
|
└── lib/osiris/derive
|
||||||
|
|
||||||
|
bin/runners/sal
|
||||||
|
├── lib/runner
|
||||||
|
│ ├── lib/models/job
|
||||||
|
│ └── lib/clients/job
|
||||||
|
└── (SAL modules from herolib_rust)
|
||||||
|
|
||||||
|
bin/runners/osiris
|
||||||
|
├── lib/runner
|
||||||
|
│ ├── lib/models/job
|
||||||
|
│ └── lib/clients/job
|
||||||
|
└── lib/osiris/core
|
||||||
|
|
||||||
|
lib/clients/supervisor
|
||||||
|
├── lib/models/job
|
||||||
|
└── (jsonrpsee, WASM support)
|
||||||
|
|
||||||
|
lib/clients/osiris
|
||||||
|
├── lib/models/job
|
||||||
|
└── lib/clients/supervisor
|
||||||
|
|
||||||
|
lib/clients/job
|
||||||
|
└── lib/models/job
|
||||||
|
```
|
||||||
|
|
||||||
|
## Build Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check entire workspace
|
||||||
|
cargo check --workspace
|
||||||
|
|
||||||
|
# Build entire workspace
|
||||||
|
cargo build --workspace
|
||||||
|
|
||||||
|
# Build specific package
|
||||||
|
cargo build -p hero-supervisor
|
||||||
|
cargo build -p osiris-core
|
||||||
|
cargo build -p runner-sal
|
||||||
|
|
||||||
|
# Run binaries
|
||||||
|
cargo run -p hero-supervisor --bin supervisor
|
||||||
|
cargo run -p osiris-server --bin osiris
|
||||||
|
cargo run -p runner-sal --bin runner_sal
|
||||||
|
cargo run -p runner-osiris --bin runner_osiris
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration Notes
|
||||||
|
|
||||||
|
### From External Repos
|
||||||
|
|
||||||
|
The following components were migrated into this workspace:
|
||||||
|
|
||||||
|
1. **Job** (`/herocode/job/rust`) → `lib/models/job` + `lib/clients/job`
|
||||||
|
2. **Runner** (`/herocode/runner/rust`) → `lib/runner` + `bin/runners/*`
|
||||||
|
3. **Osiris** (`/herocode/osiris`) → `lib/osiris/*` + `bin/osiris` + `lib/clients/osiris`
|
||||||
|
4. **Supervisor** (already in workspace) → `bin/supervisor` + `lib/clients/supervisor`
|
||||||
|
|
||||||
|
### Path Dependencies
|
||||||
|
|
||||||
|
All internal dependencies now use path-based references:
|
||||||
|
- `hero-job = { path = "../../lib/models/job" }`
|
||||||
|
- `osiris-core = { path = "../../lib/osiris/core" }`
|
||||||
|
- etc.
|
||||||
|
|
||||||
|
External dependencies (SAL modules, heromodels, etc.) remain as git dependencies.
|
||||||
|
|
||||||
|
## Workspace Configuration
|
||||||
|
|
||||||
|
Shared dependencies are defined in the root `Cargo.toml` under `[workspace.dependencies]`:
|
||||||
|
- tokio, async-trait
|
||||||
|
- serde, serde_json
|
||||||
|
- redis, uuid, chrono
|
||||||
|
- jsonrpsee, axum, tower
|
||||||
|
- And more...
|
||||||
|
|
||||||
|
Individual crates reference these with `.workspace = true`:
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
tokio.workspace = true
|
||||||
|
serde.workspace = true
|
||||||
|
```
|
||||||
42
bin/coordinator/Cargo.toml
Normal file
42
bin/coordinator/Cargo.toml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
[package]
|
||||||
|
name = "hero-coordinator"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
description = "Hero Coordinator - Manages job execution across runners"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "hero_coordinator"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "coordinator"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Core dependencies
|
||||||
|
tokio.workspace = true
|
||||||
|
async-trait.workspace = true
|
||||||
|
serde.workspace = true
|
||||||
|
serde_json.workspace = true
|
||||||
|
thiserror.workspace = true
|
||||||
|
clap.workspace = true
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
redis.workspace = true
|
||||||
|
|
||||||
|
# JSON-RPC
|
||||||
|
jsonrpsee.workspace = true
|
||||||
|
|
||||||
|
# HTTP client
|
||||||
|
reqwest = { version = "0.12.7", features = ["json", "rustls-tls"] }
|
||||||
|
|
||||||
|
# Base64 encoding
|
||||||
|
base64 = "0.22.1"
|
||||||
|
|
||||||
|
# Tracing
|
||||||
|
tracing.workspace = true
|
||||||
|
tracing-subscriber.workspace = true
|
||||||
|
|
||||||
|
# Hero dependencies
|
||||||
|
hero-job = { path = "../../lib/models/job" }
|
||||||
28
bin/coordinator/README.md
Normal file
28
bin/coordinator/README.md
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# herocoordinator
|
||||||
|
|
||||||
|
## Demo setup
|
||||||
|
|
||||||
|
A python script is provided in the [scripts directory](./scripts/supervisor_flow_demo.py). This script
|
||||||
|
generates some demo jobs to be run by [a supervisor](https://git.ourworld.tf/herocode/supervisor).
|
||||||
|
Communication happens over [mycelium](https://github.com/threefoldtech/mycelium). To run the demo a
|
||||||
|
supervisor must be running, which uses a mycelium instance to read and write messages. A __different__
|
||||||
|
mycelium instance needs to run for the coordinator (the supervisor can run on a different host than
|
||||||
|
the coordinator, so long as the 2 mycelium instances used can reach eachother).
|
||||||
|
|
||||||
|
An example of a local setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run a redis docker
|
||||||
|
docker run -it -d -p 6379:6379 --name redis redis
|
||||||
|
# Spawn mycelium node 1 with default settings. This also creates a TUN interface though that is not
|
||||||
|
# necessary for the messages
|
||||||
|
mycelium
|
||||||
|
# Spawn mycelium node 2, connect to the first node
|
||||||
|
mycelium --key-file key.bin --peers tcp://127.0.0.1:9651 --disable-quic --disable-peer-discovery --api-addr 127.0.0.1:9989 --jsonrpc-addr 127.0.0.1:9990 --no-tun -t 8651
|
||||||
|
# Start the supervisor
|
||||||
|
supervisor --admin-secret admin123 --user-secret user123 --register-secret register123 --mycelium-url http://127.0.0.1:9990 --topic supervisor.rpc
|
||||||
|
# Start the coordinator
|
||||||
|
cargo run # (alternatively if a compiled binary is present that can be run)
|
||||||
|
# Finally, invoke the demo script
|
||||||
|
python3 scripts/supervisor_flow_demo.py
|
||||||
|
```
|
||||||
142
bin/coordinator/main.rs
Normal file
142
bin/coordinator/main.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
use std::net::{IpAddr, SocketAddr};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use tracing::{error, info};
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
#[derive(Debug, Clone, Parser)]
|
||||||
|
#[command(
|
||||||
|
name = "herocoordinator",
|
||||||
|
version,
|
||||||
|
about = "Hero Coordinator CLI",
|
||||||
|
long_about = None
|
||||||
|
)]
|
||||||
|
struct Cli {
|
||||||
|
#[arg(
|
||||||
|
long = "mycelium-ip",
|
||||||
|
short = 'i',
|
||||||
|
env = "MYCELIUM_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "IP address where Mycelium JSON-RPC is listening (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
mycelium_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "mycelium-port",
|
||||||
|
short = 'p',
|
||||||
|
env = "MYCELIUM_PORT",
|
||||||
|
default_value_t = 8990u16,
|
||||||
|
help = "Port for Mycelium JSON-RPC (default: 8990)"
|
||||||
|
)]
|
||||||
|
mycelium_port: u16,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "redis-addr",
|
||||||
|
short = 'r',
|
||||||
|
env = "REDIS_ADDR",
|
||||||
|
default_value = "127.0.0.1:6379",
|
||||||
|
help = "Socket address of Redis instance (default: 127.0.0.1:6379)"
|
||||||
|
)]
|
||||||
|
redis_addr: SocketAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-http-ip",
|
||||||
|
env = "API_HTTP_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "Bind IP for HTTP JSON-RPC server (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
api_http_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-http-port",
|
||||||
|
env = "API_HTTP_PORT",
|
||||||
|
default_value_t = 9652u16,
|
||||||
|
help = "Bind port for HTTP JSON-RPC server (default: 9652)"
|
||||||
|
)]
|
||||||
|
api_http_port: u16,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-ws-ip",
|
||||||
|
env = "API_WS_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "Bind IP for WebSocket JSON-RPC server (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
api_ws_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-ws-port",
|
||||||
|
env = "API_WS_PORT",
|
||||||
|
default_value_t = 9653u16,
|
||||||
|
help = "Bind port for WebSocket JSON-RPC server (default: 9653)"
|
||||||
|
)]
|
||||||
|
api_ws_port: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let cli = Cli::parse();
|
||||||
|
// Initialize tracing subscriber (pretty formatter; controlled by RUST_LOG)
|
||||||
|
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(filter)
|
||||||
|
.pretty()
|
||||||
|
.with_target(true)
|
||||||
|
.with_level(true)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let http_addr = SocketAddr::new(cli.api_http_ip, cli.api_http_port);
|
||||||
|
let ws_addr = SocketAddr::new(cli.api_ws_ip, cli.api_ws_port);
|
||||||
|
|
||||||
|
// Initialize Redis driver
|
||||||
|
let redis = herocoordinator::storage::RedisDriver::new(cli.redis_addr.to_string())
|
||||||
|
.await
|
||||||
|
.expect("Failed to connect to Redis");
|
||||||
|
|
||||||
|
// Initialize Service
|
||||||
|
let service = herocoordinator::service::AppService::new(redis);
|
||||||
|
let service_for_router = service.clone();
|
||||||
|
|
||||||
|
// Shared application state
|
||||||
|
let state = Arc::new(herocoordinator::rpc::AppState::new(service));
|
||||||
|
|
||||||
|
// Start router workers (auto-discovered contexts) using a single global SupervisorHub (no separate inbound listener)
|
||||||
|
{
|
||||||
|
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
|
||||||
|
let hub = herocoordinator::clients::SupervisorHub::new(
|
||||||
|
base_url.clone(),
|
||||||
|
"supervisor.rpc".to_string(),
|
||||||
|
)
|
||||||
|
.expect("Failed to initialize SupervisorHub");
|
||||||
|
let cfg = herocoordinator::router::RouterConfig {
|
||||||
|
context_ids: Vec::new(), // ignored by start_router_auto
|
||||||
|
concurrency: 32,
|
||||||
|
base_url,
|
||||||
|
topic: "supervisor.rpc".to_string(),
|
||||||
|
sup_hub: hub.clone(),
|
||||||
|
transport_poll_interval_secs: 2,
|
||||||
|
transport_poll_timeout_secs: 300,
|
||||||
|
};
|
||||||
|
// Per-context outbound delivery loops (replies handled by SupervisorHub)
|
||||||
|
let _auto_handle = herocoordinator::router::start_router_auto(service_for_router, cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build RPC modules for both servers
|
||||||
|
let http_module = herocoordinator::rpc::build_module(state.clone());
|
||||||
|
let ws_module = herocoordinator::rpc::build_module(state.clone());
|
||||||
|
|
||||||
|
info!(%http_addr, %ws_addr, redis_addr=%cli.redis_addr, "Starting JSON-RPC servers");
|
||||||
|
|
||||||
|
// Start servers
|
||||||
|
let _http_handle = herocoordinator::rpc::start_http(http_addr, http_module)
|
||||||
|
.await
|
||||||
|
.expect("Failed to start HTTP server");
|
||||||
|
let _ws_handle = herocoordinator::rpc::start_ws(ws_addr, ws_module)
|
||||||
|
.await
|
||||||
|
.expect("Failed to start WS server");
|
||||||
|
|
||||||
|
// Wait for Ctrl+C to terminate
|
||||||
|
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||||
|
error!(error=%e, "Failed to listen for shutdown signal");
|
||||||
|
}
|
||||||
|
info!("Shutdown signal received, exiting.");
|
||||||
|
}
|
||||||
77
bin/coordinator/specs/architecture.md
Normal file
77
bin/coordinator/specs/architecture.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
|
||||||
|
|
||||||
|
## per user
|
||||||
|
|
||||||
|
runs in container or VM, one per user
|
||||||
|
|
||||||
|
- zinit
|
||||||
|
- herocoordinator
|
||||||
|
- think about like DAG worklflow manager
|
||||||
|
- manage jobs who are send around to different nodes
|
||||||
|
- mycelium address range (part of mycelium on host)
|
||||||
|
- herodb
|
||||||
|
- state manager
|
||||||
|
- redis protocol / primitives
|
||||||
|
- fs backend (mem and allways append in future)
|
||||||
|
- encryption & decryption primitives
|
||||||
|
- key mgmt for encryption (creation, deletion)
|
||||||
|
- openrpc admin features: user management, role-based access control
|
||||||
|
- postgresql + postgrest
|
||||||
|
- AI Agent TBD
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
graph TD
|
||||||
|
subgraph Per Node System
|
||||||
|
N[Node] --> OS(Run on top of ZOS4 or Ubuntu or in a VM)
|
||||||
|
|
||||||
|
subgraph On Node
|
||||||
|
OS --> SV(Supervisors)
|
||||||
|
OS --> ZN(Zinit)
|
||||||
|
OS --> R(Runners)
|
||||||
|
OS --> PGN(Some Nodes: PostgreSQL + Postgrest)
|
||||||
|
OS --> HDN(Each Node: Herodb)
|
||||||
|
|
||||||
|
subgraph Supervisors Responsibilities
|
||||||
|
SV --> SV_MR(Manage runners & scheduling for the node)
|
||||||
|
SV --> SV_MJ(Monitor & schedule jobs)
|
||||||
|
SV --> SV_RU(Check resource usage)
|
||||||
|
SV --> SV_TO(Checks on timeout)
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Runners Characteristics
|
||||||
|
R --> R_LV(V/Python & Rust)
|
||||||
|
R --> R_FORK(Uses fork per runner for scalability)
|
||||||
|
R --> R_COUNT(Some runners can only run 1, others more)
|
||||||
|
R --> R_CONTEXT(Some runners are per context)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
SV -- "Manage" --> R
|
||||||
|
SV -- "Schedule jobs via" --> ZN
|
||||||
|
ZN -- "Starts" --> R
|
||||||
|
R -- "Interacts with" --> PGN
|
||||||
|
R -- "Interacts with" --> HDN
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
## per node
|
||||||
|
|
||||||
|
- run on top of ZOS4 or Ubuntu or in a VM
|
||||||
|
- supervisors
|
||||||
|
- manage runners and scheduling for the node of these runners
|
||||||
|
- monitor & schedule jobs, check resource usage, checks on timout
|
||||||
|
- zinit
|
||||||
|
- runners (are scheduled in zinit by supervisor)
|
||||||
|
- V/Python & Rust
|
||||||
|
- uses fork per runner (process) for scalability
|
||||||
|
- some runners can only run 1, others more
|
||||||
|
- some runners are per context
|
||||||
|
- some nodes will have postgresql + postgrest
|
||||||
|
- each node has herodb
|
||||||
|
|
||||||
|
REMARK
|
||||||
|
|
||||||
|
- each rhaj or heroscript running on a node can use herodb if needed (careful, because can and will be lost), but cannot communicate with anyone else outside of the node
|
||||||
|
|
||||||
|
|
||||||
16
bin/coordinator/specs/hercoordinator.md
Normal file
16
bin/coordinator/specs/hercoordinator.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
|
||||||
|
will have openrpc interface
|
||||||
|
|
||||||
|
- start, stop, delete, list a DAG
|
||||||
|
- query the DAG and its status
|
||||||
|
|
||||||
|
|
||||||
|
## remarks for supervisor
|
||||||
|
|
||||||
|
- no retry
|
||||||
|
- no dependencies
|
||||||
|
|
||||||
|
## inspiration
|
||||||
|
|
||||||
|
- DAGU
|
||||||
18
bin/coordinator/specs/model/actor.v
Normal file
18
bin/coordinator/specs/model/actor.v
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// a actor is a participant in the new internet, the one who can ask for work
|
||||||
|
// user can have more than one actor operating for them, an actor always operates in a context which is hosted by the hero of the user
|
||||||
|
// stored in the context db at actor:<id> (actor is hset)
|
||||||
|
@[heap]
|
||||||
|
pub struct Actor {
|
||||||
|
pub mut:
|
||||||
|
id u32
|
||||||
|
pubkey string
|
||||||
|
address []Address // address (is to reach the actor back), normally mycelium but doesn't have to be
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self Actor) redis_key() string {
|
||||||
|
return 'actor:${self.id}'
|
||||||
|
}
|
||||||
20
bin/coordinator/specs/model/context.v
Normal file
20
bin/coordinator/specs/model/context.v
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// each job is run in a context, this corresponds to a DB in redis and has specific rights to actors
|
||||||
|
// context is a redis db and also a locaction on a filesystem which can be used for e.g. logs, temporary files, etc.
|
||||||
|
// actors create contexts for others to work in
|
||||||
|
// stored in the context db at context:<id> (context is hset)
|
||||||
|
@[heap]
|
||||||
|
pub struct Context {
|
||||||
|
pub mut:
|
||||||
|
id u32 // corresponds with the redis db (in our ourdb or other redis)
|
||||||
|
admins []u32 // actors which have admin rights on this context (means can do everything)
|
||||||
|
readers []u32 // actors which can read the context info
|
||||||
|
executors []u32 // actors which can execute jobs in this context
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self Context) redis_key() string {
|
||||||
|
return 'context:${self.id}'
|
||||||
|
}
|
||||||
41
bin/coordinator/specs/model/flow.v
Normal file
41
bin/coordinator/specs/model/flow.v
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// what get's executed by an actor and needs to be tracked as a whole, can be represented as a DAG graph
|
||||||
|
// this is the high level representation of a workflow to execute on work, its fully decentralized and distributed
|
||||||
|
// only the actor who created the flow can modify it and holds it in DB
|
||||||
|
// stored in the context db at flow:<id> (flow is hset)
|
||||||
|
@[heap]
|
||||||
|
pub struct Flow {
|
||||||
|
pub mut:
|
||||||
|
id u32 // this job id is given by the actor who called for it
|
||||||
|
caller_id u32 // is the actor which called for this job
|
||||||
|
context_id u32 // each job is executed in a context
|
||||||
|
jobs []u32 // links to all jobs which make up this flow, this can be dynamically modified
|
||||||
|
env_vars map[string]string // they are copied to every job done
|
||||||
|
result map[string]string // the result of the flow
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
status FlowStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self Flow) redis_key() string {
|
||||||
|
return 'flow:${self.id}'
|
||||||
|
}
|
||||||
|
|
||||||
|
// FlowStatus represents the status of a flow
|
||||||
|
pub enum FlowStatus {
|
||||||
|
dispatched
|
||||||
|
started
|
||||||
|
error
|
||||||
|
finished
|
||||||
|
}
|
||||||
|
|
||||||
|
// str returns the string representation of FlowStatus
|
||||||
|
pub fn (self FlowStatus) str() string {
|
||||||
|
return match self {
|
||||||
|
.dispatched { 'dispatched' }
|
||||||
|
.started { 'started' }
|
||||||
|
.error { 'error' }
|
||||||
|
.finished { 'finished' }
|
||||||
|
}
|
||||||
|
}
|
||||||
68
bin/coordinator/specs/model/message.v
Normal file
68
bin/coordinator/specs/model/message.v
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// Messages is what goes over mycelium (which is our messaging system), they can have a job inside
|
||||||
|
// stored in the context db at msg:<callerid>:<id> (msg is hset)
|
||||||
|
// there are 2 queues in the context db: queue: msg_out and msg_in these are generic queues which get all messages from mycelium (in) and the ones who need to be sent (out) are in the outqueue
|
||||||
|
@[heap]
|
||||||
|
pub struct Message {
|
||||||
|
pub mut:
|
||||||
|
id u32 // is unique id for the message, has been given by the caller
|
||||||
|
caller_id u32 // is the actor whos send this message
|
||||||
|
context_id u32 // each message is for a specific context
|
||||||
|
message string
|
||||||
|
message_type ScriptType
|
||||||
|
message_format_type MessageFormatType
|
||||||
|
timeout u32 // in sec, to arrive destination
|
||||||
|
timeout_ack u32 // in sec, to acknowledge receipt
|
||||||
|
timeout_result u32 // in sec, to process result and have it back
|
||||||
|
job []Job
|
||||||
|
logs []Log // e.g. for streaming logs back to originator
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
status MessageStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
// MessageType represents the type of message
|
||||||
|
pub enum MessageType {
|
||||||
|
job
|
||||||
|
chat
|
||||||
|
mail
|
||||||
|
}
|
||||||
|
|
||||||
|
// MessageFormatType represents the format of a message
|
||||||
|
pub enum MessageFormatType {
|
||||||
|
html
|
||||||
|
text
|
||||||
|
md
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self Message) redis_key() string {
|
||||||
|
return 'message:${self.caller_id}:${self.id}'
|
||||||
|
}
|
||||||
|
|
||||||
|
// queue_suffix returns the queue suffix for the message type
|
||||||
|
pub fn (mt MessageType) queue_suffix() string {
|
||||||
|
return match mt {
|
||||||
|
.job { 'job' }
|
||||||
|
.chat { 'chat' }
|
||||||
|
.mail { 'mail' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MessageStatus represents the status of a message
|
||||||
|
pub enum MessageStatus {
|
||||||
|
dispatched
|
||||||
|
acknowledged
|
||||||
|
error
|
||||||
|
processed // e.g. can be something which comes back
|
||||||
|
}
|
||||||
|
|
||||||
|
// str returns the string representation of MessageStatus
|
||||||
|
pub fn (ms MessageStatus) str() string {
|
||||||
|
return match ms {
|
||||||
|
.dispatched { 'dispatched' }
|
||||||
|
.acknowledged { 'acknowledged' }
|
||||||
|
.error { 'error' }
|
||||||
|
.processed { 'processed' }
|
||||||
|
}
|
||||||
|
}
|
||||||
27
bin/coordinator/specs/model/runner.v
Normal file
27
bin/coordinator/specs/model/runner.v
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// a runner executes a job, this can be in VM, in a container or just some processes running somewhere
|
||||||
|
// the messages always come in over a topic
|
||||||
|
// stored in the context db at runner:<id> (runner is hset)
|
||||||
|
@[heap]
|
||||||
|
pub struct Runner {
|
||||||
|
pub mut:
|
||||||
|
id u32
|
||||||
|
pubkey string // from mycelium
|
||||||
|
address string // mycelium address
|
||||||
|
topic string // needs to be set by the runner but often runner<runnerid> e.g. runner20
|
||||||
|
local bool // if local then goes on redis using the id
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum RunnerType {
|
||||||
|
v
|
||||||
|
python
|
||||||
|
osis
|
||||||
|
rust
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self Runner) redis_key() string {
|
||||||
|
return 'runner:${self.id}'
|
||||||
|
}
|
||||||
64
bin/coordinator/specs/model/runnerjob.v
Normal file
64
bin/coordinator/specs/model/runnerjob.v
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
module model
|
||||||
|
|
||||||
|
// Job represents a job, a job is only usable in the context of a runner (which is part of a hero)
|
||||||
|
// stored in the context db at job:<callerid>:<id> (job is hset)
|
||||||
|
@[heap]
|
||||||
|
pub struct RunnerJob {
|
||||||
|
pub mut:
|
||||||
|
id u32 // this job id is given by the actor who called for it
|
||||||
|
caller_id u32 // is the actor which called for this job
|
||||||
|
context_id u32 // each job is executed in a context
|
||||||
|
script string
|
||||||
|
script_type ScriptType
|
||||||
|
timeout u32 // in sec
|
||||||
|
retries u8
|
||||||
|
env_vars map[string]string
|
||||||
|
result map[string]string
|
||||||
|
prerequisites []string
|
||||||
|
dependends []u32
|
||||||
|
created_at u32 // epoch
|
||||||
|
updated_at u32 // epoch
|
||||||
|
status JobStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScriptType represents the type of script
|
||||||
|
pub enum ScriptType {
|
||||||
|
osis
|
||||||
|
sal
|
||||||
|
v
|
||||||
|
python
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (self RunnerJob) redis_key() string {
|
||||||
|
return 'job:${self.caller_id}:${self.id}'
|
||||||
|
}
|
||||||
|
|
||||||
|
// queue_suffix returns the queue suffix for the script type
|
||||||
|
pub fn (st ScriptType) queue_suffix() string {
|
||||||
|
return match st {
|
||||||
|
.osis { 'osis' }
|
||||||
|
.sal { 'sal' }
|
||||||
|
.v { 'v' }
|
||||||
|
.python { 'python' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// JobStatus represents the status of a job
|
||||||
|
pub enum JobStatus {
|
||||||
|
dispatched
|
||||||
|
waiting_for_prerequisites
|
||||||
|
started
|
||||||
|
error
|
||||||
|
finished
|
||||||
|
}
|
||||||
|
|
||||||
|
// str returns the string representation of JobStatus
|
||||||
|
pub fn (js JobStatus) str() string {
|
||||||
|
return match js {
|
||||||
|
.dispatched { 'dispatched' }
|
||||||
|
.waiting_for_prerequisites { 'waiting_for_prerequisites' }
|
||||||
|
.started { 'started' }
|
||||||
|
.error { 'error' }
|
||||||
|
.finished { 'finished' }
|
||||||
|
}
|
||||||
|
}
|
||||||
314
bin/coordinator/specs/models.md
Normal file
314
bin/coordinator/specs/models.md
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
# Models Specification
|
||||||
|
*Freeflow Universe – mycojobs*
|
||||||
|
|
||||||
|
This document gathers **all data‑models** that exist in the `lib/mycojobs/model/` package, together with a concise purpose description, field semantics, Redis storage layout and the role each model plays in the overall *decentralised workflow* architecture.
|
||||||
|
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
1. [Actor](#actor)
|
||||||
|
2. [Context](#context)
|
||||||
|
3. [Flow](#flow)
|
||||||
|
4. [Message](#message)
|
||||||
|
5. [Runner](#runner)
|
||||||
|
6. [RunnerJob](#runnerjob)
|
||||||
|
7. [Enums & Shared Types](#enums-shared-types)
|
||||||
|
8. [Key‑generation helpers](#key-generation-helpers)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="actor"></a>1️⃣ `Actor` – Identity & entry‑point
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` | `u32` | Sequential identifier **unique per tenant**. Used as part of the Redis key `actor:<id>`. |
|
||||||
|
| `pubkey` | `string` | Public key (Mycelium‑compatible) that authenticates the actor when it sends/receives messages. |
|
||||||
|
| `address` | `[]Address` | One or more reachable addresses (normally Mycelium topics) that other participants can use to contact the actor. |
|
||||||
|
| `created_at` | `u32` | Unix‑epoch time when the record was created. |
|
||||||
|
| `updated_at` | `u32` | Unix‑epoch time of the last mutation. |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* An **Actor** is the *human‑or‑service* that **requests work**, receives results and can be an administrator of a **Context**.
|
||||||
|
* It is the *security principal* – every operation in a context is authorised against the actor’s ID and its public key signature.
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type | Fields |
|
||||||
|
|-----|---------|--------------|--------|
|
||||||
|
| `actor:${id}` | `actor:12` | **hash** (`HSET`) | `id`, `pubkey`, `address` (list), `created_at`, `updated_at` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="context"></a>2️⃣ `Context` – Tenant & permission container
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` | `u32` | Identifier that also selects the underlying **Redis DB** for this tenant. |
|
||||||
|
| `admins` | `[]u32` | Actor IDs that have **full control** (create/delete any object, manage permissions). |
|
||||||
|
| `readers` | `[]u32` | Actor IDs that may **read** any object in the context but cannot modify. |
|
||||||
|
| `executors` | `[]u32` | Actor IDs allowed to **run** `RunnerJob`s and update their status. |
|
||||||
|
| `created_at` | `u32` | Unix‑epoch of creation. |
|
||||||
|
| `updated_at` | `u32` | Unix‑epoch of last modification. |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* A **Context** isolates a *tenant* – each tenant gets its own Redis database and a dedicated filesystem area (for logs, temporary files, …).
|
||||||
|
* It stores **permission lists** that the system consults before any operation (e.g., creating a `Flow`, enqueuing a `RunnerJob`).
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type | Fields |
|
||||||
|
|-----|---------|--------------|--------|
|
||||||
|
| `context:${id}` | `context:7` | **hash** | `id`, `admins`, `readers`, `executors`, `created_at`, `updated_at` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="flow"></a>3️⃣ `Flow` – High‑level workflow (DAG)
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` | `u32` | Flow identifier – *unique inside the creator’s actor space*. |
|
||||||
|
| `caller_id` | `u32` | Actor that **created** the flow (owner). |
|
||||||
|
| `context_id` | `u32` | Context in which the flow lives. |
|
||||||
|
| `jobs` | `[]u32` | List of **RunnerJob** IDs that belong to this flow (the DAG edges are stored in each job’s `dependends`). |
|
||||||
|
| `env_vars` | `map[string]string` | Global environment variables injected into **every** job of the flow. |
|
||||||
|
| `result` | `map[string]string` | Aggregated output produced by the flow (filled by the orchestrator when the flow finishes). |
|
||||||
|
| `created_at` | `u32` | Creation timestamp. |
|
||||||
|
| `updated_at` | `u32` | Last update timestamp. |
|
||||||
|
| `status` | `FlowStatus` | Current lifecycle stage (`dispatched`, `started`, `error`, `finished`). |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* A **Flow** is the *public‑facing* representation of a **workflow**.
|
||||||
|
* It groups many `RunnerJob`s, supplies common env‑vars, tracks overall status and collects the final result.
|
||||||
|
* Only the *creator* (the `caller_id`) may mutate the flow definition.
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type | Fields |
|
||||||
|
|-----|---------|--------------|--------|
|
||||||
|
| `flow:${id}` | `flow:33` | **hash** | `id`, `caller_id`, `context_id`, `jobs`, `env_vars`, `result`, `created_at`, `updated_at`, `status` |
|
||||||
|
|
||||||
|
### `FlowStatus` enum
|
||||||
|
|
||||||
|
| Value | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `dispatched` | Flow has been stored but not yet started. |
|
||||||
|
| `started` | At least one job is running. |
|
||||||
|
| `error` | One or more jobs failed; flow aborted. |
|
||||||
|
| `finished` | All jobs succeeded, `result` is final. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="message"></a>4️⃣ `Message` – Transport unit (Mycelium)
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` |u32 `_type` | `ScriptType` | *Kind* of the message – currently re‑used for job payloads (`osis`, `sal`, `v`, `python`). |
|
||||||
|
| `message_format_type` | `MessageFormatType` | Formatting of `message` (`html`, `text`, `md`). |
|
||||||
|
| `timeout` | `u32` | Seconds before the message is considered *lost* if not delivered. |
|
||||||
|
| `timeout_ack` | `u32` | Seconds allowed for the receiver to acknowledge. |
|
||||||
|
| `timeout_result` | `u32` | Seconds allowed for the receiver to send back a result. |
|
||||||
|
| `job` | `[]Job` | Embedded **RunnerJob** objects (normally a single job). |
|
||||||
|
| `logs` | `[]Log` | Optional streaming logs attached to the message. |
|
||||||
|
| `created_at` | `u32` | Timestamp of creation. |
|
||||||
|
| `updated_at` | `u32` | Timestamp of latest update. |
|
||||||
|
| `status` | `MessageStatus` | Current lifecycle (`dispatched`, `acknowledged`, `error`, `processed`). |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* `Message` is the **payload carrier** that travels over **Mycelium** (the pub/sub system).
|
||||||
|
* It can be a **job request**, a **chat line**, an **email**, or any generic data that needs to be routed between actors, runners, or services.
|
||||||
|
* Every message is persisted as a Redis hash; the system also maintains two *generic* queues:
|
||||||
|
|
||||||
|
* `msg_out` – outbound messages waiting to be handed to Mycelium.
|
||||||
|
* `msg_in` – inbound messages that have already arrived and are awaiting local processing.
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type | Fields |
|
||||||
|
|-----|---------|--------------|--------|
|
||||||
|
| `message:${caller_id}:${id}` | `message:12:101` | **hash** | All fields above (`id`, `caller_id`, `context_id`, …, `status`). |
|
||||||
|
|
||||||
|
### `MessageType` enum (legacy – not used in current code but documented)
|
||||||
|
|
||||||
|
| Value | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `job` | Payload carries a `RunnerJob`. |
|
||||||
|
| `chat` | Human‑to‑human communication. |
|
||||||
|
| `mail` | Email‑like message. |
|
||||||
|
|
||||||
|
### `MessageFormatType` enum
|
||||||
|
|
||||||
|
| Value | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `html` | HTML formatted body. |
|
||||||
|
| `text` | Plain‑text. |
|
||||||
|
| `md` | Markdown. |
|
||||||
|
|
||||||
|
### `MessageStatus` enum
|
||||||
|
|
||||||
|
| Value | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `dispatched` | Stored, not yet processed. |
|
||||||
|
| `acknowledged` | Receiver has confirmed receipt. |
|
||||||
|
| `error` | Delivery or processing failed. |
|
||||||
|
|` | Message handled (e.g., job result returned). |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="runner"></a>5️⃣ `Runner` – Worker that executes jobs
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` | `u32` | Unique runner identifier. |
|
||||||
|
| `pubkey` | `string` | Public key of the runner (used by Mycelium for auth). |
|
||||||
|
| `address` | `string` | Mycelium address (e.g., `mycelium://…`). |
|
||||||
|
| `topic` | `string` | Pub/Sub topic the runner subscribes to; defaults to `runner${id}`. |
|
||||||
|
| `local` | `bool` | If `true`, the runner also consumes jobs directly from **Redis queues** (e.g., `queue:v`). |
|
||||||
|
| `created_at` | `u32` | Creation timestamp. |
|
||||||
|
| `updated_at` | `u32` | Last modification timestamp. |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* A **Runner** is the *execution engine* – it could be a VM, a container, or a process that knows how to run a specific script type (`v`, `python`, `osis`, `rust`).
|
||||||
|
* It **subscribes** to a Mycelium topic to receive job‑related messages, and, when `local==true`, it also **polls** a Redis list named after the script‑type (`queue:<suffix>`).
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type |
|
||||||
|
|-----|---------|--------------|
|
||||||
|
| `runner:${id}` | `runner:20` | **hash** *(all fields above)* |
|
||||||
|
|
||||||
|
### `RunnerType` enum
|
||||||
|
|
||||||
|
| Value | Intended runtime |
|
||||||
|
|-------|------------------|
|
||||||
|
| `v` | V language VM |
|
||||||
|
| `python` | CPython / PyPy |
|
||||||
|
| `osis` | OSIS‑specific runtime |
|
||||||
|
| `rust` | Native Rust binary |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="runnerjob"></a>6️⃣ `RunnerJob` – Executable unit
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| `id` | `u32` | Job identifier **provided by the caller**. |
|
||||||
|
| `caller_id` | `u32` | Actor that created the job. |
|
||||||
|
| `context_id` | `u32` | Context in which the job will run. |
|
||||||
|
| `script` | `string` | Source code / command to be executed. |
|
||||||
|
| `script_type` | `ScriptType` | Language or runtime of the script (`osis`, `sal`, `v`, `python`). |
|
||||||
|
| `timeout` | `u32` | Maximum execution time (seconds). |
|
||||||
|
| `retries` | `u8` | Number of automatic retries on failure. |
|
||||||
|
| `env_vars` | `map[string]string` | Job‑specific environment variables (merged with `Flow.env_vars`). |
|
||||||
|
| `result` | `map[string]string` | Key‑value map that the job writes back upon completion. |
|
||||||
|
| `prerequisites` | `[]string` | Human‑readable IDs of **external** prerequisites (e.g., files, other services). |
|
||||||
|
| `dependends` | `[]u32` | IDs of **other RunnerJob** objects that must finish before this job can start. |
|
||||||
|
| `created_at` | `u32` | Creation timestamp. |
|
||||||
|
| `updated_at` | `u32` | Last update timestamp. |
|
||||||
|
| `status` | `JobStatus` | Lifecycle status (`dispatched`, `waiting_for_prerequisites`, `started`, `error`, `finished`). |
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
* A **RunnerJob** is the *atomic piece of work* that a `Runner` executes.
|
||||||
|
* It lives inside a **Context**, is queued according to its `script_type`, and moves through a well‑defined **state machine**.
|
||||||
|
* The `dependends` field enables the *DAG* behaviour that the `Flow` model represents at a higher level.
|
||||||
|
|
||||||
|
### Redis representation
|
||||||
|
|
||||||
|
| Key | Example | Storage type |
|
||||||
|
|-----|---------|--------------|
|
||||||
|
| `job:${caller_id}:${id}` | `job:12:2001` | **hash** *(all fields above)* |
|
||||||
|
|
||||||
|
### `ScriptType` enum
|
||||||
|
|
||||||
|
| Value | Runtime |
|
||||||
|
|-------|---------|
|
||||||
|
| `osis` | OSIS interpreter |
|
||||||
|
| `sal` | SAL DSL (custom) |
|
||||||
|
| `v` | V language |
|
||||||
|
| `python`| CPython / PyPy |
|
||||||
|
|
||||||
|
*The enum provides a **`queue_suffix()`** helper that maps a script type to the name of the Redis list used for local job dispatch (`queue:python`, `queue:v`, …).*
|
||||||
|
|
||||||
|
### `JobStatus` enum
|
||||||
|
|
||||||
|
| Value | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `dispatched` | Stored, waiting to be examined for prerequisites. |
|
||||||
|
| `waiting_for_prerequisites` | Has `dependends` that are not yet finished. |
|
||||||
|
| `started` | Currently executing on a runner. |
|
||||||
|
| `error` | Execution failed (or exceeded retries). |
|
||||||
|
| `finished` | Successfully completed, `result` populated. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="enums-shared-types"></a>7️⃣ Other Enums & Shared Types
|
||||||
|
|
||||||
|
| Enum | Location | Values | Note |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| `MessageType` | `message.v` | `job`, `chat`, `mail` | Determines how a `Message` is interpreted. |
|
||||||
|
| `MessageFormatType` | `message.v` | `html`, `text`, `md` | UI‑layer rendering hint. |
|
||||||
|
| `MessageStatus` | `message.v` | `dispatched`, `acknowledged`, `error`, `processed` | Life‑cycle of a `Message`. |
|
||||||
|
| `FlowStatus` | `flow.v` | `dispatched`, `started`, `error`, `finished` | High‑level flow progress. |
|
||||||
|
| `RunnerType` | `runner.v` | `v`, `python`, `osis`, `rust` | Not currently stored; used by the orchestration layer to pick a runner implementation. |
|
||||||
|
| `ScriptType` | `runnerjob.v` | `osis`, `sal`, `v`, `python` | Determines queue suffix & runtime. |
|
||||||
|
| `JobStatus` | `runnerjob.v` | `dispatched`, `waiting_for_prerequisites`, `started`, `error`, `finished` | Per‑job state machine. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## <a name="key-generation-helpers"></a>8️⃣ Key‑generation helpers (methods)
|
||||||
|
|
||||||
|
| Model | Method | Returns | Example |
|
||||||
|
|-------|--------|---------|---------|
|
||||||
|
| `Actor` | `redis_key()` | `"actor:${self.id}"` | `actor:12` |
|
||||||
|
| `Context` | `redis_key()` | `"context:${self.id}"` | `context:7` |
|
||||||
|
| `Flow` | `redis_key()` | `"flow:${self.id}"` | `flow:33` |
|
||||||
|
| `Message` | `redis_key()` | `"message:${self.caller_id}:${self.id}"` | `message:12:101` |
|
||||||
|
| `Runner` | `redis_key()` | `"runner:${self.id}"` | `runner:20` |
|
||||||
|
| `RunnerJob` | `redis_key()` | `"job:${self.caller_id}:${self.id}"` | `job:12:2001` |
|
||||||
|
| `MessageType` | `queue_suffix()` | `"job"` / `"chat"` / `"mail"` | `MessageType.job.queue_suffix() → "job"` |
|
||||||
|
| `ScriptType` | `queue_suffix()` | `"osis"` / `"sal"` / `"v"` / `"python"` | `ScriptType.python.queue_suffix() → "python"` |
|
||||||
|
|
||||||
|
These helpers guarantee **canonical key naming** throughout the code base and simplify Redis interactions.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📌 Summary Diagram (quick reference)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
graph TD
|
||||||
|
%% Actors and what they can create
|
||||||
|
A[Actor] -->|creates| Ctx[Context]
|
||||||
|
A -->|creates| Fl[Flow]
|
||||||
|
A -->|creates| Msg[Message]
|
||||||
|
A -->|creates| Rnr[Runner]
|
||||||
|
A -->|creates| Job[RunnerJob]
|
||||||
|
|
||||||
|
%% All objects live inside one Redis DB that belongs to a Context
|
||||||
|
subgraph "Redis DB (per Context)"
|
||||||
|
Ctx
|
||||||
|
A
|
||||||
|
Fl
|
||||||
|
Msg
|
||||||
|
Rnr
|
||||||
|
Job
|
||||||
|
end
|
||||||
|
|
||||||
|
%% Messaging queues (global, outside the Context DB)
|
||||||
|
Msg -->|pushes key onto| OutQ[msg_out]
|
||||||
|
OutQ -->|transport via Mycelium| InQ[msg_in]
|
||||||
|
InQ -->|pulled by| Rnr
|
||||||
|
|
||||||
|
%% Local runner queues (only when runner.local == true)
|
||||||
|
Rnr -->|BRPOP from| QueueV["queue:v"]
|
||||||
|
Rnr -->|BRPOP from| QueuePy["queue:python"]
|
||||||
|
Rnr -->|BRPOP from| QueueOSIS["queue:osis"]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## context based
|
||||||
|
|
||||||
|
* Inside a Context, an **Actor** can create a **Flow** that references many **RunnerJob** IDs (the DAG).
|
||||||
|
* To *initiate* execution, the Actor packages a **RunnerJob** (or a full Flow) inside a **Message**, pushes it onto `msg_out`, and the system routes it via **Mycelium** to the target Context.
|
||||||
|
* The remote **Runner** receives the Message, materialises the **RunnerJob**, queues it on a script‑type list, executes it, writes back `result` and status, and optionally sends a *result Message* back to the originator.
|
||||||
|
|
||||||
|
All state is persisted as **Redis hashes**, guaranteeing durability and enabling *idempotent* retries. The uniform naming conventions (`actor:<id>`, `job:<caller_id>:<id>`, …) make it trivial to locate any object given its identifiers.
|
||||||
|
|
||||||
1399
bin/coordinator/specs/openrpc.json
Normal file
1399
bin/coordinator/specs/openrpc.json
Normal file
File diff suppressed because it is too large
Load Diff
263
bin/coordinator/specs/specs.md
Normal file
263
bin/coordinator/specs/specs.md
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
|
||||||
|
## Objects Used
|
||||||
|
|
||||||
|
| Component | What it **stores** | Where it lives (Redis key) | Main responsibilities |
|
||||||
|
|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| **Actor** | Public key, reachable addresses, timestamps | `actor:<id>` (hash) | An identity that can request work, receive results and act as an administrator of a *Context*. |
|
||||||
|
| **Context**| Permission lists (`admins`, `readers`, `executors`), timestamps | `context:<id>` (hash) | An isolated “tenant” – a separate Redis DB and filesystem area. All objects (flows, messages, jobs, runners) belonging to a given workflow are stored under this context. The permission lists control who may read, execute or administer the context. |
|
||||||
|
| **Flow** | DAG of job IDs, env‑vars, result map, status, timestamps | `flow:<id>` (hash) | A high‑level workflow created by a single **Actor**. It groups many **RunnerJob** objects, records their execution order, supplies common environment variables and aggregates the final result. |
|
||||||
|
| **Message**| Payload, type (`job\|chat\|mail`), format (`html\|text\|md`), time‑outs, embedded **Job** objects, log stream, status, timestamps | `message:<caller_id>:<id>` (hash) | The transport unit that travels over **Mycelium** (the pub/sub/message bus). A message can contain a **RunnerJob** (or a list of jobs) and is queued in two generic Redis lists: `msg_out` (to be sent) and `msg_in` (already received). |
|
||||||
|
| **Runner** | Public key, Mycelium address, topic name, type (`v\|python\|osis\|rust`), local flag, timestamps | `runner:<id>` (hash) | The *worker* that actually executes **RunnerJob** scripts. It subscribes to a Mycelium topic (normally `runner<id>`). If `local == true` the runner also consumes jobs directly from a Redis queue that is named after the script‑type suffix (`v`, `python`, …). |
|
||||||
|
| **RunnerJob**| Script source, type (`osis\|sal\|v\|python`), env‑vars, prerequisites, dependencies, status, timestamps, result map | `job:<caller_id>:<id>` (hash) | A single executable unit. It lives inside a **Context**, belongs to a **Runner**, and is queued according to its `script_type` (e.g. `queue:python`). Its status moves through the lifecycle `dispatched → waiting_for_prerequisites → started → finished|error`. |
|
||||||
|
|
||||||
|
> **Key idea:** All objects are persisted as *hashes*. Context‑scoped objects (**Context**, **Flow**, **Message**, **Runner**, **RunnerJob**) live in a **Redis** database dedicated to that context. **Actors are global** and are stored in Redis DB 0 under `actor:<id>`. The system is completely **decentralised** – each actor owns its own context and can spin up as many runners as needed. Communication between actors, runners and the rest of the system happens over **Mycelium**, a message‑bus that uses Redis lists as queues.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Interaction diagram (who talks to who)
|
||||||
|
|
||||||
|
### Sequence diagram – “Submit a flow and run it”
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
sequenceDiagram
|
||||||
|
participant A as Actor
|
||||||
|
participant L as Local‑Context (Redis)
|
||||||
|
participant M as Mycelium (msg_out / msg_in)
|
||||||
|
participant R as Remote‑Context (Redis)
|
||||||
|
participant W as Runner (worker)
|
||||||
|
|
||||||
|
%% 1. Actor creates everything locally
|
||||||
|
A->>L: create Flow + RunnerJob (J)
|
||||||
|
A->>L: LPUSH msg_out Message{type=job, payload=J, target=Remote}
|
||||||
|
|
||||||
|
%% 2. Mycelium transports the message
|
||||||
|
M->>R: LPUSH msg_in (Message key)
|
||||||
|
|
||||||
|
%% 3. Remote context materialises the job
|
||||||
|
R->>R: HSET Message hash
|
||||||
|
R->>R: HSET RunnerJob (J') // copy of payload
|
||||||
|
R->>R: LPUSH queue:v (job key)
|
||||||
|
|
||||||
|
%% 4. Runner consumes and executes
|
||||||
|
W->>R: BRPOP queue:v (job key)
|
||||||
|
W->>R: HSET job status = started
|
||||||
|
W->>W: execute script
|
||||||
|
W->>R: HSET job result + status = finished
|
||||||
|
|
||||||
|
%% 5. Result is sent back
|
||||||
|
W->>M: LPUSH msg_out Message{type=result, payload=result, target=Local}
|
||||||
|
M->>L: LPUSH msg_in (result Message key)
|
||||||
|
|
||||||
|
%% 6. Actor receives the result
|
||||||
|
A->>L: RPOP msg_in → read result
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 Component diagram – “Static view of objects & links”
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
graph LR
|
||||||
|
subgraph Redis["Redis (per Context)"]
|
||||||
|
A[Actor] -->|stores| Ctx[Context]
|
||||||
|
Ctx -->|stores| Fl[Flow]
|
||||||
|
Ctx -->|stores| Msg[Message]
|
||||||
|
Ctx -->|stores| Rnr[Runner]
|
||||||
|
Ctx -->|stores| Job[RunnerJob]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Mycelium["Mycelium (Pub/Sub)"]
|
||||||
|
MsgOut["queue:msg_out"] -->|outgoing| Mcel[Mycelium Bus]
|
||||||
|
Mcel -->|incoming| MsgIn["queue:msg_in"]
|
||||||
|
RnrTopic["topic:runnerX"] -->|subscribed by| Rnr
|
||||||
|
queueV["queue:v"] -->|local jobs| Rnr
|
||||||
|
queuePython["queue:python"] -->|local jobs| Rnr
|
||||||
|
end
|
||||||
|
|
||||||
|
A -->|creates / reads| Fl
|
||||||
|
A -->|creates / reads| Msg
|
||||||
|
A -->|creates / reads| Rnr
|
||||||
|
A -->|creates / reads| Job
|
||||||
|
Fl -->|references| Job
|
||||||
|
Msg -->|may embed| Job
|
||||||
|
Rnr -->|executes| Job
|
||||||
|
Job -->|updates| Fl
|
||||||
|
Msg -->|carries result back to| A
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.3 Flow‑status life‑cycle (state diagram)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
stateDiagram-v2
|
||||||
|
[*] --> dispatched
|
||||||
|
dispatched --> waiting_for_prerequisites : has prereqs
|
||||||
|
waiting_for_prerequisites --> started : prereqs met
|
||||||
|
dispatched --> started : no prereqs
|
||||||
|
started --> finished : success
|
||||||
|
started --> error : failure
|
||||||
|
waiting_for_prerequisites --> error : timeout / impossible
|
||||||
|
error --> [*]
|
||||||
|
finished --> [*]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3️⃣ Redis objects – concrete key & data layout
|
||||||
|
|
||||||
|
All objects are stored as **hashes** (`HSET`). Below is a concise catalog that can be copied into a design doc.
|
||||||
|
|
||||||
|
| Key pattern | Example | Fields (type) | Comments |
|
||||||
|
|-------------|---------|---------------|----------|
|
||||||
|
| `actor:${id}` | `actor:12` | `id` u32, `pubkey` str, `address` list\<Address\>, `created_at` u32, `updated_at` u32 | One hash per actor. |
|
||||||
|
| `context:${id}` | `context:7` | `id` u32, `admins` list\<u32\>, `readers` list\<u32\>, `executors` list\<u32\>, `created_at` u32, `updated_at` u32 | Holds permission lists for a tenant. |
|
||||||
|
| `flow:${id}` | `flow:33` | `id` u32, `caller_id` u32, `context_id` u32, `jobs` list\<u32\>, `env_vars` map\<str,str\>, `result` map\<str,str\>, `created_at` u32, `updated_at` u32, `status` str (`dispatched|started|error|finished`) |
|
||||||
|
| `message:${caller_id}:${id}` | `message:12:101` | `id` u32, `caller_id` u32, `context_id` u32, `message` str, `message_type` str (`job|chat|mail`), `message_format_type` str (`html|text|md`), `timeout` u32, `timeout_ack` u32, `timeout_result` u32, `job` list\<RunnerJob\> (serialized), `logs` list\<Log\>, `created_at` u32, `updated_at` u32, `status` str (`dispatched|acknowledged|error|processed`) |
|
||||||
|
| `runner:${id}` | `runner:20` | `id` u32, `pubkey` str, `address` str, `topic` str, `local` bool, `created_at` u32, `updated_at` u32 |
|
||||||
|
| `job:${caller_id}:${id}` | `job:12:2001` | `id` u32, `caller_id` u32, `context_id` u32, `script` str, `script_type` str (`osis|sal|v|python`), `timeout` u32, `retries` u8, `env_vars` map\<str,str\>, `result` map\<str,str\>, `prerequisites` list\<str\>, `dependends` list\<u32\>, `created_at` u32, `updated_at` u32, `status` str (`dispatched|waiting_for_prerequisites|started|error|finished`) |
|
||||||
|
|
||||||
|
#### Queue objects (lists)
|
||||||
|
|
||||||
|
| Queue name | Purpose |
|
||||||
|
|------------|---------|
|
||||||
|
| `msg_out` | **Outbound** generic queue – every `Message` that an actor wants to send is pushed here. |
|
||||||
|
| `msg_in` | **Inbound** generic queue – every message received from Mycelium is placed here for the local consumer to process. |
|
||||||
|
| `queue:${suffix}` (e.g. `queue:v`, `queue:python`) | Local job queues used by a **Runner** when `local == true`. The suffix comes from `ScriptType.queue_suffix()`. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4️⃣ System specification (as a concise “specs” section)
|
||||||
|
|
||||||
|
### 4.1 Naming conventions
|
||||||
|
* All Redis **hashes** are prefixed with the object name (`actor:`, `context:`, …).
|
||||||
|
* All **queues** are simple Redis lists (`LPUSH` / `RPOP`).
|
||||||
|
* **Message** keys embed both the *caller* and a locally unique *message id* – this guarantees global uniqueness across contexts.
|
||||||
|
|
||||||
|
### 4.2 Permissions & security
|
||||||
|
* Only IDs present in `Context.admins` may **create** or **delete** any object inside that context.
|
||||||
|
* `Context.readers` can **GET** any hash but not modify it.
|
||||||
|
* `Context.executors` are allowed to **update** `RunnerJob.status`, `result` and to **pop** from local job queues.
|
||||||
|
* Every `Actor` must present a `pubkey` that can be verified by the receiving side (Mycelium uses asymmetric crypto).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### 4.3 Message flow (publish / consume)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Below is a **re‑written “Message flow (publish / consume)”** that reflects the real runtime components:
|
||||||
|
|
||||||
|
* **Supervisor daemon** – runs on the node that owns the **Flow** (the *actor’s* side).
|
||||||
|
It is the only process that ever **RPOP**s from the global `msg_out` queue, adds the proper routing information and hands the message to **Mycelium**.
|
||||||
|
|
||||||
|
* **Mycelium** – the pure pub/sub/message‑bus. It never touches Redis directly; it only receives a *payload key* from the coordinator and delivers that key to the remote tenant’s `msg_in` list.
|
||||||
|
|
||||||
|
* **Remote‑side runner / service** – consumes from its own `msg_in`, materialises the job and executes it.
|
||||||
|
|
||||||
|
The table now uses the exact component names and adds a short note about the permission check that the coordinator performs before it releases a message.
|
||||||
|
|
||||||
|
| # | Action (what the system does) | Component that performs it | Redis interaction (exact commands) |
|
||||||
|
|---|-------------------------------|----------------------------|------------------------------------|
|
||||||
|
| **1️⃣ Publish** | Actor creates a `Message` hash and **LPUSH**es its key onto the *outbound* queue. | **Actor** (client code) | `HSET message:12:101 …` <br/> `LPUSH msg_out message:12:101` |
|
||||||
|
| **2️⃣ Coordinate & route** | The **Supervisor daemon** (running at source) **RPOP**s the key, checks the actor’s permissions, adds the *target‑context* and *topic* fields, then forwards the key to Mycelium. | **Supervisor daemon** (per‑actor) | `RPOP msg_out` → (in‑process) → `LPUSH msg_out_coordinator <key>` (internal buffer) |
|
||||||
|
| **3️⃣ Transport** | Mycelium receives the key, looks at `Message.message_type` (or the explicit `topic`) and pushes the key onto the *inbound* queue of the **remote** tenant. | **Mycelium bus** (network layer) | `LPUSH msg_in:<remote‑ctx> <key>` |
|
||||||
|
| **4️⃣ Consume** | The **Remote side** (runner or service) **RPOP**s from its `msg_in`, loads the full hash, verifies the actor’s signature and decides what to do based on `message_type`. | **Remote consumer** (runner / service | `RPOP msg_in:<remote‑ctx>` → `HGETALL message:<key>` |
|
||||||
|
| **5️⃣ Job materialisation** | If `message_type == "job"` the consumer creates a **RunnerJob** entry inside the **remote** context, adds the job **key** to the proper *script‑type* queue (`queue:v`, `queue:python`, …). | **Remote consumer** | `HSET job:<caller_id>:<job_id> …` <br/> `LPUSH queue:<script_type> job:<caller_id>:<job_id>` |
|
||||||
|
| **6️⃣ Runner execution loop** | A **Runner** attached to that remote context **BRPOP**s from its script‑type queue, sets `status = started`, runs the script, writes `result` and final `status`. | **Runner** | `BRPOP queue:<script_type>` → `HSET job:<…> status started` → … → `HSET job:<…> result … status finished` |
|
||||||
|
| **7️⃣ Result notification** | The runner builds a new `Message` (type `chat`, `result`, …) and pushes it onto **msg_out** again. The **Supervisor daemon** on the *originating* side will later pick it up and route it back to the original actor. | **Runner** → **Supervisor (remote side)** → **Mycelium** → **Supervisor (origin side)** → **Actor** | `HSET message:<res_key> …` <br/> `LPUSH msg_out message:<res_key>` (steps 2‑3 repeat in reverse direction) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tiny end‑to‑end sequence (still simple enough to render)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
sequenceDiagram
|
||||||
|
participant A as Actor
|
||||||
|
participant L as Local‑Redis (Flow ctx)
|
||||||
|
participant C as Supervisor daemon (local)
|
||||||
|
participant M as Mycelium bus
|
||||||
|
participant R as Remote‑Redis (target ctx)
|
||||||
|
participant W as Runner (remote)
|
||||||
|
|
||||||
|
%% 1️⃣ publish
|
||||||
|
A->>L: HSET message:12:101 …
|
||||||
|
A->>L: LPUSH msg_out message:12:101
|
||||||
|
|
||||||
|
%% 2️⃣ coordinate
|
||||||
|
C->>L: RPOP msg_out
|
||||||
|
C->>C: check permissions / add routing info
|
||||||
|
C->>M: push key to Mycelium (msg_out_coordinator)
|
||||||
|
|
||||||
|
%% 3️⃣ transport
|
||||||
|
M->>R: LPUSH msg_in message:12:101
|
||||||
|
|
||||||
|
%% 4️⃣ consume
|
||||||
|
R->>W: RPOP msg_in
|
||||||
|
R->>R: HGETALL message:12:101
|
||||||
|
R->>R: verify signature
|
||||||
|
alt message_type == job
|
||||||
|
R->>R: HSET job:12:2001 …
|
||||||
|
R->>R: LPUSH queue:v job:12:2001
|
||||||
|
end
|
||||||
|
|
||||||
|
%% 5️⃣ runner loop
|
||||||
|
W->>R: BRPOP queue:v (job:12:2001)
|
||||||
|
W->>R: HSET job:12:2001 status started
|
||||||
|
W->>W: execute script
|
||||||
|
W->>R: HSET job:12:2001 result … status finished
|
||||||
|
|
||||||
|
%% 6️⃣ result back
|
||||||
|
W->>R: HSET message:12:900 result …
|
||||||
|
W->>R: LPUSH msg_out message:12:900
|
||||||
|
C->>M: (coordinator on remote side) routes back
|
||||||
|
M->>L: LPUSH msg_in message:12:900
|
||||||
|
A->>L: RPOP msg_in → read result
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 5️⃣ What the **system** is trying to achieve
|
||||||
|
|
||||||
|
| Goal | How it is realized |
|
||||||
|
|------|--------------------|
|
||||||
|
| **Decentralised execution** | Every *actor* owns a **Context**; any number of **Runners** can be attached to that context, possibly on different machines, and they all talk over the same Mycelium/Redis backend. |
|
||||||
|
| **Fine‑grained permissions** | `Context.admins/readers/executors` enforce who can create, view or run jobs. |
|
||||||
|
| **Loose coupling via messages** | All actions (job submission, result propagation, chat, mail …) use the generic `Message` object; the same transport pipeline handles all of them. |
|
||||||
|
| **Workflow orchestration** | The **Flow** object models a DAG of jobs, tracks collective status and aggregates results, without needing a central scheduler. |
|
||||||
|
| **Pluggable runtimes** | `ScriptType` and `RunnerType` let a runner choose the proper execution environment (V, Python, OSIS, Rust, …) – adding a new language only means adding a new `ScriptType` and a corresponding worker. |
|
||||||
|
| **Observability** | `Log` arrays attached to a `Message` and the timestamps on every hash give a complete audit trail. |
|
||||||
|
| **Resilience** | Jobs are idempotent hash entries; queues are persisted in Redis, and status changes are atomic (`HSET`). Retries and time‑outs guarantee eventual consistency. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6️⃣ Diagram summary (quick visual cheat‑sheet)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
%%{init: {"theme":"dark"}}%%
|
||||||
|
graph TD
|
||||||
|
A[Actor] -->|creates| Ctx[Context]
|
||||||
|
A -->|creates| Flow
|
||||||
|
A -->|creates| Msg
|
||||||
|
A -->|creates| Rnr[Runner]
|
||||||
|
A -->|creates| Job[RunnerJob]
|
||||||
|
|
||||||
|
subgraph Redis["Redis (per Context)"]
|
||||||
|
Ctx --> A
|
||||||
|
Ctx --> Flow
|
||||||
|
Ctx --> Msg
|
||||||
|
Ctx --> Rnr
|
||||||
|
Ctx --> Job
|
||||||
|
end
|
||||||
|
|
||||||
|
Msg -->|push to| OutQ[msg_out]
|
||||||
|
OutQ --> Myc[Mycelium Bus]
|
||||||
|
Myc -->|deliver| InQ[msg_in]
|
||||||
|
InQ --> Rnr
|
||||||
|
Rnr -->|pop from| Qv["queue:v"]
|
||||||
|
Rnr -->|pop from| Qpy["queue:python"]
|
||||||
|
|
||||||
|
Rnr -->|updates| Job
|
||||||
|
Job -->|updates| Flow
|
||||||
|
Flow -->|result Message| Msg
|
||||||
|
```
|
||||||
|
|
||||||
9
bin/coordinator/src/clients/mod.rs
Normal file
9
bin/coordinator/src/clients/mod.rs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
pub mod mycelium_client;
|
||||||
|
pub mod supervisor_client;
|
||||||
|
pub mod supervisor_hub;
|
||||||
|
pub mod types;
|
||||||
|
|
||||||
|
pub use mycelium_client::{MyceliumClient, MyceliumClientError};
|
||||||
|
pub use supervisor_client::{SupervisorClient, SupervisorClientError};
|
||||||
|
pub use supervisor_hub::SupervisorHub;
|
||||||
|
pub use types::Destination;
|
||||||
319
bin/coordinator/src/clients/mycelium_client.rs
Normal file
319
bin/coordinator/src/clients/mycelium_client.rs
Normal file
@@ -0,0 +1,319 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
use reqwest::Client as HttpClient;
|
||||||
|
|
||||||
|
use base64::Engine;
|
||||||
|
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::clients::Destination;
|
||||||
|
use crate::models::TransportStatus;
|
||||||
|
|
||||||
|
/// Lightweight client for Mycelium JSON-RPC (send + query status)
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct MyceliumClient {
|
||||||
|
base_url: String, // e.g. http://127.0.0.1:8990
|
||||||
|
http: HttpClient,
|
||||||
|
id_counter: Arc<AtomicU64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum MyceliumClientError {
|
||||||
|
#[error("HTTP error: {0}")]
|
||||||
|
Http(#[from] reqwest::Error),
|
||||||
|
#[error("JSON error: {0}")]
|
||||||
|
Json(#[from] serde_json::Error),
|
||||||
|
#[error("Transport timed out waiting for a reply (408)")]
|
||||||
|
TransportTimeout,
|
||||||
|
#[error("JSON-RPC error: {0}")]
|
||||||
|
RpcError(String),
|
||||||
|
#[error("Invalid response: {0}")]
|
||||||
|
InvalidResponse(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MyceliumClient {
|
||||||
|
pub fn new(base_url: impl Into<String>) -> Result<Self, MyceliumClientError> {
|
||||||
|
let url = base_url.into();
|
||||||
|
let http = HttpClient::builder().build()?;
|
||||||
|
Ok(Self {
|
||||||
|
base_url: url,
|
||||||
|
http,
|
||||||
|
id_counter: Arc::new(AtomicU64::new(1)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_id(&self) -> u64 {
|
||||||
|
self.id_counter.fetch_add(1, Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn jsonrpc(&self, method: &str, params: Value) -> Result<Value, MyceliumClientError> {
|
||||||
|
let req = json!({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": self.next_id(),
|
||||||
|
"method": method,
|
||||||
|
"params": [ params ]
|
||||||
|
});
|
||||||
|
|
||||||
|
tracing::info!(%req, "jsonrpc");
|
||||||
|
let resp = self.http.post(&self.base_url).json(&req).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
let body: Value = resp.json().await?;
|
||||||
|
if let Some(err) = body.get("error") {
|
||||||
|
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
|
||||||
|
let msg = err
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown error");
|
||||||
|
if code == 408 {
|
||||||
|
return Err(MyceliumClientError::TransportTimeout);
|
||||||
|
}
|
||||||
|
return Err(MyceliumClientError::RpcError(format!(
|
||||||
|
"code={code} msg={msg}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if !status.is_success() {
|
||||||
|
return Err(MyceliumClientError::RpcError(format!(
|
||||||
|
"HTTP {status}, body {body}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(body)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call messageStatus with an outbound message id (hex string)
|
||||||
|
pub async fn message_status(
|
||||||
|
&self,
|
||||||
|
id_hex: &str,
|
||||||
|
) -> Result<TransportStatus, MyceliumClientError> {
|
||||||
|
let params = json!(id_hex);
|
||||||
|
let body = self.jsonrpc("getMessageInfo", params).await?;
|
||||||
|
let result = body.get("result").ok_or_else(|| {
|
||||||
|
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
|
||||||
|
})?;
|
||||||
|
// Accept both { state: "..."} and bare "..."
|
||||||
|
let status_str = if let Some(s) = result.get("state").and_then(|v| v.as_str()) {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(s) = result.as_str() {
|
||||||
|
s.to_string()
|
||||||
|
} else {
|
||||||
|
return Err(MyceliumClientError::InvalidResponse(format!(
|
||||||
|
"unexpected result shape: {result}"
|
||||||
|
)));
|
||||||
|
};
|
||||||
|
let status = Self::map_status(&status_str).ok_or_else(|| {
|
||||||
|
MyceliumClientError::InvalidResponse(format!("unknown status: {status_str}"))
|
||||||
|
});
|
||||||
|
tracing::info!(%id_hex, status = %status.as_ref().unwrap(), "queried messages status");
|
||||||
|
status
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_status(s: &str) -> Option<TransportStatus> {
|
||||||
|
match s {
|
||||||
|
"pending" => Some(TransportStatus::Queued),
|
||||||
|
"received" => Some(TransportStatus::Delivered),
|
||||||
|
"read" => Some(TransportStatus::Read),
|
||||||
|
"aborted" => Some(TransportStatus::Failed),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build params object for pushMessage without performing any network call.
|
||||||
|
/// Exposed for serializer-only tests and reuse.
|
||||||
|
pub(crate) fn build_push_params(
|
||||||
|
dst: &Destination,
|
||||||
|
topic: &str,
|
||||||
|
payload_b64: &str,
|
||||||
|
reply_timeout: Option<u64>,
|
||||||
|
) -> Value {
|
||||||
|
let dst_v = match dst {
|
||||||
|
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
|
||||||
|
Destination::Pk(pk) => json!({ "pk": pk }),
|
||||||
|
};
|
||||||
|
let mut message = json!({
|
||||||
|
"dst": dst_v,
|
||||||
|
"topic": topic,
|
||||||
|
"payload": payload_b64,
|
||||||
|
});
|
||||||
|
if let Some(rt) = reply_timeout {
|
||||||
|
message["reply_timeout"] = json!(rt);
|
||||||
|
}
|
||||||
|
message
|
||||||
|
}
|
||||||
|
|
||||||
|
/// pushMessage: send a message with dst/topic/payload. Optional reply_timeout for sync replies.
|
||||||
|
pub async fn push_message(
|
||||||
|
&self,
|
||||||
|
dst: &Destination,
|
||||||
|
topic: &str,
|
||||||
|
payload_b64: &str,
|
||||||
|
reply_timeout: Option<u64>,
|
||||||
|
) -> Result<Value, MyceliumClientError> {
|
||||||
|
let params = Self::build_push_params(dst, topic, payload_b64, reply_timeout);
|
||||||
|
let body = self.jsonrpc("pushMessage", params).await?;
|
||||||
|
let result = body.get("result").ok_or_else(|| {
|
||||||
|
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
|
||||||
|
})?;
|
||||||
|
Ok(result.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to extract outbound message id from pushMessage result (InboundMessage or PushMessageResponseId)
|
||||||
|
pub fn extract_message_id_from_result(result: &Value) -> Option<String> {
|
||||||
|
result
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
}
|
||||||
|
/// popMessage: retrieve an inbound message if available (optionally filtered by topic).
|
||||||
|
/// - peek: if true, do not remove the message from the queue
|
||||||
|
/// - timeout_secs: seconds to wait for a message (0 returns immediately)
|
||||||
|
/// - topic_plain: optional plain-text topic which will be base64-encoded per Mycelium spec
|
||||||
|
/// Returns:
|
||||||
|
/// - Ok(Some(result_json)) on success, where result_json matches InboundMessage schema
|
||||||
|
/// - Ok(None) when there is no message ready (Mycelium returns error code 204)
|
||||||
|
pub async fn pop_message(
|
||||||
|
&self,
|
||||||
|
peek: Option<bool>,
|
||||||
|
timeout_secs: Option<u64>,
|
||||||
|
topic_plain: Option<&str>,
|
||||||
|
) -> Result<Option<Value>, MyceliumClientError> {
|
||||||
|
// Build params array
|
||||||
|
let mut params_array = vec![];
|
||||||
|
if let Some(p) = peek {
|
||||||
|
params_array.push(serde_json::Value::Bool(p));
|
||||||
|
} else {
|
||||||
|
params_array.push(serde_json::Value::Null)
|
||||||
|
}
|
||||||
|
if let Some(t) = timeout_secs {
|
||||||
|
params_array.push(serde_json::Value::Number(t.into()));
|
||||||
|
} else {
|
||||||
|
params_array.push(serde_json::Value::Null)
|
||||||
|
}
|
||||||
|
if let Some(tp) = topic_plain {
|
||||||
|
let topic_b64 = BASE64_STANDARD.encode(tp.as_bytes());
|
||||||
|
params_array.push(serde_json::Value::String(topic_b64));
|
||||||
|
} else {
|
||||||
|
params_array.push(serde_json::Value::Null)
|
||||||
|
}
|
||||||
|
|
||||||
|
let req = json!({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": self.next_id(),
|
||||||
|
"method": "popMessage",
|
||||||
|
"params": serde_json::Value::Array(params_array),
|
||||||
|
});
|
||||||
|
|
||||||
|
tracing::info!(%req, "calling popMessage");
|
||||||
|
|
||||||
|
let resp = self.http.post(&self.base_url).json(&req).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
let body: Value = resp.json().await?;
|
||||||
|
|
||||||
|
// Handle JSON-RPC error envelope specially for code 204 (no message ready)
|
||||||
|
if let Some(err) = body.get("error") {
|
||||||
|
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
|
||||||
|
let msg = err
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown error");
|
||||||
|
|
||||||
|
if code == 204 {
|
||||||
|
// No message ready
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
if code == 408 {
|
||||||
|
// Align with other transport timeout mapping
|
||||||
|
return Err(MyceliumClientError::TransportTimeout);
|
||||||
|
}
|
||||||
|
return Err(MyceliumClientError::RpcError(format!(
|
||||||
|
"code={code} msg={msg}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !status.is_success() {
|
||||||
|
return Err(MyceliumClientError::RpcError(format!(
|
||||||
|
"HTTP {status}, body {body}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = body.get("result").ok_or_else(|| {
|
||||||
|
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
|
||||||
|
})?;
|
||||||
|
Ok(Some(result.clone()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::clients::Destination;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_push_params_shapes_ip_pk_and_timeout() {
|
||||||
|
// IP destination
|
||||||
|
let p1 = MyceliumClient::build_push_params(
|
||||||
|
&Destination::Ip("2001:db8::1".parse().unwrap()),
|
||||||
|
"supervisor.rpc",
|
||||||
|
"Zm9vYmFy", // "foobar"
|
||||||
|
Some(10),
|
||||||
|
);
|
||||||
|
let msg1 = p1.get("message").unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
msg1.get("topic").unwrap().as_str().unwrap(),
|
||||||
|
"supervisor.rpc"
|
||||||
|
);
|
||||||
|
assert_eq!(msg1.get("payload").unwrap().as_str().unwrap(), "Zm9vYmFy");
|
||||||
|
assert_eq!(
|
||||||
|
msg1.get("dst")
|
||||||
|
.unwrap()
|
||||||
|
.get("ip")
|
||||||
|
.unwrap()
|
||||||
|
.as_str()
|
||||||
|
.unwrap(),
|
||||||
|
"2001:db8::1"
|
||||||
|
);
|
||||||
|
assert_eq!(p1.get("reply_timeout").unwrap().as_u64().unwrap(), 10);
|
||||||
|
|
||||||
|
// PK destination without timeout
|
||||||
|
let p2 = MyceliumClient::build_push_params(
|
||||||
|
&Destination::Pk(
|
||||||
|
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".into(),
|
||||||
|
),
|
||||||
|
"supervisor.rpc",
|
||||||
|
"YmF6", // "baz"
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
let msg2 = p2.get("message").unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
msg2.get("dst")
|
||||||
|
.unwrap()
|
||||||
|
.get("pk")
|
||||||
|
.unwrap()
|
||||||
|
.as_str()
|
||||||
|
.unwrap(),
|
||||||
|
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
|
||||||
|
);
|
||||||
|
assert!(p2.get("reply_timeout").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_message_id_variants() {
|
||||||
|
// PushMessageResponseId
|
||||||
|
let r1 = json!({"id":"0123456789abcdef"});
|
||||||
|
assert_eq!(
|
||||||
|
MyceliumClient::extract_message_id_from_result(&r1).unwrap(),
|
||||||
|
"0123456789abcdef"
|
||||||
|
);
|
||||||
|
|
||||||
|
// InboundMessage-like
|
||||||
|
let r2 = json!({
|
||||||
|
"id":"fedcba9876543210",
|
||||||
|
"srcIp":"449:abcd:0123:defa::1",
|
||||||
|
"payload":"hpV+"
|
||||||
|
});
|
||||||
|
assert_eq!(
|
||||||
|
MyceliumClient::extract_message_id_from_result(&r2).unwrap(),
|
||||||
|
"fedcba9876543210"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
588
bin/coordinator/src/clients/supervisor_client.rs
Normal file
588
bin/coordinator/src/clients/supervisor_client.rs
Normal file
@@ -0,0 +1,588 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use base64::Engine;
|
||||||
|
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
use thiserror::Error;
|
||||||
|
use tokio::time::timeout;
|
||||||
|
|
||||||
|
use crate::clients::{Destination, MyceliumClient, MyceliumClientError, SupervisorHub};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SupervisorClient {
|
||||||
|
hub: Arc<SupervisorHub>, // Global hub with background pop loop and shared id generator
|
||||||
|
destination: Destination, // ip or pk
|
||||||
|
secret: Option<String>, // optional, required by several supervisor methods
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum SupervisorClientError {
|
||||||
|
#[error("HTTP error: {0}")]
|
||||||
|
Http(#[from] reqwest::Error),
|
||||||
|
#[error("JSON error: {0}")]
|
||||||
|
Json(#[from] serde_json::Error),
|
||||||
|
#[error("Transport timed out waiting for a reply (408)")]
|
||||||
|
TransportTimeout,
|
||||||
|
#[error("JSON-RPC error: {0}")]
|
||||||
|
RpcError(String),
|
||||||
|
#[error("Invalid response: {0}")]
|
||||||
|
InvalidResponse(String),
|
||||||
|
#[error("Missing secret for method requiring authentication")]
|
||||||
|
MissingSecret,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<MyceliumClientError> for SupervisorClientError {
|
||||||
|
fn from(e: MyceliumClientError) -> Self {
|
||||||
|
match e {
|
||||||
|
MyceliumClientError::TransportTimeout => SupervisorClientError::TransportTimeout,
|
||||||
|
MyceliumClientError::RpcError(m) => SupervisorClientError::RpcError(m),
|
||||||
|
MyceliumClientError::InvalidResponse(m) => SupervisorClientError::InvalidResponse(m),
|
||||||
|
MyceliumClientError::Http(err) => SupervisorClientError::Http(err),
|
||||||
|
MyceliumClientError::Json(err) => SupervisorClientError::Json(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupervisorClient {
|
||||||
|
/// Preferred constructor using a shared SupervisorHub (single global listener).
|
||||||
|
pub fn new_with_hub(
|
||||||
|
hub: Arc<SupervisorHub>,
|
||||||
|
destination: Destination,
|
||||||
|
secret: Option<String>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
hub,
|
||||||
|
destination,
|
||||||
|
secret,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backward-compatible constructor that builds a new Hub from base_url/topic.
|
||||||
|
/// NOTE: This spawns a background popMessage listener for the given topic.
|
||||||
|
/// Prefer `new_with_hub` so the process has a single global hub.
|
||||||
|
pub fn new(
|
||||||
|
base_url: impl Into<String>,
|
||||||
|
destination: Destination,
|
||||||
|
topic: impl Into<String>,
|
||||||
|
secret: Option<String>,
|
||||||
|
) -> Result<Self, SupervisorClientError> {
|
||||||
|
let mut url = base_url.into();
|
||||||
|
if url.is_empty() {
|
||||||
|
url = "http://127.0.0.1:8990".to_string();
|
||||||
|
}
|
||||||
|
let mycelium = Arc::new(MyceliumClient::new(url)?);
|
||||||
|
Ok(Self::new_with_client(mycelium, destination, topic, secret))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backward-compatible constructor that reuses an existing Mycelium client.
|
||||||
|
/// NOTE: This creates a new hub and its own background listener. Prefer `new_with_hub`.
|
||||||
|
pub fn new_with_client(
|
||||||
|
mycelium: Arc<MyceliumClient>,
|
||||||
|
destination: Destination,
|
||||||
|
topic: impl Into<String>,
|
||||||
|
secret: Option<String>,
|
||||||
|
) -> Self {
|
||||||
|
let hub = SupervisorHub::new_with_client(mycelium, topic);
|
||||||
|
Self::new_with_hub(hub, destination, secret)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal helper used by tests to inspect dst JSON shape.
|
||||||
|
fn build_dst(&self) -> Value {
|
||||||
|
match &self.destination {
|
||||||
|
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
|
||||||
|
Destination::Pk(pk) => json!({ "pk": pk }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_supervisor_payload(&self, method: &str, params: Value) -> Value {
|
||||||
|
json!({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": self.hub.next_id(),
|
||||||
|
"method": method,
|
||||||
|
"params": params,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a supervisor JSON-RPC payload but force a specific id (used for correlation).
|
||||||
|
fn build_supervisor_payload_with_id(&self, method: &str, params: Value, id: u64) -> Value {
|
||||||
|
json!({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": id,
|
||||||
|
"method": method,
|
||||||
|
"params": params,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_payload(payload: &Value) -> Result<String, SupervisorClientError> {
|
||||||
|
let s = serde_json::to_string(payload)?;
|
||||||
|
Ok(BASE64_STANDARD.encode(s.as_bytes()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_topic(topic: &[u8]) -> String {
|
||||||
|
BASE64_STANDARD.encode(topic)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_message_id_from_result(result: &Value) -> Option<String> {
|
||||||
|
// Two possibilities per Mycelium spec oneOf:
|
||||||
|
// - PushMessageResponseId: { "id": "0123456789abcdef" }
|
||||||
|
// - InboundMessage: object containing "id" plus srcIp, ...; we still return id.
|
||||||
|
result
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn need_secret(&self) -> Result<&str, SupervisorClientError> {
|
||||||
|
self.secret
|
||||||
|
.as_deref()
|
||||||
|
.ok_or(SupervisorClientError::MissingSecret)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Core: request-reply call via Hub with default 10s timeout
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Send a supervisor JSON-RPC request and await its reply via the Hub.
|
||||||
|
/// Returns (outbound_message_id, reply_envelope_json).
|
||||||
|
pub async fn call_with_reply_timeout(
|
||||||
|
&self,
|
||||||
|
method: &str,
|
||||||
|
params: Value,
|
||||||
|
timeout_secs: u64,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let inner_id = self.hub.next_id();
|
||||||
|
// Register waiter before sending to avoid race
|
||||||
|
let rx = self.hub.register_waiter(inner_id).await;
|
||||||
|
|
||||||
|
let inner = self.build_supervisor_payload_with_id(method, params, inner_id);
|
||||||
|
let payload_b64 = Self::encode_payload(&inner)?;
|
||||||
|
|
||||||
|
let result = self
|
||||||
|
.hub
|
||||||
|
.mycelium()
|
||||||
|
.push_message(
|
||||||
|
&self.destination,
|
||||||
|
&Self::encode_topic(self.hub.topic().as_bytes()),
|
||||||
|
&payload_b64,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let out_id = if let Some(id) = MyceliumClient::extract_message_id_from_result(&result) {
|
||||||
|
id
|
||||||
|
} else if let Some(arr) = result.as_array()
|
||||||
|
&& arr.len() == 1
|
||||||
|
&& let Some(id) = MyceliumClient::extract_message_id_from_result(&arr[0])
|
||||||
|
{
|
||||||
|
id
|
||||||
|
} else {
|
||||||
|
// Clean pending entry to avoid leak
|
||||||
|
let _ = self.hub.remove_waiter(inner_id).await;
|
||||||
|
return Err(SupervisorClientError::InvalidResponse(format!(
|
||||||
|
"result did not contain message id: {result}"
|
||||||
|
)));
|
||||||
|
};
|
||||||
|
|
||||||
|
let d = Duration::from_secs(timeout_secs);
|
||||||
|
match timeout(d, rx).await {
|
||||||
|
Ok(Ok(reply)) => Ok((out_id, reply)),
|
||||||
|
Ok(Err(_canceled)) => Err(SupervisorClientError::InvalidResponse(
|
||||||
|
"oneshot canceled before receiving reply".into(),
|
||||||
|
)),
|
||||||
|
Err(_elapsed) => {
|
||||||
|
// Cleanup on timeout
|
||||||
|
let _ = self.hub.remove_waiter(inner_id).await;
|
||||||
|
Err(SupervisorClientError::TransportTimeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send and await with default 10s timeout.
|
||||||
|
pub async fn call_with_reply(
|
||||||
|
&self,
|
||||||
|
method: &str,
|
||||||
|
params: Value,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply_timeout(method, params, 60).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Back-compat: Send and await a reply but return only the outbound id (discard reply).
|
||||||
|
/// This keeps existing call sites working while the system migrates to reply-aware paths.
|
||||||
|
pub async fn call(&self, method: &str, params: Value) -> Result<String, SupervisorClientError> {
|
||||||
|
let (out_id, _reply) = self.call_with_reply(method, params).await?;
|
||||||
|
Ok(out_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Typed wrappers for Supervisor API (await replies)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
// Runners
|
||||||
|
pub async fn list_runners_wait(&self) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("list_runners", json!([])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn register_runner_wait(
|
||||||
|
&self,
|
||||||
|
name: impl Into<String>,
|
||||||
|
queue: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"name": name.into(),
|
||||||
|
"queue": queue.into()
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("register_runner", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn remove_runner_wait(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("remove_runner", json!([actor_id.into()]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_runner_wait(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("start_runner", json!([actor_id.into()]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stop_runner_wait(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
force: bool,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("stop_runner", json!([actor_id.into(), force]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_runner_status_wait(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("get_runner_status", json!([actor_id.into()]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_all_runner_status_wait(
|
||||||
|
&self,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("get_all_runner_status", json!([]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_all_wait(&self) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("start_all", json!([])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stop_all_wait(
|
||||||
|
&self,
|
||||||
|
force: bool,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("stop_all", json!([force])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_all_status_wait(&self) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("get_all_status", json!([])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// Jobs (await)
|
||||||
|
pub async fn jobs_create_wait(
|
||||||
|
&self,
|
||||||
|
job: Value,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"job": job
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("jobs.create", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn jobs_list_wait(&self) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("jobs.list", json!([])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_run_wait(&self, job: Value) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"job": job
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("job.run", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_start_wait(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"job_id": job_id.into()
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("job.start", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_status_wait(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("job.status", json!([job_id.into()]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_result_wait(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("job.result", json!([job_id.into()]))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_stop_wait(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"job_id": job_id.into()
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("job.stop", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_delete_wait(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
let secret = self.need_secret()?;
|
||||||
|
let params = json!([{
|
||||||
|
"secret": secret,
|
||||||
|
"job_id": job_id.into()
|
||||||
|
}]);
|
||||||
|
self.call_with_reply("job.delete", params).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn rpc_discover_wait(&self) -> Result<(String, Value), SupervisorClientError> {
|
||||||
|
self.call_with_reply("rpc.discover", json!([])).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Backward-compatible variants returning only outbound id (discarding reply)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn list_runners(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.list_runners_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn register_runner(
|
||||||
|
&self,
|
||||||
|
name: impl Into<String>,
|
||||||
|
queue: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.register_runner_wait(name, queue).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn remove_runner(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.remove_runner_wait(actor_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_runner(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.start_runner_wait(actor_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stop_runner(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
force: bool,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.stop_runner_wait(actor_id, force).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_runner_status(
|
||||||
|
&self,
|
||||||
|
actor_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.get_runner_status_wait(actor_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_all_runner_status(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.get_all_runner_status_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_all(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.start_all_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stop_all(&self, force: bool) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.stop_all_wait(force).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_all_status(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.get_all_status_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn jobs_create(&self, job: Value) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.jobs_create_wait(job).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn jobs_list(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.jobs_list_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_run(&self, job: Value) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_run_wait(job).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_start(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_start_wait(job_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_status(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_status_wait(job_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_result(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_result_wait(job_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_stop(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_stop_wait(job_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn job_delete(
|
||||||
|
&self,
|
||||||
|
job_id: impl Into<String>,
|
||||||
|
) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.job_delete_wait(job_id).await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn rpc_discover(&self) -> Result<String, SupervisorClientError> {
|
||||||
|
let (id, _) = self.rpc_discover_wait().await?;
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Tests (serialization-only)
|
||||||
|
// -----------------------------
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
fn mk_client() -> SupervisorClient {
|
||||||
|
// Build a hub but it won't issue real network calls in these serializer-only tests.
|
||||||
|
let mycelium = Arc::new(MyceliumClient::new("http://127.0.0.1:8990").unwrap());
|
||||||
|
let hub = SupervisorHub::new_with_client(mycelium, "supervisor.rpc");
|
||||||
|
SupervisorClient::new_with_hub(
|
||||||
|
hub,
|
||||||
|
Destination::Pk(
|
||||||
|
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".to_string(),
|
||||||
|
),
|
||||||
|
Some("secret".to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn builds_dst_ip_and_pk() {
|
||||||
|
let mycelium = Arc::new(MyceliumClient::new("http://127.0.0.1:8990").unwrap());
|
||||||
|
let hub_ip = SupervisorHub::new_with_client(mycelium.clone(), "supervisor.rpc");
|
||||||
|
let c_ip = SupervisorClient::new_with_hub(
|
||||||
|
hub_ip,
|
||||||
|
Destination::Ip("2001:db8::1".parse().unwrap()),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
let v_ip = c_ip.build_dst();
|
||||||
|
assert_eq!(v_ip.get("ip").unwrap().as_str().unwrap(), "2001:db8::1");
|
||||||
|
|
||||||
|
let c_pk = mk_client();
|
||||||
|
let v_pk = c_pk.build_dst();
|
||||||
|
assert_eq!(
|
||||||
|
v_pk.get("pk").unwrap().as_str().unwrap(),
|
||||||
|
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encodes_supervisor_payload_b64() {
|
||||||
|
let c = mk_client();
|
||||||
|
let payload = c.build_supervisor_payload("list_runners", json!([]));
|
||||||
|
let b64 = SupervisorClient::encode_payload(&payload).unwrap();
|
||||||
|
|
||||||
|
// decode and compare round-trip JSON
|
||||||
|
let raw = base64::engine::general_purpose::STANDARD
|
||||||
|
.decode(b64.as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
let decoded: Value = serde_json::from_slice(&raw).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
decoded.get("method").unwrap().as_str().unwrap(),
|
||||||
|
"list_runners"
|
||||||
|
);
|
||||||
|
assert_eq!(decoded.get("jsonrpc").unwrap().as_str().unwrap(), "2.0");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_message_id_works_for_both_variants() {
|
||||||
|
// PushMessageResponseId
|
||||||
|
let r1 = json!({"id":"0123456789abcdef"});
|
||||||
|
assert_eq!(
|
||||||
|
SupervisorClient::extract_message_id_from_result(&r1).unwrap(),
|
||||||
|
"0123456789abcdef"
|
||||||
|
);
|
||||||
|
// InboundMessage-like
|
||||||
|
let r2 = json!({
|
||||||
|
"id":"fedcba9876543210",
|
||||||
|
"srcIp":"449:abcd:0123:defa::1",
|
||||||
|
"payload":"hpV+"
|
||||||
|
});
|
||||||
|
assert_eq!(
|
||||||
|
SupervisorClient::extract_message_id_from_result(&r2).unwrap(),
|
||||||
|
"fedcba9876543210"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
143
bin/coordinator/src/clients/supervisor_hub.rs
Normal file
143
bin/coordinator/src/clients/supervisor_hub.rs
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
use base64::Engine;
|
||||||
|
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||||
|
use serde_json::Value;
|
||||||
|
use tokio::sync::{Mutex, oneshot};
|
||||||
|
|
||||||
|
use crate::clients::mycelium_client::MyceliumClient;
|
||||||
|
|
||||||
|
/// Global hub that:
|
||||||
|
/// - Owns a single MyceliumClient
|
||||||
|
/// - Spawns a background popMessage loop filtered by topic
|
||||||
|
/// - Correlates supervisor JSON-RPC replies by inner id to waiting callers via oneshot channels
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SupervisorHub {
|
||||||
|
mycelium: Arc<MyceliumClient>,
|
||||||
|
topic: String,
|
||||||
|
pending: Arc<Mutex<HashMap<u64, oneshot::Sender<Value>>>>,
|
||||||
|
id_counter: Arc<AtomicU64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupervisorHub {
|
||||||
|
/// Create a new hub and start the background popMessage task.
|
||||||
|
/// - base_url: Mycelium JSON-RPC endpoint, e.g. "http://127.0.0.1:8990"
|
||||||
|
/// - topic: plain-text topic (e.g., "supervisor.rpc")
|
||||||
|
pub fn new(
|
||||||
|
base_url: impl Into<String>,
|
||||||
|
topic: impl Into<String>,
|
||||||
|
) -> Result<Arc<Self>, crate::clients::MyceliumClientError> {
|
||||||
|
let myc = Arc::new(MyceliumClient::new(base_url)?);
|
||||||
|
Ok(Self::new_with_client(myc, topic))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Variant that reuses an existing Mycelium client.
|
||||||
|
pub fn new_with_client(mycelium: Arc<MyceliumClient>, topic: impl Into<String>) -> Arc<Self> {
|
||||||
|
let hub = Arc::new(Self {
|
||||||
|
mycelium,
|
||||||
|
topic: topic.into(),
|
||||||
|
pending: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
id_counter: Arc::new(AtomicU64::new(1)),
|
||||||
|
});
|
||||||
|
Self::spawn_pop_loop(hub.clone());
|
||||||
|
hub
|
||||||
|
}
|
||||||
|
|
||||||
|
fn spawn_pop_loop(hub: Arc<Self>) {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
loop {
|
||||||
|
match hub.mycelium.pop_message(Some(false), Some(20), None).await {
|
||||||
|
Ok(Some(inb)) => {
|
||||||
|
// Extract and decode payload
|
||||||
|
let Some(payload_b64) = inb.get("payload").and_then(|v| v.as_str()) else {
|
||||||
|
// Not a payload-bearing message; ignore
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Ok(raw) = BASE64_STANDARD.decode(payload_b64.as_bytes()) else {
|
||||||
|
tracing::warn!(target: "supervisor_hub", "Failed to decode inbound payload base64");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Ok(rpc): Result<Value, _> = serde_json::from_slice(&raw) else {
|
||||||
|
tracing::warn!(target: "supervisor_hub", "Failed to parse inbound payload JSON");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract inner JSON-RPC id
|
||||||
|
let inner_id_u64 = match rpc.get("id") {
|
||||||
|
Some(Value::Number(n)) => n.as_u64(),
|
||||||
|
Some(Value::String(s)) => s.parse::<u64>().ok(),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(inner_id) = inner_id_u64 {
|
||||||
|
// Try to deliver to a pending waiter
|
||||||
|
let sender_opt = {
|
||||||
|
let mut guard = hub.pending.lock().await;
|
||||||
|
guard.remove(&inner_id)
|
||||||
|
};
|
||||||
|
if let Some(tx) = sender_opt {
|
||||||
|
let _ = tx.send(rpc);
|
||||||
|
} else {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "supervisor_hub",
|
||||||
|
inner_id,
|
||||||
|
payload = %String::from_utf8_lossy(&raw),
|
||||||
|
"Unmatched supervisor reply; no waiter registered"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tracing::warn!(target: "supervisor_hub", "Inbound supervisor reply missing id; dropping");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// No message; continue polling
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(target: "supervisor_hub", error = %e, "popMessage error; backing off");
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Allocate a new inner supervisor JSON-RPC id.
|
||||||
|
pub fn next_id(&self) -> u64 {
|
||||||
|
self.id_counter.fetch_add(1, Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register a oneshot sender for the given inner id and return the receiver side.
|
||||||
|
pub async fn register_waiter(&self, inner_id: u64) -> oneshot::Receiver<Value> {
|
||||||
|
let (tx, rx) = oneshot::channel();
|
||||||
|
let mut guard = self.pending.lock().await;
|
||||||
|
guard.insert(inner_id, tx);
|
||||||
|
rx
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove a pending waiter for a given id (used to cleanup on timeout).
|
||||||
|
pub async fn remove_waiter(&self, inner_id: u64) -> Option<oneshot::Sender<Value>> {
|
||||||
|
let mut guard = self.pending.lock().await;
|
||||||
|
guard.remove(&inner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Access to underlying Mycelium client (for pushMessage).
|
||||||
|
pub fn mycelium(&self) -> Arc<MyceliumClient> {
|
||||||
|
self.mycelium.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Access configured topic.
|
||||||
|
pub fn topic(&self) -> &str {
|
||||||
|
&self.topic
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for SupervisorHub {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("SupervisorHub")
|
||||||
|
.field("topic", &self.topic)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
9
bin/coordinator/src/clients/types.rs
Normal file
9
bin/coordinator/src/clients/types.rs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
/// Destination for Mycelium messages (shared by clients)
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Destination {
|
||||||
|
Ip(IpAddr),
|
||||||
|
/// 64-hex public key of the receiver node
|
||||||
|
Pk(String),
|
||||||
|
}
|
||||||
381
bin/coordinator/src/dag.rs
Normal file
381
bin/coordinator/src/dag.rs
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
models::{Flow, Job, JobStatus, ScriptType},
|
||||||
|
storage::RedisDriver,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub type DagResult<T> = Result<T, DagError>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum DagError {
|
||||||
|
Storage(Box<dyn std::error::Error + Send + Sync>),
|
||||||
|
MissingDependency { job: u32, depends_on: u32 },
|
||||||
|
CycleDetected { remaining: Vec<u32> },
|
||||||
|
UnknownJob { job: u32 },
|
||||||
|
DependenciesIncomplete { job: u32, missing: Vec<u32> },
|
||||||
|
FlowFailed { failed_job: u32 },
|
||||||
|
JobNotStarted { job: u32 },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for DagError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
DagError::Storage(e) => write!(f, "Storage error: {}", e),
|
||||||
|
DagError::MissingDependency { job, depends_on } => write!(
|
||||||
|
f,
|
||||||
|
"Job {} depends on {}, which is not part of the flow.jobs list",
|
||||||
|
job, depends_on
|
||||||
|
),
|
||||||
|
DagError::CycleDetected { remaining } => {
|
||||||
|
write!(f, "Cycle detected; unresolved nodes: {:?}", remaining)
|
||||||
|
}
|
||||||
|
DagError::UnknownJob { job } => write!(f, "Unknown job id: {}", job),
|
||||||
|
DagError::DependenciesIncomplete { job, missing } => write!(
|
||||||
|
f,
|
||||||
|
"Job {} cannot start; missing completed deps: {:?}",
|
||||||
|
job, missing
|
||||||
|
),
|
||||||
|
DagError::FlowFailed { failed_job } => {
|
||||||
|
write!(f, "Flow failed due to job {}", failed_job)
|
||||||
|
}
|
||||||
|
DagError::JobNotStarted { job } => write!(
|
||||||
|
f,
|
||||||
|
"Job {} cannot be completed because it is not marked as started",
|
||||||
|
job
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for DagError {}
|
||||||
|
|
||||||
|
impl From<Box<dyn std::error::Error + Send + Sync>> for DagError {
|
||||||
|
fn from(e: Box<dyn std::error::Error + Send + Sync>) -> Self {
|
||||||
|
DagError::Storage(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct JobSummary {
|
||||||
|
pub id: u32,
|
||||||
|
pub depends: Vec<u32>,
|
||||||
|
pub prerequisites: Vec<String>,
|
||||||
|
pub script_type: ScriptType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FlowDag {
|
||||||
|
pub flow_id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub context_id: u32,
|
||||||
|
pub nodes: HashMap<u32, JobSummary>,
|
||||||
|
pub edges: Vec<(u32, u32)>, // (from prerequisite, to job)
|
||||||
|
pub reverse_edges: Vec<(u32, u32)>, // (from job, to prerequisite)
|
||||||
|
pub roots: Vec<u32>, // in_degree == 0
|
||||||
|
pub leaves: Vec<u32>, // out_degree == 0
|
||||||
|
pub levels: Vec<Vec<u32>>, // topological layers for parallel execution
|
||||||
|
// Runtime execution state
|
||||||
|
pub started: HashSet<u32>,
|
||||||
|
pub completed: HashSet<u32>,
|
||||||
|
pub failed_job: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn build_flow_dag(
|
||||||
|
redis: &RedisDriver,
|
||||||
|
context_id: u32,
|
||||||
|
flow_id: u32,
|
||||||
|
) -> DagResult<FlowDag> {
|
||||||
|
// Load flow
|
||||||
|
let flow: Flow = redis
|
||||||
|
.load_flow(context_id, flow_id)
|
||||||
|
.await
|
||||||
|
.map_err(DagError::from)?;
|
||||||
|
let caller_id = flow.caller_id();
|
||||||
|
let flow_job_ids = flow.jobs();
|
||||||
|
|
||||||
|
// Build a set for faster membership tests
|
||||||
|
let job_id_set: HashSet<u32> = flow_job_ids.iter().copied().collect();
|
||||||
|
|
||||||
|
// Load all jobs
|
||||||
|
let mut jobs: HashMap<u32, Job> = HashMap::with_capacity(flow_job_ids.len());
|
||||||
|
for jid in flow_job_ids {
|
||||||
|
let job = redis
|
||||||
|
.load_job(context_id, caller_id, *jid)
|
||||||
|
.await
|
||||||
|
.map_err(DagError::from)?;
|
||||||
|
jobs.insert(*jid, job);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate dependencies and construct adjacency
|
||||||
|
let mut edges: Vec<(u32, u32)> = Vec::new();
|
||||||
|
let mut reverse_edges: Vec<(u32, u32)> = Vec::new();
|
||||||
|
let mut adj: HashMap<u32, Vec<u32>> = HashMap::with_capacity(jobs.len());
|
||||||
|
let mut rev_adj: HashMap<u32, Vec<u32>> = HashMap::with_capacity(jobs.len());
|
||||||
|
let mut in_degree: HashMap<u32, usize> = HashMap::with_capacity(jobs.len());
|
||||||
|
|
||||||
|
for &jid in flow_job_ids {
|
||||||
|
adj.entry(jid).or_default();
|
||||||
|
rev_adj.entry(jid).or_default();
|
||||||
|
in_degree.entry(jid).or_insert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (&jid, job) in &jobs {
|
||||||
|
for &dep in job.depends() {
|
||||||
|
if !job_id_set.contains(&dep) {
|
||||||
|
return Err(DagError::MissingDependency {
|
||||||
|
job: jid,
|
||||||
|
depends_on: dep,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// edge: dep -> jid
|
||||||
|
edges.push((dep, jid));
|
||||||
|
reverse_edges.push((jid, dep));
|
||||||
|
adj.get_mut(&dep).unwrap().push(jid);
|
||||||
|
rev_adj.get_mut(&jid).unwrap().push(dep);
|
||||||
|
*in_degree.get_mut(&jid).unwrap() += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kahn's algorithm for topological sorting, with level construction
|
||||||
|
let mut zero_in: VecDeque<u32> = in_degree
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(k, v)| if *v == 0 { Some(*k) } else { None })
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut processed_count = 0usize;
|
||||||
|
let mut levels: Vec<Vec<u32>> = Vec::new();
|
||||||
|
|
||||||
|
// To make deterministic, sort initial zero_in
|
||||||
|
{
|
||||||
|
let mut tmp: Vec<u32> = zero_in.iter().copied().collect();
|
||||||
|
tmp.sort_unstable();
|
||||||
|
zero_in = tmp.into_iter().collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
while !zero_in.is_empty() {
|
||||||
|
let mut level: Vec<u32> = Vec::new();
|
||||||
|
// drain current frontier
|
||||||
|
let mut next_zero: Vec<u32> = Vec::new();
|
||||||
|
let mut current_frontier: Vec<u32> = zero_in.drain(..).collect();
|
||||||
|
current_frontier.sort_unstable();
|
||||||
|
for u in current_frontier {
|
||||||
|
level.push(u);
|
||||||
|
processed_count += 1;
|
||||||
|
if let Some(children) = adj.get(&u) {
|
||||||
|
let mut sorted_children = children.clone();
|
||||||
|
sorted_children.sort_unstable();
|
||||||
|
for &v in &sorted_children {
|
||||||
|
let d = in_degree.get_mut(&v).unwrap();
|
||||||
|
*d -= 1;
|
||||||
|
if *d == 0 {
|
||||||
|
next_zero.push(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
next_zero.sort_unstable();
|
||||||
|
zero_in = next_zero.into_iter().collect();
|
||||||
|
levels.push(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
if processed_count != jobs.len() {
|
||||||
|
let remaining: Vec<u32> = in_degree
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|(k, v)| if v > 0 { Some(k) } else { None })
|
||||||
|
.collect();
|
||||||
|
return Err(DagError::CycleDetected { remaining });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Roots and leaves
|
||||||
|
let roots: Vec<u32> = levels.first().cloned().unwrap_or_default();
|
||||||
|
let leaves: Vec<u32> = adj
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(k, v)| if v.is_empty() { Some(*k) } else { None })
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Nodes map (JobSummary)
|
||||||
|
let mut nodes: HashMap<u32, JobSummary> = HashMap::with_capacity(jobs.len());
|
||||||
|
for (&jid, job) in &jobs {
|
||||||
|
let summary = JobSummary {
|
||||||
|
id: jid,
|
||||||
|
depends: job.depends().to_vec(),
|
||||||
|
prerequisites: job.prerequisites().to_vec(),
|
||||||
|
script_type: job.script_type(),
|
||||||
|
};
|
||||||
|
nodes.insert(jid, summary);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort edges deterministically
|
||||||
|
edges.sort_unstable();
|
||||||
|
reverse_edges.sort_unstable();
|
||||||
|
|
||||||
|
// Populate runtime execution state from persisted Job.status()
|
||||||
|
let mut started_set: HashSet<u32> = HashSet::new();
|
||||||
|
let mut completed_set: HashSet<u32> = HashSet::new();
|
||||||
|
let mut error_ids: Vec<u32> = Vec::new();
|
||||||
|
|
||||||
|
for (&jid, job) in &jobs {
|
||||||
|
match job.status() {
|
||||||
|
JobStatus::Finished => {
|
||||||
|
completed_set.insert(jid);
|
||||||
|
}
|
||||||
|
JobStatus::Started => {
|
||||||
|
started_set.insert(jid);
|
||||||
|
}
|
||||||
|
JobStatus::Dispatched => {
|
||||||
|
// Consider Dispatched as "in-flight" for DAG runtime started set,
|
||||||
|
// so queued/running work is visible in periodic snapshots.
|
||||||
|
started_set.insert(jid);
|
||||||
|
}
|
||||||
|
JobStatus::Error => {
|
||||||
|
error_ids.push(jid);
|
||||||
|
}
|
||||||
|
JobStatus::WaitingForPrerequisites => {
|
||||||
|
// Neither started nor completed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose a deterministic failed job if any errors exist (smallest job id)
|
||||||
|
let failed_job = if error_ids.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
error_ids.sort_unstable();
|
||||||
|
Some(error_ids[0])
|
||||||
|
};
|
||||||
|
|
||||||
|
let dag = FlowDag {
|
||||||
|
flow_id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
nodes,
|
||||||
|
edges,
|
||||||
|
reverse_edges,
|
||||||
|
roots,
|
||||||
|
leaves,
|
||||||
|
levels,
|
||||||
|
started: started_set,
|
||||||
|
completed: completed_set,
|
||||||
|
failed_job,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(dag)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FlowDag {
|
||||||
|
/// Return all jobs that are ready to be processed.
|
||||||
|
/// A job is ready if:
|
||||||
|
/// - it exists in the DAG
|
||||||
|
/// - it is not already started or completed
|
||||||
|
/// - it has no dependencies, or all dependencies are completed
|
||||||
|
///
|
||||||
|
/// If any job has failed, the entire flow is considered failed and an error is returned.
|
||||||
|
pub fn ready_jobs(&self) -> DagResult<Vec<u32>> {
|
||||||
|
if let Some(failed_job) = self.failed_job {
|
||||||
|
return Err(DagError::FlowFailed { failed_job });
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ready: Vec<u32> = Vec::new();
|
||||||
|
for (&jid, summary) in &self.nodes {
|
||||||
|
if self.completed.contains(&jid) || self.started.contains(&jid) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let mut deps_ok = true;
|
||||||
|
for dep in &summary.depends {
|
||||||
|
if !self.completed.contains(dep) {
|
||||||
|
deps_ok = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if deps_ok {
|
||||||
|
ready.push(jid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ready.sort_unstable();
|
||||||
|
Ok(ready)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark a job as started.
|
||||||
|
/// Strict validation rules:
|
||||||
|
/// - Unknown jobs are rejected with UnknownJob
|
||||||
|
/// - If the flow has already failed, return FlowFailed
|
||||||
|
/// - If the job is already started or completed, this is a no-op (idempotent)
|
||||||
|
/// - If any dependency is not completed, return DependenciesIncomplete with the missing deps
|
||||||
|
pub fn mark_job_started(&mut self, job: u32) -> DagResult<()> {
|
||||||
|
if !self.nodes.contains_key(&job) {
|
||||||
|
return Err(DagError::UnknownJob { job });
|
||||||
|
}
|
||||||
|
if self.completed.contains(&job) || self.started.contains(&job) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if let Some(failed_job) = self.failed_job {
|
||||||
|
return Err(DagError::FlowFailed { failed_job });
|
||||||
|
}
|
||||||
|
|
||||||
|
let summary = self.nodes.get(&job).expect("checked contains_key");
|
||||||
|
let missing: Vec<u32> = summary
|
||||||
|
.depends
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.filter(|d| !self.completed.contains(d))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !missing.is_empty() {
|
||||||
|
return Err(DagError::DependenciesIncomplete { job, missing });
|
||||||
|
}
|
||||||
|
|
||||||
|
self.started.insert(job);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark a job as completed.
|
||||||
|
/// Strict validation rules:
|
||||||
|
/// - Unknown jobs are rejected with UnknownJob
|
||||||
|
/// - If the job is already completed, this is a no-op (idempotent)
|
||||||
|
/// - If the flow has already failed, return FlowFailed
|
||||||
|
/// - If the job was not previously started, return JobNotStarted
|
||||||
|
pub fn mark_job_completed(&mut self, job: u32) -> DagResult<()> {
|
||||||
|
if !self.nodes.contains_key(&job) {
|
||||||
|
return Err(DagError::UnknownJob { job });
|
||||||
|
}
|
||||||
|
if self.completed.contains(&job) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if let Some(failed_job) = self.failed_job {
|
||||||
|
return Err(DagError::FlowFailed { failed_job });
|
||||||
|
}
|
||||||
|
if !self.started.contains(&job) {
|
||||||
|
return Err(DagError::JobNotStarted { job });
|
||||||
|
}
|
||||||
|
|
||||||
|
self.started.remove(&job);
|
||||||
|
self.completed.insert(job);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark a job as failed.
|
||||||
|
/// Behavior:
|
||||||
|
/// - Unknown jobs are rejected with UnknownJob
|
||||||
|
/// - If a failure is already recorded:
|
||||||
|
/// - If it is the same job, no-op (idempotent)
|
||||||
|
/// - If it is a different job, return FlowFailed with the already-failed job
|
||||||
|
/// - Otherwise record this job as the failed job
|
||||||
|
pub fn mark_job_failed(&mut self, job: u32) -> DagResult<()> {
|
||||||
|
if !self.nodes.contains_key(&job) {
|
||||||
|
return Err(DagError::UnknownJob { job });
|
||||||
|
}
|
||||||
|
match self.failed_job {
|
||||||
|
Some(existing) if existing == job => Ok(()),
|
||||||
|
Some(existing) => Err(DagError::FlowFailed {
|
||||||
|
failed_job: existing,
|
||||||
|
}),
|
||||||
|
None => {
|
||||||
|
self.failed_job = Some(job);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
8
bin/coordinator/src/lib.rs
Normal file
8
bin/coordinator/src/lib.rs
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
pub mod clients;
|
||||||
|
pub mod dag;
|
||||||
|
pub mod models;
|
||||||
|
pub mod router;
|
||||||
|
pub mod rpc;
|
||||||
|
pub mod service;
|
||||||
|
pub mod storage;
|
||||||
|
mod time;
|
||||||
142
bin/coordinator/src/main.rs
Normal file
142
bin/coordinator/src/main.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
use std::net::{IpAddr, SocketAddr};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use tracing::{error, info};
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
#[derive(Debug, Clone, Parser)]
|
||||||
|
#[command(
|
||||||
|
name = "herocoordinator",
|
||||||
|
version,
|
||||||
|
about = "Hero Coordinator CLI",
|
||||||
|
long_about = None
|
||||||
|
)]
|
||||||
|
struct Cli {
|
||||||
|
#[arg(
|
||||||
|
long = "mycelium-ip",
|
||||||
|
short = 'i',
|
||||||
|
env = "MYCELIUM_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "IP address where Mycelium JSON-RPC is listening (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
mycelium_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "mycelium-port",
|
||||||
|
short = 'p',
|
||||||
|
env = "MYCELIUM_PORT",
|
||||||
|
default_value_t = 8990u16,
|
||||||
|
help = "Port for Mycelium JSON-RPC (default: 8990)"
|
||||||
|
)]
|
||||||
|
mycelium_port: u16,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "redis-addr",
|
||||||
|
short = 'r',
|
||||||
|
env = "REDIS_ADDR",
|
||||||
|
default_value = "127.0.0.1:6379",
|
||||||
|
help = "Socket address of Redis instance (default: 127.0.0.1:6379)"
|
||||||
|
)]
|
||||||
|
redis_addr: SocketAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-http-ip",
|
||||||
|
env = "API_HTTP_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "Bind IP for HTTP JSON-RPC server (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
api_http_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-http-port",
|
||||||
|
env = "API_HTTP_PORT",
|
||||||
|
default_value_t = 9652u16,
|
||||||
|
help = "Bind port for HTTP JSON-RPC server (default: 9652)"
|
||||||
|
)]
|
||||||
|
api_http_port: u16,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-ws-ip",
|
||||||
|
env = "API_WS_IP",
|
||||||
|
default_value = "127.0.0.1",
|
||||||
|
help = "Bind IP for WebSocket JSON-RPC server (default: 127.0.0.1)"
|
||||||
|
)]
|
||||||
|
api_ws_ip: IpAddr,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long = "api-ws-port",
|
||||||
|
env = "API_WS_PORT",
|
||||||
|
default_value_t = 9653u16,
|
||||||
|
help = "Bind port for WebSocket JSON-RPC server (default: 9653)"
|
||||||
|
)]
|
||||||
|
api_ws_port: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let cli = Cli::parse();
|
||||||
|
// Initialize tracing subscriber (pretty formatter; controlled by RUST_LOG)
|
||||||
|
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(filter)
|
||||||
|
.pretty()
|
||||||
|
.with_target(true)
|
||||||
|
.with_level(true)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let http_addr = SocketAddr::new(cli.api_http_ip, cli.api_http_port);
|
||||||
|
let ws_addr = SocketAddr::new(cli.api_ws_ip, cli.api_ws_port);
|
||||||
|
|
||||||
|
// Initialize Redis driver
|
||||||
|
let redis = hero_coordinator::storage::RedisDriver::new(cli.redis_addr.to_string())
|
||||||
|
.await
|
||||||
|
.expect("Failed to connect to Redis");
|
||||||
|
|
||||||
|
// Initialize Service
|
||||||
|
let service = hero_coordinator::service::AppService::new(redis);
|
||||||
|
let service_for_router = service.clone();
|
||||||
|
|
||||||
|
// Shared application state
|
||||||
|
let state = Arc::new(hero_coordinator::rpc::AppState::new(service));
|
||||||
|
|
||||||
|
// Start router workers (auto-discovered contexts) using a single global SupervisorHub (no separate inbound listener)
|
||||||
|
{
|
||||||
|
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
|
||||||
|
let hub = hero_coordinator::clients::SupervisorHub::new(
|
||||||
|
base_url.clone(),
|
||||||
|
"supervisor.rpc".to_string(),
|
||||||
|
)
|
||||||
|
.expect("Failed to initialize SupervisorHub");
|
||||||
|
let cfg = hero_coordinator::router::RouterConfig {
|
||||||
|
context_ids: Vec::new(), // ignored by start_router_auto
|
||||||
|
concurrency: 32,
|
||||||
|
base_url,
|
||||||
|
topic: "supervisor.rpc".to_string(),
|
||||||
|
sup_hub: hub.clone(),
|
||||||
|
transport_poll_interval_secs: 2,
|
||||||
|
transport_poll_timeout_secs: 300,
|
||||||
|
};
|
||||||
|
// Per-context outbound delivery loops (replies handled by SupervisorHub)
|
||||||
|
let _auto_handle = hero_coordinator::router::start_router_auto(service_for_router, cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build RPC modules for both servers
|
||||||
|
let http_module = hero_coordinator::rpc::build_module(state.clone());
|
||||||
|
let ws_module = hero_coordinator::rpc::build_module(state.clone());
|
||||||
|
|
||||||
|
info!(%http_addr, %ws_addr, redis_addr=%cli.redis_addr, "Starting JSON-RPC servers");
|
||||||
|
|
||||||
|
// Start servers
|
||||||
|
let _http_handle = hero_coordinator::rpc::start_http(http_addr, http_module)
|
||||||
|
.await
|
||||||
|
.expect("Failed to start HTTP server");
|
||||||
|
let _ws_handle = hero_coordinator::rpc::start_ws(ws_addr, ws_module)
|
||||||
|
.await
|
||||||
|
.expect("Failed to start WS server");
|
||||||
|
|
||||||
|
// Wait for Ctrl+C to terminate
|
||||||
|
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||||
|
error!(error=%e, "Failed to listen for shutdown signal");
|
||||||
|
}
|
||||||
|
info!("Shutdown signal received, exiting.");
|
||||||
|
}
|
||||||
15
bin/coordinator/src/models.rs
Normal file
15
bin/coordinator/src/models.rs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
mod actor;
|
||||||
|
mod context;
|
||||||
|
mod flow;
|
||||||
|
mod job;
|
||||||
|
mod message;
|
||||||
|
mod runner;
|
||||||
|
mod script_type;
|
||||||
|
|
||||||
|
pub use actor::Actor;
|
||||||
|
pub use context::Context;
|
||||||
|
pub use flow::{Flow, FlowStatus};
|
||||||
|
pub use job::{Job, JobStatus};
|
||||||
|
pub use message::{Message, MessageFormatType, MessageStatus, MessageType, TransportStatus};
|
||||||
|
pub use runner::Runner;
|
||||||
|
pub use script_type::ScriptType;
|
||||||
15
bin/coordinator/src/models/actor.rs
Normal file
15
bin/coordinator/src/models/actor.rs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::time::Timestamp;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct Actor {
|
||||||
|
id: u32,
|
||||||
|
pubkey: String,
|
||||||
|
/// IP where the actor is reachable, can be mycelium but that is not mandatory
|
||||||
|
address: Vec<IpAddr>,
|
||||||
|
created_at: Timestamp,
|
||||||
|
updated_at: Timestamp,
|
||||||
|
}
|
||||||
17
bin/coordinator/src/models/context.rs
Normal file
17
bin/coordinator/src/models/context.rs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::time::Timestamp;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct Context {
|
||||||
|
/// Redis DB to use
|
||||||
|
pub id: u32,
|
||||||
|
/// Actor ids which have admin rights on this context
|
||||||
|
pub admins: Vec<u32>,
|
||||||
|
/// Actor ids which can read the context info
|
||||||
|
pub readers: Vec<u32>,
|
||||||
|
/// Actor ids which can execute jobs in this context
|
||||||
|
pub executors: Vec<u32>,
|
||||||
|
pub created_at: Timestamp,
|
||||||
|
pub updated_at: Timestamp,
|
||||||
|
}
|
||||||
49
bin/coordinator/src/models/flow.rs
Normal file
49
bin/coordinator/src/models/flow.rs
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::time::Timestamp;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct Flow {
|
||||||
|
/// Job Id set tby the actor which created it
|
||||||
|
pub id: u32,
|
||||||
|
/// Actor Id who created this job
|
||||||
|
pub caller_id: u32,
|
||||||
|
/// The context in which this job is executed
|
||||||
|
pub context_id: u32,
|
||||||
|
/// List of jobs which make up the flow
|
||||||
|
pub jobs: Vec<u32>,
|
||||||
|
/// Environment variables, passed to every job when executed
|
||||||
|
pub env_vars: HashMap<String, String>,
|
||||||
|
/// The result of the flow
|
||||||
|
pub result: HashMap<String, String>,
|
||||||
|
pub created_at: Timestamp,
|
||||||
|
pub updated_at: Timestamp,
|
||||||
|
pub status: FlowStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The status of a flow
|
||||||
|
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub enum FlowStatus {
|
||||||
|
Created,
|
||||||
|
Dispatched,
|
||||||
|
Started,
|
||||||
|
Error,
|
||||||
|
Finished,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Flow {
|
||||||
|
pub fn id(&self) -> u32 {
|
||||||
|
self.id
|
||||||
|
}
|
||||||
|
pub fn caller_id(&self) -> u32 {
|
||||||
|
self.caller_id
|
||||||
|
}
|
||||||
|
pub fn context_id(&self) -> u32 {
|
||||||
|
self.context_id
|
||||||
|
}
|
||||||
|
pub fn jobs(&self) -> &[u32] {
|
||||||
|
&self.jobs
|
||||||
|
}
|
||||||
|
}
|
||||||
62
bin/coordinator/src/models/job.rs
Normal file
62
bin/coordinator/src/models/job.rs
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{models::ScriptType, time::Timestamp};
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Job {
|
||||||
|
/// Job Id, this is given by the actor who created the job
|
||||||
|
pub id: u32,
|
||||||
|
/// Actor ID which created this job
|
||||||
|
pub caller_id: u32,
|
||||||
|
/// Context in which the job is executed
|
||||||
|
pub context_id: u32,
|
||||||
|
pub script: String,
|
||||||
|
pub script_type: ScriptType,
|
||||||
|
/// Timeout in seconds for this job
|
||||||
|
pub timeout: u32,
|
||||||
|
/// Max amount of times to retry this job
|
||||||
|
pub retries: u8,
|
||||||
|
pub env_vars: HashMap<String, String>,
|
||||||
|
pub result: HashMap<String, String>,
|
||||||
|
pub prerequisites: Vec<String>,
|
||||||
|
/// Ids of jobs this job depends on, i.e. this job can't start until those have finished
|
||||||
|
pub depends: Vec<u32>,
|
||||||
|
pub created_at: Timestamp,
|
||||||
|
pub updated_at: Timestamp,
|
||||||
|
pub status: JobStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
pub enum JobStatus {
|
||||||
|
Dispatched,
|
||||||
|
WaitingForPrerequisites,
|
||||||
|
Started,
|
||||||
|
Error,
|
||||||
|
Finished,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Job {
|
||||||
|
pub fn id(&self) -> u32 {
|
||||||
|
self.id
|
||||||
|
}
|
||||||
|
pub fn caller_id(&self) -> u32 {
|
||||||
|
self.caller_id
|
||||||
|
}
|
||||||
|
pub fn context_id(&self) -> u32 {
|
||||||
|
self.context_id
|
||||||
|
}
|
||||||
|
pub fn depends(&self) -> &[u32] {
|
||||||
|
&self.depends
|
||||||
|
}
|
||||||
|
pub fn prerequisites(&self) -> &[String] {
|
||||||
|
&self.prerequisites
|
||||||
|
}
|
||||||
|
pub fn script_type(&self) -> ScriptType {
|
||||||
|
self.script_type.clone()
|
||||||
|
}
|
||||||
|
pub fn status(&self) -> JobStatus {
|
||||||
|
self.status.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
81
bin/coordinator/src/models/message.rs
Normal file
81
bin/coordinator/src/models/message.rs
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
models::{Job, ScriptType},
|
||||||
|
time::Timestamp,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Message {
|
||||||
|
/// Unique ID for the message, set by the caller
|
||||||
|
pub id: u32,
|
||||||
|
/// Id of the actor who sent this message
|
||||||
|
pub caller_id: u32,
|
||||||
|
/// Id of the context in which this message was sent
|
||||||
|
pub context_id: u32,
|
||||||
|
pub message: String,
|
||||||
|
pub message_type: ScriptType,
|
||||||
|
pub message_format_type: MessageFormatType,
|
||||||
|
/// Seconds for the message to arrive at the destination
|
||||||
|
pub timeout: u32,
|
||||||
|
/// Seconds for the receiver to acknowledge receipt of the message
|
||||||
|
pub timeout_ack: u32,
|
||||||
|
/// Seconds for the receiver to send us a reply
|
||||||
|
pub timeout_result: u32,
|
||||||
|
|
||||||
|
/// Outbound transport id returned by Mycelium on push
|
||||||
|
pub transport_id: Option<String>,
|
||||||
|
/// Latest transport status as reported by Mycelium
|
||||||
|
pub transport_status: Option<TransportStatus>,
|
||||||
|
|
||||||
|
pub job: Vec<Job>,
|
||||||
|
pub logs: Vec<Log>,
|
||||||
|
pub created_at: Timestamp,
|
||||||
|
pub updated_at: Timestamp,
|
||||||
|
pub status: MessageStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum MessageType {
|
||||||
|
Job,
|
||||||
|
Chat,
|
||||||
|
Mail,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub enum MessageStatus {
|
||||||
|
Dispatched,
|
||||||
|
Acknowledged,
|
||||||
|
Error,
|
||||||
|
Processed,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub enum TransportStatus {
|
||||||
|
Queued,
|
||||||
|
Sent,
|
||||||
|
Delivered,
|
||||||
|
Read,
|
||||||
|
Failed,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for TransportStatus {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
TransportStatus::Queued => f.write_str("queued"),
|
||||||
|
TransportStatus::Sent => f.write_str("sent"),
|
||||||
|
TransportStatus::Delivered => f.write_str("delivered"),
|
||||||
|
TransportStatus::Read => f.write_str("read"),
|
||||||
|
TransportStatus::Failed => f.write_str("failed"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum MessageFormatType {
|
||||||
|
Html,
|
||||||
|
Text,
|
||||||
|
Md,
|
||||||
|
}
|
||||||
|
|
||||||
|
type Log = String;
|
||||||
25
bin/coordinator/src/models/runner.rs
Normal file
25
bin/coordinator/src/models/runner.rs
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::models::ScriptType;
|
||||||
|
use crate::time::Timestamp;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct Runner {
|
||||||
|
pub id: u32,
|
||||||
|
/// Mycelium public key
|
||||||
|
pub pubkey: String,
|
||||||
|
/// Mycelium address
|
||||||
|
pub address: IpAddr,
|
||||||
|
/// Needs to be set by the runner, usually `runner<runnerid`
|
||||||
|
pub topic: String,
|
||||||
|
/// The script type this runner can execute; used for routing
|
||||||
|
pub script_type: ScriptType,
|
||||||
|
/// If this is true, the runner also listens on a local redis queue
|
||||||
|
pub local: bool,
|
||||||
|
/// Optional secret used for authenticated supervisor calls (if required)
|
||||||
|
pub secret: Option<String>,
|
||||||
|
pub created_at: Timestamp,
|
||||||
|
pub updated_at: Timestamp,
|
||||||
|
}
|
||||||
9
bin/coordinator/src/models/script_type.rs
Normal file
9
bin/coordinator/src/models/script_type.rs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
|
||||||
|
pub enum ScriptType {
|
||||||
|
Osis,
|
||||||
|
Sal,
|
||||||
|
V,
|
||||||
|
Python,
|
||||||
|
}
|
||||||
972
bin/coordinator/src/router.rs
Normal file
972
bin/coordinator/src/router.rs
Normal file
@@ -0,0 +1,972 @@
|
|||||||
|
use std::{
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use base64::Engine;
|
||||||
|
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
use std::collections::hash_map::DefaultHasher;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
use tokio::sync::{Mutex, Semaphore};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
clients::{Destination, MyceliumClient, SupervisorClient, SupervisorHub},
|
||||||
|
models::{Job, JobStatus, Message, MessageStatus, ScriptType, TransportStatus},
|
||||||
|
service::AppService,
|
||||||
|
};
|
||||||
|
use tracing::{error, info};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct RouterConfig {
|
||||||
|
pub context_ids: Vec<u32>,
|
||||||
|
pub concurrency: usize,
|
||||||
|
pub base_url: String, // e.g. http://127.0.0.1:8990
|
||||||
|
pub topic: String, // e.g. "supervisor.rpc"
|
||||||
|
pub sup_hub: Arc<SupervisorHub>, // global supervisor hub for replies
|
||||||
|
// Transport status polling configuration
|
||||||
|
pub transport_poll_interval_secs: u64, // e.g. 2
|
||||||
|
pub transport_poll_timeout_secs: u64, // e.g. 300 (5 minutes)
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
SupervisorClient reuse cache (Router-local):
|
||||||
|
|
||||||
|
Rationale:
|
||||||
|
- SupervisorClient maintains an internal JSON-RPC id_counter per instance.
|
||||||
|
- Rebuilding a client for each message resets this counter, causing inner JSON-RPC ids to restart at 1.
|
||||||
|
- We reuse one SupervisorClient per (destination, topic, secret) to preserve monotonically increasing ids.
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Cache is per Router loop (and a separate one for the inbound listener).
|
||||||
|
- If cross-loop/process reuse becomes necessary later, promote to a process-global cache.
|
||||||
|
|
||||||
|
Keying:
|
||||||
|
- Key: destination + topic + secret-presence (secret content hashed; not stored in plaintext).
|
||||||
|
|
||||||
|
Concurrency:
|
||||||
|
- tokio::Mutex protects a HashMap<String, Arc<SupervisorClient>>.
|
||||||
|
- Values are Arc so call sites clone cheaply and share the same id_counter.
|
||||||
|
*/
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct SupervisorClientCache {
|
||||||
|
map: Arc<Mutex<HashMap<String, Arc<SupervisorClient>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupervisorClientCache {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
map: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_key(dest: &Destination, topic: &str, secret: &Option<String>) -> String {
|
||||||
|
let dst = match dest {
|
||||||
|
Destination::Ip(ip) => format!("ip:{ip}"),
|
||||||
|
Destination::Pk(pk) => format!("pk:{pk}"),
|
||||||
|
};
|
||||||
|
// Hash the secret to avoid storing plaintext in keys while still differentiating values
|
||||||
|
let sec_hash = match secret {
|
||||||
|
Some(s) if !s.is_empty() => {
|
||||||
|
let mut hasher = DefaultHasher::new();
|
||||||
|
s.hash(&mut hasher);
|
||||||
|
format!("s:{}", hasher.finish())
|
||||||
|
}
|
||||||
|
_ => "s:none".to_string(),
|
||||||
|
};
|
||||||
|
format!("{dst}|t:{topic}|{sec_hash}")
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_or_create(
|
||||||
|
&self,
|
||||||
|
hub: Arc<SupervisorHub>,
|
||||||
|
dest: Destination,
|
||||||
|
topic: String,
|
||||||
|
secret: Option<String>,
|
||||||
|
) -> Arc<SupervisorClient> {
|
||||||
|
let key = Self::make_key(&dest, &topic, &secret);
|
||||||
|
|
||||||
|
{
|
||||||
|
let guard = self.map.lock().await;
|
||||||
|
if let Some(existing) = guard.get(&key) {
|
||||||
|
tracing::debug!(target: "router", cache="supervisor", hit=true, %topic, secret = %if secret.is_some() { "set" } else { "none" }, "SupervisorClient cache lookup");
|
||||||
|
return existing.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut guard = self.map.lock().await;
|
||||||
|
if let Some(existing) = guard.get(&key) {
|
||||||
|
tracing::debug!(target: "router", cache="supervisor", hit=true, %topic, secret = %if secret.is_some() { "set" } else { "none" }, "SupervisorClient cache lookup (double-checked)");
|
||||||
|
return existing.clone();
|
||||||
|
}
|
||||||
|
let client = Arc::new(SupervisorClient::new_with_hub(hub, dest, secret.clone()));
|
||||||
|
guard.insert(key, client.clone());
|
||||||
|
tracing::debug!(target: "router", cache="supervisor", hit=false, %topic, secret = %if secret.is_some() { "set" } else { "none" }, "SupervisorClient cache insert");
|
||||||
|
client
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start background router loops, one per context.
|
||||||
|
/// Each loop:
|
||||||
|
/// - BRPOP msg_out with 1s timeout
|
||||||
|
/// - Loads the Message by key, selects a Runner by script_type
|
||||||
|
/// - Sends supervisor JSON-RPC via Mycelium
|
||||||
|
/// - On success: Message.status = Acknowledged
|
||||||
|
/// - On error: Message.status = Error and append a log
|
||||||
|
pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::JoinHandle<()>> {
|
||||||
|
let mut handles = Vec::new();
|
||||||
|
for ctx_id in cfg.context_ids.clone() {
|
||||||
|
let service_cloned = service.clone();
|
||||||
|
let cfg_cloned = cfg.clone();
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
|
let sem = Arc::new(Semaphore::new(cfg_cloned.concurrency));
|
||||||
|
|
||||||
|
// Use the global SupervisorHub and its Mycelium client
|
||||||
|
let sup_hub = cfg_cloned.sup_hub.clone();
|
||||||
|
let mycelium = sup_hub.mycelium();
|
||||||
|
|
||||||
|
let cache = Arc::new(SupervisorClientCache::new());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Pop next message key (blocking with timeout)
|
||||||
|
match service_cloned.brpop_msg_out(ctx_id, 1).await {
|
||||||
|
Ok(Some(key)) => {
|
||||||
|
let permit = {
|
||||||
|
// acquire a concurrency permit (non-fair is fine)
|
||||||
|
let sem = sem.clone();
|
||||||
|
// if semaphore is exhausted, await until a slot becomes available
|
||||||
|
match sem.acquire_owned().await {
|
||||||
|
Ok(p) => p,
|
||||||
|
Err(_) => {
|
||||||
|
// Semaphore closed; exit loop
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let service_task = service_cloned.clone();
|
||||||
|
let cfg_task = cfg_cloned.clone();
|
||||||
|
tokio::spawn({
|
||||||
|
let mycelium = mycelium.clone();
|
||||||
|
let cache = cache.clone();
|
||||||
|
let sup_hub = sup_hub.clone();
|
||||||
|
async move {
|
||||||
|
// Ensure permit is dropped at end of task
|
||||||
|
let _permit = permit;
|
||||||
|
if let Err(e) = deliver_one(
|
||||||
|
&service_task,
|
||||||
|
&cfg_task,
|
||||||
|
ctx_id,
|
||||||
|
&key,
|
||||||
|
mycelium,
|
||||||
|
sup_hub,
|
||||||
|
cache.clone(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
error!(context_id=ctx_id, key=%key, error=%e, "Delivery error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// timeout: just tick
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(context_id=ctx_id, error=%e, "BRPOP error");
|
||||||
|
// small backoff to avoid busy-loop on persistent errors
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
handles.push(handle);
|
||||||
|
}
|
||||||
|
handles
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn deliver_one(
|
||||||
|
service: &AppService,
|
||||||
|
cfg: &RouterConfig,
|
||||||
|
context_id: u32,
|
||||||
|
msg_key: &str,
|
||||||
|
mycelium: Arc<MyceliumClient>,
|
||||||
|
sup_hub: Arc<SupervisorHub>,
|
||||||
|
cache: Arc<SupervisorClientCache>,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
// Parse "message:{caller_id}:{id}"
|
||||||
|
let (caller_id, id) = parse_message_key(msg_key)
|
||||||
|
.ok_or_else(|| format!("invalid message key format: {}", msg_key))?;
|
||||||
|
|
||||||
|
// Load message
|
||||||
|
let msg: Message = service.load_message(context_id, caller_id, id).await?;
|
||||||
|
// Embedded job id (if any)
|
||||||
|
let job_id_opt: Option<u32> = msg.job.first().map(|j| j.id);
|
||||||
|
|
||||||
|
// Determine routing script_type
|
||||||
|
let desired: ScriptType = determine_script_type(&msg);
|
||||||
|
|
||||||
|
// Discover runners and select a matching one
|
||||||
|
let runners = service.scan_runners(context_id).await?;
|
||||||
|
let Some(runner) = runners.into_iter().find(|r| r.script_type == desired) else {
|
||||||
|
let log = format!(
|
||||||
|
"No runner with script_type {:?} available in context {} for message {}",
|
||||||
|
desired, context_id, msg_key
|
||||||
|
);
|
||||||
|
let _ = service
|
||||||
|
.append_message_logs(context_id, caller_id, id, vec![log.clone()])
|
||||||
|
.await;
|
||||||
|
let _ = service
|
||||||
|
.update_message_status(context_id, caller_id, id, MessageStatus::Error)
|
||||||
|
.await;
|
||||||
|
return Err(log.into());
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build SupervisorClient
|
||||||
|
let dest = if !runner.pubkey.trim().is_empty() {
|
||||||
|
Destination::Pk(runner.pubkey.clone())
|
||||||
|
} else {
|
||||||
|
Destination::Ip(runner.address)
|
||||||
|
};
|
||||||
|
// Keep clones for poller usage
|
||||||
|
let dest_for_poller = dest.clone();
|
||||||
|
let topic_for_poller = cfg.topic.clone();
|
||||||
|
let secret_for_poller = runner.secret.clone();
|
||||||
|
let client = cache
|
||||||
|
.get_or_create(
|
||||||
|
sup_hub.clone(),
|
||||||
|
dest.clone(),
|
||||||
|
cfg.topic.clone(),
|
||||||
|
runner.secret.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Build supervisor method and params from Message
|
||||||
|
let method = msg.message.clone();
|
||||||
|
let params = build_params(&msg)?;
|
||||||
|
|
||||||
|
// Send
|
||||||
|
// If this is a job.run and we have a secret configured on the client,
|
||||||
|
// prefer the typed wrapper that injects the secret into inner supervisor params,
|
||||||
|
// and await the reply to capture job_queued immediately.
|
||||||
|
let (out_id, reply_opt) = if method == "job.run" {
|
||||||
|
if let Some(j) = msg.job.first() {
|
||||||
|
let jv = job_to_json(j)?;
|
||||||
|
// Returns (outbound message id, reply envelope)
|
||||||
|
let (out, reply) = client.job_run_wait(jv).await?;
|
||||||
|
(out, Some(reply))
|
||||||
|
} else {
|
||||||
|
// Fallback: no embedded job, use the generic call (await reply, discard)
|
||||||
|
let out = client.call(&method, params).await?;
|
||||||
|
(out, None)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let out = client.call(&method, params).await?;
|
||||||
|
(out, None)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Store transport id and initial Sent status
|
||||||
|
let _ = service
|
||||||
|
.update_message_transport(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
Some(out_id.clone()),
|
||||||
|
Some(TransportStatus::Sent),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Mark as acknowledged on success
|
||||||
|
service
|
||||||
|
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// If we got a job.run reply, interpret job_queued immediately
|
||||||
|
if let (Some(reply), Some(job_id)) = (reply_opt, msg.job.first().map(|j| j.id)) {
|
||||||
|
let result_opt = reply.get("result");
|
||||||
|
let error_opt = reply.get("error");
|
||||||
|
|
||||||
|
// Handle job.run success (job_queued)
|
||||||
|
let is_job_queued = result_opt
|
||||||
|
.and_then(|res| {
|
||||||
|
if res.get("job_queued").is_some() {
|
||||||
|
Some(true)
|
||||||
|
} else if let Some(s) = res.as_str() {
|
||||||
|
Some(s == "job_queued")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
if is_job_queued {
|
||||||
|
let _ = service
|
||||||
|
.update_job_status_unchecked(context_id, caller_id, job_id, JobStatus::Dispatched)
|
||||||
|
.await;
|
||||||
|
let _ = service
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Supervisor reply for job {}: job_queued (processed synchronously)",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
} else if let Some(err_obj) = error_opt {
|
||||||
|
let _ = service
|
||||||
|
.update_job_status_unchecked(context_id, caller_id, job_id, JobStatus::Error)
|
||||||
|
.await;
|
||||||
|
let _ = service
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Supervisor error for job {}: {} (processed synchronously)",
|
||||||
|
job_id, err_obj
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No correlation map needed; replies are handled synchronously via SupervisorHub
|
||||||
|
|
||||||
|
// Spawn transport-status poller
|
||||||
|
{
|
||||||
|
let service_poll = service.clone();
|
||||||
|
let poll_interval = std::time::Duration::from_secs(cfg.transport_poll_interval_secs);
|
||||||
|
let poll_timeout = std::time::Duration::from_secs(cfg.transport_poll_timeout_secs);
|
||||||
|
let out_id_cloned = out_id.clone();
|
||||||
|
let mycelium = mycelium.clone();
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
let client = mycelium;
|
||||||
|
|
||||||
|
// Supervisor call context captured for sync status checks
|
||||||
|
let sup_dest = dest_for_poller;
|
||||||
|
let sup_topic = topic_for_poller;
|
||||||
|
let job_id_opt = job_id_opt;
|
||||||
|
|
||||||
|
let mut last_status: Option<TransportStatus> = Some(TransportStatus::Sent);
|
||||||
|
// Ensure we only request supervisor job.status or job.result once per outbound message
|
||||||
|
let mut requested_job_check: bool = false;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if start.elapsed() >= poll_timeout {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec!["Transport-status polling timed out".to_string()],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// leave last known status; do not override
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
match client.message_status(&out_id_cloned).await {
|
||||||
|
Ok(s) => {
|
||||||
|
if last_status.as_ref() != Some(&s) {
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_transport(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
None,
|
||||||
|
Some(s.clone()),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
last_status = Some(s.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop on terminal states
|
||||||
|
if matches!(s, TransportStatus::Delivered | TransportStatus::Read) {
|
||||||
|
if let Some(job_id) = job_id_opt {
|
||||||
|
// First consult Redis for the latest job state in case we already have a terminal update
|
||||||
|
match service_poll.load_job(context_id, caller_id, job_id).await {
|
||||||
|
Ok(job) => {
|
||||||
|
// Promote to Started as soon as transport is delivered/read,
|
||||||
|
// if currently Dispatched or WaitingForPrerequisites.
|
||||||
|
// This makes DAG.started reflect "in-flight" work even when jobs
|
||||||
|
// complete too quickly to observe an intermediate supervisor "running" status.
|
||||||
|
if matches!(
|
||||||
|
job.status(),
|
||||||
|
JobStatus::Dispatched
|
||||||
|
| JobStatus::WaitingForPrerequisites
|
||||||
|
) {
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
job_id,
|
||||||
|
JobStatus::Started,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
match job.status() {
|
||||||
|
JobStatus::Finished | JobStatus::Error => {
|
||||||
|
// Local job is already terminal; skip supervisor job.status
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Local job {} status is terminal ({:?}); skipping supervisor job.status",
|
||||||
|
job_id,
|
||||||
|
job.status()
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// If result is still empty, immediately request supervisor job.result
|
||||||
|
if job.result.is_empty() {
|
||||||
|
let sup = cache
|
||||||
|
.get_or_create(
|
||||||
|
sup_hub.clone(),
|
||||||
|
sup_dest.clone(),
|
||||||
|
sup_topic.clone(),
|
||||||
|
secret_for_poller.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
match sup
|
||||||
|
.job_result_wait(job_id.to_string())
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok((_out2, reply2)) => {
|
||||||
|
// Interpret reply synchronously: success/error/bare string
|
||||||
|
let res = reply2.get("result");
|
||||||
|
if let Some(obj) =
|
||||||
|
res.and_then(|v| v.as_object())
|
||||||
|
{
|
||||||
|
if let Some(s) = obj
|
||||||
|
.get("success")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
{
|
||||||
|
let mut patch = std::collections::HashMap::new();
|
||||||
|
patch.insert(
|
||||||
|
"success".to_string(),
|
||||||
|
s.to_string(),
|
||||||
|
);
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_result_merge_unchecked(
|
||||||
|
context_id, caller_id, job_id, patch,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_status(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
MessageStatus::Processed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Also mark job as Finished so the flow can progress (ignore invalid transitions)
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id, caller_id, job_id, JobStatus::Finished,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Updated job {} status to Finished (sync)", job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Existing log about storing result
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Stored supervisor job.result for job {} (success, sync)",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
} else if let Some(s) = obj
|
||||||
|
.get("error")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
{
|
||||||
|
let mut patch = std::collections::HashMap::new();
|
||||||
|
patch.insert(
|
||||||
|
"error".to_string(),
|
||||||
|
s.to_string(),
|
||||||
|
);
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_result_merge_unchecked(
|
||||||
|
context_id, caller_id, job_id, patch,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_status(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
MessageStatus::Processed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Also mark job as Error so the flow can handle failure (ignore invalid transitions)
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id, caller_id, job_id, JobStatus::Error,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Updated job {} status to Error (sync)", job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Existing log about storing result
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Stored supervisor job.result for job {} (error, sync)",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
} else if let Some(s) =
|
||||||
|
res.and_then(|v| v.as_str())
|
||||||
|
{
|
||||||
|
let mut patch =
|
||||||
|
std::collections::HashMap::new(
|
||||||
|
);
|
||||||
|
patch.insert(
|
||||||
|
"success".to_string(),
|
||||||
|
s.to_string(),
|
||||||
|
);
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_result_merge_unchecked(
|
||||||
|
context_id, caller_id, job_id, patch,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_status(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
MessageStatus::Processed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Also mark job as Finished so the flow can progress (ignore invalid transitions)
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
job_id,
|
||||||
|
JobStatus::Finished,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Updated job {} status to Finished (sync)", job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// Existing log about storing result
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Stored supervisor job.result for job {} (success, sync)",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
} else {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec!["Supervisor job.result reply missing recognizable fields".to_string()],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"job.result request error for job {}: {}",
|
||||||
|
job_id, e
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Result already present; nothing to fetch
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Job {} already has result; no supervisor calls needed",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark processed and stop polling for this message
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_status(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
MessageStatus::Processed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Terminal job {} detected; stopping transport polling",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Not terminal yet -> request supervisor job.status as before
|
||||||
|
_ => {
|
||||||
|
let sup = cache
|
||||||
|
.get_or_create(
|
||||||
|
sup_hub.clone(),
|
||||||
|
sup_dest.clone(),
|
||||||
|
sup_topic.clone(),
|
||||||
|
secret_for_poller.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
match sup.job_status_wait(job_id.to_string()).await
|
||||||
|
{
|
||||||
|
Ok((_out_id, reply_status)) => {
|
||||||
|
// Interpret status reply synchronously
|
||||||
|
let result_opt = reply_status.get("result");
|
||||||
|
let error_opt = reply_status.get("error");
|
||||||
|
if let Some(err_obj) = error_opt {
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
job_id,
|
||||||
|
JobStatus::Error,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id, caller_id, id,
|
||||||
|
vec![format!(
|
||||||
|
"Supervisor error for job {}: {} (sync)",
|
||||||
|
job_id, err_obj
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
} else if let Some(res) = result_opt {
|
||||||
|
let status_candidate = res
|
||||||
|
.get("status")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.or_else(|| res.as_str());
|
||||||
|
if let Some(remote_status) =
|
||||||
|
status_candidate
|
||||||
|
{
|
||||||
|
if let Some((mapped, terminal)) =
|
||||||
|
map_supervisor_job_status(
|
||||||
|
remote_status,
|
||||||
|
)
|
||||||
|
{
|
||||||
|
let _ = service_poll
|
||||||
|
.update_job_status_unchecked(
|
||||||
|
context_id, caller_id, job_id, mapped.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id, caller_id, id,
|
||||||
|
vec![format!(
|
||||||
|
"Supervisor job.status for job {} -> {} (mapped to {:?}, sync)",
|
||||||
|
job_id, remote_status, mapped
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// If terminal, request job.result now (handled above for local terminal case)
|
||||||
|
if terminal {
|
||||||
|
// trigger job.result only if result empty to avoid spam
|
||||||
|
if let Ok(j_after) =
|
||||||
|
service_poll
|
||||||
|
.load_job(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
job_id,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
if j_after
|
||||||
|
.result
|
||||||
|
.is_empty()
|
||||||
|
{
|
||||||
|
let sup2 = cache
|
||||||
|
.get_or_create(
|
||||||
|
sup_hub.clone(),
|
||||||
|
sup_dest.clone(),
|
||||||
|
sup_topic.clone(),
|
||||||
|
secret_for_poller.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = sup2.job_result_wait(job_id.to_string()).await
|
||||||
|
.and_then(|(_oid, reply2)| {
|
||||||
|
// Minimal parse and store
|
||||||
|
let res2 = reply2.get("result");
|
||||||
|
if let Some(obj) = res2.and_then(|v| v.as_object()) {
|
||||||
|
if let Some(s) = obj.get("success").and_then(|v| v.as_str()) {
|
||||||
|
let mut patch = std::collections::HashMap::new();
|
||||||
|
patch.insert("success".to_string(), s.to_string());
|
||||||
|
tokio::spawn({
|
||||||
|
let service_poll = service_poll.clone();
|
||||||
|
async move {
|
||||||
|
let _ = service_poll.update_job_result_merge_unchecked(context_id, caller_id, job_id, patch).await;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok((String::new(), Value::Null))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark processed and stop polling for this message
|
||||||
|
let _ = service_poll
|
||||||
|
.update_message_status(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
MessageStatus::Processed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Terminal job {} detected from supervisor status; stopping transport polling",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"job.status request error: {}",
|
||||||
|
e
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we cannot load the job, fall back to requesting job.status
|
||||||
|
Err(_) => {
|
||||||
|
let sup = cache
|
||||||
|
.get_or_create(
|
||||||
|
sup_hub.clone(),
|
||||||
|
sup_dest.clone(),
|
||||||
|
sup_topic.clone(),
|
||||||
|
secret_for_poller.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
match sup.job_status_wait(job_id.to_string()).await {
|
||||||
|
Ok((_out_id, _reply_status)) => {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Requested supervisor job.status for job {} (fallback; load_job failed, sync)",
|
||||||
|
job_id
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"job.status request error: {}",
|
||||||
|
e
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Ensure we only do this once
|
||||||
|
requested_job_check = true;
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
if matches!(s, TransportStatus::Failed) {
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!(
|
||||||
|
"Transport failed for outbound id {out_id_cloned}"
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// Log and continue polling
|
||||||
|
let _ = service_poll
|
||||||
|
.append_message_logs(
|
||||||
|
context_id,
|
||||||
|
caller_id,
|
||||||
|
id,
|
||||||
|
vec![format!("messageStatus query error: {e}")],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::time::sleep(poll_interval).await;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn determine_script_type(msg: &Message) -> ScriptType {
|
||||||
|
// Prefer embedded job's script_type if available, else fallback to message.message_type
|
||||||
|
match msg.job.first() {
|
||||||
|
Some(j) => j.script_type.clone(),
|
||||||
|
None => msg.message_type.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_params(msg: &Message) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
// Minimal mapping:
|
||||||
|
// - "job.run" with exactly one embedded job: [{ "job": <job> }]
|
||||||
|
// - otherwise: []
|
||||||
|
if msg.message == "job.run"
|
||||||
|
&& let Some(j) = msg.job.first()
|
||||||
|
{
|
||||||
|
let jv = job_to_json(j)?;
|
||||||
|
return Ok(json!([ { "job": jv } ]));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(json!([]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn job_to_json(job: &Job) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
Ok(serde_json::to_value(job)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_message_key(s: &str) -> Option<(u32, u32)> {
|
||||||
|
// Expect "message:{caller_id}:{id}"
|
||||||
|
let mut it = s.split(':');
|
||||||
|
match (it.next(), it.next(), it.next(), it.next()) {
|
||||||
|
(Some("message"), Some(caller), Some(id), None) => {
|
||||||
|
let caller_id = caller.parse::<u32>().ok()?;
|
||||||
|
let msg_id = id.parse::<u32>().ok()?;
|
||||||
|
Some((caller_id, msg_id))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map supervisor job.status -> (local JobStatus, terminal)
|
||||||
|
fn map_supervisor_job_status(s: &str) -> Option<(JobStatus, bool)> {
|
||||||
|
match s {
|
||||||
|
"created" | "queued" => Some((JobStatus::Dispatched, false)),
|
||||||
|
"running" => Some((JobStatus::Started, false)),
|
||||||
|
"completed" => Some((JobStatus::Finished, true)),
|
||||||
|
"failed" | "timeout" => Some((JobStatus::Error, true)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Auto-discover contexts periodically and ensure a router loop exists for each.
|
||||||
|
/// Returns a JoinHandle of the discovery task (router loops are detached).
|
||||||
|
pub fn start_router_auto(service: AppService, cfg: RouterConfig) -> tokio::task::JoinHandle<()> {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut active: HashSet<u32> = HashSet::new();
|
||||||
|
loop {
|
||||||
|
match service.list_context_ids().await {
|
||||||
|
Ok(ids) => {
|
||||||
|
for ctx_id in ids {
|
||||||
|
if !active.contains(&ctx_id) {
|
||||||
|
// Spawn a loop for this new context
|
||||||
|
let cfg_ctx = RouterConfig {
|
||||||
|
context_ids: vec![ctx_id],
|
||||||
|
..cfg.clone()
|
||||||
|
};
|
||||||
|
let _ = start_router(service.clone(), cfg_ctx);
|
||||||
|
active.insert(ctx_id);
|
||||||
|
info!(context_id = ctx_id, "Started loop for context");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(error=%e, "list_context_ids error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
676
bin/coordinator/src/rpc.rs
Normal file
676
bin/coordinator/src/rpc.rs
Normal file
@@ -0,0 +1,676 @@
|
|||||||
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
net::{IpAddr, SocketAddr},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use jsonrpsee::{
|
||||||
|
RpcModule,
|
||||||
|
server::{ServerBuilder, ServerHandle},
|
||||||
|
types::error::ErrorObjectOwned,
|
||||||
|
};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
dag::{DagError, FlowDag},
|
||||||
|
models::{
|
||||||
|
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType,
|
||||||
|
MessageStatus, Runner, ScriptType,
|
||||||
|
},
|
||||||
|
service::AppService,
|
||||||
|
time::current_timestamp,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// The OpenRPC specification for the HeroCoordinator JSON-RPC API
|
||||||
|
const OPENRPC_SPEC: &str = include_str!("../specs/openrpc.json");
|
||||||
|
|
||||||
|
pub struct AppState {
|
||||||
|
pub service: AppService,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppState {
|
||||||
|
pub fn new(service: AppService) -> Self {
|
||||||
|
Self { service }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Error helpers
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
fn invalid_params_err<E: std::fmt::Display>(e: E) -> ErrorObjectOwned {
|
||||||
|
ErrorObjectOwned::owned(-32602, "Invalid params", Some(Value::String(e.to_string())))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn storage_err(e: Box<dyn std::error::Error + Send + Sync>) -> ErrorObjectOwned {
|
||||||
|
let msg = e.to_string();
|
||||||
|
if msg.contains("Key not found") {
|
||||||
|
ErrorObjectOwned::owned(-32001, "Not Found", Some(Value::String(msg)))
|
||||||
|
} else {
|
||||||
|
ErrorObjectOwned::owned(-32010, "Storage Error", Some(Value::String(msg)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dag_err(e: DagError) -> ErrorObjectOwned {
|
||||||
|
match e {
|
||||||
|
DagError::Storage(inner) => storage_err(inner),
|
||||||
|
DagError::MissingDependency { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32020,
|
||||||
|
"DAG Missing Dependency",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
DagError::CycleDetected { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32021,
|
||||||
|
"DAG Cycle Detected",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
DagError::UnknownJob { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32022,
|
||||||
|
"DAG Unknown Job",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
DagError::DependenciesIncomplete { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32023,
|
||||||
|
"DAG Dependencies Incomplete",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
DagError::FlowFailed { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32024,
|
||||||
|
"DAG Flow Failed",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
DagError::JobNotStarted { .. } => ErrorObjectOwned::owned(
|
||||||
|
-32025,
|
||||||
|
"DAG Job Not Started",
|
||||||
|
Some(Value::String(e.to_string())),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Create DTOs and Param wrappers
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ActorCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub pubkey: String,
|
||||||
|
pub address: Vec<IpAddr>,
|
||||||
|
}
|
||||||
|
impl ActorCreate {
|
||||||
|
pub fn into_domain(self) -> Result<Actor, String> {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
let v = json!({
|
||||||
|
"id": self.id,
|
||||||
|
"pubkey": self.pubkey,
|
||||||
|
"address": self.address,
|
||||||
|
"created_at": ts,
|
||||||
|
"updated_at": ts,
|
||||||
|
});
|
||||||
|
serde_json::from_value(v).map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ContextCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub admins: Vec<u32>,
|
||||||
|
pub readers: Vec<u32>,
|
||||||
|
pub executors: Vec<u32>,
|
||||||
|
}
|
||||||
|
impl ContextCreate {
|
||||||
|
pub fn into_domain(self) -> Context {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
|
||||||
|
let ContextCreate {
|
||||||
|
id,
|
||||||
|
admins,
|
||||||
|
readers,
|
||||||
|
executors,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Context {
|
||||||
|
id,
|
||||||
|
admins,
|
||||||
|
readers,
|
||||||
|
executors,
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct RunnerCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub pubkey: String,
|
||||||
|
pub address: IpAddr,
|
||||||
|
pub topic: String,
|
||||||
|
/// The script type this runner executes (used for routing)
|
||||||
|
pub script_type: ScriptType,
|
||||||
|
pub local: bool,
|
||||||
|
/// Optional secret used for authenticated supervisor calls (if required)
|
||||||
|
pub secret: Option<String>,
|
||||||
|
}
|
||||||
|
impl RunnerCreate {
|
||||||
|
pub fn into_domain(self) -> Runner {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
|
||||||
|
let RunnerCreate {
|
||||||
|
id,
|
||||||
|
pubkey,
|
||||||
|
address,
|
||||||
|
topic,
|
||||||
|
script_type,
|
||||||
|
local,
|
||||||
|
secret,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Runner {
|
||||||
|
id,
|
||||||
|
pubkey,
|
||||||
|
address,
|
||||||
|
topic,
|
||||||
|
script_type,
|
||||||
|
local,
|
||||||
|
secret,
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct FlowCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub context_id: u32,
|
||||||
|
pub jobs: Vec<u32>,
|
||||||
|
pub env_vars: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FlowCreate {
|
||||||
|
pub fn into_domain(self) -> Flow {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
|
||||||
|
let FlowCreate {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
jobs,
|
||||||
|
env_vars,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Flow {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
jobs,
|
||||||
|
env_vars,
|
||||||
|
result: HashMap::new(),
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
status: FlowStatus::Created,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct JobCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub context_id: u32,
|
||||||
|
pub script: String,
|
||||||
|
pub script_type: ScriptType,
|
||||||
|
pub timeout: u32,
|
||||||
|
pub retries: u8,
|
||||||
|
pub env_vars: HashMap<String, String>,
|
||||||
|
pub prerequisites: Vec<String>,
|
||||||
|
pub depends: Vec<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JobCreate {
|
||||||
|
pub fn into_domain(self) -> Job {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
|
||||||
|
let JobCreate {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
script,
|
||||||
|
script_type,
|
||||||
|
timeout,
|
||||||
|
retries,
|
||||||
|
env_vars,
|
||||||
|
prerequisites,
|
||||||
|
depends,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Job {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
script,
|
||||||
|
script_type,
|
||||||
|
timeout,
|
||||||
|
retries,
|
||||||
|
env_vars,
|
||||||
|
result: HashMap::new(),
|
||||||
|
prerequisites,
|
||||||
|
depends,
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
status: JobStatus::WaitingForPrerequisites,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct MessageCreate {
|
||||||
|
pub id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub context_id: u32,
|
||||||
|
pub message: String,
|
||||||
|
pub message_type: ScriptType,
|
||||||
|
pub message_format_type: MessageFormatType,
|
||||||
|
pub timeout: u32,
|
||||||
|
pub timeout_ack: u32,
|
||||||
|
pub timeout_result: u32,
|
||||||
|
pub job: Vec<JobCreate>,
|
||||||
|
}
|
||||||
|
impl MessageCreate {
|
||||||
|
pub fn into_domain(self) -> Message {
|
||||||
|
let ts = current_timestamp();
|
||||||
|
|
||||||
|
let MessageCreate {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
message,
|
||||||
|
message_type,
|
||||||
|
message_format_type,
|
||||||
|
timeout,
|
||||||
|
timeout_ack,
|
||||||
|
timeout_result,
|
||||||
|
job,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Message {
|
||||||
|
id,
|
||||||
|
caller_id,
|
||||||
|
context_id,
|
||||||
|
message,
|
||||||
|
message_type,
|
||||||
|
message_format_type,
|
||||||
|
timeout,
|
||||||
|
timeout_ack,
|
||||||
|
timeout_result,
|
||||||
|
transport_id: None,
|
||||||
|
transport_status: None,
|
||||||
|
job: job.into_iter().map(JobCreate::into_domain).collect(),
|
||||||
|
logs: Vec::new(),
|
||||||
|
created_at: ts,
|
||||||
|
updated_at: ts,
|
||||||
|
status: MessageStatus::Dispatched,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ActorCreateParams {
|
||||||
|
pub actor: ActorCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ActorLoadParams {
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ContextCreateParams {
|
||||||
|
pub context: ContextCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ContextLoadParams {
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct RunnerCreateParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub runner: RunnerCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct RunnerLoadParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct FlowCreateParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub flow: FlowCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct FlowLoadParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct JobCreateParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub job: JobCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct JobLoadParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct MessageCreateParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub message: MessageCreate,
|
||||||
|
}
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct MessageLoadParams {
|
||||||
|
pub context_id: u32,
|
||||||
|
pub caller_id: u32,
|
||||||
|
pub id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Rpc module builder (manual registration)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub fn build_module(state: Arc<AppState>) -> RpcModule<()> {
|
||||||
|
let mut module: RpcModule<()> = RpcModule::new(());
|
||||||
|
|
||||||
|
// Actor
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("actor.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: ActorCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let actor = p.actor.into_domain().map_err(invalid_params_err)?;
|
||||||
|
let actor = state
|
||||||
|
.service
|
||||||
|
.create_actor(actor)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(actor)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register actor.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("actor.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: ActorLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let actor = state.service.load_actor(p.id).await.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(actor)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register actor.load");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Context
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("context.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: ContextCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let ctx = p.context.into_domain();
|
||||||
|
let ctx = state
|
||||||
|
.service
|
||||||
|
.create_context(ctx)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(ctx)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register context.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("context.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: ContextLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let ctx = state
|
||||||
|
.service
|
||||||
|
.load_context(p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(ctx)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register context.load");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runner
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("runner.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: RunnerCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let runner = p.runner.into_domain();
|
||||||
|
let runner = state
|
||||||
|
.service
|
||||||
|
.create_runner(p.context_id, runner)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(runner)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register runner.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("runner.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: RunnerLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let runner = state
|
||||||
|
.service
|
||||||
|
.load_runner(p.context_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(runner)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register runner.load");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flow
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("flow.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: FlowCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let flow = p.flow.into_domain();
|
||||||
|
let flow = state
|
||||||
|
.service
|
||||||
|
.create_flow(p.context_id, flow)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(flow)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register flow.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("flow.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let flow = state
|
||||||
|
.service
|
||||||
|
.load_flow(p.context_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(flow)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register flow.load");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("flow.dag", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let dag: FlowDag = state
|
||||||
|
.service
|
||||||
|
.flow_dag(p.context_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(dag_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(dag)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register flow.dag");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("flow.start", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let started: bool = state
|
||||||
|
.service
|
||||||
|
.flow_start(p.context_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(started)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register flow.start");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Job
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("job.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: JobCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let job = p.job.into_domain();
|
||||||
|
let job = state
|
||||||
|
.service
|
||||||
|
.create_job(p.context_id, job)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(job)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register job.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("job.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: JobLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let job = state
|
||||||
|
.service
|
||||||
|
.load_job(p.context_id, p.caller_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(job)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register job.load");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Message
|
||||||
|
{
|
||||||
|
let state = state.clone();
|
||||||
|
module
|
||||||
|
.register_async_method("message.create", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: MessageCreateParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let message = p.message.into_domain();
|
||||||
|
let message = state
|
||||||
|
.service
|
||||||
|
.create_message(p.context_id, message)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(message)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register message.create");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let state = state;
|
||||||
|
module
|
||||||
|
.register_async_method("message.load", move |params, _caller, _ctx| {
|
||||||
|
let state = state.clone();
|
||||||
|
async move {
|
||||||
|
let p: MessageLoadParams = params.parse().map_err(invalid_params_err)?;
|
||||||
|
let msg = state
|
||||||
|
.service
|
||||||
|
.load_message(p.context_id, p.caller_id, p.id)
|
||||||
|
.await
|
||||||
|
.map_err(storage_err)?;
|
||||||
|
Ok::<_, ErrorObjectOwned>(msg)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.expect("register message.load");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
module
|
||||||
|
.register_async_method("rpc.discover", move |_params, _caller, _ctx| async move {
|
||||||
|
let spec = serde_json::from_str::<serde_json::Value>(OPENRPC_SPEC)
|
||||||
|
.expect("Failed to parse OpenRPC spec");
|
||||||
|
Ok::<_, ErrorObjectOwned>(spec)
|
||||||
|
})
|
||||||
|
.expect("register rpc.discover");
|
||||||
|
}
|
||||||
|
|
||||||
|
module
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Server runners (HTTP/WS on separate listeners)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn start_http<C>(
|
||||||
|
addr: SocketAddr,
|
||||||
|
module: RpcModule<C>,
|
||||||
|
) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let server = ServerBuilder::default().build(addr).await?;
|
||||||
|
let handle = server.start(module);
|
||||||
|
Ok(handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_ws<C>(
|
||||||
|
addr: SocketAddr,
|
||||||
|
module: RpcModule<C>,
|
||||||
|
) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
// jsonrpsee server supports both HTTP and WS; using a second listener gives us a dedicated WS port.
|
||||||
|
let server = ServerBuilder::default().build(addr).await?;
|
||||||
|
let handle = server.start(module);
|
||||||
|
Ok(handle)
|
||||||
|
}
|
||||||
1211
bin/coordinator/src/service.rs
Normal file
1211
bin/coordinator/src/service.rs
Normal file
File diff suppressed because it is too large
Load Diff
3
bin/coordinator/src/storage.rs
Normal file
3
bin/coordinator/src/storage.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod redis;
|
||||||
|
|
||||||
|
pub use redis::RedisDriver;
|
||||||
827
bin/coordinator/src/storage/redis.rs
Normal file
827
bin/coordinator/src/storage/redis.rs
Normal file
@@ -0,0 +1,827 @@
|
|||||||
|
use std::collections::HashMap as StdHashMap;
|
||||||
|
|
||||||
|
use redis::{AsyncCommands, aio::ConnectionManager};
|
||||||
|
use serde::Serialize;
|
||||||
|
use serde::de::DeserializeOwned;
|
||||||
|
use serde_json::{Map as JsonMap, Value};
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
|
use crate::models::{
|
||||||
|
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner,
|
||||||
|
TransportStatus,
|
||||||
|
};
|
||||||
|
use tracing::{error, warn};
|
||||||
|
|
||||||
|
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
|
||||||
|
|
||||||
|
/// Async Redis driver that saves/loads every model as a Redis hash (HSET),
|
||||||
|
/// using canonical keys as specified in the specs.
|
||||||
|
/// - Complex fields (arrays, maps, nested structs) are JSON-encoded per field
|
||||||
|
/// - Scalars are written as plain strings (numbers/bools as their string representation)
|
||||||
|
/// - On load, each field value is first attempted to parse as JSON; if that fails it is treated as a plain string
|
||||||
|
pub struct RedisDriver {
|
||||||
|
/// Base address, e.g. "127.0.0.1:6379" or "redis://127.0.0.1:6379"
|
||||||
|
base_addr: String,
|
||||||
|
/// Cache of connection managers per DB index
|
||||||
|
managers: Mutex<StdHashMap<u32, ConnectionManager>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RedisDriver {
|
||||||
|
/// Create a new driver for the given Redis address.
|
||||||
|
/// Accepts either "host:port" or "redis://host:port"
|
||||||
|
pub async fn new(addr: impl Into<String>) -> Result<Self> {
|
||||||
|
let raw = addr.into();
|
||||||
|
let base_addr = if raw.starts_with("redis://") {
|
||||||
|
raw
|
||||||
|
} else {
|
||||||
|
format!("redis://{}", raw)
|
||||||
|
};
|
||||||
|
Ok(Self {
|
||||||
|
base_addr,
|
||||||
|
managers: Mutex::new(StdHashMap::new()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get or create a ConnectionManager for the given DB index.
|
||||||
|
async fn manager_for_db(&self, db: u32) -> Result<ConnectionManager> {
|
||||||
|
{
|
||||||
|
// Fast path: check existing
|
||||||
|
let guard = self.managers.lock().await;
|
||||||
|
if let Some(cm) = guard.get(&db) {
|
||||||
|
return Ok(cm.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow path: create a new manager and cache it
|
||||||
|
let url = format!("{}/{}", self.base_addr.trim_end_matches('/'), db);
|
||||||
|
let client = redis::Client::open(url.as_str()).map_err(|e| {
|
||||||
|
error!(%url, db=%db, error=%e, "Redis client open failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
let cm = client.get_connection_manager().await.map_err(|e| {
|
||||||
|
error!(%url, db=%db, error=%e, "Redis connection manager init failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut guard = self.managers.lock().await;
|
||||||
|
let entry = guard.entry(db).or_insert(cm);
|
||||||
|
Ok(entry.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Generic helpers (serde <-> HSET)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
fn struct_to_hset_pairs<T: Serialize>(value: &T) -> Result<Vec<(String, String)>> {
|
||||||
|
let json = serde_json::to_value(value)?;
|
||||||
|
let obj = json
|
||||||
|
.as_object()
|
||||||
|
.ok_or("Model must serialize to a JSON object")?;
|
||||||
|
let mut pairs = Vec::with_capacity(obj.len());
|
||||||
|
for (k, v) in obj {
|
||||||
|
let s = match v {
|
||||||
|
Value::Array(_) | Value::Object(_) => serde_json::to_string(v)?, // complex - store JSON
|
||||||
|
Value::String(s) => s.clone(), // string - plain
|
||||||
|
Value::Number(n) => n.to_string(), // number - plain
|
||||||
|
Value::Bool(b) => b.to_string(), // bool - plain
|
||||||
|
Value::Null => "null".to_string(), // null sentinel
|
||||||
|
};
|
||||||
|
pairs.push((k.clone(), s));
|
||||||
|
}
|
||||||
|
Ok(pairs)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hmap_to_struct<T: DeserializeOwned>(map: StdHashMap<String, String>) -> Result<T> {
|
||||||
|
let mut obj = JsonMap::with_capacity(map.len());
|
||||||
|
for (k, s) in map {
|
||||||
|
// Try parse as JSON first (works for arrays, objects, numbers, booleans, null)
|
||||||
|
// If that fails, fallback to string.
|
||||||
|
match serde_json::from_str::<Value>(&s) {
|
||||||
|
Ok(v) => {
|
||||||
|
obj.insert(k, v);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
obj.insert(k, Value::String(s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let json = Value::Object(obj);
|
||||||
|
let model = serde_json::from_value(json)?;
|
||||||
|
Ok(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn hset_model<T: Serialize>(&self, db: u32, key: &str, model: &T) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let pairs = Self::struct_to_hset_pairs(model).map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "Serialize model to HSET pairs failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
// Ensure no stale fields
|
||||||
|
let del_res: redis::RedisResult<u64> = cm.del(key).await;
|
||||||
|
if let Err(e) = del_res {
|
||||||
|
warn!(db=%db, key=%key, error=%e, "DEL before HSET failed");
|
||||||
|
}
|
||||||
|
// Write all fields
|
||||||
|
let _: () = cm.hset_multiple(key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET multiple failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn hget_model<T: DeserializeOwned>(&self, db: u32, key: &str) -> Result<T> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let map: StdHashMap<String, String> = cm.hgetall(key).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HGETALL failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
if map.is_empty() {
|
||||||
|
// NotFound is expected in some flows; don't log as error
|
||||||
|
return Err(format!("Key not found: {}", key).into());
|
||||||
|
}
|
||||||
|
Self::hmap_to_struct(map).map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "Deserialize model from HGETALL failed");
|
||||||
|
e
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Key helpers (canonical keys)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
fn actor_key(id: u32) -> String {
|
||||||
|
format!("actor:{}", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn context_key(id: u32) -> String {
|
||||||
|
format!("context:{}", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flow_key(id: u32) -> String {
|
||||||
|
format!("flow:{}", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn runner_key(id: u32) -> String {
|
||||||
|
format!("runner:{}", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn job_key(caller_id: u32, id: u32) -> String {
|
||||||
|
format!("job:{}:{}", caller_id, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn message_key(caller_id: u32, id: u32) -> String {
|
||||||
|
format!("message:{}:{}", caller_id, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Context (DB = context.id)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Save a Context in its own DB (db index = context.id)
|
||||||
|
pub async fn save_context(&self, ctx: &Context) -> Result<()> {
|
||||||
|
// We don't have field access; compute db and key via JSON to avoid changing model definitions.
|
||||||
|
// Extract "id" from serialized JSON object.
|
||||||
|
let json = serde_json::to_value(ctx)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Context.id missing or not a number")? as u32;
|
||||||
|
let key = Self::context_key(id);
|
||||||
|
// Write the context hash in its own DB
|
||||||
|
self.hset_model(id, &key, ctx).await?;
|
||||||
|
// Register this context id in the global registry (DB 0)
|
||||||
|
let _ = self.register_context_id(id).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load a Context from its own DB (db index = id)
|
||||||
|
pub async fn load_context(&self, id: u32) -> Result<Context> {
|
||||||
|
let key = Self::context_key(id);
|
||||||
|
self.hget_model(id, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Actor
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Save an Actor to the given DB (tenant/context DB)
|
||||||
|
pub async fn save_actor(&self, db: u32, actor: &Actor) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(actor)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Actor.id missing or not a number")? as u32;
|
||||||
|
let key = Self::actor_key(id);
|
||||||
|
self.hset_model(db, &key, actor).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load an Actor by id from the given DB
|
||||||
|
pub async fn load_actor(&self, db: u32, id: u32) -> Result<Actor> {
|
||||||
|
let key = Self::actor_key(id);
|
||||||
|
self.hget_model(db, &key).await
|
||||||
|
}
|
||||||
|
/// Save an Actor globally in DB 0 (Actor is context-independent)
|
||||||
|
pub async fn save_actor_global(&self, actor: &Actor) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(actor)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Actor.id missing or not a number")? as u32;
|
||||||
|
let key = Self::actor_key(id);
|
||||||
|
self.hset_model(0, &key, actor).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load an Actor globally from DB 0 by id
|
||||||
|
pub async fn load_actor_global(&self, id: u32) -> Result<Actor> {
|
||||||
|
let key = Self::actor_key(id);
|
||||||
|
self.hget_model(0, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Runner
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn save_runner(&self, db: u32, runner: &Runner) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(runner)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Runner.id missing or not a number")? as u32;
|
||||||
|
let key = Self::runner_key(id);
|
||||||
|
self.hset_model(db, &key, runner).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn load_runner(&self, db: u32, id: u32) -> Result<Runner> {
|
||||||
|
let key = Self::runner_key(id);
|
||||||
|
self.hget_model(db, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Flow
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn save_flow(&self, db: u32, flow: &Flow) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(flow)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Flow.id missing or not a number")? as u32;
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
self.hset_model(db, &key, flow).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn load_flow(&self, db: u32, id: u32) -> Result<Flow> {
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
self.hget_model(db, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Job
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn save_job(&self, db: u32, job: &Job) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(job)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Job.id missing or not a number")? as u32;
|
||||||
|
let caller_id = json
|
||||||
|
.get("caller_id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Job.caller_id missing or not a number")? as u32;
|
||||||
|
let key = Self::job_key(caller_id, id);
|
||||||
|
self.hset_model(db, &key, job).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn load_job(&self, db: u32, caller_id: u32, id: u32) -> Result<Job> {
|
||||||
|
let key = Self::job_key(caller_id, id);
|
||||||
|
self.hget_model(db, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Atomically update a job's status and `updated_at` fields.
|
||||||
|
/// - No transition validation is performed.
|
||||||
|
/// - Writes only the two fields via HSET to avoid rewriting the whole model.
|
||||||
|
pub async fn update_job_status(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
status: JobStatus,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::job_key(caller_id, id);
|
||||||
|
|
||||||
|
// Serialize enum into the same plain string representation stored by create paths
|
||||||
|
let status_str = match serde_json::to_value(&status)? {
|
||||||
|
Value::String(s) => s,
|
||||||
|
v => v.to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
|
||||||
|
let pairs = vec![
|
||||||
|
("status".to_string(), status_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_job_status failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Message
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
pub async fn save_message(&self, db: u32, message: &Message) -> Result<()> {
|
||||||
|
let json = serde_json::to_value(message)?;
|
||||||
|
let id = json
|
||||||
|
.get("id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Message.id missing or not a number")? as u32;
|
||||||
|
let caller_id = json
|
||||||
|
.get("caller_id")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.ok_or("Message.caller_id missing or not a number")? as u32;
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
self.hset_model(db, &key, message).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn load_message(&self, db: u32, caller_id: u32, id: u32) -> Result<Message> {
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
self.hget_model(db, &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Partial update helpers
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Flow: update only status and updated_at
|
||||||
|
pub async fn update_flow_status(&self, db: u32, id: u32, status: FlowStatus) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
|
||||||
|
let status_str = match serde_json::to_value(&status)? {
|
||||||
|
Value::String(s) => s,
|
||||||
|
v => v.to_string(),
|
||||||
|
};
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
|
||||||
|
let pairs = vec![
|
||||||
|
("status".to_string(), status_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_flow_status failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Message: update only status and updated_at
|
||||||
|
pub async fn update_message_status(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
status: MessageStatus,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
|
||||||
|
let status_str = match serde_json::to_value(&status)? {
|
||||||
|
Value::String(s) => s,
|
||||||
|
v => v.to_string(),
|
||||||
|
};
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
|
||||||
|
let pairs = vec![
|
||||||
|
("status".to_string(), status_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_message_status failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Message: update transport_id / transport_status (optionally) and bump updated_at
|
||||||
|
pub async fn update_message_transport(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
transport_id: Option<String>,
|
||||||
|
transport_status: Option<TransportStatus>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
|
||||||
|
let mut pairs: Vec<(String, String)> = Vec::new();
|
||||||
|
|
||||||
|
if let Some(tid) = transport_id {
|
||||||
|
pairs.push(("transport_id".to_string(), tid));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ts_status) = transport_status {
|
||||||
|
let status_str = match serde_json::to_value(&ts_status)? {
|
||||||
|
Value::String(s) => s,
|
||||||
|
v => v.to_string(),
|
||||||
|
};
|
||||||
|
pairs.push(("transport_status".to_string(), status_str));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always bump updated_at
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
pairs.push(("updated_at".to_string(), ts.to_string()));
|
||||||
|
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_message_transport failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flow: merge env_vars map and bump updated_at
|
||||||
|
pub async fn update_flow_env_vars_merge(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
id: u32,
|
||||||
|
patch: StdHashMap<String, String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
|
||||||
|
let current: Option<String> = cm.hget(&key, "env_vars").await.ok();
|
||||||
|
let mut obj = match current
|
||||||
|
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||||
|
.and_then(|v| v.as_object().cloned())
|
||||||
|
{
|
||||||
|
Some(m) => m,
|
||||||
|
None => JsonMap::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (k, v) in patch {
|
||||||
|
obj.insert(k, Value::String(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
let env_vars_str = Value::Object(obj).to_string();
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("env_vars".to_string(), env_vars_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_flow_env_vars_merge failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flow: merge result map and bump updated_at
|
||||||
|
pub async fn update_flow_result_merge(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
id: u32,
|
||||||
|
patch: StdHashMap<String, String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
|
||||||
|
let current: Option<String> = cm.hget(&key, "result").await.ok();
|
||||||
|
let mut obj = match current
|
||||||
|
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||||
|
.and_then(|v| v.as_object().cloned())
|
||||||
|
{
|
||||||
|
Some(m) => m,
|
||||||
|
None => JsonMap::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (k, v) in patch {
|
||||||
|
obj.insert(k, Value::String(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
let result_str = Value::Object(obj).to_string();
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("result".to_string(), result_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_flow_result_merge failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Job: merge env_vars map and bump updated_at
|
||||||
|
pub async fn update_job_env_vars_merge(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
patch: StdHashMap<String, String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::job_key(caller_id, id);
|
||||||
|
|
||||||
|
let current: Option<String> = cm.hget(&key, "env_vars").await.ok();
|
||||||
|
let mut obj = match current
|
||||||
|
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||||
|
.and_then(|v| v.as_object().cloned())
|
||||||
|
{
|
||||||
|
Some(m) => m,
|
||||||
|
None => JsonMap::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (k, v) in patch {
|
||||||
|
obj.insert(k, Value::String(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
let env_vars_str = Value::Object(obj).to_string();
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("env_vars".to_string(), env_vars_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_job_env_vars_merge failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Job: merge result map and bump updated_at
|
||||||
|
pub async fn update_job_result_merge(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
patch: StdHashMap<String, String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::job_key(caller_id, id);
|
||||||
|
|
||||||
|
let current: Option<String> = cm.hget(&key, "result").await.ok();
|
||||||
|
let mut obj = match current
|
||||||
|
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||||
|
.and_then(|v| v.as_object().cloned())
|
||||||
|
{
|
||||||
|
Some(m) => m,
|
||||||
|
None => JsonMap::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (k, v) in patch {
|
||||||
|
obj.insert(k, Value::String(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
let result_str = Value::Object(obj).to_string();
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("result".to_string(), result_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_job_result_merge failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flow: set jobs list and bump updated_at
|
||||||
|
pub async fn update_flow_jobs_set(&self, db: u32, id: u32, new_jobs: Vec<u32>) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::flow_key(id);
|
||||||
|
|
||||||
|
let jobs_str = serde_json::to_string(&new_jobs)?;
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("jobs".to_string(), jobs_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET update_flow_jobs_set failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Message: append logs (no dedup) and bump updated_at
|
||||||
|
pub async fn append_message_logs(
|
||||||
|
&self,
|
||||||
|
db: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
id: u32,
|
||||||
|
new_logs: Vec<String>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
|
||||||
|
let current: Option<String> = cm.hget(&key, "logs").await.ok();
|
||||||
|
let mut arr: Vec<Value> = current
|
||||||
|
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||||
|
.and_then(|v| v.as_array().cloned())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
for l in new_logs {
|
||||||
|
arr.push(Value::String(l));
|
||||||
|
}
|
||||||
|
|
||||||
|
let logs_str = Value::Array(arr).to_string();
|
||||||
|
let ts = crate::time::current_timestamp();
|
||||||
|
let pairs = vec![
|
||||||
|
("logs".to_string(), logs_str),
|
||||||
|
("updated_at".to_string(), ts.to_string()),
|
||||||
|
];
|
||||||
|
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
|
||||||
|
error!(db=%db, key=%key, error=%e, "HSET append_message_logs failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Queues (lists)
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Push a value onto a Redis list using LPUSH in the given DB.
|
||||||
|
pub async fn lpush_list(&self, db: u32, list: &str, value: &str) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let _: i64 = cm.lpush(list, value).await.map_err(|e| {
|
||||||
|
error!(db=%db, list=%list, value=%value, error=%e, "LPUSH failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enqueue a message key onto the outbound queue (msg_out).
|
||||||
|
/// The value is the canonical message key "message:{caller_id}:{id}".
|
||||||
|
pub async fn enqueue_msg_out(&self, db: u32, caller_id: u32, id: u32) -> Result<()> {
|
||||||
|
let key = Self::message_key(caller_id, id);
|
||||||
|
self.lpush_list(db, "msg_out", &key).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Block-pop from msg_out with timeout (seconds). Returns the message key if present.
|
||||||
|
/// Uses BRPOP so that the queue behaves FIFO with LPUSH producer.
|
||||||
|
pub async fn brpop_msg_out(&self, db: u32, timeout_secs: usize) -> Result<Option<String>> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
// BRPOP returns (list, element) on success
|
||||||
|
let res: Option<(String, String)> = redis::cmd("BRPOP")
|
||||||
|
.arg("msg_out")
|
||||||
|
.arg(timeout_secs)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=%db, timeout_secs=%timeout_secs, error=%e, "BRPOP failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(res.map(|(_, v)| v))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scan all runner:* keys in this DB and return the deserialized Runner entries.
|
||||||
|
pub async fn scan_runners(&self, db: u32) -> Result<Vec<Runner>> {
|
||||||
|
let mut cm = self.manager_for_db(db).await?;
|
||||||
|
let mut out: Vec<Runner> = Vec::new();
|
||||||
|
let mut cursor: u64 = 0;
|
||||||
|
loop {
|
||||||
|
let (next, keys): (u64, Vec<String>) = redis::cmd("SCAN")
|
||||||
|
.arg(cursor)
|
||||||
|
.arg("MATCH")
|
||||||
|
.arg("runner:*")
|
||||||
|
.arg("COUNT")
|
||||||
|
.arg(100)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=%db, cursor=%cursor, error=%e, "SCAN failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
for k in keys {
|
||||||
|
if let Ok(r) = self.hget_model::<Runner>(db, &k).await {
|
||||||
|
out.push(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if next == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cursor = next;
|
||||||
|
}
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Global registry (DB 0) for Context IDs
|
||||||
|
// -----------------------------
|
||||||
|
|
||||||
|
/// Register a context id in the global set "contexts" stored in DB 0.
|
||||||
|
pub async fn register_context_id(&self, id: u32) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(0).await?;
|
||||||
|
let _: i64 = redis::cmd("SADD")
|
||||||
|
.arg("contexts")
|
||||||
|
.arg(id)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=0, context_id=%id, error=%e, "SADD contexts failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all registered context ids from the global set in DB 0.
|
||||||
|
pub async fn list_context_ids(&self) -> Result<Vec<u32>> {
|
||||||
|
let mut cm = self.manager_for_db(0).await?;
|
||||||
|
// Using SMEMBERS and parsing into u32
|
||||||
|
let vals: Vec<String> = redis::cmd("SMEMBERS")
|
||||||
|
.arg("contexts")
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=0, error=%e, "SMEMBERS contexts failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
let mut out = Vec::with_capacity(vals.len());
|
||||||
|
for v in vals {
|
||||||
|
if let Ok(n) = v.parse::<u32>() {
|
||||||
|
out.push(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.sort_unstable();
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------
|
||||||
|
// Supervisor correlation mapping (DB 0)
|
||||||
|
// Key: "supcorr:{inner_id_decimal}"
|
||||||
|
// Value: JSON {"context_id":u32,"caller_id":u32,"job_id":u32,"message_id":u32}
|
||||||
|
// TTL: 1 hour to avoid leaks in case of crashes
|
||||||
|
pub async fn supcorr_set(
|
||||||
|
&self,
|
||||||
|
inner_id: u64,
|
||||||
|
context_id: u32,
|
||||||
|
caller_id: u32,
|
||||||
|
job_id: u32,
|
||||||
|
message_id: u32,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(0).await?;
|
||||||
|
let key = format!("supcorr:{}", inner_id);
|
||||||
|
let val = serde_json::json!({
|
||||||
|
"context_id": context_id,
|
||||||
|
"caller_id": caller_id,
|
||||||
|
"job_id": job_id,
|
||||||
|
"message_id": message_id,
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
|
// SET key val EX 3600
|
||||||
|
let _: () = redis::cmd("SET")
|
||||||
|
.arg(&key)
|
||||||
|
.arg(&val)
|
||||||
|
.arg("EX")
|
||||||
|
.arg(3600)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=0, key=%key, error=%e, "SET supcorr_set failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn supcorr_get(&self, inner_id: u64) -> Result<Option<(u32, u32, u32, u32)>> {
|
||||||
|
let mut cm = self.manager_for_db(0).await?;
|
||||||
|
let key = format!("supcorr:{}", inner_id);
|
||||||
|
let res: Option<String> = redis::cmd("GET")
|
||||||
|
.arg(&key)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=0, key=%key, error=%e, "GET supcorr_get failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
if let Some(s) = res {
|
||||||
|
let v: Value = serde_json::from_str(&s)?;
|
||||||
|
let ctx = v.get("context_id").and_then(|x| x.as_u64()).unwrap_or(0) as u32;
|
||||||
|
let caller = v.get("caller_id").and_then(|x| x.as_u64()).unwrap_or(0) as u32;
|
||||||
|
let job = v.get("job_id").and_then(|x| x.as_u64()).unwrap_or(0) as u32;
|
||||||
|
let msg = v.get("message_id").and_then(|x| x.as_u64()).unwrap_or(0) as u32;
|
||||||
|
return Ok(Some((ctx, caller, job, msg)));
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn supcorr_del(&self, inner_id: u64) -> Result<()> {
|
||||||
|
let mut cm = self.manager_for_db(0).await?;
|
||||||
|
let key = format!("supcorr:{}", inner_id);
|
||||||
|
let _: i64 = redis::cmd("DEL")
|
||||||
|
.arg(&key)
|
||||||
|
.query_async(&mut cm)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(db=0, key=%key, error=%e, "DEL supcorr_del failed");
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
14
bin/coordinator/src/time.rs
Normal file
14
bin/coordinator/src/time.rs
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
/// A timestamp since the unix epoch
|
||||||
|
pub type Timestamp = i64;
|
||||||
|
|
||||||
|
/// Get the current system timestamp
|
||||||
|
pub fn current_timestamp() -> Timestamp {
|
||||||
|
let now = SystemTime::now();
|
||||||
|
// A duration is always positive so this returns an unsigned integer, while a timestamp can
|
||||||
|
// predate the unix epoch so we must cast to a signed integer.
|
||||||
|
now.duration_since(UNIX_EPOCH)
|
||||||
|
.expect("Time moves forward")
|
||||||
|
.as_secs() as i64
|
||||||
|
}
|
||||||
29
bin/osiris/Cargo.toml
Normal file
29
bin/osiris/Cargo.toml
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
[package]
|
||||||
|
name = "osiris-server"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
description = "Osiris HTTP server"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "osiris"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Osiris core
|
||||||
|
osiris-core = { path = "../../lib/osiris/core" }
|
||||||
|
|
||||||
|
# Web framework
|
||||||
|
axum = "0.7"
|
||||||
|
tower = "0.4"
|
||||||
|
tower-http.workspace = true
|
||||||
|
|
||||||
|
# Core dependencies
|
||||||
|
tokio.workspace = true
|
||||||
|
serde.workspace = true
|
||||||
|
serde_json.workspace = true
|
||||||
|
anyhow.workspace = true
|
||||||
|
|
||||||
|
# Tracing
|
||||||
|
tracing.workspace = true
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
145
bin/osiris/src/main.rs
Normal file
145
bin/osiris/src/main.rs
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
//! Osiris Server - Generic OpenAPI REST server for Osiris data structures
|
||||||
|
//!
|
||||||
|
//! Provides generic CRUD operations for all Osiris structs via REST API.
|
||||||
|
//! Routes follow pattern: GET /api/:struct_name/:id
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
extract::{Path, Query, State},
|
||||||
|
http::StatusCode,
|
||||||
|
response::{IntoResponse, Json},
|
||||||
|
routing::get,
|
||||||
|
Router,
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tower_http::cors::{Any, CorsLayer};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct AppState {
|
||||||
|
// In a real implementation, this would be a Redis connection pool
|
||||||
|
// For now, we'll use an in-memory store for demonstration
|
||||||
|
store: Arc<tokio::sync::RwLock<HashMap<String, HashMap<String, Value>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppState {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
store: Arc::new(tokio::sync::RwLock::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
// Initialize tracing
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_target(false)
|
||||||
|
.compact()
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let state = AppState::new();
|
||||||
|
|
||||||
|
// Build router
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/health", get(health_check))
|
||||||
|
.route("/api/:struct_name", get(list_structs))
|
||||||
|
.route("/api/:struct_name/:id", get(get_struct))
|
||||||
|
.layer(
|
||||||
|
CorsLayer::new()
|
||||||
|
.allow_origin(Any)
|
||||||
|
.allow_methods(Any)
|
||||||
|
.allow_headers(Any),
|
||||||
|
)
|
||||||
|
.with_state(state);
|
||||||
|
|
||||||
|
let addr = "0.0.0.0:8081";
|
||||||
|
info!("🚀 Osiris Server starting on {}", addr);
|
||||||
|
info!("📖 API Documentation: http://localhost:8081/health");
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind(addr)
|
||||||
|
.await
|
||||||
|
.expect("Failed to bind address");
|
||||||
|
|
||||||
|
axum::serve(listener, app)
|
||||||
|
.await
|
||||||
|
.expect("Server failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Health check endpoint
|
||||||
|
async fn health_check() -> impl IntoResponse {
|
||||||
|
Json(json!({
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "osiris-server",
|
||||||
|
"version": "0.1.0"
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generic GET endpoint for a single struct by ID
|
||||||
|
/// GET /api/:struct_name/:id
|
||||||
|
async fn get_struct(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path((struct_name, id)): Path<(String, String)>,
|
||||||
|
) -> Result<Json<Value>, (StatusCode, String)> {
|
||||||
|
info!("GET /api/{}/{}", struct_name, id);
|
||||||
|
|
||||||
|
let store = state.store.read().await;
|
||||||
|
|
||||||
|
if let Some(struct_store) = store.get(&struct_name) {
|
||||||
|
if let Some(data) = struct_store.get(&id) {
|
||||||
|
return Ok(Json(data.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!("Not found: {}/{}", struct_name, id);
|
||||||
|
Err((
|
||||||
|
StatusCode::NOT_FOUND,
|
||||||
|
format!("{}/{} not found", struct_name, id),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generic LIST endpoint for all instances of a struct
|
||||||
|
/// GET /api/:struct_name?field=value
|
||||||
|
async fn list_structs(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(struct_name): Path<String>,
|
||||||
|
Query(params): Query<HashMap<String, String>>,
|
||||||
|
) -> Result<Json<Vec<Value>>, (StatusCode, String)> {
|
||||||
|
info!("GET /api/{} with params: {:?}", struct_name, params);
|
||||||
|
|
||||||
|
let store = state.store.read().await;
|
||||||
|
|
||||||
|
if let Some(struct_store) = store.get(&struct_name) {
|
||||||
|
let mut results: Vec<Value> = struct_store.values().cloned().collect();
|
||||||
|
|
||||||
|
// Apply filters if any
|
||||||
|
if !params.is_empty() {
|
||||||
|
results.retain(|item| {
|
||||||
|
params.iter().all(|(key, value)| {
|
||||||
|
item.get(key)
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|v| v == value)
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(Json(results));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return empty array if struct type doesn't exist yet
|
||||||
|
Ok(Json(vec![]))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_health_check() {
|
||||||
|
let response = health_check().await.into_response();
|
||||||
|
assert_eq!(response.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
}
|
||||||
32
bin/runners/osiris/Cargo.toml
Normal file
32
bin/runners/osiris/Cargo.toml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
[package]
|
||||||
|
name = "runner-osiris"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
description = "Osiris Runner - Database-backed runner"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "runner_osiris"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Runner library
|
||||||
|
hero-runner = { path = "../../../lib/runner" }
|
||||||
|
hero-job = { path = "../../../lib/models/job" }
|
||||||
|
|
||||||
|
# Core dependencies
|
||||||
|
anyhow.workspace = true
|
||||||
|
tokio.workspace = true
|
||||||
|
log.workspace = true
|
||||||
|
env_logger.workspace = true
|
||||||
|
clap.workspace = true
|
||||||
|
|
||||||
|
# Rhai scripting
|
||||||
|
rhai = { version = "1.21.0", features = ["std", "sync", "serde"] }
|
||||||
|
|
||||||
|
# Osiris dependencies
|
||||||
|
osiris = { package = "osiris-core", path = "../../../lib/osiris/core" }
|
||||||
|
heromodels = { git = "https://git.ourworld.tf/herocode/db.git" }
|
||||||
|
heromodels_core = { git = "https://git.ourworld.tf/herocode/db.git" }
|
||||||
|
heromodels-derive = { git = "https://git.ourworld.tf/herocode/db.git" }
|
||||||
|
rhailib_dsl = { git = "https://git.ourworld.tf/herocode/rhailib.git" }
|
||||||
294
bin/runners/osiris/src/engine.rs
Normal file
294
bin/runners/osiris/src/engine.rs
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
/// OSIRIS Rhai Engine
|
||||||
|
///
|
||||||
|
/// Creates a Rhai engine configured with OSIRIS contexts and methods.
|
||||||
|
|
||||||
|
use osiris::context::OsirisContext;
|
||||||
|
use osiris::objects::note::rhai::register_note_functions;
|
||||||
|
use osiris::objects::event::rhai::register_event_functions;
|
||||||
|
use osiris::objects::heroledger::rhai::register_heroledger_modules;
|
||||||
|
use osiris::objects::kyc::rhai::register_kyc_modules;
|
||||||
|
use osiris::objects::flow::rhai::register_flow_modules;
|
||||||
|
use osiris::objects::communication::rhai::register_communication_modules;
|
||||||
|
use osiris::objects::money::rhai::register_money_modules;
|
||||||
|
use osiris::objects::legal::rhai::register_legal_modules;
|
||||||
|
use osiris::objects::supervisor::rhai::register_supervisor_modules;
|
||||||
|
use rhai::{Engine, def_package, FuncRegistration};
|
||||||
|
use rhai::packages::{Package, StandardPackage};
|
||||||
|
|
||||||
|
/// Register get_context function in a Rhai engine with signatory-based access control
|
||||||
|
///
|
||||||
|
/// Simple logic:
|
||||||
|
/// - Context is a list of public keys (participants)
|
||||||
|
/// - To get_context, at least one participant must be a signatory
|
||||||
|
/// - No state tracking, no caching - creates fresh context each time
|
||||||
|
pub fn register_context_api(engine: &mut rhai::Engine) {
|
||||||
|
// Register get_context function with signatory-based access control
|
||||||
|
// Usage: get_context(['pk1', 'pk2', 'pk3'])
|
||||||
|
engine.register_fn("get_context", move |context: rhai::NativeCallContext, participants: rhai::Array| -> Result<OsirisContext, Box<rhai::EvalAltResult>> {
|
||||||
|
// Extract SIGNATORIES from context tag
|
||||||
|
let tag_map = context
|
||||||
|
.tag()
|
||||||
|
.and_then(|tag| tag.read_lock::<rhai::Map>())
|
||||||
|
.ok_or_else(|| Box::new(rhai::EvalAltResult::ErrorRuntime("Context tag must be a Map.".into(), context.position())))?;
|
||||||
|
|
||||||
|
let signatories_dynamic = tag_map.get("SIGNATORIES")
|
||||||
|
.ok_or_else(|| Box::new(rhai::EvalAltResult::ErrorRuntime("'SIGNATORIES' not found in context tag Map.".into(), context.position())))?;
|
||||||
|
|
||||||
|
// Convert SIGNATORIES array to Vec<String>
|
||||||
|
let signatories_array = signatories_dynamic.clone().into_array()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("SIGNATORIES must be an array: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
let signatories: Vec<String> = signatories_array.into_iter()
|
||||||
|
.map(|s| s.into_string())
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("SIGNATORIES must contain strings: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
// Convert participants array to Vec<String>
|
||||||
|
let participant_keys: Vec<String> = participants.into_iter()
|
||||||
|
.map(|p| p.into_string())
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("Participants must be strings: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
// Verify at least one participant is a signatory
|
||||||
|
let has_signatory = participant_keys.iter().any(|p| signatories.contains(p));
|
||||||
|
if !has_signatory {
|
||||||
|
return Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
|
||||||
|
format!("Access denied: none of the participants are signatories. Signatories: {}", signatories.join(", ")).into(),
|
||||||
|
context.position()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context directly with participants
|
||||||
|
OsirisContext::builder()
|
||||||
|
.participants(participant_keys)
|
||||||
|
.build()
|
||||||
|
.map_err(|e| format!("Failed to create context: {}", e).into())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define the OSIRIS package
|
||||||
|
def_package! {
|
||||||
|
/// OSIRIS package with all OSIRIS types and functions
|
||||||
|
pub OsirisPackage(module) : StandardPackage {
|
||||||
|
// Register OsirisContext type with all its methods
|
||||||
|
module.set_custom_type::<OsirisContext>("OsirisContext");
|
||||||
|
|
||||||
|
// Register OsirisContext methods
|
||||||
|
FuncRegistration::new("participants")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext| ctx.participants());
|
||||||
|
FuncRegistration::new("context_id")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext| ctx.context_id());
|
||||||
|
// Typed save methods - all named "save" for function overloading using generic save_object
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, note: osiris::objects::Note| ctx.save_object(note));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, event: osiris::objects::Event| ctx.save_object(event));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, user: osiris::objects::heroledger::user::User| ctx.save_object(user));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, group: osiris::objects::heroledger::group::Group| ctx.save_object(group));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, account: osiris::objects::heroledger::money::Account| ctx.save_object(account));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, zone: osiris::objects::heroledger::dnsrecord::DNSZone| ctx.save_object(zone));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, kyc_info: osiris::objects::KycInfo| ctx.save_object(kyc_info));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, kyc_session: osiris::objects::KycSession| ctx.save_object(kyc_session));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, flow_template: osiris::objects::FlowTemplate| ctx.save_object(flow_template));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, flow_instance: osiris::objects::FlowInstance| ctx.save_object(flow_instance));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, verification: osiris::objects::Verification| ctx.save_object(verification));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, email_client: osiris::objects::communication::email::EmailClient| ctx.save_object(email_client));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, mail_template: osiris::objects::communication::email::MailTemplate| ctx.save_object(mail_template));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, account: osiris::objects::Account| ctx.save_object(account));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, asset: osiris::objects::Asset| ctx.save_object(asset));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, transaction: osiris::objects::Transaction| ctx.save_object(transaction));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, contract: osiris::objects::Contract| ctx.save_object(contract));
|
||||||
|
|
||||||
|
// Supervisor objects
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, api_key: osiris::objects::supervisor::ApiKey| ctx.save_object(api_key));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, runner: osiris::objects::supervisor::Runner| ctx.save_object(runner));
|
||||||
|
FuncRegistration::new("save")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, job_metadata: osiris::objects::supervisor::JobMetadata| ctx.save_object(job_metadata));
|
||||||
|
FuncRegistration::new("list")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, collection: String| ctx.list(collection));
|
||||||
|
FuncRegistration::new("get")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, collection: String, id: String| ctx.get(collection, id));
|
||||||
|
FuncRegistration::new("delete")
|
||||||
|
.set_into_module(module, |ctx: &mut OsirisContext, collection: String, id: String| ctx.delete(collection, id));
|
||||||
|
|
||||||
|
// Register Note functions
|
||||||
|
register_note_functions(module);
|
||||||
|
|
||||||
|
// Register Event functions
|
||||||
|
register_event_functions(module);
|
||||||
|
|
||||||
|
// Register HeroLedger modules (User, Group, Account, DNSZone)
|
||||||
|
register_heroledger_modules(module);
|
||||||
|
|
||||||
|
// Register KYC modules (KycClient, KycSession)
|
||||||
|
register_kyc_modules(module);
|
||||||
|
|
||||||
|
// Register Flow modules (FlowTemplate, FlowInstance)
|
||||||
|
register_flow_modules(module);
|
||||||
|
|
||||||
|
// Register Communication modules (Verification, EmailClient)
|
||||||
|
register_communication_modules(module);
|
||||||
|
|
||||||
|
// Register Money modules (Account, Asset, Transaction, PaymentClient)
|
||||||
|
register_money_modules(module);
|
||||||
|
|
||||||
|
// Register Legal modules (Contract)
|
||||||
|
register_legal_modules(module);
|
||||||
|
|
||||||
|
// Register Supervisor modules (ApiKey, Runner, JobMetadata)
|
||||||
|
register_supervisor_modules(module);
|
||||||
|
|
||||||
|
// Register get_context function with signatory-based access control
|
||||||
|
FuncRegistration::new("get_context")
|
||||||
|
.set_into_module(module, |context: rhai::NativeCallContext, participants: rhai::Array| -> Result<OsirisContext, Box<rhai::EvalAltResult>> {
|
||||||
|
// Extract SIGNATORIES from context tag
|
||||||
|
let tag_map = context
|
||||||
|
.tag()
|
||||||
|
.and_then(|tag| tag.read_lock::<rhai::Map>())
|
||||||
|
.ok_or_else(|| Box::new(rhai::EvalAltResult::ErrorRuntime("Context tag must be a Map.".into(), context.position())))?;
|
||||||
|
|
||||||
|
let signatories_dynamic = tag_map.get("SIGNATORIES")
|
||||||
|
.ok_or_else(|| Box::new(rhai::EvalAltResult::ErrorRuntime("'SIGNATORIES' not found in context tag Map.".into(), context.position())))?;
|
||||||
|
|
||||||
|
// Convert SIGNATORIES array to Vec<String>
|
||||||
|
let signatories_array = signatories_dynamic.clone().into_array()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("SIGNATORIES must be an array: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
let signatories: Vec<String> = signatories_array.into_iter()
|
||||||
|
.map(|s| s.into_string())
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("SIGNATORIES must contain strings: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
// Convert participants array to Vec<String>
|
||||||
|
let participant_keys: Vec<String> = participants.into_iter()
|
||||||
|
.map(|p| p.into_string())
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|e| Box::new(rhai::EvalAltResult::ErrorRuntime(format!("Participants must be strings: {}", e).into(), context.position())))?;
|
||||||
|
|
||||||
|
// Verify at least one participant is a signatory
|
||||||
|
let has_signatory = participant_keys.iter().any(|p| signatories.contains(p));
|
||||||
|
if !has_signatory {
|
||||||
|
return Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
|
||||||
|
format!("Access denied: none of the participants are signatories. Signatories: {}", signatories.join(", ")).into(),
|
||||||
|
context.position()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context directly with participants
|
||||||
|
OsirisContext::builder()
|
||||||
|
.participants(participant_keys)
|
||||||
|
.build()
|
||||||
|
.map_err(|e| format!("Failed to create context: {}", e).into())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register all OSIRIS components into an engine
|
||||||
|
/// This is a convenience function that registers the complete OsirisPackage
|
||||||
|
pub fn register_osiris_full(engine: &mut Engine) {
|
||||||
|
let package = OsirisPackage::new();
|
||||||
|
package.register_into_engine(engine);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a single OSIRIS engine (for backward compatibility)
|
||||||
|
pub fn create_osiris_engine() -> Result<Engine, Box<dyn std::error::Error>> {
|
||||||
|
let mut engine = Engine::new_raw();
|
||||||
|
register_osiris_full(&mut engine);
|
||||||
|
Ok(engine)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_create_osiris_engine() {
|
||||||
|
let result = create_osiris_engine();
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let mut engine = result.unwrap();
|
||||||
|
|
||||||
|
// Set up context tags with SIGNATORIES (like in runner_rust example)
|
||||||
|
let mut tag_map = rhai::Map::new();
|
||||||
|
// Create a proper Rhai array
|
||||||
|
let signatories: rhai::Array = vec![
|
||||||
|
rhai::Dynamic::from("pk1".to_string()),
|
||||||
|
rhai::Dynamic::from("pk2".to_string()),
|
||||||
|
rhai::Dynamic::from("pk3".to_string()),
|
||||||
|
];
|
||||||
|
tag_map.insert("SIGNATORIES".into(), rhai::Dynamic::from(signatories));
|
||||||
|
tag_map.insert("DB_PATH".into(), "/tmp/test_db".to_string().into());
|
||||||
|
tag_map.insert("CONTEXT_ID".into(), "test_context".to_string().into());
|
||||||
|
engine.set_default_tag(rhai::Dynamic::from(tag_map));
|
||||||
|
|
||||||
|
// Test get_context with valid signatories
|
||||||
|
let mut scope = rhai::Scope::new();
|
||||||
|
let test_result = engine.eval_with_scope::<rhai::Dynamic>(
|
||||||
|
&mut scope,
|
||||||
|
r#"
|
||||||
|
// All participants must be signatories
|
||||||
|
let ctx = get_context(["pk1", "pk2"]);
|
||||||
|
ctx.context_id()
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Err(ref e) = test_result {
|
||||||
|
eprintln!("Test error: {}", e);
|
||||||
|
}
|
||||||
|
assert!(test_result.is_ok(), "Failed to get context: {:?}", test_result.err());
|
||||||
|
assert_eq!(test_result.unwrap().to_string(), "pk1,pk2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_engine_with_manager_access_denied() {
|
||||||
|
let result = create_osiris_engine();
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let mut engine = result.unwrap();
|
||||||
|
|
||||||
|
// Set up context tags with SIGNATORIES
|
||||||
|
let mut tag_map = rhai::Map::new();
|
||||||
|
// Create a proper Rhai array
|
||||||
|
let signatories: rhai::Array = vec![
|
||||||
|
rhai::Dynamic::from("pk1".to_string()),
|
||||||
|
rhai::Dynamic::from("pk2".to_string()),
|
||||||
|
];
|
||||||
|
tag_map.insert("SIGNATORIES".into(), rhai::Dynamic::from(signatories));
|
||||||
|
tag_map.insert("DB_PATH".into(), "/tmp/test_db".to_string().into());
|
||||||
|
tag_map.insert("CONTEXT_ID".into(), "test_context".to_string().into());
|
||||||
|
engine.set_default_tag(rhai::Dynamic::from(tag_map));
|
||||||
|
|
||||||
|
// Test get_context with invalid participant (not a signatory)
|
||||||
|
let mut scope = rhai::Scope::new();
|
||||||
|
let test_result = engine.eval_with_scope::<rhai::Dynamic>(
|
||||||
|
&mut scope,
|
||||||
|
r#"
|
||||||
|
// pk3 is not a signatory, should fail
|
||||||
|
let ctx = get_context(["pk1", "pk3"]);
|
||||||
|
ctx.context_id()
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should fail because pk3 is not in SIGNATORIES
|
||||||
|
assert!(test_result.is_err());
|
||||||
|
let err_msg = test_result.unwrap_err().to_string();
|
||||||
|
assert!(err_msg.contains("Access denied") || err_msg.contains("not a signatory"));
|
||||||
|
}
|
||||||
|
}
|
||||||
117
bin/runners/osiris/src/main.rs
Normal file
117
bin/runners/osiris/src/main.rs
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
use hero_runner::{spawn_sync_runner, script_mode::execute_script_mode};
|
||||||
|
use clap::Parser;
|
||||||
|
use log::{error, info};
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
mod engine;
|
||||||
|
use engine::create_osiris_engine;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// Runner ID
|
||||||
|
runner_id: String,
|
||||||
|
|
||||||
|
/// Redis URL (also used as HeroDB URL)
|
||||||
|
#[arg(short = 'r', long, default_value = "redis://localhost:6379")]
|
||||||
|
redis_url: String,
|
||||||
|
|
||||||
|
/// Base database ID for OSIRIS contexts
|
||||||
|
#[arg(long, default_value_t = 1)]
|
||||||
|
base_db_id: u16,
|
||||||
|
|
||||||
|
/// Script to execute in single-job mode (optional)
|
||||||
|
#[arg(short, long)]
|
||||||
|
script: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Check if we're in script mode
|
||||||
|
if let Some(script_content) = args.script {
|
||||||
|
info!("Running in script mode with runner ID: {}", args.runner_id);
|
||||||
|
|
||||||
|
let redis_url = args.redis_url.clone();
|
||||||
|
let base_db_id = args.base_db_id;
|
||||||
|
let result = execute_script_mode(
|
||||||
|
&script_content,
|
||||||
|
&args.runner_id,
|
||||||
|
args.redis_url,
|
||||||
|
std::time::Duration::from_secs(300), // Default timeout for OSIS
|
||||||
|
move || create_osiris_engine()
|
||||||
|
.expect("Failed to create OSIRIS engine"),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(output) => {
|
||||||
|
println!("Script execution result:\n{}", output);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Script execution failed: {}", e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Starting OSIS Sync Runner with ID: {}", args.runner_id);
|
||||||
|
info!("Redis URL: {}", args.redis_url);
|
||||||
|
|
||||||
|
// Create shutdown channel
|
||||||
|
let (shutdown_tx, shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
|
||||||
|
// Setup signal handling for graceful shutdown
|
||||||
|
let shutdown_tx_clone = shutdown_tx.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||||
|
info!("Received Ctrl+C, initiating shutdown...");
|
||||||
|
let _ = shutdown_tx_clone.send(()).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spawn the sync runner with engine factory
|
||||||
|
let redis_url = args.redis_url.clone();
|
||||||
|
let base_db_id = args.base_db_id;
|
||||||
|
let runner_handle = spawn_sync_runner(
|
||||||
|
args.runner_id.clone(),
|
||||||
|
args.redis_url,
|
||||||
|
shutdown_rx,
|
||||||
|
move || create_osiris_engine()
|
||||||
|
.expect("Failed to create OSIRIS engine"),
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("OSIS Sync Runner '{}' started successfully", args.runner_id);
|
||||||
|
|
||||||
|
// Wait for the runner to complete
|
||||||
|
match runner_handle.await {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
info!("OSIS Sync Runner '{}' shut down successfully", args.runner_id);
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!("OSIS Sync Runner '{}' encountered an error: {}", args.runner_id, e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to join OSIS Sync Runner '{}' task: {}", args.runner_id, e);
|
||||||
|
return Err(Box::new(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Example: Run a Rhai script with OSIRIS support
|
||||||
|
pub fn run_osiris_script(
|
||||||
|
script: &str,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let engine = create_osiris_engine()?;
|
||||||
|
engine.run(script)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
41
bin/runners/sal/Cargo.toml
Normal file
41
bin/runners/sal/Cargo.toml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
[package]
|
||||||
|
name = "runner-sal"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
description = "SAL Runner - System Abstraction Layer runner"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "runner_sal"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Runner library
|
||||||
|
hero-runner = { path = "../../../lib/runner" }
|
||||||
|
hero-job = { path = "../../../lib/models/job" }
|
||||||
|
|
||||||
|
# Core dependencies
|
||||||
|
anyhow.workspace = true
|
||||||
|
tokio.workspace = true
|
||||||
|
log.workspace = true
|
||||||
|
env_logger.workspace = true
|
||||||
|
clap.workspace = true
|
||||||
|
|
||||||
|
# Rhai and logging
|
||||||
|
rhai = { version = "1.21.0", features = ["std", "sync", "decimal", "internals", "serde"] }
|
||||||
|
hero_logger = { git = "https://git.ourworld.tf/herocode/baobab.git", branch = "logger" }
|
||||||
|
|
||||||
|
# SAL modules
|
||||||
|
sal-os = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-redisclient = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-postgresclient = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-process = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-virt = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-git = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-zinit-client = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-mycelium = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-text = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-net = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-kubernetes = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-vault = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
|
sal-hetzner = { git = "https://git.ourworld.tf/herocode/herolib_rust.git" }
|
||||||
87
bin/runners/sal/src/README.md
Normal file
87
bin/runners/sal/src/README.md
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
# SAL Runner
|
||||||
|
|
||||||
|
The SAL (System Abstraction Layer) Runner is an asynchronous job processing engine that executes Rhai scripts with access to system-level operations and infrastructure management capabilities.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Asynchronous Processing**: Handles multiple jobs concurrently with configurable timeouts
|
||||||
|
- **Redis Integration**: Uses Redis for job queue management and coordination
|
||||||
|
- **System Operations**: Full access to SAL modules including OS, networking, containers, and cloud services
|
||||||
|
- **Graceful Shutdown**: Responds to SIGINT (Ctrl+C) for clean termination
|
||||||
|
- **Comprehensive Logging**: Detailed logging for monitoring and debugging
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run --bin runner_sal -- <RUNNER_ID> [OPTIONS]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Arguments
|
||||||
|
|
||||||
|
- `<RUNNER_ID>`: Unique identifier for this runner instance (required, positional)
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
- `-d, --db-path <PATH>`: Database file path (default: `/tmp/sal.db`)
|
||||||
|
- `-r, --redis-url <URL>`: Redis connection URL (default: `redis://localhost:6379`)
|
||||||
|
- `-t, --timeout <SECONDS>`: Default job timeout in seconds (default: `300`)
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic usage with default settings
|
||||||
|
cargo run --bin runner_sal -- myrunner
|
||||||
|
|
||||||
|
# Custom Redis URL and database path
|
||||||
|
cargo run --bin runner_sal -- production-runner -r redis://prod-redis:6379 -d /var/lib/sal.db
|
||||||
|
|
||||||
|
# Custom timeout for long-running jobs
|
||||||
|
cargo run --bin runner_sal -- batch-runner -t 3600
|
||||||
|
```
|
||||||
|
|
||||||
|
## Available SAL Modules
|
||||||
|
|
||||||
|
The SAL runner provides access to the following system modules through Rhai scripts:
|
||||||
|
|
||||||
|
- **OS Operations**: File system, process management, system information
|
||||||
|
- **Redis Client**: Redis database operations and caching
|
||||||
|
- **PostgreSQL Client**: Database connectivity and queries
|
||||||
|
- **Process Management**: System process control and monitoring
|
||||||
|
- **Virtualization**: Container and VM management
|
||||||
|
- **Git Operations**: Version control system integration
|
||||||
|
- **Zinit Client**: Service management and initialization
|
||||||
|
- **Mycelium**: Networking and mesh connectivity
|
||||||
|
- **Text Processing**: String manipulation and text utilities
|
||||||
|
- **Network Operations**: HTTP requests, network utilities
|
||||||
|
- **Kubernetes**: Container orchestration and cluster management
|
||||||
|
- **Hetzner Cloud**: Cloud infrastructure management
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The SAL runner uses an asynchronous architecture that:
|
||||||
|
|
||||||
|
1. Connects to Redis for job queue management
|
||||||
|
2. Creates a Rhai engine with all SAL modules registered
|
||||||
|
3. Processes jobs concurrently with configurable timeouts
|
||||||
|
4. Handles graceful shutdown on SIGINT
|
||||||
|
5. Provides comprehensive error handling and logging
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The runner provides detailed error messages for common issues:
|
||||||
|
|
||||||
|
- Redis connection failures
|
||||||
|
- Database access problems
|
||||||
|
- Script execution errors
|
||||||
|
- Timeout handling
|
||||||
|
- Resource cleanup on shutdown
|
||||||
|
|
||||||
|
## Logging
|
||||||
|
|
||||||
|
Set the `RUST_LOG` environment variable to control logging levels:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RUST_LOG=debug cargo run --bin runner_sal -- myrunner
|
||||||
|
```
|
||||||
|
|
||||||
|
Available log levels: `error`, `warn`, `info`, `debug`, `trace`
|
||||||
73
bin/runners/sal/src/engine.rs
Normal file
73
bin/runners/sal/src/engine.rs
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
use std::sync::{Arc, OnceLock};
|
||||||
|
// Re-export common Rhai types for convenience
|
||||||
|
pub use rhai::Engine;
|
||||||
|
|
||||||
|
// Re-export specific functions from sal-os package
|
||||||
|
|
||||||
|
// Re-export Redis client module registration function
|
||||||
|
|
||||||
|
// Re-export PostgreSQL client module registration function
|
||||||
|
|
||||||
|
|
||||||
|
// Re-export virt functions from sal-virt package
|
||||||
|
|
||||||
|
|
||||||
|
/// Engine factory for creating and sharing Rhai engines with SAL modules.
|
||||||
|
pub struct EngineFactory {
|
||||||
|
engine: Arc<Engine>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineFactory {
|
||||||
|
/// Create a new engine factory with a configured Rhai engine.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let mut engine = Engine::new();
|
||||||
|
register_sal_modules(&mut engine);
|
||||||
|
// Logger
|
||||||
|
hero_logger::rhai_integration::configure_rhai_logging(&mut engine, "sal_runner");
|
||||||
|
|
||||||
|
Self {
|
||||||
|
engine: Arc::new(engine),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a shared reference to the engine.
|
||||||
|
pub fn get_engine(&self) -> Arc<Engine> {
|
||||||
|
Arc::clone(&self.engine)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the global singleton engine factory.
|
||||||
|
pub fn global() -> &'static EngineFactory {
|
||||||
|
static FACTORY: OnceLock<EngineFactory> = OnceLock::new();
|
||||||
|
FACTORY.get_or_init(|| EngineFactory::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register_sal_modules(engine: &mut Engine) {
|
||||||
|
let _ = sal_os::rhai::register_os_module(engine);
|
||||||
|
let _ = sal_redisclient::rhai::register_redisclient_module(engine);
|
||||||
|
let _ = sal_postgresclient::rhai::register_postgresclient_module(engine);
|
||||||
|
let _ = sal_process::rhai::register_process_module(engine);
|
||||||
|
let _ = sal_virt::rhai::register_virt_module(engine);
|
||||||
|
let _ = sal_git::rhai::register_git_module(engine);
|
||||||
|
let _ = sal_zinit_client::rhai::register_zinit_module(engine);
|
||||||
|
let _ = sal_mycelium::rhai::register_mycelium_module(engine);
|
||||||
|
let _ = sal_text::rhai::register_text_module(engine);
|
||||||
|
let _ = sal_net::rhai::register_net_module(engine);
|
||||||
|
let _ = sal_kubernetes::rhai::register_kubernetes_module(engine);
|
||||||
|
let _ = sal_hetzner::rhai::register_hetzner_module(engine);
|
||||||
|
|
||||||
|
println!("SAL modules registered successfully.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new SAL engine instance.
|
||||||
|
pub fn create_sal_engine() -> Engine {
|
||||||
|
let mut engine = Engine::new();
|
||||||
|
register_sal_modules(&mut engine);
|
||||||
|
hero_logger::rhai_integration::configure_rhai_logging(&mut engine, "sal_runner");
|
||||||
|
engine
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a shared system engine using the factory.
|
||||||
|
pub fn create_shared_sal_engine() -> Arc<Engine> {
|
||||||
|
EngineFactory::global().get_engine()
|
||||||
|
}
|
||||||
108
bin/runners/sal/src/main.rs
Normal file
108
bin/runners/sal/src/main.rs
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
use hero_runner::{spawn_async_runner, script_mode::execute_script_mode};
|
||||||
|
use clap::Parser;
|
||||||
|
use log::{error, info};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
mod engine;
|
||||||
|
use engine::create_sal_engine;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// Runner ID
|
||||||
|
runner_id: String,
|
||||||
|
|
||||||
|
/// Database path
|
||||||
|
#[arg(short, long, default_value = "/tmp/sal.db")]
|
||||||
|
db_path: String,
|
||||||
|
|
||||||
|
/// Redis URL
|
||||||
|
#[arg(short = 'r', long, default_value = "redis://localhost:6379")]
|
||||||
|
redis_url: String,
|
||||||
|
|
||||||
|
/// Default timeout for jobs in seconds
|
||||||
|
#[arg(short, long, default_value_t = 300)]
|
||||||
|
timeout: u64,
|
||||||
|
|
||||||
|
/// Script to execute in single-job mode (optional)
|
||||||
|
#[arg(short, long)]
|
||||||
|
script: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Check if we're in script mode
|
||||||
|
if let Some(script_content) = args.script {
|
||||||
|
info!("Running in script mode with runner ID: {}", args.runner_id);
|
||||||
|
|
||||||
|
let result = execute_script_mode(
|
||||||
|
&script_content,
|
||||||
|
&args.runner_id,
|
||||||
|
args.redis_url,
|
||||||
|
Duration::from_secs(args.timeout),
|
||||||
|
create_sal_engine,
|
||||||
|
).await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(output) => {
|
||||||
|
println!("Script execution result:\n{}", output);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Script execution failed: {}", e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Starting SAL Async Runner with ID: {}", args.runner_id);
|
||||||
|
info!("Database path: {}", args.db_path);
|
||||||
|
info!("Redis URL: {}", args.redis_url);
|
||||||
|
info!("Default timeout: {} seconds", args.timeout);
|
||||||
|
|
||||||
|
// Create shutdown channel
|
||||||
|
let (shutdown_tx, shutdown_rx) = mpsc::channel::<()>(1);
|
||||||
|
|
||||||
|
// Setup signal handling for graceful shutdown
|
||||||
|
let shutdown_tx_clone = shutdown_tx.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||||
|
info!("Received Ctrl+C, initiating shutdown...");
|
||||||
|
let _ = shutdown_tx_clone.send(()).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spawn the async runner with engine factory
|
||||||
|
let runner_handle = spawn_async_runner(
|
||||||
|
args.runner_id.clone(),
|
||||||
|
args.db_path,
|
||||||
|
args.redis_url,
|
||||||
|
shutdown_rx,
|
||||||
|
Duration::from_secs(args.timeout),
|
||||||
|
create_sal_engine,
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("SAL Async Runner '{}' started successfully", args.runner_id);
|
||||||
|
|
||||||
|
// Wait for the runner to complete
|
||||||
|
match runner_handle.await {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
info!("SAL Async Runner '{}' shut down successfully", args.runner_id);
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!("SAL Async Runner '{}' encountered an error: {}", args.runner_id, e);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to join SAL Async Runner '{}' task: {}", args.runner_id, e);
|
||||||
|
return Err(Box::new(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
23
bin/supervisor/.env.example
Normal file
23
bin/supervisor/.env.example
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Hero Supervisor Configuration
|
||||||
|
|
||||||
|
# Redis connection URL
|
||||||
|
REDIS_URL=redis://127.0.0.1:6379
|
||||||
|
|
||||||
|
# OpenRPC Server Configuration
|
||||||
|
BIND_ADDRESS=127.0.0.1
|
||||||
|
PORT=3030
|
||||||
|
|
||||||
|
# Authentication Secrets (generate with: ./scripts/generate_secret.sh)
|
||||||
|
# At least one admin secret is required
|
||||||
|
ADMIN_SECRETS=your_admin_secret_here
|
||||||
|
|
||||||
|
# Optional: Additional secrets for different access levels
|
||||||
|
# USER_SECRETS=user_secret_1,user_secret_2
|
||||||
|
# REGISTER_SECRETS=register_secret_1
|
||||||
|
|
||||||
|
# Optional: Pre-configured runners (comma-separated names)
|
||||||
|
# These runners will be automatically registered on startup
|
||||||
|
# RUNNERS=runner1,runner2,runner3
|
||||||
|
|
||||||
|
# Optional: Mycelium network URL (requires mycelium feature)
|
||||||
|
# MYCELIUM_URL=http://127.0.0.1:8989
|
||||||
4
bin/supervisor/.gitignore
vendored
Normal file
4
bin/supervisor/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
target
|
||||||
|
.bin
|
||||||
|
.env
|
||||||
|
/tmp/supervisor-*.log
|
||||||
66
bin/supervisor/Cargo.toml
Normal file
66
bin/supervisor/Cargo.toml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
[package]
|
||||||
|
name = "hero-supervisor"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "hero_supervisor"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "supervisor"
|
||||||
|
path = "src/bin/supervisor.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Job types
|
||||||
|
hero-job = { path = "../../lib/models/job" }
|
||||||
|
hero-job-client = { path = "../../lib/clients/job" }
|
||||||
|
|
||||||
|
# Async runtime
|
||||||
|
tokio.workspace = true
|
||||||
|
async-trait.workspace = true
|
||||||
|
|
||||||
|
# Redis client
|
||||||
|
redis.workspace = true
|
||||||
|
|
||||||
|
# Core dependencies
|
||||||
|
uuid.workspace = true
|
||||||
|
log.workspace = true
|
||||||
|
thiserror.workspace = true
|
||||||
|
chrono.workspace = true
|
||||||
|
serde.workspace = true
|
||||||
|
serde_json.workspace = true
|
||||||
|
env_logger.workspace = true
|
||||||
|
|
||||||
|
# CLI argument parsing
|
||||||
|
clap.workspace = true
|
||||||
|
toml.workspace = true
|
||||||
|
|
||||||
|
# OpenRPC dependencies
|
||||||
|
jsonrpsee.workspace = true
|
||||||
|
anyhow.workspace = true
|
||||||
|
futures.workspace = true
|
||||||
|
|
||||||
|
# CORS support for OpenRPC server
|
||||||
|
tower-http.workspace = true
|
||||||
|
tower.workspace = true
|
||||||
|
hyper.workspace = true
|
||||||
|
hyper-util.workspace = true
|
||||||
|
http-body-util.workspace = true
|
||||||
|
|
||||||
|
# Osiris client for persistent storage
|
||||||
|
# osiris-client = { git = "https://git.ourworld.tf/herocode/osiris.git" } # Temporarily disabled - needs update
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tokio-test = "0.4"
|
||||||
|
hero-supervisor-openrpc-client = { path = "../../lib/clients/supervisor" }
|
||||||
|
escargot = "0.5"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["cli"]
|
||||||
|
cli = []
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
[[example]]
|
||||||
|
name = "osiris_openrpc"
|
||||||
|
path = "examples/osiris_openrpc/main.rs"
|
||||||
46
bin/supervisor/README.md
Normal file
46
bin/supervisor/README.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Supervisor
|
||||||
|
|
||||||
|
A job execution supervisor that queues jobs to runners over Redis and returns their output. It provides an OpenRPC server for remote job dispatching. The OpenRPC server requires authorization via API keys. API keys are scoped to grant one of three levels of access: Admin, Registrar (can register runners), User (can dispatch jobs).
|
||||||
|
|
||||||
|
Jobs contain scripts, environment variables, an identifier of the runner to execute the script, and signatures. The supervisor verifies the signatures, however access control based on who the signatories of a script is handled by the runner logic.
|
||||||
|
|
||||||
|
**Note:** Runners are expected to be started and managed externally. The supervisor only tracks which runners are registered and queues jobs to them via Redis.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The supervisor needs an admin key to be configured to get started.
|
||||||
|
`cargo run -- --admin-secret <SECRET>`
|
||||||
|
|
||||||
|
You can also use the run script which uses the `.env` file to get the admin key.
|
||||||
|
`./scripts/run.sh`
|
||||||
|
|
||||||
|
The scripts directory also offers other scripts for building testing etc.
|
||||||
|
|
||||||
|
## Functionality
|
||||||
|
|
||||||
|
Beyond job dispatching, the supervisor provides:
|
||||||
|
- **API Key Management**: Create, list, and remove API keys with different permission scopes
|
||||||
|
- **Runner Registration**: Register runners so the supervisor knows which queues are available
|
||||||
|
- **Job Lifecycle**: Create, start, stop, and monitor jobs
|
||||||
|
- **Job Queuing**: Queue jobs to specific runners via Redis
|
||||||
|
|
||||||
|
Runner registration simply means the supervisor becomes aware that a certain runner is listening to its queue. The full API specification can be seen in `docs/openrpc.json`.
|
||||||
|
|
||||||
|
## OpenRPC
|
||||||
|
|
||||||
|
### Server
|
||||||
|
|
||||||
|
The supervisor automatically starts an OpenRPC server on `127.0.0.1:3030` that exposes all supervisor functionality via JSON-RPC.
|
||||||
|
|
||||||
|
|
||||||
|
### Example JSON-RPC Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST -H "Content-Type: application/json" \
|
||||||
|
-d '{"jsonrpc":"2.0","method":"list_runners","id":1}' \
|
||||||
|
http://127.0.0.1:3030
|
||||||
|
```
|
||||||
|
|
||||||
|
### Client
|
||||||
|
|
||||||
|
The repository also offers OpenRPC Client for supervisor compatible with WASM targets as well.
|
||||||
146
bin/supervisor/docs/AUTH.md
Normal file
146
bin/supervisor/docs/AUTH.md
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
# Hero Supervisor Authentication
|
||||||
|
|
||||||
|
The Hero Supervisor now supports API key-based authentication with three permission scopes:
|
||||||
|
|
||||||
|
## Permission Scopes
|
||||||
|
|
||||||
|
1. **Admin** - Full access to all operations including key management
|
||||||
|
2. **Registrar** - Can register new runners
|
||||||
|
3. **User** - Can create and manage jobs
|
||||||
|
|
||||||
|
## Starting the Supervisor with an Admin Key
|
||||||
|
|
||||||
|
Bootstrap an initial admin key when starting the supervisor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run --bin supervisor -- --bootstrap-admin-key "my-admin"
|
||||||
|
```
|
||||||
|
|
||||||
|
This will output:
|
||||||
|
|
||||||
|
```
|
||||||
|
╔════════════════════════════════════════════════════════════╗
|
||||||
|
║ 🔑 Admin API Key Created ║
|
||||||
|
╚════════════════════════════════════════════════════════════╝
|
||||||
|
Name: my-admin
|
||||||
|
Key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||||
|
Scope: admin
|
||||||
|
⚠️ SAVE THIS KEY - IT WILL NOT BE SHOWN AGAIN!
|
||||||
|
╚════════════════════════════════════════════════════════════╝
|
||||||
|
```
|
||||||
|
|
||||||
|
**IMPORTANT:** Save this key securely - it will not be displayed again!
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Verify API Key
|
||||||
|
|
||||||
|
Verify a key and get its metadata:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://127.0.0.1:3030 \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "auth.verify",
|
||||||
|
"params": {
|
||||||
|
"key": "your-api-key-here"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"valid": true,
|
||||||
|
"name": "my-admin",
|
||||||
|
"scope": "admin"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create New API Key (Admin Only)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://127.0.0.1:3030 \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "auth.create_key",
|
||||||
|
"params": {
|
||||||
|
"admin_key": "your-admin-key",
|
||||||
|
"name": "runner-bot",
|
||||||
|
"scope": "registrar"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"key": "new-generated-uuid",
|
||||||
|
"name": "runner-bot",
|
||||||
|
"scope": "registrar",
|
||||||
|
"created_at": "2025-10-27T15:00:00Z",
|
||||||
|
"expires_at": null
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### List All API Keys (Admin Only)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://127.0.0.1:3030 \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "auth.list_keys",
|
||||||
|
"params": {
|
||||||
|
"admin_key": "your-admin-key"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Remove API Key (Admin Only)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://127.0.0.1:3030 \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "auth.remove_key",
|
||||||
|
"params": {
|
||||||
|
"admin_key": "your-admin-key",
|
||||||
|
"key": "key-to-remove"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using Keys in the Admin UI
|
||||||
|
|
||||||
|
The admin UI will use the `auth.verify` endpoint during login to:
|
||||||
|
1. Validate the provided API key
|
||||||
|
2. Retrieve the key's name and scope
|
||||||
|
3. Display the user's name and permissions in the header
|
||||||
|
4. Show/hide UI elements based on scope
|
||||||
|
|
||||||
|
## Migration from Legacy Secrets
|
||||||
|
|
||||||
|
The supervisor still supports the legacy secret-based authentication for backward compatibility:
|
||||||
|
- `--admin-secret` - Legacy admin secrets
|
||||||
|
- `--user-secret` - Legacy user secrets
|
||||||
|
- `--register-secret` - Legacy register secrets
|
||||||
|
|
||||||
|
However, the new API key system is recommended for better management and auditability.
|
||||||
268
bin/supervisor/docs/MYCELIUM.md
Normal file
268
bin/supervisor/docs/MYCELIUM.md
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
# Mycelium Integration - Now Optional!
|
||||||
|
|
||||||
|
The Mycelium integration is now an optional feature. The supervisor can run with just the OpenRPC HTTP server, making it simpler to use and deploy.
|
||||||
|
|
||||||
|
## What Changed
|
||||||
|
|
||||||
|
### Before
|
||||||
|
- Mycelium integration was always enabled
|
||||||
|
- Supervisor would continuously try to connect to Mycelium on port 8990
|
||||||
|
- Error logs if Mycelium wasn't available
|
||||||
|
- Required additional dependencies
|
||||||
|
|
||||||
|
### After
|
||||||
|
- ✅ Mycelium is now an optional feature
|
||||||
|
- ✅ Supervisor runs with clean OpenRPC HTTP server by default
|
||||||
|
- ✅ No connection errors when Mycelium isn't needed
|
||||||
|
- ✅ Smaller binary size without Mycelium dependencies
|
||||||
|
|
||||||
|
## Running the Supervisor
|
||||||
|
|
||||||
|
### Option 1: Simple OpenRPC Server (Recommended)
|
||||||
|
|
||||||
|
**No Mycelium, just OpenRPC:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using the helper script
|
||||||
|
./run_supervisor_simple.sh
|
||||||
|
|
||||||
|
# Or manually
|
||||||
|
MYCELIUM_URL="" cargo run --bin supervisor -- \
|
||||||
|
--redis-url redis://localhost:6379 \
|
||||||
|
--port 3030
|
||||||
|
```
|
||||||
|
|
||||||
|
This starts:
|
||||||
|
- ✅ OpenRPC HTTP server on port 3030
|
||||||
|
- ✅ Redis connection for job queuing
|
||||||
|
- ❌ No Mycelium integration
|
||||||
|
|
||||||
|
### Option 2: With Mycelium Integration
|
||||||
|
|
||||||
|
**Enable Mycelium feature:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build with Mycelium support
|
||||||
|
cargo build --bin supervisor --features mycelium
|
||||||
|
|
||||||
|
# Run with Mycelium URL
|
||||||
|
MYCELIUM_URL="http://localhost:8990" cargo run --bin supervisor --features mycelium -- \
|
||||||
|
--redis-url redis://localhost:6379 \
|
||||||
|
--port 3030
|
||||||
|
```
|
||||||
|
|
||||||
|
This starts:
|
||||||
|
- ✅ OpenRPC HTTP server on port 3030
|
||||||
|
- ✅ Redis connection for job queuing
|
||||||
|
- ✅ Mycelium integration (connects to daemon)
|
||||||
|
|
||||||
|
## Feature Flags
|
||||||
|
|
||||||
|
### Available Features
|
||||||
|
|
||||||
|
| Feature | Description | Default |
|
||||||
|
|---------|-------------|---------|
|
||||||
|
| `cli` | Command-line interface | ✅ Yes |
|
||||||
|
| `mycelium` | Mycelium integration | ❌ No |
|
||||||
|
|
||||||
|
### Building with Features
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Default build (CLI only, no Mycelium)
|
||||||
|
cargo build --bin supervisor
|
||||||
|
|
||||||
|
# With Mycelium
|
||||||
|
cargo build --bin supervisor --features mycelium
|
||||||
|
|
||||||
|
# Minimal (no CLI, no Mycelium)
|
||||||
|
cargo build --bin supervisor --no-default-features
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Without Mycelium (Default)
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Client │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ HTTP/JSON-RPC
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Supervisor │
|
||||||
|
│ OpenRPC Server │
|
||||||
|
│ (Port 3030) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ Redis
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Runners │
|
||||||
|
└─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### With Mycelium (Optional)
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Client │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ HTTP/JSON-RPC
|
||||||
|
▼
|
||||||
|
┌─────────────────┐ ┌──────────────┐
|
||||||
|
│ Supervisor │◄────►│ Mycelium │
|
||||||
|
│ OpenRPC Server │ │ Daemon │
|
||||||
|
│ (Port 3030) │ │ (Port 8990) │
|
||||||
|
└────────┬────────┘ └──────────────┘
|
||||||
|
│ Redis
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Runners │
|
||||||
|
└─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default | Required |
|
||||||
|
|----------|-------------|---------|----------|
|
||||||
|
| `MYCELIUM_URL` | Mycelium daemon URL | `http://127.0.0.1:8990` | No |
|
||||||
|
| `RUST_LOG` | Log level | `info` | No |
|
||||||
|
|
||||||
|
**To disable Mycelium:**
|
||||||
|
```bash
|
||||||
|
export MYCELIUM_URL=""
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
### Core Dependencies (Always)
|
||||||
|
- `tokio` - Async runtime
|
||||||
|
- `redis` - Job queuing
|
||||||
|
- `jsonrpsee` - OpenRPC server
|
||||||
|
- `runner_rust` - Job model
|
||||||
|
|
||||||
|
### Mycelium Dependencies (Optional)
|
||||||
|
- `reqwest` - HTTP client
|
||||||
|
- `base64` - Encoding
|
||||||
|
- `rand` - Random IDs
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
All examples work without Mycelium:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Simple end-to-end example
|
||||||
|
RUST_LOG=info cargo run --example simple_e2e
|
||||||
|
|
||||||
|
# Full automated demo
|
||||||
|
RUST_LOG=info cargo run --example end_to_end_demo
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration Guide
|
||||||
|
|
||||||
|
### If you were using Mycelium
|
||||||
|
|
||||||
|
**Before:**
|
||||||
|
```bash
|
||||||
|
cargo run --bin supervisor
|
||||||
|
# Would try to connect to Mycelium automatically
|
||||||
|
```
|
||||||
|
|
||||||
|
**After:**
|
||||||
|
```bash
|
||||||
|
# Option A: Disable Mycelium (recommended for most use cases)
|
||||||
|
MYCELIUM_URL="" cargo run --bin supervisor
|
||||||
|
|
||||||
|
# Option B: Enable Mycelium feature
|
||||||
|
cargo run --bin supervisor --features mycelium
|
||||||
|
```
|
||||||
|
|
||||||
|
### If you weren't using Mycelium
|
||||||
|
|
||||||
|
**Before:**
|
||||||
|
```bash
|
||||||
|
cargo run --bin supervisor
|
||||||
|
# Would see connection errors to port 8990
|
||||||
|
```
|
||||||
|
|
||||||
|
**After:**
|
||||||
|
```bash
|
||||||
|
cargo run --bin supervisor
|
||||||
|
# Clean startup, no connection errors! 🎉
|
||||||
|
```
|
||||||
|
|
||||||
|
## Benefits
|
||||||
|
|
||||||
|
### For Development
|
||||||
|
- ✅ Faster builds (fewer dependencies)
|
||||||
|
- ✅ Simpler setup (no Mycelium daemon needed)
|
||||||
|
- ✅ Cleaner logs (no connection errors)
|
||||||
|
- ✅ Easier debugging
|
||||||
|
|
||||||
|
### For Production
|
||||||
|
- ✅ Smaller binary size
|
||||||
|
- ✅ Fewer runtime dependencies
|
||||||
|
- ✅ More flexible deployment
|
||||||
|
- ✅ Optional advanced features
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Test without Mycelium
|
||||||
|
```bash
|
||||||
|
# Build
|
||||||
|
cargo build --bin supervisor
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
cargo test
|
||||||
|
|
||||||
|
# Run examples
|
||||||
|
cargo run --example simple_e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test with Mycelium
|
||||||
|
```bash
|
||||||
|
# Build with feature
|
||||||
|
cargo build --bin supervisor --features mycelium
|
||||||
|
|
||||||
|
# Start Mycelium daemon (if you have one)
|
||||||
|
# mycelium-daemon --port 8990
|
||||||
|
|
||||||
|
# Run supervisor
|
||||||
|
MYCELIUM_URL="http://localhost:8990" cargo run --bin supervisor --features mycelium
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "Mycelium integration not enabled"
|
||||||
|
|
||||||
|
This is informational, not an error. If you need Mycelium:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo build --features mycelium
|
||||||
|
```
|
||||||
|
|
||||||
|
### "HTTP request failed: error sending request"
|
||||||
|
|
||||||
|
If you see this with Mycelium enabled, check:
|
||||||
|
1. Is Mycelium daemon running?
|
||||||
|
2. Is the URL correct? (`MYCELIUM_URL`)
|
||||||
|
3. Is the port accessible?
|
||||||
|
|
||||||
|
Or simply disable Mycelium:
|
||||||
|
```bash
|
||||||
|
export MYCELIUM_URL=""
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
🎉 **The supervisor now runs cleanly with just OpenRPC!**
|
||||||
|
|
||||||
|
- Default: OpenRPC HTTP server only
|
||||||
|
- Optional: Enable Mycelium with `--features mycelium`
|
||||||
|
- No more connection errors when Mycelium isn't needed
|
||||||
|
- Simpler, faster, cleaner!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Status:** ✅ Complete
|
||||||
|
**Version:** 0.1.0
|
||||||
|
**Last Updated:** 2025-10-24
|
||||||
214
bin/supervisor/docs/QUICK_START.md
Normal file
214
bin/supervisor/docs/QUICK_START.md
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# Quick Start Guide
|
||||||
|
|
||||||
|
Complete guide to running the Hero Supervisor with OSIS runners and examples.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. **Redis** - Must be running
|
||||||
|
2. **Rust** - Version 1.88+ (run `rustup update`)
|
||||||
|
|
||||||
|
## 1. Start Redis
|
||||||
|
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Start Supervisor
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
cargo run --bin supervisor
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see:
|
||||||
|
```
|
||||||
|
╔════════════════════════════════════════════════════════════╗
|
||||||
|
║ Hero Supervisor Started ║
|
||||||
|
╚════════════════════════════════════════════════════════════╝
|
||||||
|
📡 OpenRPC Server: http://127.0.0.1:3030
|
||||||
|
🔗 Redis: redis://localhost:6379
|
||||||
|
🌐 Mycelium: Not compiled (use --features mycelium)
|
||||||
|
╚════════════════════════════════════════════════════════════╝
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Start OSIS Runner
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust
|
||||||
|
cargo run --bin runner_osis -- test_runner \
|
||||||
|
--redis-url redis://localhost:6379 \
|
||||||
|
--db-path /tmp/test_runner.db
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see:
|
||||||
|
```
|
||||||
|
Starting OSIS Sync Runner with ID: test_runner
|
||||||
|
Database path: /tmp/test_runner.db
|
||||||
|
Redis URL: redis://localhost:6379
|
||||||
|
OSIS Sync Runner 'test_runner' started successfully
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Run Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
RUST_LOG=info cargo run --example simple_e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
## Terminal Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────┬─────────────────────┐
|
||||||
|
│ Terminal 1 │ Terminal 2 │
|
||||||
|
│ Redis │ Supervisor │
|
||||||
|
│ redis-server │ cargo run --bin │
|
||||||
|
│ │ supervisor │
|
||||||
|
├─────────────────────┼─────────────────────┤
|
||||||
|
│ Terminal 3 │ Terminal 4 │
|
||||||
|
│ OSIS Runner │ Example │
|
||||||
|
│ cargo run --bin │ cargo run │
|
||||||
|
│ runner_osis │ --example │
|
||||||
|
│ │ simple_e2e │
|
||||||
|
└─────────────────────┴─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## What Each Component Does
|
||||||
|
|
||||||
|
### Redis
|
||||||
|
- Job queue storage
|
||||||
|
- Job result storage
|
||||||
|
- Runner coordination
|
||||||
|
|
||||||
|
### Supervisor
|
||||||
|
- OpenRPC HTTP server (port 3030)
|
||||||
|
- Job dispatch to runners
|
||||||
|
- Runner registration
|
||||||
|
- Job execution coordination
|
||||||
|
|
||||||
|
### OSIS Runner
|
||||||
|
- Listens for jobs on Redis queue
|
||||||
|
- Executes Rhai scripts
|
||||||
|
- Stores results back to Redis
|
||||||
|
- Uses HeroDB for data persistence
|
||||||
|
|
||||||
|
### Example
|
||||||
|
- Creates jobs with Rhai scripts
|
||||||
|
- Sends jobs to supervisor via OpenRPC
|
||||||
|
- Receives results
|
||||||
|
- Demonstrates both blocking and non-blocking modes
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┐
|
||||||
|
│ Example │ (simple_e2e.rs)
|
||||||
|
└──────┬──────┘
|
||||||
|
│ HTTP/JSON-RPC
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Supervisor │ (port 3030)
|
||||||
|
└──────┬──────┘
|
||||||
|
│ Redis Queue
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ OSIS Runner │ (test_runner)
|
||||||
|
└──────┬──────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ HeroDB │ (Redis + local DB)
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "Connection refused" on port 3030
|
||||||
|
- Make sure supervisor is running
|
||||||
|
- Check if another process is using port 3030
|
||||||
|
|
||||||
|
### "Connection refused" on port 6379
|
||||||
|
- Make sure Redis is running
|
||||||
|
- Check: `redis-cli ping` (should return "PONG")
|
||||||
|
|
||||||
|
### Runner not receiving jobs
|
||||||
|
- Check runner is registered: Look for "Runner registered successfully" in example output
|
||||||
|
- Check Redis connection: Both supervisor and runner must use same Redis URL
|
||||||
|
- Check queue name matches: Should be `hero:q:work:type:osis:group:default:inst:test_runner`
|
||||||
|
|
||||||
|
### "Job execution timeout"
|
||||||
|
- Increase timeout in job builder: `.timeout(120)`
|
||||||
|
- Check if runner is actually processing jobs (look for logs)
|
||||||
|
|
||||||
|
## Example Output
|
||||||
|
|
||||||
|
### Successful Run
|
||||||
|
|
||||||
|
```
|
||||||
|
╔════════════════════════════════════════╗
|
||||||
|
║ Simple End-to-End Demo ║
|
||||||
|
╚════════════════════════════════════════╝
|
||||||
|
|
||||||
|
📋 Step 1: Registering Runner
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Runner registered successfully
|
||||||
|
|
||||||
|
📋 Step 2: Running a Simple Job (Blocking)
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Job completed!
|
||||||
|
Result: {"message":"Hello from the runner!","number":42}
|
||||||
|
|
||||||
|
📋 Step 3: Running a Calculation Job
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Calculation completed!
|
||||||
|
Result: {"sum":55,"product":3628800,"count":10}
|
||||||
|
|
||||||
|
📋 Step 4: Starting a Non-Blocking Job
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Job started!
|
||||||
|
Job ID: abc-123 (running in background)
|
||||||
|
|
||||||
|
🎉 Demo completed successfully!
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. **Try different Rhai scripts** - Modify the payload in examples
|
||||||
|
2. **Add more runners** - Start multiple runners with different IDs
|
||||||
|
3. **Explore the API** - Use the OpenRPC client library
|
||||||
|
4. **Build your own client** - See `client/` for examples
|
||||||
|
|
||||||
|
## Useful Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check Redis
|
||||||
|
redis-cli ping
|
||||||
|
|
||||||
|
# List Redis keys
|
||||||
|
redis-cli keys "hero:*"
|
||||||
|
|
||||||
|
# Monitor Redis commands
|
||||||
|
redis-cli monitor
|
||||||
|
|
||||||
|
# Check supervisor is running
|
||||||
|
curl http://localhost:3030
|
||||||
|
|
||||||
|
# View runner logs
|
||||||
|
# (check terminal where runner is running)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Clean Up
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop all processes (Ctrl+C in each terminal)
|
||||||
|
|
||||||
|
# Clean up test database
|
||||||
|
rm /tmp/test_runner.db
|
||||||
|
|
||||||
|
# (Optional) Flush Redis
|
||||||
|
redis-cli FLUSHALL
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Status:** ✅ Ready to Use
|
||||||
|
**Last Updated:** 2025-10-24
|
||||||
58
bin/supervisor/docs/RESTRUCTURE.md
Normal file
58
bin/supervisor/docs/RESTRUCTURE.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Repository Restructure
|
||||||
|
|
||||||
|
## Changes Made
|
||||||
|
|
||||||
|
The supervisor repository has been restructured to follow a cleaner organization:
|
||||||
|
|
||||||
|
### Before:
|
||||||
|
```
|
||||||
|
supervisor/
|
||||||
|
├── clients/
|
||||||
|
│ ├── openrpc/ # OpenRPC client library
|
||||||
|
│ └── admin-ui/ # Admin UI (Yew WASM app)
|
||||||
|
├── src/ # Main supervisor library
|
||||||
|
└── cmd/ # Supervisor binary
|
||||||
|
```
|
||||||
|
|
||||||
|
### After:
|
||||||
|
```
|
||||||
|
supervisor/
|
||||||
|
├── client/ # OpenRPC client library (renamed from clients/openrpc)
|
||||||
|
├── ui/ # Admin UI (renamed from clients/admin-ui)
|
||||||
|
├── src/ # Main supervisor library
|
||||||
|
└── cmd/ # Supervisor binary
|
||||||
|
```
|
||||||
|
|
||||||
|
## Package Names
|
||||||
|
|
||||||
|
The package names remain unchanged:
|
||||||
|
- **Client**: `hero-supervisor-openrpc-client`
|
||||||
|
- **UI**: `supervisor-admin-ui`
|
||||||
|
- **Main**: `hero-supervisor`
|
||||||
|
|
||||||
|
## Git Dependencies
|
||||||
|
|
||||||
|
External projects using Git URLs will automatically pick up the new structure:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# This continues to work
|
||||||
|
hero-supervisor-openrpc-client = { git = "https://git.ourworld.tf/herocode/supervisor.git" }
|
||||||
|
```
|
||||||
|
|
||||||
|
Cargo will find the package by name regardless of its location in the repository.
|
||||||
|
|
||||||
|
## Local Path Dependencies
|
||||||
|
|
||||||
|
If you have local path dependencies, update them:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# Old
|
||||||
|
hero-supervisor-openrpc-client = { path = "../supervisor/clients/openrpc" }
|
||||||
|
|
||||||
|
# New
|
||||||
|
hero-supervisor-openrpc-client = { path = "../supervisor/client" }
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts and Documentation
|
||||||
|
|
||||||
|
All references in scripts, documentation, and examples have been updated to reflect the new structure.
|
||||||
333
bin/supervisor/docs/job-api-convention.md
Normal file
333
bin/supervisor/docs/job-api-convention.md
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
# Hero Supervisor Job API Convention
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The Hero Supervisor OpenRPC API follows a consistent naming convention for job-related operations:
|
||||||
|
|
||||||
|
- **`jobs.`** - General job operations (plural)
|
||||||
|
- **`job.`** - Specific job operations (singular)
|
||||||
|
|
||||||
|
This convention provides a clear distinction between operations that work with multiple jobs or create new jobs versus operations that work with a specific existing job.
|
||||||
|
|
||||||
|
## API Methods
|
||||||
|
|
||||||
|
### General Job Operations (`jobs.`)
|
||||||
|
|
||||||
|
#### `jobs.create`
|
||||||
|
Creates a new job without immediately queuing it to a runner.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `secret` (string): Authentication secret (admin or user)
|
||||||
|
- `job` (Job object): Complete job specification
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `job_id` (string): Unique identifier of the created job
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "jobs.create",
|
||||||
|
"params": {
|
||||||
|
"secret": "your-secret",
|
||||||
|
"job": {
|
||||||
|
"id": "job-123",
|
||||||
|
"caller_id": "client-1",
|
||||||
|
"context_id": "context-1",
|
||||||
|
"payload": "print('Hello World')",
|
||||||
|
"executor": "osis",
|
||||||
|
"runner": "osis-runner-1",
|
||||||
|
"timeout": 300,
|
||||||
|
"env_vars": {},
|
||||||
|
"created_at": "2023-01-01T00:00:00Z",
|
||||||
|
"updated_at": "2023-01-01T00:00:00Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `jobs.list`
|
||||||
|
Lists all jobs in the system with full details.
|
||||||
|
|
||||||
|
**Parameters:** None
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `jobs` (array of Job objects): List of all jobs with complete information
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "jobs.list",
|
||||||
|
"params": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "job-123",
|
||||||
|
"caller_id": "client-1",
|
||||||
|
"context_id": "context-1",
|
||||||
|
"payload": "print('Hello World')",
|
||||||
|
"executor": "osis",
|
||||||
|
"runner": "osis-runner-1",
|
||||||
|
"timeout": 300,
|
||||||
|
"env_vars": {},
|
||||||
|
"created_at": "2023-01-01T00:00:00Z",
|
||||||
|
"updated_at": "2023-01-01T00:00:00Z"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Specific Job Operations (`job.`)
|
||||||
|
|
||||||
|
#### `job.run`
|
||||||
|
Runs a job immediately on the appropriate runner and returns the result.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `secret` (string): Authentication secret (admin or user)
|
||||||
|
- `job` (Job object): Complete job specification
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `result` (JobResult): Either success or error result
|
||||||
|
|
||||||
|
**JobResult Format:**
|
||||||
|
```json
|
||||||
|
// Success case
|
||||||
|
{
|
||||||
|
"success": "Job completed successfully with output..."
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error case
|
||||||
|
{
|
||||||
|
"error": "Job failed with error message..."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.run",
|
||||||
|
"params": {
|
||||||
|
"secret": "your-secret",
|
||||||
|
"job": { /* job object */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `job.start`
|
||||||
|
Starts a previously created job by queuing it to its assigned runner.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `secret` (string): Authentication secret (admin or user)
|
||||||
|
- `job_id` (string): ID of the job to start
|
||||||
|
|
||||||
|
**Returns:** `null` (void)
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.start",
|
||||||
|
"params": {
|
||||||
|
"secret": "your-secret",
|
||||||
|
"job_id": "job-123"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `job.status`
|
||||||
|
Gets the current status of a job.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `job_id` (string): ID of the job to check
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `status` (JobStatusResponse): Current job status information
|
||||||
|
|
||||||
|
**JobStatusResponse Format:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "job-123",
|
||||||
|
"status": "running",
|
||||||
|
"created_at": "2023-01-01T00:00:00Z",
|
||||||
|
"started_at": "2023-01-01T00:00:05Z",
|
||||||
|
"completed_at": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Status Values:**
|
||||||
|
- `created` - Job has been created but not queued
|
||||||
|
- `queued` - Job has been queued to a runner
|
||||||
|
- `running` - Job is currently executing
|
||||||
|
- `completed` - Job finished successfully
|
||||||
|
- `failed` - Job failed with an error
|
||||||
|
- `timeout` - Job timed out
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.status",
|
||||||
|
"params": ["job-123"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `job.result`
|
||||||
|
Gets the result of a completed job. This method blocks until the result is available.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `job_id` (string): ID of the job to get results for
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- `result` (JobResult): Either success or error result
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.result",
|
||||||
|
"params": ["job-123"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `job.stop`
|
||||||
|
Stops a running job.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `secret` (string): Authentication secret (admin or user)
|
||||||
|
- `job_id` (string): ID of the job to stop
|
||||||
|
|
||||||
|
**Returns:** `null` (void)
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.stop",
|
||||||
|
"params": {
|
||||||
|
"secret": "your-secret",
|
||||||
|
"job_id": "job-123"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `job.delete`
|
||||||
|
Deletes a job from the system.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `secret` (string): Authentication secret (admin or user)
|
||||||
|
- `job_id` (string): ID of the job to delete
|
||||||
|
|
||||||
|
**Returns:** `null` (void)
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"method": "job.delete",
|
||||||
|
"params": {
|
||||||
|
"secret": "your-secret",
|
||||||
|
"job_id": "job-123"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow Examples
|
||||||
|
|
||||||
|
### Fire-and-Forget Job
|
||||||
|
```javascript
|
||||||
|
// Create and immediately run a job
|
||||||
|
const result = await client.job_run(secret, jobSpec);
|
||||||
|
if (result.success) {
|
||||||
|
console.log("Job completed:", result.success);
|
||||||
|
} else {
|
||||||
|
console.error("Job failed:", result.error);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Asynchronous Job Processing
|
||||||
|
```javascript
|
||||||
|
// 1. Create the job
|
||||||
|
const jobId = await client.jobs_create(secret, jobSpec);
|
||||||
|
|
||||||
|
// 2. Start the job
|
||||||
|
await client.job_start(secret, jobId);
|
||||||
|
|
||||||
|
// 3. Poll for completion (non-blocking)
|
||||||
|
let status;
|
||||||
|
do {
|
||||||
|
status = await client.job_status(jobId);
|
||||||
|
if (status.status === 'running') {
|
||||||
|
await sleep(1000); // Wait 1 second
|
||||||
|
}
|
||||||
|
} while (status.status === 'running' || status.status === 'queued');
|
||||||
|
|
||||||
|
// 4. Get the result
|
||||||
|
const result = await client.job_result(jobId);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch Job Management
|
||||||
|
```javascript
|
||||||
|
// Create multiple jobs
|
||||||
|
const jobIds = [];
|
||||||
|
for (const jobSpec of jobSpecs) {
|
||||||
|
const jobId = await client.jobs_create(secret, jobSpec);
|
||||||
|
jobIds.push(jobId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start all jobs
|
||||||
|
for (const jobId of jobIds) {
|
||||||
|
await client.job_start(secret, jobId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Monitor progress
|
||||||
|
const results = [];
|
||||||
|
for (const jobId of jobIds) {
|
||||||
|
const result = await client.job_result(jobId); // Blocks until complete
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: Stop or delete jobs if needed
|
||||||
|
for (const jobId of jobIds) {
|
||||||
|
await client.job_stop(secret, jobId); // Stop running job
|
||||||
|
await client.job_delete(secret, jobId); // Delete from system
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
All job operations require authentication using one of the following secret types:
|
||||||
|
|
||||||
|
- **Admin secrets**: Full access to all operations
|
||||||
|
- **User secrets**: Access to job operations (`jobs.create`, `job.run`, `job.start`)
|
||||||
|
- **Register secrets**: Only access to runner registration
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
All methods return standard JSON-RPC error responses for:
|
||||||
|
|
||||||
|
- **Authentication errors** (-32602): Invalid or missing secrets
|
||||||
|
- **Job not found errors** (-32000): Job ID doesn't exist
|
||||||
|
- **Internal errors** (-32603): Server-side processing errors
|
||||||
|
|
||||||
|
## Migration from Legacy API
|
||||||
|
|
||||||
|
### Old → New Method Names
|
||||||
|
|
||||||
|
| Legacy Method | New Method | Notes |
|
||||||
|
|---------------|------------|-------|
|
||||||
|
| `run_job` | `job.run` | Same functionality, new naming |
|
||||||
|
| `list_jobs` | `jobs.list` | Same functionality, new naming |
|
||||||
|
| `create_job` | `jobs.create` | Enhanced to not auto-queue |
|
||||||
|
|
||||||
|
### New Methods Added
|
||||||
|
|
||||||
|
- `job.start` - Start a created job
|
||||||
|
- `job.stop` - Stop a running job
|
||||||
|
- `job.delete` - Delete a job from the system
|
||||||
|
- `job.status` - Get job status (non-blocking)
|
||||||
|
- `job.result` - Get job result (blocking)
|
||||||
|
|
||||||
|
### API Changes
|
||||||
|
|
||||||
|
- **Job struct**: Replaced `job_type` field with `executor`
|
||||||
|
- **jobs.list**: Now returns full Job objects instead of just job IDs
|
||||||
|
- **Enhanced job lifecycle**: Added stop and delete operations
|
||||||
|
|
||||||
|
This provides much more granular control over job lifecycle management.
|
||||||
391
bin/supervisor/docs/openrpc.json
Normal file
391
bin/supervisor/docs/openrpc.json
Normal file
@@ -0,0 +1,391 @@
|
|||||||
|
{
|
||||||
|
"openrpc": "1.3.2",
|
||||||
|
"info": {
|
||||||
|
"title": "Hero Supervisor OpenRPC API",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "OpenRPC API for managing Hero Supervisor runners and jobs. Job operations follow the convention: 'jobs.' for general operations and 'job.' for specific job operations."
|
||||||
|
},
|
||||||
|
"components": {
|
||||||
|
"schemas": {
|
||||||
|
"Job": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": { "type": "string" },
|
||||||
|
"caller_id": { "type": "string" },
|
||||||
|
"context_id": { "type": "string" },
|
||||||
|
"payload": { "type": "string" },
|
||||||
|
"runner": { "type": "string" },
|
||||||
|
"executor": { "type": "string" },
|
||||||
|
"timeout": { "type": "number" },
|
||||||
|
"env_vars": { "type": "object" },
|
||||||
|
"created_at": { "type": "string" },
|
||||||
|
"updated_at": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["id", "caller_id", "context_id", "payload", "runner", "executor", "timeout", "env_vars", "created_at", "updated_at"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"methods": [
|
||||||
|
{
|
||||||
|
"name": "list_runners",
|
||||||
|
"description": "List all registered runners",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "runners",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "register_runner",
|
||||||
|
"description": "Register a new runner to the supervisor with secret authentication",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"name": { "type": "string" },
|
||||||
|
"queue": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["secret", "name", "queue"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "jobs.create",
|
||||||
|
"description": "Create a new job without queuing it to a runner",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"job": {
|
||||||
|
"$ref": "#/components/schemas/Job"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["secret", "job"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "job_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "jobs.list",
|
||||||
|
"description": "List all jobs",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "jobs",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "$ref": "#/components/schemas/Job" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.run",
|
||||||
|
"description": "Run a job on the appropriate runner and return the result",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"job": {
|
||||||
|
"$ref": "#/components/schemas/Job"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["secret", "job"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"success": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["success"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"error": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["error"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.start",
|
||||||
|
"description": "Start a previously created job by queuing it to its assigned runner",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"job_id": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["secret", "job_id"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.status",
|
||||||
|
"description": "Get the current status of a job",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "job_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "status",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"job_id": { "type": "string" },
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["created", "queued", "running", "completed", "failed", "timeout"]
|
||||||
|
},
|
||||||
|
"created_at": { "type": "string" },
|
||||||
|
"started_at": { "type": ["string", "null"] },
|
||||||
|
"completed_at": { "type": ["string", "null"] }
|
||||||
|
},
|
||||||
|
"required": ["job_id", "status", "created_at"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.result",
|
||||||
|
"description": "Get the result of a completed job (blocks until result is available)",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "job_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"success": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["success"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"error": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["error"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "remove_runner",
|
||||||
|
"description": "Remove a runner from the supervisor",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "actor_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "start_runner",
|
||||||
|
"description": "Start a specific runner",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "actor_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stop_runner",
|
||||||
|
"description": "Stop a specific runner",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "actor_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "force",
|
||||||
|
"schema": { "type": "boolean" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "get_runner_status",
|
||||||
|
"description": "Get the status of a specific runner",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "actor_id",
|
||||||
|
"schema": { "type": "string" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "status",
|
||||||
|
"schema": { "type": "object" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "get_all_runner_status",
|
||||||
|
"description": "Get status of all runners",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "statuses",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "object" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "start_all",
|
||||||
|
"description": "Start all runners",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "results",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stop_all",
|
||||||
|
"description": "Stop all runners",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "force",
|
||||||
|
"schema": { "type": "boolean" }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "results",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "get_all_status",
|
||||||
|
"description": "Get status of all runners (alternative format)",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "statuses",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.stop",
|
||||||
|
"description": "Stop a running job",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"job_id": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["secret", "job_id"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "job.delete",
|
||||||
|
"description": "Delete a job from the system",
|
||||||
|
"params": [
|
||||||
|
{
|
||||||
|
"name": "params",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"secret": { "type": "string" },
|
||||||
|
"job_id": { "type": "string" }
|
||||||
|
},
|
||||||
|
"required": ["secret", "job_id"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"result": {
|
||||||
|
"name": "result",
|
||||||
|
"schema": { "type": "null" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "rpc.discover",
|
||||||
|
"description": "OpenRPC discovery method - returns the OpenRPC document describing this API",
|
||||||
|
"params": [],
|
||||||
|
"result": {
|
||||||
|
"name": "openrpc_document",
|
||||||
|
"schema": { "type": "object" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
80
bin/supervisor/docs/test_keypairs.md
Normal file
80
bin/supervisor/docs/test_keypairs.md
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Test Keypairs for Supervisor Auth
|
||||||
|
|
||||||
|
These are secp256k1 keypairs for testing the supervisor authentication system.
|
||||||
|
|
||||||
|
## Keypair 1 (Alice - Admin)
|
||||||
|
```
|
||||||
|
Private Key: 0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
|
||||||
|
Public Key: 0x04a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd5b8dec5235a0fa8722476c7709c02559e3aa73aa03918ba2d492eea75abea235
|
||||||
|
Address: 0x1234567890abcdef1234567890abcdef12345678
|
||||||
|
```
|
||||||
|
|
||||||
|
## Keypair 2 (Bob - User)
|
||||||
|
```
|
||||||
|
Private Key: 0xfedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321
|
||||||
|
Public Key: 0x04d0de0aaeaefad02b8bdf8a56451a9852d7f851fee0cc8b4d42f3a0a4c3c2f66c1e5e3e8e3c3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e
|
||||||
|
Address: 0xfedcba0987654321fedcba0987654321fedcba09
|
||||||
|
```
|
||||||
|
|
||||||
|
## Keypair 3 (Charlie - Register)
|
||||||
|
```
|
||||||
|
Private Key: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
Public Key: 0x04e68acfc0253a10620dff706b0a1b1f1f5833ea3beb3bde6250d4e5e1e283bb4e9504be11a68d7a263f8e2000d1f8b8c5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e
|
||||||
|
Address: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
```
|
||||||
|
|
||||||
|
## Keypair 4 (Dave - Test)
|
||||||
|
```
|
||||||
|
Private Key: 0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||||
|
Public Key: 0x04f71e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e
|
||||||
|
Address: 0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Keypair 5 (Eve - Test)
|
||||||
|
```
|
||||||
|
Private Key: 0xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
|
||||||
|
Public Key: 0x04a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0
|
||||||
|
Address: 0xcccccccccccccccccccccccccccccccccccccccc
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Using with OpenRPC Client
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use secp256k1::{Secp256k1, SecretKey};
|
||||||
|
use hex;
|
||||||
|
|
||||||
|
// Alice's private key
|
||||||
|
let alice_privkey = SecretKey::from_slice(
|
||||||
|
&hex::decode("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef").unwrap()
|
||||||
|
).unwrap();
|
||||||
|
|
||||||
|
// Sign a message
|
||||||
|
let secp = Secp256k1::new();
|
||||||
|
let message = "Hello, Supervisor!";
|
||||||
|
// ... sign with alice_privkey
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using with Admin UI
|
||||||
|
|
||||||
|
You can use the public keys as identifiers when creating API keys:
|
||||||
|
- Alice: `0x04a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd...`
|
||||||
|
- Bob: `0x04d0de0aaeaefad02b8bdf8a56451a9852d7f851fee0cc8b4d42f3a0a4c3c2f66c...`
|
||||||
|
|
||||||
|
### Testing Different Scopes
|
||||||
|
|
||||||
|
1. **Admin Scope** - Use Alice's keypair for full admin access
|
||||||
|
2. **User Scope** - Use Bob's keypair for limited user access
|
||||||
|
3. **Register Scope** - Use Charlie's keypair for runner registration only
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
⚠️ **WARNING**: These are TEST keypairs only! Never use these in production!
|
||||||
|
|
||||||
|
The private keys are intentionally simple patterns for easy testing:
|
||||||
|
- Alice: All 0x12...ef pattern
|
||||||
|
- Bob: Reverse pattern 0xfe...21
|
||||||
|
- Charlie: All 0xaa
|
||||||
|
- Dave: All 0xbb
|
||||||
|
- Eve: All 0xcc
|
||||||
74
bin/supervisor/examples/README.md
Normal file
74
bin/supervisor/examples/README.md
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# Hero Supervisor Examples
|
||||||
|
|
||||||
|
This directory contains examples demonstrating Hero Supervisor functionality.
|
||||||
|
|
||||||
|
## Available Examples
|
||||||
|
|
||||||
|
### osiris_openrpc
|
||||||
|
|
||||||
|
Comprehensive example showing the complete workflow of using Hero Supervisor with OSIRIS runners via OpenRPC.
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Automatic supervisor and runner startup
|
||||||
|
- OpenRPC client communication
|
||||||
|
- Runner registration and management
|
||||||
|
- Job dispatching with multiple scripts
|
||||||
|
- Context-based access control
|
||||||
|
- Graceful shutdown
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
```bash
|
||||||
|
cargo run --example osiris_openrpc
|
||||||
|
```
|
||||||
|
|
||||||
|
See [osiris_openrpc/README.md](osiris_openrpc/README.md) for details.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
All examples require:
|
||||||
|
- Redis server running on `localhost:6379`
|
||||||
|
- Rust toolchain installed
|
||||||
|
|
||||||
|
## Example Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
examples/
|
||||||
|
├── README.md # This file
|
||||||
|
├── osiris_openrpc/ # OSIRIS + OpenRPC example
|
||||||
|
│ ├── main.rs # Main example code
|
||||||
|
│ ├── README.md # Detailed documentation
|
||||||
|
│ ├── note.rhai # Note creation script
|
||||||
|
│ ├── event.rhai # Event creation script
|
||||||
|
│ ├── query.rhai # Query script
|
||||||
|
│ └── access_denied.rhai # Access control test script
|
||||||
|
└── _archive/ # Archived old examples
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
The examples demonstrate the Hero Supervisor architecture:
|
||||||
|
|
||||||
|
```
|
||||||
|
Client (OpenRPC)
|
||||||
|
↓
|
||||||
|
Supervisor (OpenRPC Server)
|
||||||
|
↓
|
||||||
|
Redis Queue
|
||||||
|
↓
|
||||||
|
Runners (OSIRIS, SAL, etc.)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
To add a new example:
|
||||||
|
|
||||||
|
1. Create a new directory under `examples/`
|
||||||
|
2. Add `main.rs` with your example code
|
||||||
|
3. Add any required script files (`.rhai`)
|
||||||
|
4. Add a `README.md` documenting the example
|
||||||
|
5. Update `Cargo.toml` to register the example
|
||||||
|
6. Update this README with a link
|
||||||
|
|
||||||
|
## Archived Examples
|
||||||
|
|
||||||
|
Previous examples have been moved to `_archive/` for reference. These may be outdated but can provide useful patterns for specific use cases.
|
||||||
364
bin/supervisor/examples/_archive/E2E_EXAMPLES.md
Normal file
364
bin/supervisor/examples/_archive/E2E_EXAMPLES.md
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
# End-to-End Examples
|
||||||
|
|
||||||
|
Complete examples demonstrating the full Supervisor + Runner + Client workflow.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
These examples show how to:
|
||||||
|
1. Start a Hero Supervisor
|
||||||
|
2. Start an OSIS Runner
|
||||||
|
3. Register the runner with the supervisor
|
||||||
|
4. Execute jobs using both blocking (`job.run`) and non-blocking (`job.start`) modes
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### Required Services
|
||||||
|
|
||||||
|
1. **Redis** - Must be running on `localhost:6379`
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Supervisor** - Hero Supervisor with Mycelium integration
|
||||||
|
```bash
|
||||||
|
cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Runner** - OSIS Runner to execute jobs
|
||||||
|
```bash
|
||||||
|
cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### 1. Simple End-to-End (`simple_e2e.rs`)
|
||||||
|
|
||||||
|
**Recommended for beginners** - A minimal example with clear step-by-step execution.
|
||||||
|
|
||||||
|
#### What it does:
|
||||||
|
- Registers a runner with the supervisor
|
||||||
|
- Runs 2 blocking jobs (with immediate results)
|
||||||
|
- Starts 1 non-blocking job (fire and forget)
|
||||||
|
- Shows clear output at each step
|
||||||
|
|
||||||
|
#### How to run:
|
||||||
|
|
||||||
|
**Terminal 1 - Redis:**
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
**Terminal 2 - Supervisor:**
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
RUST_LOG=info cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
**Terminal 3 - Runner:**
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust
|
||||||
|
RUST_LOG=info cargo run --bin runner_osis -- test_runner \
|
||||||
|
--redis-url redis://localhost:6379 \
|
||||||
|
--db-path /tmp/test_runner.db
|
||||||
|
```
|
||||||
|
|
||||||
|
**Terminal 4 - Demo:**
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
RUST_LOG=info cargo run --example simple_e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Expected Output:
|
||||||
|
|
||||||
|
```
|
||||||
|
╔════════════════════════════════════════╗
|
||||||
|
║ Simple End-to-End Demo ║
|
||||||
|
╚════════════════════════════════════════╝
|
||||||
|
|
||||||
|
📋 Step 1: Registering Runner
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Runner registered successfully
|
||||||
|
|
||||||
|
📋 Step 2: Running a Simple Job (Blocking)
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Job completed!
|
||||||
|
Result: {"message":"Hello from the runner!","number":42,"timestamp":1234567890}
|
||||||
|
|
||||||
|
📋 Step 3: Running a Calculation Job
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Calculation completed!
|
||||||
|
Result: {"sum":55,"product":3628800,"count":10,"average":5}
|
||||||
|
|
||||||
|
📋 Step 4: Starting a Non-Blocking Job
|
||||||
|
─────────────────────────────────────────
|
||||||
|
✅ Job started!
|
||||||
|
Job ID: abc-123 (running in background)
|
||||||
|
|
||||||
|
🎉 Demo completed successfully!
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Full End-to-End (`end_to_end_demo.rs`)
|
||||||
|
|
||||||
|
**Advanced** - Automatically spawns supervisor and runner processes.
|
||||||
|
|
||||||
|
#### What it does:
|
||||||
|
- Automatically starts supervisor and runner
|
||||||
|
- Runs multiple test jobs
|
||||||
|
- Demonstrates both execution modes
|
||||||
|
- Handles cleanup automatically
|
||||||
|
|
||||||
|
#### How to run:
|
||||||
|
|
||||||
|
**Terminal 1 - Redis:**
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
**Terminal 2 - Demo:**
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
RUST_LOG=info cargo run --example end_to_end_demo
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Features:
|
||||||
|
- ✅ Automatic process management
|
||||||
|
- ✅ Multiple job examples
|
||||||
|
- ✅ Graceful shutdown
|
||||||
|
- ✅ Comprehensive logging
|
||||||
|
|
||||||
|
## Job Execution Modes
|
||||||
|
|
||||||
|
### job.run (Blocking)
|
||||||
|
|
||||||
|
Executes a job and waits for the result.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "job.run",
|
||||||
|
"params": [{
|
||||||
|
"secret": "admin_secret",
|
||||||
|
"job": { /* job object */ },
|
||||||
|
"timeout": 30
|
||||||
|
}],
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"job_id": "uuid",
|
||||||
|
"status": "completed",
|
||||||
|
"result": "{ /* actual result */ }"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Use when:**
|
||||||
|
- You need immediate results
|
||||||
|
- Job completes quickly (< 60 seconds)
|
||||||
|
- Synchronous workflow
|
||||||
|
|
||||||
|
### job.start (Non-Blocking)
|
||||||
|
|
||||||
|
Starts a job and returns immediately.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "job.start",
|
||||||
|
"params": [{
|
||||||
|
"secret": "admin_secret",
|
||||||
|
"job": { /* job object */ }
|
||||||
|
}],
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"result": {
|
||||||
|
"job_id": "uuid",
|
||||||
|
"status": "queued"
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Use when:**
|
||||||
|
- Long-running operations
|
||||||
|
- Background processing
|
||||||
|
- Async workflows
|
||||||
|
- Don't need immediate results
|
||||||
|
|
||||||
|
## Job Structure
|
||||||
|
|
||||||
|
Jobs are created using the `JobBuilder`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use runner_rust::job::JobBuilder;
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("my_client")
|
||||||
|
.context_id("my_context")
|
||||||
|
.payload(r#"
|
||||||
|
// Rhai script to execute
|
||||||
|
let result = 2 + 2;
|
||||||
|
to_json(result)
|
||||||
|
"#)
|
||||||
|
.runner("runner_name")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Job Fields
|
||||||
|
|
||||||
|
- **caller_id**: Identifier for the client making the request
|
||||||
|
- **context_id**: Context for the job execution
|
||||||
|
- **payload**: Rhai script to execute
|
||||||
|
- **runner**: Name of the runner to execute on
|
||||||
|
- **executor**: Type of executor (always "rhai" for OSIS)
|
||||||
|
- **timeout**: Maximum execution time in seconds
|
||||||
|
|
||||||
|
## Rhai Script Examples
|
||||||
|
|
||||||
|
### Simple Calculation
|
||||||
|
```rhai
|
||||||
|
let result = 2 + 2;
|
||||||
|
to_json(result)
|
||||||
|
```
|
||||||
|
|
||||||
|
### String Manipulation
|
||||||
|
```rhai
|
||||||
|
let message = "Hello, World!";
|
||||||
|
let upper = message.to_upper();
|
||||||
|
to_json(upper)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Array Operations
|
||||||
|
```rhai
|
||||||
|
let numbers = [1, 2, 3, 4, 5];
|
||||||
|
let sum = 0;
|
||||||
|
for n in numbers {
|
||||||
|
sum += n;
|
||||||
|
}
|
||||||
|
to_json(#{sum: sum, count: numbers.len()})
|
||||||
|
```
|
||||||
|
|
||||||
|
### Object Creation
|
||||||
|
```rhai
|
||||||
|
let person = #{
|
||||||
|
name: "Alice",
|
||||||
|
age: 30,
|
||||||
|
email: "alice@example.com"
|
||||||
|
};
|
||||||
|
to_json(person)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "Failed to connect to supervisor"
|
||||||
|
|
||||||
|
**Problem:** Supervisor is not running or wrong port.
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Check if supervisor is running
|
||||||
|
curl http://localhost:3030
|
||||||
|
|
||||||
|
# Start supervisor
|
||||||
|
cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
### "Runner not found"
|
||||||
|
|
||||||
|
**Problem:** Runner is not registered or not running.
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Start the runner
|
||||||
|
cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379
|
||||||
|
|
||||||
|
# Check runner logs for connection issues
|
||||||
|
```
|
||||||
|
|
||||||
|
### "Job execution timeout"
|
||||||
|
|
||||||
|
**Problem:** Job took longer than timeout value.
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
- Increase timeout in job builder: `.timeout(60)`
|
||||||
|
- Or in job.run request: `"timeout": 60`
|
||||||
|
|
||||||
|
### "Redis connection failed"
|
||||||
|
|
||||||
|
**Problem:** Redis is not running.
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Start Redis
|
||||||
|
redis-server
|
||||||
|
|
||||||
|
# Or specify custom Redis URL
|
||||||
|
cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┐
|
||||||
|
│ Client │
|
||||||
|
│ (Example) │
|
||||||
|
└──────┬──────┘
|
||||||
|
│ HTTP/JSON-RPC
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Supervisor │
|
||||||
|
│ (Mycelium) │
|
||||||
|
└──────┬──────┘
|
||||||
|
│ Redis Queue
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Runner │
|
||||||
|
│ (OSIS) │
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Flow
|
||||||
|
|
||||||
|
1. **Client** creates a job with Rhai script
|
||||||
|
2. **Client** sends job to supervisor via JSON-RPC
|
||||||
|
3. **Supervisor** verifies signatures (if present)
|
||||||
|
4. **Supervisor** queues job to runner's Redis queue
|
||||||
|
5. **Runner** picks up job from queue
|
||||||
|
6. **Runner** executes Rhai script
|
||||||
|
7. **Runner** stores result in Redis
|
||||||
|
8. **Supervisor** retrieves result (for job.run)
|
||||||
|
9. **Client** receives result
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
- Add signature verification to jobs (see `JOB_SIGNATURES.md`)
|
||||||
|
- Implement job status polling for non-blocking jobs
|
||||||
|
- Create custom Rhai functions for your use case
|
||||||
|
- Scale with multiple runners
|
||||||
|
|
||||||
|
## Related Documentation
|
||||||
|
|
||||||
|
- `JOB_EXECUTION.md` - Detailed job execution modes
|
||||||
|
- `JOB_SIGNATURES.md` - Cryptographic job signing
|
||||||
|
- `README.md` - Supervisor overview
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Status:** ✅ Production Ready
|
||||||
|
**Last Updated:** 2025-10-24
|
||||||
192
bin/supervisor/examples/_archive/EXAMPLES_SUMMARY.md
Normal file
192
bin/supervisor/examples/_archive/EXAMPLES_SUMMARY.md
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
# Supervisor Examples - Summary
|
||||||
|
|
||||||
|
## ✅ **Complete End-to-End Examples with OpenRPC Client**
|
||||||
|
|
||||||
|
All examples now use the official `hero-supervisor-openrpc-client` library for type-safe, async communication with the supervisor.
|
||||||
|
|
||||||
|
### **What Was Updated:**
|
||||||
|
|
||||||
|
1. **OpenRPC Client Library** (`client/src/lib.rs`)
|
||||||
|
- Added `JobRunResponse` - Response from blocking `job.run`
|
||||||
|
- Added `JobStartResponse` - Response from non-blocking `job.start`
|
||||||
|
- Updated `job_run()` method - Now accepts timeout parameter
|
||||||
|
- Updated `job_start()` method - Now accepts Job instead of job_id
|
||||||
|
- Re-exports `Job` and `JobBuilder` from `runner_rust`
|
||||||
|
|
||||||
|
2. **Simple E2E Example** (`examples/simple_e2e.rs`)
|
||||||
|
- Uses `SupervisorClient` from OpenRPC library
|
||||||
|
- Clean, type-safe API calls
|
||||||
|
- No manual JSON-RPC construction
|
||||||
|
- Perfect for learning and testing
|
||||||
|
|
||||||
|
3. **Full E2E Demo** (`examples/end_to_end_demo.rs`)
|
||||||
|
- Automated supervisor and runner spawning
|
||||||
|
- Uses OpenRPC client throughout
|
||||||
|
- Helper functions for common operations
|
||||||
|
- Comprehensive test scenarios
|
||||||
|
|
||||||
|
### **Key Changes:**
|
||||||
|
|
||||||
|
**Before (Manual JSON-RPC):**
|
||||||
|
```rust
|
||||||
|
let request = json!({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "job.run",
|
||||||
|
"params": [{
|
||||||
|
"secret": secret,
|
||||||
|
"job": job,
|
||||||
|
"timeout": 30
|
||||||
|
}],
|
||||||
|
"id": 1
|
||||||
|
});
|
||||||
|
let response = http_client.post(url).json(&request).send().await?;
|
||||||
|
```
|
||||||
|
|
||||||
|
**After (OpenRPC Client):**
|
||||||
|
```rust
|
||||||
|
let response = client.job_run(secret, job, Some(30)).await?;
|
||||||
|
println!("Result: {:?}", response.result);
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Client API:**
|
||||||
|
|
||||||
|
#### **Job Execution**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder};
|
||||||
|
|
||||||
|
// Create client
|
||||||
|
let client = SupervisorClient::new("http://localhost:3030")?;
|
||||||
|
|
||||||
|
// Register runner
|
||||||
|
client.register_runner("admin_secret", "runner_name", "queue_name").await?;
|
||||||
|
|
||||||
|
// Run job (blocking - waits for result)
|
||||||
|
let response = client.job_run("admin_secret", job, Some(60)).await?;
|
||||||
|
// response.result contains the actual result
|
||||||
|
|
||||||
|
// Start job (non-blocking - returns immediately)
|
||||||
|
let response = client.job_start("admin_secret", job).await?;
|
||||||
|
// response.job_id for later polling
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **Response Types**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// JobRunResponse (from job.run)
|
||||||
|
pub struct JobRunResponse {
|
||||||
|
pub job_id: String,
|
||||||
|
pub status: String, // "completed"
|
||||||
|
pub result: Option<String>, // Actual result from runner
|
||||||
|
}
|
||||||
|
|
||||||
|
// JobStartResponse (from job.start)
|
||||||
|
pub struct JobStartResponse {
|
||||||
|
pub job_id: String,
|
||||||
|
pub status: String, // "queued"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Examples Overview:**
|
||||||
|
|
||||||
|
| Example | Description | Use Case |
|
||||||
|
|---------|-------------|----------|
|
||||||
|
| `simple_e2e.rs` | Manual setup, step-by-step | Learning, testing |
|
||||||
|
| `end_to_end_demo.rs` | Automated, comprehensive | CI/CD, integration tests |
|
||||||
|
|
||||||
|
### **Running the Examples:**
|
||||||
|
|
||||||
|
**Prerequisites:**
|
||||||
|
```bash
|
||||||
|
# Terminal 1: Redis
|
||||||
|
redis-server
|
||||||
|
|
||||||
|
# Terminal 2: Supervisor
|
||||||
|
cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379
|
||||||
|
|
||||||
|
# Terminal 3: Runner
|
||||||
|
cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379
|
||||||
|
```
|
||||||
|
|
||||||
|
**Run Simple Example:**
|
||||||
|
```bash
|
||||||
|
# Terminal 4
|
||||||
|
RUST_LOG=info cargo run --example simple_e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
**Run Full Demo:**
|
||||||
|
```bash
|
||||||
|
# Only needs Redis running (spawns supervisor and runner automatically)
|
||||||
|
RUST_LOG=info cargo run --example end_to_end_demo
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Benefits of OpenRPC Client:**
|
||||||
|
|
||||||
|
✅ **Type Safety** - Compile-time checking of requests/responses
|
||||||
|
✅ **Async/Await** - Native Rust async support
|
||||||
|
✅ **Error Handling** - Proper Result types with detailed errors
|
||||||
|
✅ **Auto Serialization** - No manual JSON construction
|
||||||
|
✅ **Documentation** - IntelliSense and type hints
|
||||||
|
✅ **Maintainability** - Single source of truth for API
|
||||||
|
|
||||||
|
### **Architecture:**
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Example Code │
|
||||||
|
│ (simple_e2e) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ OpenRPC Client │
|
||||||
|
│ (typed API) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ JSON-RPC over HTTP
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Supervisor │
|
||||||
|
│ (Mycelium) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ Redis Queue
|
||||||
|
▼
|
||||||
|
┌─────────────────┐
|
||||||
|
│ OSIS Runner │
|
||||||
|
│ (Rhai Engine) │
|
||||||
|
└─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Job Execution Modes:**
|
||||||
|
|
||||||
|
**Blocking (`job.run`):**
|
||||||
|
- Client waits for result
|
||||||
|
- Uses `queue_and_wait` internally
|
||||||
|
- Returns actual result
|
||||||
|
- Best for: CRUD, queries, short jobs
|
||||||
|
|
||||||
|
**Non-Blocking (`job.start`):**
|
||||||
|
- Client returns immediately
|
||||||
|
- Job runs in background
|
||||||
|
- Returns job_id for polling
|
||||||
|
- Best for: Long jobs, batch processing
|
||||||
|
|
||||||
|
### **Files Modified:**
|
||||||
|
|
||||||
|
- ✅ `client/src/lib.rs` - Updated client methods and response types
|
||||||
|
- ✅ `examples/simple_e2e.rs` - Refactored to use OpenRPC client
|
||||||
|
- ✅ `examples/end_to_end_demo.rs` - Refactored to use OpenRPC client
|
||||||
|
- ✅ `examples/E2E_EXAMPLES.md` - Updated documentation
|
||||||
|
- ✅ `examples/EXAMPLES_SUMMARY.md` - This file
|
||||||
|
|
||||||
|
### **Next Steps:**
|
||||||
|
|
||||||
|
1. **Add more examples** - Specific use cases (batch jobs, error handling)
|
||||||
|
2. **Job polling** - Implement `wait_for_job()` helper
|
||||||
|
3. **WASM support** - Browser-based examples
|
||||||
|
4. **Signature examples** - Jobs with cryptographic signatures
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Status:** ✅ Complete and Production Ready
|
||||||
|
**Last Updated:** 2025-10-24
|
||||||
|
**Client Version:** hero-supervisor-openrpc-client 0.1.0
|
||||||
182
bin/supervisor/examples/_archive/README.md
Normal file
182
bin/supervisor/examples/_archive/README.md
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
# Hero Supervisor Examples
|
||||||
|
|
||||||
|
This directory contains examples demonstrating the new job API functionality and workflows.
|
||||||
|
|
||||||
|
## Examples Overview
|
||||||
|
|
||||||
|
### 1. `job_api_examples.rs` - Comprehensive API Demo
|
||||||
|
Complete demonstration of all new job API methods:
|
||||||
|
- **Fire-and-forget execution** using `job.run`
|
||||||
|
- **Asynchronous processing** with `jobs.create`, `job.start`, `job.status`, `job.result`
|
||||||
|
- **Batch job processing** for multiple jobs
|
||||||
|
- **Job listing** with `jobs.list`
|
||||||
|
|
||||||
|
**Run with:**
|
||||||
|
```bash
|
||||||
|
cargo run --example job_api_examples
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. `simple_job_workflow.rs` - Basic Workflow
|
||||||
|
Simple example showing the basic job lifecycle:
|
||||||
|
1. Create job with `jobs.create`
|
||||||
|
2. Start job with `job.start`
|
||||||
|
3. Monitor with `job.status`
|
||||||
|
4. Get result with `job.result`
|
||||||
|
|
||||||
|
**Run with:**
|
||||||
|
```bash
|
||||||
|
cargo run --example simple_job_workflow
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. `integration_test.rs` - Integration Tests
|
||||||
|
Comprehensive integration tests validating:
|
||||||
|
- Complete job lifecycle
|
||||||
|
- Immediate job execution
|
||||||
|
- Job listing functionality
|
||||||
|
- Authentication error handling
|
||||||
|
- Nonexistent job operations
|
||||||
|
|
||||||
|
**Run with:**
|
||||||
|
```bash
|
||||||
|
cargo test --test integration_test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before running the examples, ensure:
|
||||||
|
|
||||||
|
1. **Redis is running:**
|
||||||
|
```bash
|
||||||
|
docker run -d -p 6379:6379 redis:alpine
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Supervisor is running:**
|
||||||
|
```bash
|
||||||
|
./target/debug/supervisor --config examples/supervisor/config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Runners are configured** in your config.toml:
|
||||||
|
```toml
|
||||||
|
[[actors]]
|
||||||
|
id = "osis_runner_1"
|
||||||
|
name = "osis_runner_1"
|
||||||
|
binary_path = "/path/to/osis_runner"
|
||||||
|
db_path = "/tmp/osis_db"
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
process_manager = "simple"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Convention Summary
|
||||||
|
|
||||||
|
The examples demonstrate the new job API convention:
|
||||||
|
|
||||||
|
### General Operations (`jobs.`)
|
||||||
|
- `jobs.create` - Create a job without queuing it
|
||||||
|
- `jobs.list` - List all job IDs in the system
|
||||||
|
|
||||||
|
### Specific Operations (`job.`)
|
||||||
|
- `job.run` - Run a job immediately and return result
|
||||||
|
- `job.start` - Start a previously created job
|
||||||
|
- `job.status` - Get current job status (non-blocking)
|
||||||
|
- `job.result` - Get job result (blocking until complete)
|
||||||
|
|
||||||
|
## Workflow Patterns
|
||||||
|
|
||||||
|
### Pattern 1: Fire-and-Forget
|
||||||
|
```rust
|
||||||
|
let result = client.job_run(secret, job).await?;
|
||||||
|
match result {
|
||||||
|
JobResult::Success { success } => println!("Output: {}", success),
|
||||||
|
JobResult::Error { error } => println!("Error: {}", error),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 2: Asynchronous Processing
|
||||||
|
```rust
|
||||||
|
// Create and start
|
||||||
|
let job_id = client.jobs_create(secret, job).await?;
|
||||||
|
client.job_start(secret, &job_id).await?;
|
||||||
|
|
||||||
|
// Monitor (non-blocking)
|
||||||
|
loop {
|
||||||
|
let status = client.job_status(&job_id).await?;
|
||||||
|
if status.status == "completed" { break; }
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get result
|
||||||
|
let result = client.job_result(&job_id).await?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 3: Batch Processing
|
||||||
|
```rust
|
||||||
|
// Create all jobs
|
||||||
|
let mut job_ids = Vec::new();
|
||||||
|
for job_spec in job_specs {
|
||||||
|
let job_id = client.jobs_create(secret, job_spec).await?;
|
||||||
|
job_ids.push(job_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start all jobs
|
||||||
|
for job_id in &job_ids {
|
||||||
|
client.job_start(secret, job_id).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect results
|
||||||
|
for job_id in &job_ids {
|
||||||
|
let result = client.job_result(job_id).await?;
|
||||||
|
// Process result...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The examples demonstrate proper error handling for:
|
||||||
|
- **Authentication errors** - Invalid secrets
|
||||||
|
- **Job not found errors** - Nonexistent job IDs
|
||||||
|
- **Connection errors** - Supervisor not available
|
||||||
|
- **Execution errors** - Job failures
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
Examples use different secret types:
|
||||||
|
- **Admin secrets**: Full system access
|
||||||
|
- **User secrets**: Job operations only (used in examples)
|
||||||
|
- **Register secrets**: Runner registration only
|
||||||
|
|
||||||
|
Configure secrets in your supervisor config:
|
||||||
|
```toml
|
||||||
|
admin_secrets = ["admin-secret-123"]
|
||||||
|
user_secrets = ["user-secret-456"]
|
||||||
|
register_secrets = ["register-secret-789"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Connection refused**
|
||||||
|
- Ensure supervisor is running on localhost:3030
|
||||||
|
- Check supervisor logs for errors
|
||||||
|
|
||||||
|
2. **Authentication failed**
|
||||||
|
- Verify secret is configured in supervisor
|
||||||
|
- Check secret type matches operation requirements
|
||||||
|
|
||||||
|
3. **Job execution failed**
|
||||||
|
- Ensure runners are properly configured and running
|
||||||
|
- Check runner logs for execution errors
|
||||||
|
- Verify job payload is valid for the target runner
|
||||||
|
|
||||||
|
4. **Redis connection failed**
|
||||||
|
- Ensure Redis is running on localhost:6379
|
||||||
|
- Check Redis connectivity from supervisor
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Run examples with debug logging:
|
||||||
|
```bash
|
||||||
|
RUST_LOG=debug cargo run --example job_api_examples
|
||||||
|
```
|
||||||
|
|
||||||
|
This will show detailed API calls and responses for troubleshooting.
|
||||||
290
bin/supervisor/examples/_archive/basic_openrpc_client.rs
Normal file
290
bin/supervisor/examples/_archive/basic_openrpc_client.rs
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
//! Comprehensive OpenRPC Example for Hero Supervisor
|
||||||
|
//!
|
||||||
|
//! This example demonstrates the complete OpenRPC workflow:
|
||||||
|
//! 1. Automatically starting a Hero Supervisor with OpenRPC server using escargot
|
||||||
|
//! 2. Building and using a mock runner binary
|
||||||
|
//! 3. Connecting with the OpenRPC client
|
||||||
|
//! 4. Managing runners (add, start, stop, remove)
|
||||||
|
//! 5. Creating and queuing jobs
|
||||||
|
//! 6. Monitoring job execution and verifying results
|
||||||
|
//! 7. Bulk operations and status monitoring
|
||||||
|
//! 8. Gracefully shutting down the supervisor
|
||||||
|
//!
|
||||||
|
//! To run this example:
|
||||||
|
//! `cargo run --example basic_openrpc_client`
|
||||||
|
//!
|
||||||
|
//! This example is completely self-contained and will start/stop the supervisor automatically.
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{
|
||||||
|
SupervisorClient, RunnerConfig, RunnerType, ProcessManagerType,
|
||||||
|
JobBuilder
|
||||||
|
};
|
||||||
|
use std::time::Duration;
|
||||||
|
use escargot::CargoBuild;
|
||||||
|
use std::process::Stdio;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// env_logger::init(); // Commented out to avoid version conflicts
|
||||||
|
|
||||||
|
println!("🚀 Comprehensive OpenRPC Example for Hero Supervisor");
|
||||||
|
println!("====================================================");
|
||||||
|
|
||||||
|
// Build the supervisor with OpenRPC feature (force rebuild to avoid escargot caching)
|
||||||
|
println!("\n🔨 Force rebuilding supervisor with OpenRPC feature...");
|
||||||
|
|
||||||
|
// Clear target directory to force fresh build
|
||||||
|
let _ = std::process::Command::new("cargo")
|
||||||
|
.arg("clean")
|
||||||
|
.output();
|
||||||
|
|
||||||
|
let supervisor_binary = CargoBuild::new()
|
||||||
|
.bin("supervisor")
|
||||||
|
.features("openrpc")
|
||||||
|
.current_release()
|
||||||
|
.run()?;
|
||||||
|
|
||||||
|
println!("✅ Supervisor binary built successfully");
|
||||||
|
|
||||||
|
// Build the mock runner binary
|
||||||
|
println!("\n🔨 Building mock runner binary...");
|
||||||
|
let mock_runner_binary = CargoBuild::new()
|
||||||
|
.example("mock_runner")
|
||||||
|
.current_release()
|
||||||
|
.run()?;
|
||||||
|
|
||||||
|
println!("✅ Mock runner binary built successfully");
|
||||||
|
|
||||||
|
// Start the supervisor process
|
||||||
|
println!("\n🚀 Starting supervisor with OpenRPC server...");
|
||||||
|
let mut supervisor_process = supervisor_binary
|
||||||
|
.command()
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
println!("✅ Supervisor process started (PID: {})", supervisor_process.id());
|
||||||
|
|
||||||
|
// Wait for the server to start up
|
||||||
|
println!("\n⏳ Waiting for OpenRPC server to start...");
|
||||||
|
sleep(Duration::from_secs(5)).await;
|
||||||
|
|
||||||
|
// Create client
|
||||||
|
let client = SupervisorClient::new("http://127.0.0.1:3030")?;
|
||||||
|
println!("✅ Client created for: {}", client.server_url());
|
||||||
|
|
||||||
|
// Test connectivity with retries
|
||||||
|
println!("\n🔍 Testing server connectivity...");
|
||||||
|
let mut connection_attempts = 0;
|
||||||
|
let max_attempts = 10;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
connection_attempts += 1;
|
||||||
|
match client.list_runners().await {
|
||||||
|
Ok(runners) => {
|
||||||
|
println!("✅ Server is responsive");
|
||||||
|
println!("📋 Current runners: {:?}", runners);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(e) if connection_attempts < max_attempts => {
|
||||||
|
println!("⏳ Attempt {}/{}: Server not ready yet, retrying...", connection_attempts, max_attempts);
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("❌ Failed to connect to server after {} attempts: {}", max_attempts, e);
|
||||||
|
// Clean up the supervisor process before returning
|
||||||
|
let _ = supervisor_process.kill();
|
||||||
|
return Err(e.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a simple runner using the mock runner binary
|
||||||
|
let config = RunnerConfig {
|
||||||
|
actor_id: "basic_example_actor".to_string(),
|
||||||
|
runner_type: RunnerType::OSISRunner,
|
||||||
|
binary_path: mock_runner_binary.path().to_path_buf(),
|
||||||
|
db_path: "/tmp/example_db".to_string(),
|
||||||
|
redis_url: "redis://localhost:6379".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("➕ Adding runner: {}", config.actor_id);
|
||||||
|
client.add_runner(config, ProcessManagerType::Simple).await?;
|
||||||
|
|
||||||
|
// Start the runner
|
||||||
|
println!("▶️ Starting runner...");
|
||||||
|
client.start_runner("basic_example_actor").await?;
|
||||||
|
|
||||||
|
// Check status
|
||||||
|
let status = client.get_runner_status("basic_example_actor").await?;
|
||||||
|
println!("📊 Runner status: {:?}", status);
|
||||||
|
|
||||||
|
// Create and queue multiple jobs to demonstrate functionality
|
||||||
|
let jobs = vec![
|
||||||
|
("Hello World", "print('Hello from comprehensive OpenRPC example!');"),
|
||||||
|
("Math Calculation", "let result = 42 * 2; print(`The answer is: ${result}`);"),
|
||||||
|
("Current Time", "print('Job executed at: ' + new Date().toISOString());"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut job_ids = Vec::new();
|
||||||
|
|
||||||
|
for (description, payload) in jobs {
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("comprehensive_client")
|
||||||
|
.context_id("demo")
|
||||||
|
.payload(payload)
|
||||||
|
.runner("basic_example_actor")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
println!("📤 Queuing job '{}': {}", description, job.id);
|
||||||
|
client.queue_job_to_runner("basic_example_actor", job.clone()).await?;
|
||||||
|
job_ids.push((job.id, description.to_string()));
|
||||||
|
|
||||||
|
// Small delay between jobs
|
||||||
|
sleep(Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Demonstrate synchronous job execution using polling approach
|
||||||
|
// (Note: queue_and_wait OpenRPC method registration needs debugging)
|
||||||
|
println!("\n🎯 Demonstrating synchronous job execution with result verification...");
|
||||||
|
|
||||||
|
let sync_jobs = vec![
|
||||||
|
("Synchronous Hello", "print('Hello from synchronous execution!');"),
|
||||||
|
("Synchronous Math", "let result = 123 + 456; print(`Calculation result: ${result}`);"),
|
||||||
|
("Synchronous Status", "print('Job processed with result verification');"),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (description, payload) in sync_jobs {
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("sync_client")
|
||||||
|
.context_id("sync_demo")
|
||||||
|
.payload(payload)
|
||||||
|
.runner("basic_example_actor")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
println!("🚀 Executing '{}' with result verification...", description);
|
||||||
|
let job_id = job.id.clone();
|
||||||
|
|
||||||
|
// Queue the job
|
||||||
|
client.queue_job_to_runner("basic_example_actor", job).await?;
|
||||||
|
|
||||||
|
// Poll for completion with timeout
|
||||||
|
let mut attempts = 0;
|
||||||
|
let max_attempts = 20; // 10 seconds with 500ms intervals
|
||||||
|
let mut result = None;
|
||||||
|
|
||||||
|
while attempts < max_attempts {
|
||||||
|
match client.get_job_result(&job_id).await {
|
||||||
|
Ok(Some(job_result)) => {
|
||||||
|
result = Some(job_result);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// Job not finished yet, wait and retry
|
||||||
|
sleep(Duration::from_millis(500)).await;
|
||||||
|
attempts += 1;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("⚠️ Error getting result for job {}: {}", job_id, e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Some(job_result) => {
|
||||||
|
println!("✅ Job '{}' completed successfully!", description);
|
||||||
|
println!(" 📋 Job ID: {}", job_id);
|
||||||
|
println!(" 📤 Result: {}", job_result);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
println!("⏰ Job '{}' did not complete within timeout", description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small delay between jobs
|
||||||
|
sleep(Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Demonstrate bulk operations and status monitoring
|
||||||
|
println!("\n📊 Demonstrating bulk operations and status monitoring...");
|
||||||
|
|
||||||
|
// Get all runner statuses
|
||||||
|
println!("📋 Getting all runner statuses...");
|
||||||
|
match client.get_all_runner_status().await {
|
||||||
|
Ok(statuses) => {
|
||||||
|
println!("✅ Runner statuses:");
|
||||||
|
for (runner_id, status) in statuses {
|
||||||
|
println!(" - {}: {:?}", runner_id, status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => println!("❌ Failed to get runner statuses: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// List all runners one more time
|
||||||
|
println!("\n📋 Final runner list:");
|
||||||
|
match client.list_runners().await {
|
||||||
|
Ok(runners) => {
|
||||||
|
println!("✅ Active runners: {:?}", runners);
|
||||||
|
}
|
||||||
|
Err(e) => println!("❌ Failed to list runners: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop and remove runner
|
||||||
|
println!("\n⏹️ Stopping runner...");
|
||||||
|
client.stop_runner("basic_example_actor", false).await?;
|
||||||
|
|
||||||
|
println!("🗑️ Removing runner...");
|
||||||
|
client.remove_runner("basic_example_actor").await?;
|
||||||
|
|
||||||
|
// Final verification
|
||||||
|
println!("\n🔍 Final verification - listing remaining runners...");
|
||||||
|
match client.list_runners().await {
|
||||||
|
Ok(runners) => {
|
||||||
|
if runners.contains(&"basic_example_actor".to_string()) {
|
||||||
|
println!("⚠️ Runner still present: {:?}", runners);
|
||||||
|
} else {
|
||||||
|
println!("✅ Runner successfully removed. Remaining runners: {:?}", runners);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => println!("❌ Failed to verify runner removal: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gracefully shutdown the supervisor process
|
||||||
|
println!("\n🛑 Shutting down supervisor process...");
|
||||||
|
match supervisor_process.kill() {
|
||||||
|
Ok(()) => {
|
||||||
|
println!("✅ Supervisor process terminated successfully");
|
||||||
|
// Wait for the process to fully exit
|
||||||
|
match supervisor_process.wait() {
|
||||||
|
Ok(status) => println!("✅ Supervisor exited with status: {}", status),
|
||||||
|
Err(e) => println!("⚠️ Error waiting for supervisor exit: {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => println!("⚠️ Error terminating supervisor: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n🎉 Comprehensive OpenRPC Example Complete!");
|
||||||
|
println!("==========================================");
|
||||||
|
println!("✅ Successfully demonstrated:");
|
||||||
|
println!(" - Automatic supervisor startup with escargot");
|
||||||
|
println!(" - Mock runner binary integration");
|
||||||
|
println!(" - OpenRPC client connectivity with retry logic");
|
||||||
|
println!(" - Runner management (add, start, stop, remove)");
|
||||||
|
println!(" - Asynchronous job creation and queuing");
|
||||||
|
println!(" - Synchronous job execution with result polling");
|
||||||
|
println!(" - Job result verification from Redis job hash");
|
||||||
|
println!(" - Bulk operations and status monitoring");
|
||||||
|
println!(" - Graceful cleanup and supervisor shutdown");
|
||||||
|
println!("\n🎯 The Hero Supervisor OpenRPC integration is fully functional!");
|
||||||
|
println!("📝 Note: queue_and_wait method implemented but OpenRPC registration needs debugging");
|
||||||
|
println!("🚀 Both async job queuing and sync result polling patterns work perfectly!");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
278
bin/supervisor/examples/_archive/end_to_end_demo.rs
Normal file
278
bin/supervisor/examples/_archive/end_to_end_demo.rs
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
//! End-to-End Demo: Supervisor + Runner + Client
|
||||||
|
//!
|
||||||
|
//! This example demonstrates the complete workflow:
|
||||||
|
//! 1. Starts a supervisor with Mycelium integration
|
||||||
|
//! 2. Starts an OSIS runner
|
||||||
|
//! 3. Uses the supervisor client to run jobs
|
||||||
|
//! 4. Shows both job.run (blocking) and job.start (non-blocking) modes
|
||||||
|
//!
|
||||||
|
//! Prerequisites:
|
||||||
|
//! - Redis running on localhost:6379
|
||||||
|
//!
|
||||||
|
//! Usage:
|
||||||
|
//! ```bash
|
||||||
|
//! RUST_LOG=info cargo run --example end_to_end_demo
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use anyhow::{Result, Context};
|
||||||
|
use log::{info, error};
|
||||||
|
use std::process::{Command, Child, Stdio};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder};
|
||||||
|
|
||||||
|
/// Configuration for the demo
|
||||||
|
struct DemoConfig {
|
||||||
|
redis_url: String,
|
||||||
|
supervisor_port: u16,
|
||||||
|
runner_id: String,
|
||||||
|
db_path: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DemoConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
redis_url: "redis://localhost:6379".to_string(),
|
||||||
|
supervisor_port: 3030,
|
||||||
|
runner_id: "example_runner".to_string(),
|
||||||
|
db_path: "/tmp/example_runner.db".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Supervisor process wrapper
|
||||||
|
struct SupervisorProcess {
|
||||||
|
child: Child,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupervisorProcess {
|
||||||
|
fn start(config: &DemoConfig) -> Result<Self> {
|
||||||
|
info!("🚀 Starting supervisor on port {}...", config.supervisor_port);
|
||||||
|
|
||||||
|
let child = Command::new("cargo")
|
||||||
|
.args(&[
|
||||||
|
"run",
|
||||||
|
"--bin",
|
||||||
|
"hero-supervisor",
|
||||||
|
"--",
|
||||||
|
"--redis-url",
|
||||||
|
&config.redis_url,
|
||||||
|
"--port",
|
||||||
|
&config.supervisor_port.to_string(),
|
||||||
|
])
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.context("Failed to start supervisor")?;
|
||||||
|
|
||||||
|
Ok(Self { child })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for SupervisorProcess {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
info!("🛑 Stopping supervisor...");
|
||||||
|
let _ = self.child.kill();
|
||||||
|
let _ = self.child.wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runner process wrapper
|
||||||
|
struct RunnerProcess {
|
||||||
|
child: Child,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RunnerProcess {
|
||||||
|
fn start(config: &DemoConfig) -> Result<Self> {
|
||||||
|
info!("🤖 Starting OSIS runner '{}'...", config.runner_id);
|
||||||
|
|
||||||
|
let child = Command::new("cargo")
|
||||||
|
.args(&[
|
||||||
|
"run",
|
||||||
|
"--bin",
|
||||||
|
"runner_osis",
|
||||||
|
"--",
|
||||||
|
&config.runner_id,
|
||||||
|
"--db-path",
|
||||||
|
&config.db_path,
|
||||||
|
"--redis-url",
|
||||||
|
&config.redis_url,
|
||||||
|
])
|
||||||
|
.env("RUST_LOG", "info")
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.context("Failed to start runner")?;
|
||||||
|
|
||||||
|
Ok(Self { child })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for RunnerProcess {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
info!("🛑 Stopping runner...");
|
||||||
|
let _ = self.child.kill();
|
||||||
|
let _ = self.child.wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper functions for the demo
|
||||||
|
async fn register_runner_helper(client: &SupervisorClient, runner_id: &str, secret: &str) -> Result<()> {
|
||||||
|
info!("📝 Registering runner '{}'...", runner_id);
|
||||||
|
|
||||||
|
let queue = format!("hero:q:work:type:osis:group:default:inst:{}", runner_id);
|
||||||
|
client.register_runner(secret, runner_id, &queue).await?;
|
||||||
|
|
||||||
|
info!("✅ Runner registered successfully");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_job_helper(client: &SupervisorClient, job: runner_rust::job::Job, secret: &str, timeout: u64) -> Result<String> {
|
||||||
|
info!("🚀 Running job {} (blocking)...", job.id);
|
||||||
|
|
||||||
|
let response = client.job_run(secret, job, Some(timeout)).await?;
|
||||||
|
|
||||||
|
let result = response.result
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("No result in response"))?;
|
||||||
|
|
||||||
|
info!("✅ Job completed with result: {}", result);
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_job_helper(client: &SupervisorClient, job: runner_rust::job::Job, secret: &str) -> Result<String> {
|
||||||
|
info!("🚀 Starting job {} (non-blocking)...", job.id);
|
||||||
|
|
||||||
|
let response = client.job_start(secret, job).await?;
|
||||||
|
|
||||||
|
info!("✅ Job queued with ID: {}", response.job_id);
|
||||||
|
Ok(response.job_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
println!("\n╔════════════════════════════════════════════════════════════╗");
|
||||||
|
println!("║ End-to-End Demo: Supervisor + Runner + Client ║");
|
||||||
|
println!("╚════════════════════════════════════════════════════════════╝\n");
|
||||||
|
|
||||||
|
let config = DemoConfig::default();
|
||||||
|
|
||||||
|
// Step 1: Start supervisor
|
||||||
|
println!("📋 Step 1: Starting Supervisor");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
let _supervisor = SupervisorProcess::start(&config)?;
|
||||||
|
sleep(Duration::from_secs(3)).await;
|
||||||
|
println!("✅ Supervisor started on port {}\n", config.supervisor_port);
|
||||||
|
|
||||||
|
// Step 2: Start runner
|
||||||
|
println!("📋 Step 2: Starting OSIS Runner");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
let _runner = RunnerProcess::start(&config)?;
|
||||||
|
sleep(Duration::from_secs(3)).await;
|
||||||
|
println!("✅ Runner '{}' started\n", config.runner_id);
|
||||||
|
|
||||||
|
// Step 3: Create client and register runner
|
||||||
|
println!("📋 Step 3: Registering Runner with Supervisor");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
let client = SupervisorClient::new(&format!("http://localhost:{}", config.supervisor_port))?;
|
||||||
|
register_runner_helper(&client, &config.runner_id, "admin_secret").await?;
|
||||||
|
println!("✅ Runner registered\n");
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
|
||||||
|
// Step 4: Run blocking jobs (job.run)
|
||||||
|
println!("📋 Step 4: Running Blocking Jobs (job.run)");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
|
||||||
|
// Job 1: Simple calculation
|
||||||
|
println!("\n🔹 Job 1: Simple Calculation");
|
||||||
|
let job1 = JobBuilder::new()
|
||||||
|
.caller_id("demo_client")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload("let result = 2 + 2; to_json(result)")
|
||||||
|
.runner(&config.runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let result1 = run_job_helper(&client, job1, "admin_secret", 30).await?;
|
||||||
|
println!(" Result: {}", result1);
|
||||||
|
|
||||||
|
// Job 2: String manipulation
|
||||||
|
println!("\n🔹 Job 2: String Manipulation");
|
||||||
|
let job2 = JobBuilder::new()
|
||||||
|
.caller_id("demo_client")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"let msg = "Hello from OSIS Runner!"; to_json(msg)"#)
|
||||||
|
.runner(&config.runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let result2 = run_job_helper(&client, job2, "admin_secret", 30).await?;
|
||||||
|
println!(" Result: {}", result2);
|
||||||
|
|
||||||
|
// Job 3: Array operations
|
||||||
|
println!("\n🔹 Job 3: Array Operations");
|
||||||
|
let job3 = JobBuilder::new()
|
||||||
|
.caller_id("demo_client")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"
|
||||||
|
let numbers = [1, 2, 3, 4, 5];
|
||||||
|
let sum = 0;
|
||||||
|
for n in numbers {
|
||||||
|
sum += n;
|
||||||
|
}
|
||||||
|
to_json(#{sum: sum, count: numbers.len()})
|
||||||
|
"#)
|
||||||
|
.runner(&config.runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let result3 = run_job_helper(&client, job3, "admin_secret", 30).await?;
|
||||||
|
println!(" Result: {}", result3);
|
||||||
|
|
||||||
|
println!("\n✅ All blocking jobs completed successfully\n");
|
||||||
|
|
||||||
|
// Step 5: Start non-blocking jobs (job.start)
|
||||||
|
println!("📋 Step 5: Starting Non-Blocking Jobs (job.start)");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
|
||||||
|
println!("\n🔹 Job 4: Background Task");
|
||||||
|
let job4 = JobBuilder::new()
|
||||||
|
.caller_id("demo_client")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"
|
||||||
|
let result = "Background task completed";
|
||||||
|
to_json(result)
|
||||||
|
"#)
|
||||||
|
.runner(&config.runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job4_id = start_job_helper(&client, job4, "admin_secret").await?;
|
||||||
|
println!(" Job ID: {} (running in background)", job4_id);
|
||||||
|
|
||||||
|
println!("\n✅ Non-blocking job started\n");
|
||||||
|
|
||||||
|
// Step 6: Summary
|
||||||
|
println!("📋 Step 6: Demo Summary");
|
||||||
|
println!("─────────────────────────────────────────────────────────────");
|
||||||
|
println!("✅ Supervisor: Running on port {}", config.supervisor_port);
|
||||||
|
println!("✅ Runner: '{}' registered and processing jobs", config.runner_id);
|
||||||
|
println!("✅ Blocking jobs: 3 completed successfully");
|
||||||
|
println!("✅ Non-blocking jobs: 1 started");
|
||||||
|
println!("\n🎉 Demo completed successfully!");
|
||||||
|
|
||||||
|
// Keep processes running for a bit to see logs
|
||||||
|
println!("\n⏳ Keeping processes running for 5 seconds...");
|
||||||
|
sleep(Duration::from_secs(5)).await;
|
||||||
|
|
||||||
|
println!("\n🛑 Shutting down...");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
196
bin/supervisor/examples/_archive/integration_test.rs
Normal file
196
bin/supervisor/examples/_archive/integration_test.rs
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
//! Integration test for the new job API
|
||||||
|
//!
|
||||||
|
//! This test demonstrates the complete job lifecycle and validates
|
||||||
|
//! that all new API methods work correctly together.
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_complete_job_lifecycle() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Skip test if supervisor is not running
|
||||||
|
let client = match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => {
|
||||||
|
println!("Skipping integration test - supervisor not available");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Test connection
|
||||||
|
if client.discover().await.is_err() {
|
||||||
|
println!("Skipping integration test - supervisor not responding");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let secret = "user-secret-456";
|
||||||
|
|
||||||
|
// Test 1: Create job
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("integration_test")
|
||||||
|
.context_id("test_lifecycle")
|
||||||
|
.payload("echo 'Integration test job'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job_id = client.jobs_create(secret, job).await?;
|
||||||
|
assert!(!job_id.is_empty());
|
||||||
|
|
||||||
|
// Test 2: Start job
|
||||||
|
client.job_start(secret, &job_id).await?;
|
||||||
|
|
||||||
|
// Test 3: Monitor status
|
||||||
|
let mut attempts = 0;
|
||||||
|
let max_attempts = 15; // 15 seconds max
|
||||||
|
let mut final_status = String::new();
|
||||||
|
|
||||||
|
while attempts < max_attempts {
|
||||||
|
let status = client.job_status(&job_id).await?;
|
||||||
|
final_status = status.status.clone();
|
||||||
|
|
||||||
|
if final_status == "completed" || final_status == "failed" || final_status == "timeout" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
attempts += 1;
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 4: Get result
|
||||||
|
let result = client.job_result(&job_id).await?;
|
||||||
|
match result {
|
||||||
|
JobResult::Success { success: _ } => {
|
||||||
|
assert_eq!(final_status, "completed");
|
||||||
|
},
|
||||||
|
JobResult::Error { error: _ } => {
|
||||||
|
assert!(final_status == "failed" || final_status == "timeout");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_job_run_immediate() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let client = match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(()), // Skip if not available
|
||||||
|
};
|
||||||
|
|
||||||
|
if client.discover().await.is_err() {
|
||||||
|
return Ok(()); // Skip if not responding
|
||||||
|
}
|
||||||
|
|
||||||
|
let secret = "user-secret-456";
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("integration_test")
|
||||||
|
.context_id("test_immediate")
|
||||||
|
.payload("echo 'Immediate job test'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
// Test immediate execution
|
||||||
|
let result = client.job_run(secret, job).await?;
|
||||||
|
|
||||||
|
// Should get either success or error, but not panic
|
||||||
|
match result {
|
||||||
|
JobResult::Success { success } => {
|
||||||
|
assert!(!success.is_empty());
|
||||||
|
},
|
||||||
|
JobResult::Error { error } => {
|
||||||
|
assert!(!error.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_jobs_list() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let client = match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(()), // Skip if not available
|
||||||
|
};
|
||||||
|
|
||||||
|
if client.discover().await.is_err() {
|
||||||
|
return Ok(()); // Skip if not responding
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test listing jobs
|
||||||
|
let job_ids = client.jobs_list().await?;
|
||||||
|
|
||||||
|
// Should return a vector (might be empty)
|
||||||
|
assert!(job_ids.len() >= 0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_authentication_errors() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let client = match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(()), // Skip if not available
|
||||||
|
};
|
||||||
|
|
||||||
|
if client.discover().await.is_err() {
|
||||||
|
return Ok(()); // Skip if not responding
|
||||||
|
}
|
||||||
|
|
||||||
|
let invalid_secret = "invalid-secret";
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("integration_test")
|
||||||
|
.context_id("test_auth")
|
||||||
|
.payload("echo 'Auth test'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
// Test that invalid secret fails
|
||||||
|
let result = client.jobs_create(invalid_secret, job.clone()).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
let result = client.job_run(invalid_secret, job.clone()).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
let result = client.job_start(invalid_secret, "fake-job-id").await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_nonexistent_job_operations() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let client = match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(()), // Skip if not available
|
||||||
|
};
|
||||||
|
|
||||||
|
if client.discover().await.is_err() {
|
||||||
|
return Ok(()); // Skip if not responding
|
||||||
|
}
|
||||||
|
|
||||||
|
let fake_job_id = "nonexistent-job-id";
|
||||||
|
|
||||||
|
// Test operations on nonexistent job
|
||||||
|
let result = client.job_status(fake_job_id).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
let result = client.job_result(fake_job_id).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("Integration test example - this would contain test logic");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
269
bin/supervisor/examples/_archive/job_api_examples.rs
Normal file
269
bin/supervisor/examples/_archive/job_api_examples.rs
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
//! Examples demonstrating the new job API workflows
|
||||||
|
//!
|
||||||
|
//! This example shows how to use the new job API methods:
|
||||||
|
//! - jobs.create: Create a job without queuing
|
||||||
|
//! - jobs.list: List all jobs
|
||||||
|
//! - job.run: Run a job and get result immediately
|
||||||
|
//! - job.start: Start a created job
|
||||||
|
//! - job.status: Get job status (non-blocking)
|
||||||
|
//! - job.result: Get job result (blocking)
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
println!("🚀 Hero Supervisor Job API Examples");
|
||||||
|
println!("===================================\n");
|
||||||
|
|
||||||
|
// Create client
|
||||||
|
let client = SupervisorClient::new("http://localhost:3030")?;
|
||||||
|
let secret = "user-secret-456"; // Use a user secret for job operations
|
||||||
|
|
||||||
|
// Test connection
|
||||||
|
println!("📡 Testing connection...");
|
||||||
|
match client.discover().await {
|
||||||
|
Ok(_) => println!("✅ Connected to supervisor\n"),
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to connect: {}", e);
|
||||||
|
println!("Make sure the supervisor is running with: ./supervisor --config examples/supervisor/config.toml\n");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example 1: Fire-and-forget job execution
|
||||||
|
println!("🔥 Example 1: Fire-and-forget job execution");
|
||||||
|
println!("--------------------------------------------");
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("example_client")
|
||||||
|
.context_id("fire_and_forget")
|
||||||
|
.payload("echo 'Hello from fire-and-forget job!'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
println!("Running job immediately...");
|
||||||
|
match client.job_run(secret, job).await {
|
||||||
|
Ok(JobResult::Success { success }) => {
|
||||||
|
println!("✅ Job completed successfully:");
|
||||||
|
println!(" Output: {}", success);
|
||||||
|
},
|
||||||
|
Ok(JobResult::Error { error }) => {
|
||||||
|
println!("❌ Job failed:");
|
||||||
|
println!(" Error: {}", error);
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ API call failed: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Example 2: Asynchronous job processing
|
||||||
|
println!("⏰ Example 2: Asynchronous job processing");
|
||||||
|
println!("------------------------------------------");
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("example_client")
|
||||||
|
.context_id("async_processing")
|
||||||
|
.payload("sleep 2 && echo 'Hello from async job!'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(60)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
// Step 1: Create the job
|
||||||
|
println!("1. Creating job...");
|
||||||
|
let job_id = match client.jobs_create(secret, job).await {
|
||||||
|
Ok(id) => {
|
||||||
|
println!("✅ Job created with ID: {}", id);
|
||||||
|
id
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to create job: {}", e);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 2: Start the job
|
||||||
|
println!("2. Starting job...");
|
||||||
|
match client.job_start(secret, &job_id).await {
|
||||||
|
Ok(_) => println!("✅ Job started"),
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to start job: {}", e);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Poll for completion (non-blocking)
|
||||||
|
println!("3. Monitoring job progress...");
|
||||||
|
let mut attempts = 0;
|
||||||
|
let max_attempts = 30; // 30 seconds max
|
||||||
|
|
||||||
|
loop {
|
||||||
|
attempts += 1;
|
||||||
|
|
||||||
|
match client.job_status(&job_id).await {
|
||||||
|
Ok(status) => {
|
||||||
|
println!(" Status: {} (attempt {})", status.status, attempts);
|
||||||
|
|
||||||
|
if status.status == "completed" || status.status == "failed" || status.status == "timeout" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if attempts >= max_attempts {
|
||||||
|
println!(" ⏰ Timeout waiting for job completion");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!(" ❌ Failed to get job status: {}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Get the result
|
||||||
|
println!("4. Getting job result...");
|
||||||
|
match client.job_result(&job_id).await {
|
||||||
|
Ok(JobResult::Success { success }) => {
|
||||||
|
println!("✅ Job completed successfully:");
|
||||||
|
println!(" Output: {}", success);
|
||||||
|
},
|
||||||
|
Ok(JobResult::Error { error }) => {
|
||||||
|
println!("❌ Job failed:");
|
||||||
|
println!(" Error: {}", error);
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to get job result: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Example 3: Batch job processing
|
||||||
|
println!("📦 Example 3: Batch job processing");
|
||||||
|
println!("-----------------------------------");
|
||||||
|
|
||||||
|
let job_specs = vec![
|
||||||
|
("echo 'Batch job 1'", "batch_1"),
|
||||||
|
("echo 'Batch job 2'", "batch_2"),
|
||||||
|
("echo 'Batch job 3'", "batch_3"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut job_ids = Vec::new();
|
||||||
|
|
||||||
|
// Create all jobs
|
||||||
|
println!("Creating batch jobs...");
|
||||||
|
for (i, (payload, context)) in job_specs.iter().enumerate() {
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("example_client")
|
||||||
|
.context_id(context)
|
||||||
|
.payload(payload)
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
match client.jobs_create(secret, job).await {
|
||||||
|
Ok(job_id) => {
|
||||||
|
println!("✅ Created job {}: {}", i + 1, job_id);
|
||||||
|
job_ids.push(job_id);
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to create job {}: {}", i + 1, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start all jobs
|
||||||
|
println!("Starting all batch jobs...");
|
||||||
|
for (i, job_id) in job_ids.iter().enumerate() {
|
||||||
|
match client.job_start(secret, job_id).await {
|
||||||
|
Ok(_) => println!("✅ Started job {}", i + 1),
|
||||||
|
Err(e) => println!("❌ Failed to start job {}: {}", i + 1, e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect results
|
||||||
|
println!("Collecting results...");
|
||||||
|
for (i, job_id) in job_ids.iter().enumerate() {
|
||||||
|
match client.job_result(job_id).await {
|
||||||
|
Ok(JobResult::Success { success }) => {
|
||||||
|
println!("✅ Job {} result: {}", i + 1, success);
|
||||||
|
},
|
||||||
|
Ok(JobResult::Error { error }) => {
|
||||||
|
println!("❌ Job {} failed: {}", i + 1, error);
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to get result for job {}: {}", i + 1, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Example 4: List all jobs
|
||||||
|
println!("📋 Example 4: Listing all jobs");
|
||||||
|
println!("-------------------------------");
|
||||||
|
|
||||||
|
match client.jobs_list().await {
|
||||||
|
Ok(jobs) => {
|
||||||
|
println!("✅ Found {} jobs in the system:", jobs.len());
|
||||||
|
for (i, job) in jobs.iter().take(10).enumerate() {
|
||||||
|
println!(" {}. {}", i + 1, job.id);
|
||||||
|
}
|
||||||
|
if jobs.len() > 10 {
|
||||||
|
println!(" ... and {} more", jobs.len() - 10);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to list jobs: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
println!("🎉 All examples completed!");
|
||||||
|
println!("\nAPI Convention Summary:");
|
||||||
|
println!("- jobs.create: Create job without queuing");
|
||||||
|
println!("- jobs.list: List all job IDs");
|
||||||
|
println!("- job.run: Run job and return result immediately");
|
||||||
|
println!("- job.start: Start a created job");
|
||||||
|
println!("- job.status: Get job status (non-blocking)");
|
||||||
|
println!("- job.result: Get job result (blocking)");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_job_builder() {
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("test")
|
||||||
|
.context_id("test")
|
||||||
|
.payload("echo 'test'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("test_runner")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
assert!(job.is_ok());
|
||||||
|
let job = job.unwrap();
|
||||||
|
assert_eq!(job.caller_id, "test");
|
||||||
|
assert_eq!(job.context_id, "test");
|
||||||
|
assert_eq!(job.payload, "echo 'test'");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_client_creation() {
|
||||||
|
let client = SupervisorClient::new("http://localhost:3030");
|
||||||
|
assert!(client.is_ok());
|
||||||
|
}
|
||||||
|
}
|
||||||
171
bin/supervisor/examples/_archive/mock_runner.rs
Normal file
171
bin/supervisor/examples/_archive/mock_runner.rs
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
//! Mock Runner Binary for Testing OpenRPC Examples
|
||||||
|
//!
|
||||||
|
//! This is a simple mock runner that simulates an actor binary for testing
|
||||||
|
//! the Hero Supervisor OpenRPC integration. It connects to Redis, listens for
|
||||||
|
//! jobs using the proper Hero job queue system, and echoes the job payload.
|
||||||
|
//!
|
||||||
|
//! Usage:
|
||||||
|
//! ```bash
|
||||||
|
//! cargo run --example mock_runner -- --actor-id test_actor --db-path /tmp/test_db --redis-url redis://localhost:6379
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use std::env;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
use hero_supervisor::{
|
||||||
|
Job, JobStatus, JobError, Client, ClientBuilder
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct MockRunnerConfig {
|
||||||
|
pub actor_id: String,
|
||||||
|
pub db_path: String,
|
||||||
|
pub redis_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MockRunnerConfig {
|
||||||
|
pub fn from_args() -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
|
let args: Vec<String> = env::args().collect();
|
||||||
|
|
||||||
|
let mut actor_id = None;
|
||||||
|
let mut db_path = None;
|
||||||
|
let mut redis_url = None;
|
||||||
|
|
||||||
|
let mut i = 1;
|
||||||
|
while i < args.len() {
|
||||||
|
match args[i].as_str() {
|
||||||
|
"--actor-id" => {
|
||||||
|
if i + 1 < args.len() {
|
||||||
|
actor_id = Some(args[i + 1].clone());
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
return Err("Missing value for --actor-id".into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--db-path" => {
|
||||||
|
if i + 1 < args.len() {
|
||||||
|
db_path = Some(args[i + 1].clone());
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
return Err("Missing value for --db-path".into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--redis-url" => {
|
||||||
|
if i + 1 < args.len() {
|
||||||
|
redis_url = Some(args[i + 1].clone());
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
return Err("Missing value for --redis-url".into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => i += 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(MockRunnerConfig {
|
||||||
|
actor_id: actor_id.ok_or("Missing required --actor-id argument")?,
|
||||||
|
db_path: db_path.ok_or("Missing required --db-path argument")?,
|
||||||
|
redis_url: redis_url.unwrap_or_else(|| "redis://localhost:6379".to_string()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MockRunner {
|
||||||
|
config: MockRunnerConfig,
|
||||||
|
client: Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MockRunner {
|
||||||
|
pub async fn new(config: MockRunnerConfig) -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
|
let client = ClientBuilder::new()
|
||||||
|
.redis_url(&config.redis_url)
|
||||||
|
.build()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(MockRunner {
|
||||||
|
config,
|
||||||
|
client,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(&self) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("🤖 Mock Runner '{}' starting...", self.config.actor_id);
|
||||||
|
println!("📂 DB Path: {}", self.config.db_path);
|
||||||
|
println!("🔗 Redis URL: {}", self.config.redis_url);
|
||||||
|
|
||||||
|
// Use the proper Hero job queue key for this actor instance
|
||||||
|
// Format: hero:q:work:type:{job_type}:group:{group}:inst:{instance}
|
||||||
|
let work_queue_key = format!("hero:q:work:type:osis:group:default:inst:{}", self.config.actor_id);
|
||||||
|
|
||||||
|
println!("👂 Listening for jobs on queue: {}", work_queue_key);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Try to pop a job ID from the work queue using the Hero protocol
|
||||||
|
let job_id = self.client.get_job_id(&work_queue_key).await?;
|
||||||
|
|
||||||
|
match job_id {
|
||||||
|
Some(job_id) => {
|
||||||
|
println!("📨 Received job ID: {}", job_id);
|
||||||
|
if let Err(e) = self.process_job(&job_id).await {
|
||||||
|
eprintln!("❌ Error processing job {}: {}", job_id, e);
|
||||||
|
// Mark job as error
|
||||||
|
if let Err(e2) = self.client.set_job_status(&job_id, JobStatus::Error).await {
|
||||||
|
eprintln!("❌ Failed to set job error status: {}", e2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// No jobs available, wait a bit
|
||||||
|
sleep(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_job(&self, job_id: &str) -> Result<(), JobError> {
|
||||||
|
// Load the job from Redis using the Hero job system
|
||||||
|
let job = self.client.get_job(job_id).await?;
|
||||||
|
|
||||||
|
self.process_job_internal(&self.client, job_id, &job).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_job_internal(
|
||||||
|
&self,
|
||||||
|
client: &Client,
|
||||||
|
job_id: &str,
|
||||||
|
job: &Job,
|
||||||
|
) -> Result<(), JobError> {
|
||||||
|
println!("🔄 Processing job {} with payload: {}", job_id, job.payload);
|
||||||
|
|
||||||
|
// Mark job as started
|
||||||
|
client.set_job_status(job_id, JobStatus::Started).await?;
|
||||||
|
println!("🚀 Job {} marked as Started", job_id);
|
||||||
|
|
||||||
|
// Simulate processing time
|
||||||
|
sleep(Duration::from_millis(500)).await;
|
||||||
|
|
||||||
|
// Echo the payload (simulate job execution)
|
||||||
|
let output = format!("echo: {}", job.payload);
|
||||||
|
println!("📤 Output: {}", output);
|
||||||
|
|
||||||
|
// Set the job result
|
||||||
|
client.set_result(job_id, &output).await?;
|
||||||
|
|
||||||
|
println!("✅ Job {} completed successfully", job_id);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Parse command line arguments
|
||||||
|
let config = MockRunnerConfig::from_args()?;
|
||||||
|
|
||||||
|
// Create and run the mock runner
|
||||||
|
let runner = MockRunner::new(config).await?;
|
||||||
|
runner.run().await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
203
bin/supervisor/examples/_archive/simple_e2e.rs
Normal file
203
bin/supervisor/examples/_archive/simple_e2e.rs
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
//! Simple End-to-End Example
|
||||||
|
//!
|
||||||
|
//! A minimal example showing supervisor + runner + client workflow.
|
||||||
|
//!
|
||||||
|
//! Prerequisites:
|
||||||
|
//! - Redis running on localhost:6379
|
||||||
|
//!
|
||||||
|
//! Usage:
|
||||||
|
//! ```bash
|
||||||
|
//! # Terminal 1: Start Redis
|
||||||
|
//! redis-server
|
||||||
|
//!
|
||||||
|
//! # Terminal 2: Run this example
|
||||||
|
//! RUST_LOG=info cargo run --example simple_e2e
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use log::info;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
println!("\n╔════════════════════════════════════════╗");
|
||||||
|
println!("║ Simple End-to-End Demo ║");
|
||||||
|
println!("╚════════════════════════════════════════╝\n");
|
||||||
|
|
||||||
|
let supervisor_url = "http://localhost:3030";
|
||||||
|
let runner_id = "test_runner";
|
||||||
|
let secret = "admin_secret";
|
||||||
|
|
||||||
|
// Create supervisor client
|
||||||
|
let client = SupervisorClient::new(supervisor_url)?;
|
||||||
|
|
||||||
|
println!("📝 Prerequisites:");
|
||||||
|
println!(" 1. Redis running on localhost:6379");
|
||||||
|
println!(" 2. Supervisor running on {}", supervisor_url);
|
||||||
|
println!(" 3. Runner '{}' registered and running\n", runner_id);
|
||||||
|
|
||||||
|
println!("💡 To start the supervisor:");
|
||||||
|
println!(" cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379\n");
|
||||||
|
|
||||||
|
println!("💡 To start a runner:");
|
||||||
|
println!(" cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust");
|
||||||
|
println!(" cargo run --bin runner_osis -- {} --redis-url redis://localhost:6379\n", runner_id);
|
||||||
|
|
||||||
|
println!("⏳ Waiting 3 seconds for you to start the prerequisites...\n");
|
||||||
|
sleep(Duration::from_secs(3)).await;
|
||||||
|
|
||||||
|
// Register runner
|
||||||
|
println!("📋 Step 1: Registering Runner");
|
||||||
|
println!("─────────────────────────────────────────");
|
||||||
|
|
||||||
|
let queue = format!("hero:q:work:type:osis:group:default:inst:{}", runner_id);
|
||||||
|
match client.register_runner(secret, runner_id, &queue).await {
|
||||||
|
Ok(_) => {
|
||||||
|
println!("✅ Runner registered successfully");
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("⚠️ Registration error: {} (runner might already be registered)", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Run a simple job
|
||||||
|
println!("\n📋 Step 2: Running a Simple Job (Blocking)");
|
||||||
|
println!("─────────────────────────────────────────");
|
||||||
|
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("simple_demo")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"
|
||||||
|
let message = "Hello from the runner!";
|
||||||
|
let number = 42;
|
||||||
|
to_json(#{
|
||||||
|
message: message,
|
||||||
|
number: number,
|
||||||
|
timestamp: timestamp()
|
||||||
|
})
|
||||||
|
"#)
|
||||||
|
.runner(runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job_id = job.id.clone();
|
||||||
|
info!("Sending job with ID: {}", job_id);
|
||||||
|
|
||||||
|
match client.job_run(secret, job, Some(30)).await {
|
||||||
|
Ok(response) => {
|
||||||
|
println!("✅ Job completed!");
|
||||||
|
if let Some(result) = response.result {
|
||||||
|
println!(" Result: {}", result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Job failed: {}", e);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run another job (calculation)
|
||||||
|
println!("\n📋 Step 3: Running a Calculation Job");
|
||||||
|
println!("─────────────────────────────────────────");
|
||||||
|
|
||||||
|
let calc_job = JobBuilder::new()
|
||||||
|
.caller_id("simple_demo")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"
|
||||||
|
let numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
||||||
|
let sum = 0;
|
||||||
|
let product = 1;
|
||||||
|
|
||||||
|
for n in numbers {
|
||||||
|
sum += n;
|
||||||
|
product *= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
to_json(#{
|
||||||
|
sum: sum,
|
||||||
|
product: product,
|
||||||
|
count: numbers.len(),
|
||||||
|
average: sum / numbers.len()
|
||||||
|
})
|
||||||
|
"#)
|
||||||
|
.runner(runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let calc_job_id = calc_job.id.clone();
|
||||||
|
info!("Sending calculation job with ID: {}", calc_job_id);
|
||||||
|
|
||||||
|
match client.job_run(secret, calc_job, Some(30)).await {
|
||||||
|
Ok(response) => {
|
||||||
|
println!("✅ Calculation completed!");
|
||||||
|
if let Some(result) = response.result {
|
||||||
|
println!(" Result: {}", result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Calculation failed: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start a non-blocking job
|
||||||
|
println!("\n📋 Step 4: Starting a Non-Blocking Job");
|
||||||
|
println!("─────────────────────────────────────────");
|
||||||
|
|
||||||
|
let async_job = JobBuilder::new()
|
||||||
|
.caller_id("simple_demo")
|
||||||
|
.context_id("demo_context")
|
||||||
|
.payload(r#"
|
||||||
|
let result = "This job was started asynchronously";
|
||||||
|
to_json(result)
|
||||||
|
"#)
|
||||||
|
.runner(runner_id)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let async_job_id = async_job.id.clone();
|
||||||
|
info!("Starting async job with ID: {}", async_job_id);
|
||||||
|
|
||||||
|
match client.job_start(secret, async_job).await {
|
||||||
|
Ok(response) => {
|
||||||
|
println!("✅ Job started!");
|
||||||
|
println!(" Job ID: {} (running in background)", response.job_id);
|
||||||
|
println!(" Status: {}", response.status);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("❌ Failed to start job: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
println!("\n╔════════════════════════════════════════╗");
|
||||||
|
println!("║ Demo Summary ║");
|
||||||
|
println!("╚════════════════════════════════════════╝");
|
||||||
|
println!("✅ Runner registered: {}", runner_id);
|
||||||
|
println!("✅ Blocking jobs completed: 2");
|
||||||
|
println!("✅ Non-blocking jobs started: 1");
|
||||||
|
println!("\n🎉 Demo completed successfully!\n");
|
||||||
|
|
||||||
|
println!("📚 What happened:");
|
||||||
|
println!(" 1. Registered a runner with the supervisor");
|
||||||
|
println!(" 2. Sent jobs with Rhai scripts to execute");
|
||||||
|
println!(" 3. Supervisor queued jobs to the runner");
|
||||||
|
println!(" 4. Runner executed the scripts and returned results");
|
||||||
|
println!(" 5. Client received results (for blocking jobs)\n");
|
||||||
|
|
||||||
|
println!("🔍 Key Concepts:");
|
||||||
|
println!(" • job.run = Execute and wait for result (blocking)");
|
||||||
|
println!(" • job.start = Start and return immediately (non-blocking)");
|
||||||
|
println!(" • Jobs contain Rhai scripts that run on the runner");
|
||||||
|
println!(" • Supervisor coordinates job distribution via Redis\n");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
64
bin/supervisor/examples/_archive/simple_job_workflow.rs
Normal file
64
bin/supervisor/examples/_archive/simple_job_workflow.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
//! Simple job workflow example
|
||||||
|
//!
|
||||||
|
//! This example demonstrates the basic job lifecycle using the new API:
|
||||||
|
//! 1. Create a job
|
||||||
|
//! 2. Start the job
|
||||||
|
//! 3. Monitor its progress
|
||||||
|
//! 4. Get the result
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("Simple Job Workflow Example");
|
||||||
|
println!("============================\n");
|
||||||
|
|
||||||
|
// Create client
|
||||||
|
let client = SupervisorClient::new("http://localhost:3030")?;
|
||||||
|
let secret = "user-secret-456";
|
||||||
|
|
||||||
|
// Create a simple job
|
||||||
|
let job = JobBuilder::new()
|
||||||
|
.caller_id("simple_example")
|
||||||
|
.context_id("demo")
|
||||||
|
.payload("echo 'Hello from Hero Supervisor!' && sleep 3 && echo 'Job completed!'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(60)
|
||||||
|
.env_var("EXAMPLE_VAR", "example_value")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
println!("📝 Creating job...");
|
||||||
|
let job_id = client.jobs_create(secret, job).await?;
|
||||||
|
println!("✅ Job created: {}\n", job_id);
|
||||||
|
|
||||||
|
println!("🚀 Starting job...");
|
||||||
|
client.job_start(secret, &job_id).await?;
|
||||||
|
println!("✅ Job started\n");
|
||||||
|
|
||||||
|
println!("👀 Monitoring job progress...");
|
||||||
|
loop {
|
||||||
|
let status = client.job_status(&job_id).await?;
|
||||||
|
println!(" Status: {}", status.status);
|
||||||
|
|
||||||
|
if status.status == "completed" || status.status == "failed" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n📋 Getting job result...");
|
||||||
|
match client.job_result(&job_id).await? {
|
||||||
|
JobResult::Success { success } => {
|
||||||
|
println!("✅ Success: {}", success);
|
||||||
|
},
|
||||||
|
JobResult::Error { error } => {
|
||||||
|
println!("❌ Error: {}", error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
108
bin/supervisor/examples/_archive/supervisor/README.md
Normal file
108
bin/supervisor/examples/_archive/supervisor/README.md
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
# Hero Supervisor Example
|
||||||
|
|
||||||
|
This example demonstrates how to configure and run the Hero Supervisor with multiple actors using a TOML configuration file.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
- `config.toml` - Example supervisor configuration with multiple actors
|
||||||
|
- `run_supervisor.sh` - Shell script to build and run the supervisor with the example config
|
||||||
|
- `run_supervisor.rs` - Rust script using escargot to build and run the supervisor
|
||||||
|
- `README.md` - This documentation file
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The `config.toml` file defines:
|
||||||
|
|
||||||
|
- **Redis connection**: URL for the Redis server used for job queuing
|
||||||
|
- **Database path**: Local path for supervisor state storage
|
||||||
|
- **Job queue key**: Redis key for the supervisor job queue
|
||||||
|
- **Actors**: List of actor configurations with:
|
||||||
|
- `name`: Unique identifier for the actor
|
||||||
|
- `runner_type`: Type of runner ("SAL", "OSIS", "V", "Python")
|
||||||
|
- `binary_path`: Path to the actor binary
|
||||||
|
- `process_manager`: Process management type ("simple" or "tmux")
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. **Redis Server**: Ensure Redis is running on `localhost:6379` (or update the config)
|
||||||
|
2. **Actor Binaries**: Build the required actor binaries referenced in the config:
|
||||||
|
```bash
|
||||||
|
# Build SAL worker
|
||||||
|
cd ../../sal
|
||||||
|
cargo build --bin sal_worker
|
||||||
|
|
||||||
|
# Build OSIS and system workers
|
||||||
|
cd ../../worker
|
||||||
|
cargo build --bin osis
|
||||||
|
cargo build --bin system
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running the Example
|
||||||
|
|
||||||
|
### Option 1: Shell Script (Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./run_supervisor.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Rust Script with Escargot
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo +nightly -Zscript run_supervisor.rs
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 3: Manual Build and Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the supervisor
|
||||||
|
cd ../../../supervisor
|
||||||
|
cargo build --bin supervisor --features cli
|
||||||
|
|
||||||
|
# Run with config
|
||||||
|
./target/debug/supervisor --config ../baobab/examples/supervisor/config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Once running, the supervisor will:
|
||||||
|
|
||||||
|
1. Load the configuration from `config.toml`
|
||||||
|
2. Initialize and start all configured actors
|
||||||
|
3. Listen for jobs on the Redis queue (`hero:supervisor:jobs`)
|
||||||
|
4. Dispatch jobs to appropriate actors based on the `runner` field
|
||||||
|
5. Monitor actor health and status
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
You can test the supervisor by dispatching jobs to the Redis queue:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using redis-cli to add a test job
|
||||||
|
redis-cli LPUSH "hero:supervisor:jobs" '{"id":"test-123","runner":"sal_actor_1","script":"print(\"Hello from SAL actor!\")"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Stopping
|
||||||
|
|
||||||
|
Use `Ctrl+C` to gracefully shutdown the supervisor. It will:
|
||||||
|
|
||||||
|
1. Stop accepting new jobs
|
||||||
|
2. Wait for running jobs to complete
|
||||||
|
3. Shutdown all managed actors
|
||||||
|
4. Clean up resources
|
||||||
|
|
||||||
|
## Customization
|
||||||
|
|
||||||
|
Modify `config.toml` to:
|
||||||
|
|
||||||
|
- Add more actors
|
||||||
|
- Change binary paths to match your build locations
|
||||||
|
- Update Redis connection settings
|
||||||
|
- Configure different process managers per actor
|
||||||
|
- Adjust database and queue settings
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
- **Redis Connection**: Ensure Redis is running and accessible
|
||||||
|
- **Binary Paths**: Verify all actor binary paths exist and are executable
|
||||||
|
- **Permissions**: Ensure the supervisor has permission to create the database directory
|
||||||
|
- **Ports**: Check that Redis port (6379) is not blocked by firewall
|
||||||
18
bin/supervisor/examples/_archive/supervisor/config.toml
Normal file
18
bin/supervisor/examples/_archive/supervisor/config.toml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Hero Supervisor Configuration
|
||||||
|
# This configuration defines the Redis connection, database path, and actors to manage
|
||||||
|
|
||||||
|
# Redis connection URL
|
||||||
|
redis_url = "redis://localhost:6379"
|
||||||
|
|
||||||
|
# Database path for supervisor state
|
||||||
|
db_path = "/tmp/supervisor_example_db"
|
||||||
|
|
||||||
|
# Job queue key for supervisor jobs
|
||||||
|
job_queue_key = "hero:supervisor:jobs"
|
||||||
|
|
||||||
|
# Actor configurations
|
||||||
|
[[actors]]
|
||||||
|
name = "sal_actor_1"
|
||||||
|
runner_type = "SAL"
|
||||||
|
binary_path = "cargo run /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor/examples/mock_runner.rs"
|
||||||
|
process_manager = "tmux"
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env cargo +nightly -Zscript
|
||||||
|
//! ```cargo
|
||||||
|
//! [dependencies]
|
||||||
|
//! escargot = "0.5"
|
||||||
|
//! tokio = { version = "1.0", features = ["full"] }
|
||||||
|
//! log = "0.4"
|
||||||
|
//! env_logger = "0.10"
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use escargot::CargoBuild;
|
||||||
|
use std::process::Command;
|
||||||
|
use log::{info, error};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Initialize logging
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
info!("Building and running Hero Supervisor with example configuration");
|
||||||
|
|
||||||
|
// Get the current directory (when running as cargo example, this is the crate root)
|
||||||
|
let current_dir = std::env::current_dir()?;
|
||||||
|
info!("Current directory: {}", current_dir.display());
|
||||||
|
|
||||||
|
// Path to the supervisor crate (current directory when running as example)
|
||||||
|
let supervisor_crate_path = current_dir.clone();
|
||||||
|
|
||||||
|
// Path to the config file (in examples/supervisor subdirectory)
|
||||||
|
let config_path = current_dir.join("examples/supervisor/config.toml");
|
||||||
|
|
||||||
|
if !config_path.exists() {
|
||||||
|
error!("Config file not found: {}", config_path.display());
|
||||||
|
return Err("Config file not found".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Using config file: {}", config_path.display());
|
||||||
|
|
||||||
|
// Build the supervisor binary using escargot
|
||||||
|
info!("Building supervisor binary...");
|
||||||
|
let supervisor_bin = CargoBuild::new()
|
||||||
|
.bin("supervisor")
|
||||||
|
.manifest_path(supervisor_crate_path.join("Cargo.toml"))
|
||||||
|
.features("cli")
|
||||||
|
.run()?;
|
||||||
|
|
||||||
|
info!("Supervisor binary built successfully");
|
||||||
|
|
||||||
|
// Run the supervisor with the config file
|
||||||
|
info!("Starting supervisor with config: {}", config_path.display());
|
||||||
|
|
||||||
|
let mut cmd = Command::new(supervisor_bin.path());
|
||||||
|
cmd.arg("--config")
|
||||||
|
.arg(&config_path);
|
||||||
|
|
||||||
|
// Add environment variables for better logging
|
||||||
|
cmd.env("RUST_LOG", "info");
|
||||||
|
|
||||||
|
info!("Executing: {:?}", cmd);
|
||||||
|
|
||||||
|
// Execute the supervisor
|
||||||
|
let status = cmd.status()?;
|
||||||
|
|
||||||
|
if status.success() {
|
||||||
|
info!("Supervisor completed successfully");
|
||||||
|
} else {
|
||||||
|
error!("Supervisor exited with status: {}", status);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
52
bin/supervisor/examples/_archive/supervisor/run_supervisor.sh
Executable file
52
bin/supervisor/examples/_archive/supervisor/run_supervisor.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Hero Supervisor Example Runner
|
||||||
|
# This script builds and runs the supervisor binary with the example configuration
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Get the directory of this script
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
SUPERVISOR_DIR="$SCRIPT_DIR/../../../supervisor"
|
||||||
|
CONFIG_FILE="$SCRIPT_DIR/config.toml"
|
||||||
|
|
||||||
|
echo "🚀 Building and running Hero Supervisor with example configuration"
|
||||||
|
echo "📁 Script directory: $SCRIPT_DIR"
|
||||||
|
echo "🔧 Supervisor crate: $SUPERVISOR_DIR"
|
||||||
|
echo "⚙️ Config file: $CONFIG_FILE"
|
||||||
|
|
||||||
|
# Check if config file exists
|
||||||
|
if [ ! -f "$CONFIG_FILE" ]; then
|
||||||
|
echo "❌ Config file not found: $CONFIG_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if supervisor directory exists
|
||||||
|
if [ ! -d "$SUPERVISOR_DIR" ]; then
|
||||||
|
echo "❌ Supervisor directory not found: $SUPERVISOR_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build the supervisor binary
|
||||||
|
echo "🔨 Building supervisor binary..."
|
||||||
|
cd "$SUPERVISOR_DIR"
|
||||||
|
cargo build --bin supervisor --features cli
|
||||||
|
|
||||||
|
# Check if build was successful
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ Failed to build supervisor binary"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ Supervisor binary built successfully"
|
||||||
|
|
||||||
|
# Run the supervisor with the config file
|
||||||
|
echo "🎯 Starting supervisor with config: $CONFIG_FILE"
|
||||||
|
echo "📝 Use Ctrl+C to stop the supervisor"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Set environment variables for better logging
|
||||||
|
export RUST_LOG=info
|
||||||
|
|
||||||
|
# Execute the supervisor
|
||||||
|
exec "$SUPERVISOR_DIR/target/debug/supervisor" --config "$CONFIG_FILE"
|
||||||
102
bin/supervisor/examples/osiris_openrpc/README.md
Normal file
102
bin/supervisor/examples/osiris_openrpc/README.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# OSIRIS + OpenRPC Comprehensive Example
|
||||||
|
|
||||||
|
This example demonstrates the complete workflow of using Hero Supervisor with OSIRIS runners via OpenRPC.
|
||||||
|
|
||||||
|
## What This Example Does
|
||||||
|
|
||||||
|
1. **Builds and starts** Hero Supervisor with OpenRPC server enabled
|
||||||
|
2. **Builds** the OSIRIS runner binary
|
||||||
|
3. **Connects** an OpenRPC client to the supervisor
|
||||||
|
4. **Registers and starts** an OSIRIS runner
|
||||||
|
5. **Dispatches multiple jobs** via OpenRPC:
|
||||||
|
- Create a Note
|
||||||
|
- Create an Event
|
||||||
|
- Query stored data
|
||||||
|
- Test access control (expected to fail)
|
||||||
|
6. **Monitors** job execution and results
|
||||||
|
7. **Gracefully shuts down** all components
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
**IMPORTANT: Redis must be running before starting this example!**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start Redis (if not already running)
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
Other requirements:
|
||||||
|
- Redis server running on `localhost:6379`
|
||||||
|
- Rust toolchain installed
|
||||||
|
- Both `supervisor` and `runner_rust` crates available
|
||||||
|
|
||||||
|
## Running the Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run --example osiris_openrpc
|
||||||
|
```
|
||||||
|
|
||||||
|
## Job Scripts
|
||||||
|
|
||||||
|
The example uses separate Rhai script files for each job:
|
||||||
|
|
||||||
|
- `note.rhai` - Creates and stores a Note object
|
||||||
|
- `event.rhai` - Creates and stores an Event object
|
||||||
|
- `query.rhai` - Queries and retrieves stored objects
|
||||||
|
- `access_denied.rhai` - Tests access control (should fail)
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐
|
||||||
|
│ This Example │
|
||||||
|
│ (OpenRPC │
|
||||||
|
│ Client) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ JSON-RPC
|
||||||
|
↓
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Supervisor │
|
||||||
|
│ (OpenRPC │
|
||||||
|
│ Server) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│ Redis Queue
|
||||||
|
↓
|
||||||
|
┌─────────────────┐
|
||||||
|
│ OSIRIS Runner │
|
||||||
|
│ (Rhai Engine │
|
||||||
|
│ + HeroDB) │
|
||||||
|
└─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Features Demonstrated
|
||||||
|
|
||||||
|
- **Automatic binary building** using escargot
|
||||||
|
- **OpenRPC communication** between client and supervisor
|
||||||
|
- **Runner registration** with configuration
|
||||||
|
- **Job dispatching** with signatories
|
||||||
|
- **Context-based access control** in OSIRIS
|
||||||
|
- **Typed object storage** (Note, Event)
|
||||||
|
- **Graceful shutdown** and cleanup
|
||||||
|
|
||||||
|
## Expected Output
|
||||||
|
|
||||||
|
The example will:
|
||||||
|
1. ✅ Create a Note successfully
|
||||||
|
2. ✅ Create an Event successfully
|
||||||
|
3. ✅ Query and retrieve stored objects
|
||||||
|
4. ✅ Deny access for unauthorized participants
|
||||||
|
5. ✅ Clean up all resources
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**Redis Connection Error:**
|
||||||
|
- Ensure Redis is running: `redis-server`
|
||||||
|
|
||||||
|
**Build Errors:**
|
||||||
|
- Ensure both supervisor and runner_rust crates are available
|
||||||
|
- Check that all dependencies are up to date
|
||||||
|
|
||||||
|
**OpenRPC Connection Error:**
|
||||||
|
- Port 3030 might be in use
|
||||||
|
- Check supervisor logs for startup issues
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
print("Attempting to access context with non-signatories...");
|
||||||
|
print("Participants: [dave, eve]");
|
||||||
|
print("Signatories: [alice, bob, charlie]");
|
||||||
|
|
||||||
|
// This should fail because neither dave nor eve are signatories
|
||||||
|
let ctx = get_context(["dave", "eve"]);
|
||||||
|
|
||||||
|
"This should not succeed!"
|
||||||
18
bin/supervisor/examples/osiris_openrpc/event.rhai
Normal file
18
bin/supervisor/examples/osiris_openrpc/event.rhai
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
print("Creating context for [alice, bob]...");
|
||||||
|
let ctx = get_context(["alice", "bob"]);
|
||||||
|
print("✓ Context ID: " + ctx.context_id());
|
||||||
|
|
||||||
|
print("\nCreating event...");
|
||||||
|
let event = event("events")
|
||||||
|
.title("Team Retrospective")
|
||||||
|
.description("Review what went well and areas for improvement")
|
||||||
|
.location("Virtual - Zoom Room A")
|
||||||
|
.category("retrospective");
|
||||||
|
|
||||||
|
print("✓ Event created");
|
||||||
|
|
||||||
|
print("\nStoring event in context...");
|
||||||
|
ctx.save(event);
|
||||||
|
print("✓ Event stored");
|
||||||
|
|
||||||
|
"Event 'Team Retrospective' created and stored successfully"
|
||||||
293
bin/supervisor/examples/osiris_openrpc/main.rs
Normal file
293
bin/supervisor/examples/osiris_openrpc/main.rs
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
///! Comprehensive OSIRIS + OpenRPC + Admin UI Example
|
||||||
|
///!
|
||||||
|
/// This example demonstrates using the Hero Supervisor OpenRPC client
|
||||||
|
/// to run OSIRIS scripts through the supervisor.
|
||||||
|
///
|
||||||
|
/// The client library is located at: client/
|
||||||
|
///!
|
||||||
|
///! 1. Starting a Hero Supervisor with OpenRPC server
|
||||||
|
///! 2. Building and serving the Admin UI (Yew WASM)
|
||||||
|
///! 3. Building and starting an OSIRIS runner
|
||||||
|
///! 4. Registering the runner with the supervisor
|
||||||
|
///! 5. Dispatching multiple OSIRIS jobs via OpenRPC
|
||||||
|
///! 6. Monitoring job execution via CLI and Web UI
|
||||||
|
///! 7. Graceful shutdown
|
||||||
|
///!
|
||||||
|
///! Services:
|
||||||
|
///! - Supervisor OpenRPC API: http://127.0.0.1:3030
|
||||||
|
///! - Admin UI: http://127.0.0.1:8080
|
||||||
|
///!
|
||||||
|
///! Usage:
|
||||||
|
///! ```bash
|
||||||
|
///! cargo run --example osiris_openrpc
|
||||||
|
///! ```
|
||||||
|
///!
|
||||||
|
///! Requirements:
|
||||||
|
///! - Redis running on localhost:6379
|
||||||
|
///! - Trunk installed (cargo install trunk)
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder};
|
||||||
|
use std::time::Duration;
|
||||||
|
use escargot::CargoBuild;
|
||||||
|
use std::process::{Stdio, Command};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
println!("🚀 OSIRIS + OpenRPC Comprehensive Example");
|
||||||
|
println!("=========================================\n");
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 1: Build and start supervisor with OpenRPC
|
||||||
|
// ========================================================================
|
||||||
|
println!("Step 1: Building and starting supervisor");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let supervisor_binary = CargoBuild::new()
|
||||||
|
.bin("supervisor")
|
||||||
|
.current_release()
|
||||||
|
.manifest_path("../supervisor/Cargo.toml")
|
||||||
|
.run()?;
|
||||||
|
|
||||||
|
println!("✅ Supervisor binary built");
|
||||||
|
|
||||||
|
let mut supervisor = supervisor_binary.command()
|
||||||
|
.arg("--redis-url")
|
||||||
|
.arg("redis://localhost:6379")
|
||||||
|
.arg("--port")
|
||||||
|
.arg("3030")
|
||||||
|
.arg("--admin-secret")
|
||||||
|
.arg("admin_secret")
|
||||||
|
.arg("--user-secret")
|
||||||
|
.arg("user_secret")
|
||||||
|
.stdout(Stdio::inherit())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
println!("✅ Supervisor started on port 3030");
|
||||||
|
println!("⏳ Waiting for supervisor to initialize...");
|
||||||
|
sleep(Duration::from_secs(5)).await;
|
||||||
|
|
||||||
|
// Check if supervisor is still running
|
||||||
|
match supervisor.try_wait()? {
|
||||||
|
Some(status) => {
|
||||||
|
return Err(format!("Supervisor exited early with status: {}", status).into());
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
println!("✅ Supervisor is running");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 2: Build and serve Admin UI
|
||||||
|
// ========================================================================
|
||||||
|
println!("\nStep 2: Building and serving Admin UI");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let mut admin_ui = Command::new("trunk")
|
||||||
|
.arg("serve")
|
||||||
|
.arg("--port")
|
||||||
|
.arg("8080")
|
||||||
|
.arg("--address")
|
||||||
|
.arg("127.0.0.1")
|
||||||
|
.current_dir("ui")
|
||||||
|
.stdout(Stdio::null())
|
||||||
|
.stderr(Stdio::null())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
println!("✅ Admin UI building...");
|
||||||
|
println!("🌐 Admin UI will be available at: http://127.0.0.1:8080");
|
||||||
|
sleep(Duration::from_secs(3)).await;
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 3: Build OSIRIS runner
|
||||||
|
// ========================================================================
|
||||||
|
println!("\nStep 3: Building OSIRIS runner");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let runner_binary = CargoBuild::new()
|
||||||
|
.bin("runner_osiris")
|
||||||
|
.current_release()
|
||||||
|
.manifest_path("../runner_rust/Cargo.toml")
|
||||||
|
.run()?;
|
||||||
|
|
||||||
|
println!("✅ OSIRIS runner binary built");
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 4: Connect OpenRPC client
|
||||||
|
// ========================================================================
|
||||||
|
println!("\nStep 4: Connecting OpenRPC client");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let client = SupervisorClient::new("http://127.0.0.1:3030")?;
|
||||||
|
println!("✅ Connected to supervisor\n");
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 5: Register and start OSIRIS runner
|
||||||
|
// ========================================================================
|
||||||
|
println!("Step 5: Registering OSIRIS runner");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let runner_path = runner_binary.path().to_string_lossy();
|
||||||
|
let db_path = "/tmp/osiris_openrpc.db";
|
||||||
|
|
||||||
|
// Register the runner with the supervisor
|
||||||
|
// Note: The current OpenRPC server uses register_runner, not add_runner
|
||||||
|
client.register_runner("admin_secret", "osiris_runner").await?;
|
||||||
|
println!("✅ Runner registered: osiris_runner");
|
||||||
|
|
||||||
|
client.start_runner("admin_secret", "osiris_runner").await?;
|
||||||
|
println!("✅ Runner started\n");
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 6: Load job scripts
|
||||||
|
// ========================================================================
|
||||||
|
println!("Step 6: Loading job scripts");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let note_script = std::fs::read_to_string("examples/osiris_openrpc/note.rhai")?;
|
||||||
|
let event_script = std::fs::read_to_string("examples/osiris_openrpc/event.rhai")?;
|
||||||
|
let query_script = std::fs::read_to_string("examples/osiris_openrpc/query.rhai")?;
|
||||||
|
let access_denied_script = std::fs::read_to_string("examples/osiris_openrpc/access_denied.rhai")?;
|
||||||
|
|
||||||
|
println!("✅ Loaded 4 job scripts\n");
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 7: Dispatch jobs via OpenRPC
|
||||||
|
// ========================================================================
|
||||||
|
println!("Step 7: Dispatching jobs");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
// Job 1: Create Note
|
||||||
|
println!("📝 Job 1: Creating Note...");
|
||||||
|
let job1 = JobBuilder::new()
|
||||||
|
.caller_id("openrpc_client")
|
||||||
|
.context_id("osiris_demo")
|
||||||
|
.payload(¬e_script)
|
||||||
|
.runner("osiris_runner")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.signature("alice", "")
|
||||||
|
.signature("bob", "")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job1_result = client.run_job("user_secret", job1).await;
|
||||||
|
|
||||||
|
match job1_result {
|
||||||
|
Ok(result) => println!("✅ {:?}\n", result),
|
||||||
|
Err(e) => println!("❌ Job failed: {}\n", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Job 2: Create Event
|
||||||
|
println!("📅 Job 2: Creating Event...");
|
||||||
|
let job2 = JobBuilder::new()
|
||||||
|
.caller_id("openrpc_client")
|
||||||
|
.context_id("osiris_demo")
|
||||||
|
.payload(&event_script)
|
||||||
|
.runner("osiris_runner")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.signature("alice", "")
|
||||||
|
.signature("bob", "")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job2_result = client.run_job("user_secret", job2).await;
|
||||||
|
|
||||||
|
match job2_result {
|
||||||
|
Ok(result) => println!("✅ {:?}\n", result),
|
||||||
|
Err(e) => println!("❌ Job failed: {}\n", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Job 3: Query Data
|
||||||
|
println!("🔍 Job 3: Querying Data...");
|
||||||
|
let job3 = JobBuilder::new()
|
||||||
|
.caller_id("openrpc_client")
|
||||||
|
.context_id("osiris_demo")
|
||||||
|
.payload(&query_script)
|
||||||
|
.runner("osiris_runner")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.signature("alice", "")
|
||||||
|
.signature("bob", "")
|
||||||
|
.signature("charlie", "")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job3_result = client.run_job("user_secret", job3).await;
|
||||||
|
|
||||||
|
match job3_result {
|
||||||
|
Ok(result) => println!("✅ {:?}\n", result),
|
||||||
|
Err(e) => println!("❌ Job failed: {}\n", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
|
||||||
|
// Job 4: Access Control Test (should fail)
|
||||||
|
println!("🔒 Job 4: Testing Access Control (expected to fail)...");
|
||||||
|
let job4 = JobBuilder::new()
|
||||||
|
.caller_id("openrpc_client")
|
||||||
|
.context_id("osiris_demo")
|
||||||
|
.payload(&access_denied_script)
|
||||||
|
.runner("osiris_runner")
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.signature("alice", "")
|
||||||
|
.signature("bob", "")
|
||||||
|
.signature("charlie", "")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let job4_result = client.run_job("user_secret", job4).await;
|
||||||
|
|
||||||
|
match job4_result {
|
||||||
|
Ok(result) => println!("❌ Unexpected success: {:?}\n", result),
|
||||||
|
Err(e) => println!("✅ Access denied as expected: {}\n", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 8: Check runner status
|
||||||
|
// ========================================================================
|
||||||
|
println!("\nStep 8: Checking runner status");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
let status = client.get_runner_status("admin_secret", "osiris_runner").await?;
|
||||||
|
println!("Runner status: {:?}\n", status);
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 9: Keep services running for manual testing
|
||||||
|
// ========================================================================
|
||||||
|
println!("\nStep 9: Services Running");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
println!("🌐 Admin UI: http://127.0.0.1:8080");
|
||||||
|
println!("📡 OpenRPC API: http://127.0.0.1:3030");
|
||||||
|
println!("\n⏸️ Press Ctrl+C to stop all services...\n");
|
||||||
|
|
||||||
|
// Wait for Ctrl+C
|
||||||
|
tokio::signal::ctrl_c().await?;
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// STEP 10: Cleanup
|
||||||
|
// ========================================================================
|
||||||
|
println!("\n\nStep 10: Cleanup");
|
||||||
|
println!("─────────────────────────────────────────────────────────────\n");
|
||||||
|
|
||||||
|
client.stop_runner("admin_secret", "osiris_runner", false).await?;
|
||||||
|
println!("✅ Runner stopped");
|
||||||
|
|
||||||
|
client.remove_runner("admin_secret", "osiris_runner").await?;
|
||||||
|
println!("✅ Runner removed");
|
||||||
|
|
||||||
|
admin_ui.kill()?;
|
||||||
|
println!("✅ Admin UI stopped");
|
||||||
|
|
||||||
|
supervisor.kill()?;
|
||||||
|
println!("✅ Supervisor stopped");
|
||||||
|
|
||||||
|
println!("\n✨ Example completed successfully!");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
20
bin/supervisor/examples/osiris_openrpc/note.rhai
Normal file
20
bin/supervisor/examples/osiris_openrpc/note.rhai
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
print("Creating context for [alice, bob]...");
|
||||||
|
let ctx = get_context(["alice", "bob"]);
|
||||||
|
print("✓ Context ID: " + ctx.context_id());
|
||||||
|
|
||||||
|
print("\nCreating note...");
|
||||||
|
let note = note("notes")
|
||||||
|
.title("Sprint Planning Meeting")
|
||||||
|
.content("Discussed Q1 2025 roadmap and milestones")
|
||||||
|
.tag("sprint", "2025-Q1")
|
||||||
|
.tag("team", "engineering")
|
||||||
|
.tag("priority", "high")
|
||||||
|
.mime("text/markdown");
|
||||||
|
|
||||||
|
print("✓ Note created");
|
||||||
|
|
||||||
|
print("\nStoring note in context...");
|
||||||
|
ctx.save(note);
|
||||||
|
print("✓ Note stored");
|
||||||
|
|
||||||
|
"Note 'Sprint Planning Meeting' created and stored successfully"
|
||||||
21
bin/supervisor/examples/osiris_openrpc/query.rhai
Normal file
21
bin/supervisor/examples/osiris_openrpc/query.rhai
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
print("Querying context [alice, bob]...");
|
||||||
|
let ctx = get_context(["alice", "bob"]);
|
||||||
|
print("✓ Context ID: " + ctx.context_id());
|
||||||
|
|
||||||
|
print("\nListing all notes...");
|
||||||
|
let notes = ctx.list("notes");
|
||||||
|
print("✓ Found " + notes.len() + " note(s)");
|
||||||
|
|
||||||
|
print("\nRetrieving specific note...");
|
||||||
|
let note = ctx.get("notes", "sprint_planning_001");
|
||||||
|
print("✓ Retrieved note: sprint_planning_001");
|
||||||
|
|
||||||
|
print("\nQuerying context [alice, bob, charlie]...");
|
||||||
|
let ctx2 = get_context(["alice", "bob", "charlie"]);
|
||||||
|
print("✓ Context ID: " + ctx2.context_id());
|
||||||
|
|
||||||
|
print("\nListing all events...");
|
||||||
|
let events = ctx2.list("events");
|
||||||
|
print("✓ Found " + events.len() + " event(s)");
|
||||||
|
|
||||||
|
"Query complete: Found " + notes.len() + " notes and " + events.len() + " events"
|
||||||
53
bin/supervisor/scripts/build.sh
Executable file
53
bin/supervisor/scripts/build.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
# Spinner function
|
||||||
|
spinner() {
|
||||||
|
local pid=$1
|
||||||
|
local delay=0.1
|
||||||
|
local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
|
||||||
|
while ps -p $pid > /dev/null 2>&1; do
|
||||||
|
local temp=${spinstr#?}
|
||||||
|
printf " [%c] " "$spinstr"
|
||||||
|
local spinstr=$temp${spinstr%"$temp"}
|
||||||
|
sleep $delay
|
||||||
|
printf "\b\b\b\b\b\b"
|
||||||
|
done
|
||||||
|
printf " \b\b\b\b"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Building Hero Supervisor Workspace"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Build core and client
|
||||||
|
printf "📦 Core & Client... "
|
||||||
|
cd "$PROJECT_DIR"
|
||||||
|
if RUSTFLAGS="-A warnings" cargo build --release --workspace > /tmp/supervisor-build-core.log 2>&1 & spinner $!; wait $!; then
|
||||||
|
echo "✅"
|
||||||
|
else
|
||||||
|
echo "❌"
|
||||||
|
echo " Error: Build failed. Run 'cd $PROJECT_DIR && cargo build --release --workspace' for details"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# # Build UI
|
||||||
|
# printf "📦 UI (WASM)... "
|
||||||
|
# cd "$PROJECT_DIR/ui"
|
||||||
|
|
||||||
|
# if ! command -v trunk &> /dev/null; then
|
||||||
|
# echo "⚠️ (trunk not installed)"
|
||||||
|
# echo " Install with: cargo install trunk"
|
||||||
|
# else
|
||||||
|
# if trunk build --release > /tmp/supervisor-build-ui.log 2>&1 & spinner $!; wait $!; then
|
||||||
|
# echo "✅"
|
||||||
|
# else
|
||||||
|
# echo "❌"
|
||||||
|
# echo " Error: Build failed. Run 'cd $PROJECT_DIR/ui && trunk build --release' for details"
|
||||||
|
# exit 1
|
||||||
|
# fi
|
||||||
|
# fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ All builds completed"
|
||||||
18
bin/supervisor/scripts/environment.sh
Executable file
18
bin/supervisor/scripts/environment.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Load environment variables from .env file
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
ENV_FILE="$PROJECT_DIR/.env"
|
||||||
|
|
||||||
|
if [ -f "$ENV_FILE" ]; then
|
||||||
|
# Export variables from .env file
|
||||||
|
set -a
|
||||||
|
source "$ENV_FILE"
|
||||||
|
set +a
|
||||||
|
echo "✅ Loaded environment from .env"
|
||||||
|
else
|
||||||
|
echo "⚠️ No .env file found at $ENV_FILE"
|
||||||
|
echo " Copy .env.example to .env and configure your settings"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
11
bin/supervisor/scripts/generate_secret.sh
Executable file
11
bin/supervisor/scripts/generate_secret.sh
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Generate a supervisor secret key in the correct format
|
||||||
|
|
||||||
|
# Generate a random 32-byte hex string
|
||||||
|
SECRET=$(openssl rand -hex 32)
|
||||||
|
|
||||||
|
echo "Generated supervisor secret:"
|
||||||
|
echo "$SECRET"
|
||||||
|
echo ""
|
||||||
|
echo "Add this to your .env file:"
|
||||||
|
echo "SUPERVISOR_ADMIN_SECRET=$SECRET"
|
||||||
8
bin/supervisor/scripts/install.sh
Executable file
8
bin/supervisor/scripts/install.sh
Executable file
@@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
ROOT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
pushd "$ROOT_DIR"
|
||||||
|
cargo update
|
||||||
161
bin/supervisor/scripts/release.sh
Executable file
161
bin/supervisor/scripts/release.sh
Executable file
@@ -0,0 +1,161 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# release.sh - Build optimized WASM and serve with Caddy + Brotli compression
|
||||||
|
set -e
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Freezone Portal Release Script
|
||||||
|
# - Builds the WASM app with trunk in release mode
|
||||||
|
# - Optionally optimizes .wasm with wasm-opt (-Oz, strip)
|
||||||
|
# - Precompresses assets with gzip and brotli for efficient static serving
|
||||||
|
# - Generates a manifest (manifest.json) with sizes and SHA-256 checksums
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./release.sh [--outdir dist] [--no-opt] [--compress] [--no-manifest]
|
||||||
|
# [--trunk-args "--public-url /portal/"]
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
# - Precompression is OFF by default; enable with --compress
|
||||||
|
# - Only modifies files within the output directory (default: dist)
|
||||||
|
# - Non-destructive to your source tree
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
set -u
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
BUILD_SCRIPT="$SCRIPT_DIR/build.sh"
|
||||||
|
|
||||||
|
# Defaults
|
||||||
|
OUTDIR="dist"
|
||||||
|
DO_OPT=1
|
||||||
|
DO_COMPRESS=0
|
||||||
|
DO_MANIFEST=1
|
||||||
|
TRUNK_ARGS=""
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: $(basename "$0") [options]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--outdir <dir> Output directory (default: dist)
|
||||||
|
--no-opt Skip wasm-opt optimization
|
||||||
|
--compress Enable gzip/brotli precompression
|
||||||
|
--no-manifest Skip manifest generation
|
||||||
|
--trunk-args "..." Extra arguments forwarded to trunk build
|
||||||
|
-h, --help Show this help
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
$(basename "$0") --outdir dist --trunk-args "--public-url /"
|
||||||
|
$(basename "$0") --no-opt --no-compress
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse args
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--outdir)
|
||||||
|
OUTDIR="$2"; shift 2;;
|
||||||
|
--no-opt)
|
||||||
|
DO_OPT=0; shift;;
|
||||||
|
--compress)
|
||||||
|
DO_COMPRESS=1; shift;;
|
||||||
|
--no-manifest)
|
||||||
|
DO_MANIFEST=0; shift;;
|
||||||
|
--trunk-args)
|
||||||
|
TRUNK_ARGS="$2"; shift 2;;
|
||||||
|
-h|--help)
|
||||||
|
usage; exit 0;;
|
||||||
|
*)
|
||||||
|
echo "❌ Unknown option: $1"; echo; usage; exit 1;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Tool checks
|
||||||
|
if [[ ! -x "$BUILD_SCRIPT" ]]; then
|
||||||
|
echo "❌ build.sh not found or not executable at: $BUILD_SCRIPT"
|
||||||
|
echo " Ensure portal/scripts/build.sh exists and is chmod +x."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! command -v trunk >/dev/null 2>&1; then
|
||||||
|
echo "❌ trunk not found. Install with: cargo install trunk"; exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
HAS_WASM_OPT=0
|
||||||
|
if command -v wasm-opt >/dev/null 2>&1; then HAS_WASM_OPT=1; fi
|
||||||
|
if [[ $DO_OPT -eq 1 && $HAS_WASM_OPT -eq 0 ]]; then
|
||||||
|
echo "⚠️ wasm-opt not found. Skipping WASM optimization."
|
||||||
|
DO_OPT=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $DO_COMPRESS -eq 1 ]]; then
|
||||||
|
if ! command -v gzip >/dev/null 2>&1; then
|
||||||
|
echo "⚠️ gzip not found. Skipping gzip compression."; GZIP_OK=0; else GZIP_OK=1; fi
|
||||||
|
if ! command -v brotli >/dev/null 2>&1; then
|
||||||
|
echo "⚠️ brotli not found. Skipping brotli compression."; BR_OK=0; else BR_OK=1; fi
|
||||||
|
else
|
||||||
|
GZIP_OK=0; BR_OK=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🔧 Building optimized WASM bundle (via build.sh)..."
|
||||||
|
set -x
|
||||||
|
"$BUILD_SCRIPT" --release --outdir "$OUTDIR" ${TRUNK_ARGS:+--trunk-args "$TRUNK_ARGS"}
|
||||||
|
set +x
|
||||||
|
|
||||||
|
DIST_DIR="$PROJECT_DIR/$OUTDIR"
|
||||||
|
if [[ ! -d "$DIST_DIR" ]]; then
|
||||||
|
echo "❌ Build failed: output directory not found: $DIST_DIR"; exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Optimize .wasm files
|
||||||
|
if [[ $DO_OPT -eq 1 && $HAS_WASM_OPT -eq 1 ]]; then
|
||||||
|
echo "🛠️ Optimizing WASM with wasm-opt (-Oz, strip)..."
|
||||||
|
while IFS= read -r -d '' wasm; do
|
||||||
|
echo " • $(basename "$wasm")"
|
||||||
|
tmp="$wasm.opt"
|
||||||
|
wasm-opt -Oz --strip-dwarf "$wasm" -o "$tmp"
|
||||||
|
mv "$tmp" "$wasm"
|
||||||
|
done < <(find "$DIST_DIR" -type f -name "*.wasm" -print0)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Precompress assets
|
||||||
|
if [[ $DO_COMPRESS -eq 1 ]]; then
|
||||||
|
echo "🗜️ Precompressing assets (gzip/brotli)..."
|
||||||
|
while IFS= read -r -d '' f; do
|
||||||
|
if [[ $GZIP_OK -eq 1 ]]; then
|
||||||
|
gzip -kf9 "$f"
|
||||||
|
fi
|
||||||
|
if [[ $BR_OK -eq 1 ]]; then
|
||||||
|
brotli -f -q 11 "$f"
|
||||||
|
fi
|
||||||
|
done < <(find "$DIST_DIR" -type f \( -name "*.wasm" -o -name "*.js" -o -name "*.css" \) -print0)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Manifest with sizes and SHA-256
|
||||||
|
if [[ $DO_MANIFEST -eq 1 ]]; then
|
||||||
|
echo "🧾 Generating manifest.json (sizes, sha256)..."
|
||||||
|
manifest="$DIST_DIR/manifest.json"
|
||||||
|
echo "{" > "$manifest"
|
||||||
|
first=1
|
||||||
|
while IFS= read -r -d '' f; do
|
||||||
|
rel="${f#"$DIST_DIR/"}"
|
||||||
|
size=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f")
|
||||||
|
if command -v shasum >/dev/null 2>&1; then
|
||||||
|
hash=$(shasum -a 256 "$f" | awk '{print $1}')
|
||||||
|
else
|
||||||
|
hash=$(openssl dgst -sha256 -r "$f" | awk '{print $1}')
|
||||||
|
fi
|
||||||
|
[[ $first -eq 1 ]] || echo "," >> "$manifest"
|
||||||
|
first=0
|
||||||
|
printf " \"%s\": { \"bytes\": %s, \"sha256\": \"%s\" }" "$rel" "$size" "$hash" >> "$manifest"
|
||||||
|
done < <(find "$DIST_DIR" -type f ! -name "manifest.json" -print0 | sort -z)
|
||||||
|
echo "\n}" >> "$manifest"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "📦 Checking bundle sizes ($OUTDIR)..."
|
||||||
|
if [ -d "$OUTDIR" ]; then
|
||||||
|
echo "Bundle sizes:"
|
||||||
|
find "$OUTDIR" -name "*.wasm" -exec ls -lh {} \; | awk '{print " WASM: " $5 " - " $9}'
|
||||||
|
find "$OUTDIR" -name "*.js" -exec ls -lh {} \; | awk '{print " JS: " $5 " - " $9}'
|
||||||
|
find "$OUTDIR" -name "*.css" -exec ls -lh {} \; | awk '{print " CSS: " $5 " - " $9}'
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
71
bin/supervisor/scripts/run.sh
Executable file
71
bin/supervisor/scripts/run.sh
Executable file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
source "$SCRIPT_DIR/environment.sh"
|
||||||
|
|
||||||
|
# Build first
|
||||||
|
echo "🔨 Building supervisor..."
|
||||||
|
"$SCRIPT_DIR/build.sh"
|
||||||
|
|
||||||
|
# Validate required environment variables
|
||||||
|
if [ -z "$ADMIN_SECRETS" ]; then
|
||||||
|
echo "❌ Error: ADMIN_SECRETS not set in .env"
|
||||||
|
echo " Generate a secret with: ./scripts/generate_secret.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set defaults from env vars
|
||||||
|
REDIS_URL="${REDIS_URL:-redis://127.0.0.1:6379}"
|
||||||
|
PORT="${PORT:-3030}"
|
||||||
|
BIND_ADDRESS="${BIND_ADDRESS:-127.0.0.1}"
|
||||||
|
LOG_LEVEL="${LOG_LEVEL:-info}"
|
||||||
|
|
||||||
|
cd "$PROJECT_DIR"
|
||||||
|
|
||||||
|
# Build command with flags from env vars
|
||||||
|
SUPERVISOR_CMD="target/release/supervisor --redis-url $REDIS_URL --port $PORT --bind-address $BIND_ADDRESS"
|
||||||
|
|
||||||
|
# Add admin secrets
|
||||||
|
IFS=',' read -ra SECRETS <<< "$ADMIN_SECRETS"
|
||||||
|
for secret in "${SECRETS[@]}"; do
|
||||||
|
SUPERVISOR_CMD="$SUPERVISOR_CMD --admin-secret $secret"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Add user secrets if provided
|
||||||
|
if [ ! -z "$USER_SECRETS" ]; then
|
||||||
|
IFS=',' read -ra SECRETS <<< "$USER_SECRETS"
|
||||||
|
for secret in "${SECRETS[@]}"; do
|
||||||
|
SUPERVISOR_CMD="$SUPERVISOR_CMD --user-secret $secret"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add register secrets if provided
|
||||||
|
if [ ! -z "$REGISTER_SECRETS" ]; then
|
||||||
|
IFS=',' read -ra SECRETS <<< "$REGISTER_SECRETS"
|
||||||
|
for secret in "${SECRETS[@]}"; do
|
||||||
|
SUPERVISOR_CMD="$SUPERVISOR_CMD --register-secret $secret"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add mycelium URL if provided
|
||||||
|
if [ ! -z "$MYCELIUM_URL" ]; then
|
||||||
|
SUPERVISOR_CMD="$SUPERVISOR_CMD --mycelium-url $MYCELIUM_URL"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add runners if provided
|
||||||
|
if [ ! -z "$RUNNERS" ]; then
|
||||||
|
SUPERVISOR_CMD="$SUPERVISOR_CMD --runners $RUNNERS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🚀 Starting Hero Supervisor"
|
||||||
|
echo " Redis: $REDIS_URL"
|
||||||
|
echo " Port: $PORT"
|
||||||
|
echo " Log Level: $LOG_LEVEL"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Run supervisor directly with output visible
|
||||||
|
exec env RUST_LOG="$LOG_LEVEL" RUST_LOG_STYLE=never $SUPERVISOR_CMD
|
||||||
53
bin/supervisor/scripts/test.sh
Executable file
53
bin/supervisor/scripts/test.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
# Spinner function
|
||||||
|
spinner() {
|
||||||
|
local pid=$1
|
||||||
|
local delay=0.1
|
||||||
|
local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
|
||||||
|
while ps -p $pid > /dev/null 2>&1; do
|
||||||
|
local temp=${spinstr#?}
|
||||||
|
printf " [%c] " "$spinstr"
|
||||||
|
local spinstr=$temp${spinstr%"$temp"}
|
||||||
|
sleep $delay
|
||||||
|
printf "\b\b\b\b\b\b"
|
||||||
|
done
|
||||||
|
printf " \b\b\b\b"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Testing Hero Supervisor Workspace"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test core and client
|
||||||
|
printf "🧪 Core & Client... "
|
||||||
|
cd "$PROJECT_DIR"
|
||||||
|
if cargo test --workspace > /tmp/supervisor-test-core.log 2>&1 & spinner $!; wait $!; then
|
||||||
|
echo "✅"
|
||||||
|
else
|
||||||
|
echo "❌"
|
||||||
|
echo " Error: Tests failed. Run 'cd $PROJECT_DIR && cargo test --workspace' for details"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test UI
|
||||||
|
printf "🧪 UI (WASM)... "
|
||||||
|
cd "$PROJECT_DIR/ui"
|
||||||
|
|
||||||
|
if ! command -v wasm-pack &> /dev/null; then
|
||||||
|
echo "⚠️ (wasm-pack not installed)"
|
||||||
|
echo " Install with: cargo install wasm-pack"
|
||||||
|
else
|
||||||
|
if wasm-pack test --headless --firefox > /tmp/supervisor-test-ui.log 2>&1 & spinner $!; wait $!; then
|
||||||
|
echo "✅"
|
||||||
|
else
|
||||||
|
echo "❌"
|
||||||
|
echo " Error: Tests failed. Run 'cd $PROJECT_DIR/ui && wasm-pack test --headless --firefox' for details"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ All tests completed"
|
||||||
111
bin/supervisor/src/auth.rs
Normal file
111
bin/supervisor/src/auth.rs
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
//! Authentication and API key management
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
/// API key scope/permission level
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "lowercase")]
|
||||||
|
pub enum ApiKeyScope {
|
||||||
|
/// Full access - can manage keys, runners, jobs
|
||||||
|
Admin,
|
||||||
|
/// Can register new runners
|
||||||
|
Registrar,
|
||||||
|
/// Can create and manage jobs
|
||||||
|
User,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApiKeyScope {
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
ApiKeyScope::Admin => "admin",
|
||||||
|
ApiKeyScope::Registrar => "registrar",
|
||||||
|
ApiKeyScope::User => "user",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An API key with metadata
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ApiKey {
|
||||||
|
/// The actual key value (UUID or custom string)
|
||||||
|
pub key: String,
|
||||||
|
/// Human-readable name for the key
|
||||||
|
pub name: String,
|
||||||
|
/// Permission scope
|
||||||
|
pub scope: ApiKeyScope,
|
||||||
|
/// When the key was created
|
||||||
|
pub created_at: String,
|
||||||
|
/// Optional expiration timestamp
|
||||||
|
pub expires_at: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApiKey {
|
||||||
|
/// Create a new API key with a generated UUID
|
||||||
|
pub fn new(name: String, scope: ApiKeyScope) -> Self {
|
||||||
|
Self {
|
||||||
|
key: Uuid::new_v4().to_string(),
|
||||||
|
name,
|
||||||
|
scope,
|
||||||
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
expires_at: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new API key with a specific key value
|
||||||
|
pub fn with_key(key: String, name: String, scope: ApiKeyScope) -> Self {
|
||||||
|
Self {
|
||||||
|
key,
|
||||||
|
name,
|
||||||
|
scope,
|
||||||
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
expires_at: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response for auth verification
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct AuthVerifyResponse {
|
||||||
|
pub valid: bool,
|
||||||
|
pub name: String,
|
||||||
|
pub scope: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Method authorization requirements
|
||||||
|
/// Maps RPC method names to required scopes
|
||||||
|
pub fn get_method_required_scopes(method: &str) -> Option<Vec<ApiKeyScope>> {
|
||||||
|
use ApiKeyScope::*;
|
||||||
|
|
||||||
|
match method {
|
||||||
|
// Admin-only methods
|
||||||
|
"key.create" | "key.generate" | "key.delete" | "key.list" |
|
||||||
|
"supervisor.info" => {
|
||||||
|
Some(vec![Admin])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Admin or Registrar methods
|
||||||
|
"runner.create" | "runner.remove" => {
|
||||||
|
Some(vec![Admin, Registrar])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Admin or User methods
|
||||||
|
"job.create" | "job.run" | "job.start" | "job.stop" | "job.delete" => {
|
||||||
|
Some(vec![Admin, User])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public methods (no auth required)
|
||||||
|
"rpc.discover" => None,
|
||||||
|
|
||||||
|
// Any authenticated user (read-only operations)
|
||||||
|
"runner.list" | "runner.ping" |
|
||||||
|
"job.get" | "job.list" | "job.status" | "job.result" | "job.logs" |
|
||||||
|
"auth.verify" => {
|
||||||
|
Some(vec![Admin, Registrar, User])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default: require authentication
|
||||||
|
_ => Some(vec![Admin, Registrar, User]),
|
||||||
|
}
|
||||||
|
}
|
||||||
112
bin/supervisor/src/bin/supervisor.rs
Normal file
112
bin/supervisor/src/bin/supervisor.rs
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
//! Hero Supervisor Binary
|
||||||
|
|
||||||
|
use hero_supervisor::SupervisorBuilder;
|
||||||
|
use clap::Parser;
|
||||||
|
use log::{error, info};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
|
/// Hero Supervisor - manages actors and dispatches jobs
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(name = "supervisor")]
|
||||||
|
#[command(about = "Hero Supervisor - manages actors and dispatches jobs")]
|
||||||
|
struct Args {
|
||||||
|
/// Redis URL for job queue
|
||||||
|
#[arg(long, default_value = "redis://127.0.0.1:6379")]
|
||||||
|
redis_url: String,
|
||||||
|
|
||||||
|
/// Namespace for Redis keys
|
||||||
|
#[arg(long, default_value = "")]
|
||||||
|
namespace: String,
|
||||||
|
|
||||||
|
/// Admin secrets (required, can be specified multiple times)
|
||||||
|
#[arg(long = "admin-secret", value_name = "SECRET", required = true)]
|
||||||
|
admin_secrets: Vec<String>,
|
||||||
|
|
||||||
|
/// User secrets (can be specified multiple times)
|
||||||
|
#[arg(long = "user-secret", value_name = "SECRET")]
|
||||||
|
user_secrets: Vec<String>,
|
||||||
|
|
||||||
|
/// Register secrets (can be specified multiple times)
|
||||||
|
#[arg(long = "register-secret", value_name = "SECRET")]
|
||||||
|
register_secrets: Vec<String>,
|
||||||
|
|
||||||
|
/// Port for OpenRPC HTTP server
|
||||||
|
#[arg(long, default_value = "3030")]
|
||||||
|
port: u16,
|
||||||
|
|
||||||
|
/// Bind address for OpenRPC HTTP server
|
||||||
|
#[arg(long, default_value = "127.0.0.1")]
|
||||||
|
bind_address: String,
|
||||||
|
|
||||||
|
/// Pre-configured runner names (comma-separated)
|
||||||
|
#[arg(long, value_name = "NAMES", value_delimiter = ',')]
|
||||||
|
runners: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
env_logger::init();
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
// Build supervisor
|
||||||
|
let mut builder = SupervisorBuilder::new()
|
||||||
|
.admin_secrets(args.admin_secrets);
|
||||||
|
|
||||||
|
if !args.user_secrets.is_empty() {
|
||||||
|
builder = builder.user_secrets(args.user_secrets);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !args.register_secrets.is_empty() {
|
||||||
|
builder = builder.register_secrets(args.register_secrets);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut supervisor = builder.build().await?;
|
||||||
|
|
||||||
|
// Register pre-configured runners
|
||||||
|
if !args.runners.is_empty() {
|
||||||
|
for runner_name in &args.runners {
|
||||||
|
match supervisor.runner_create(runner_name.clone()).await {
|
||||||
|
Ok(_) => {},
|
||||||
|
Err(e) => error!("Failed to register runner '{}': {}", runner_name, e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start OpenRPC server
|
||||||
|
use hero_supervisor::openrpc::start_http_openrpc_server;
|
||||||
|
|
||||||
|
let supervisor_clone = supervisor.clone();
|
||||||
|
let bind_addr = args.bind_address.clone();
|
||||||
|
let port = args.port;
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
match start_http_openrpc_server(supervisor_clone, &bind_addr, port).await {
|
||||||
|
Ok(handle) => {
|
||||||
|
handle.stopped().await;
|
||||||
|
error!("OpenRPC server stopped unexpectedly");
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("OpenRPC server error: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||||
|
|
||||||
|
// Print startup info
|
||||||
|
println!("📡 http://{}:{}", args.bind_address, args.port);
|
||||||
|
info!("Hero Supervisor is running. Press Ctrl+C to shutdown.");
|
||||||
|
|
||||||
|
// Set up graceful shutdown
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tokio::signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||||
|
info!("Received shutdown signal");
|
||||||
|
std::process::exit(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keep the application running
|
||||||
|
loop {
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
198
bin/supervisor/src/builder.rs
Normal file
198
bin/supervisor/src/builder.rs
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
//! Supervisor builder for configuration and initialization.
|
||||||
|
|
||||||
|
use crate::error::{SupervisorError, SupervisorResult};
|
||||||
|
use crate::Supervisor;
|
||||||
|
use hero_job_client::ClientBuilder;
|
||||||
|
|
||||||
|
/// Builder for constructing a Supervisor instance
|
||||||
|
pub struct SupervisorBuilder {
|
||||||
|
/// Set of registered runner IDs
|
||||||
|
runners: std::collections::HashSet<String>,
|
||||||
|
/// Redis URL for connection
|
||||||
|
redis_url: String,
|
||||||
|
/// Admin secrets for bootstrapping API keys
|
||||||
|
admin_secrets: Vec<String>,
|
||||||
|
/// User secrets for bootstrapping API keys
|
||||||
|
user_secrets: Vec<String>,
|
||||||
|
/// Register secrets for bootstrapping API keys
|
||||||
|
register_secrets: Vec<String>,
|
||||||
|
client_builder: ClientBuilder,
|
||||||
|
/// Osiris URL for queries (optional)
|
||||||
|
osiris_url: Option<String>,
|
||||||
|
/// Supervisor URL for commands via Osiris (optional)
|
||||||
|
supervisor_url: Option<String>,
|
||||||
|
/// Supervisor secret for Osiris commands (optional)
|
||||||
|
supervisor_secret: Option<String>,
|
||||||
|
/// Runner name for Osiris operations (optional)
|
||||||
|
osiris_runner_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupervisorBuilder {
|
||||||
|
/// Create a new supervisor builder
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
runners: std::collections::HashSet::new(),
|
||||||
|
redis_url: "redis://localhost:6379".to_string(),
|
||||||
|
admin_secrets: Vec::new(),
|
||||||
|
user_secrets: Vec::new(),
|
||||||
|
register_secrets: Vec::new(),
|
||||||
|
client_builder: ClientBuilder::new(),
|
||||||
|
osiris_url: None,
|
||||||
|
supervisor_url: None,
|
||||||
|
supervisor_secret: None,
|
||||||
|
osiris_runner_name: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the Osiris URL for queries
|
||||||
|
pub fn osiris_url<S: Into<String>>(mut self, url: S) -> Self {
|
||||||
|
self.osiris_url = Some(url.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the Supervisor URL for Osiris commands
|
||||||
|
pub fn supervisor_url_for_osiris<S: Into<String>>(mut self, url: S) -> Self {
|
||||||
|
self.supervisor_url = Some(url.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the Supervisor secret for Osiris commands
|
||||||
|
pub fn supervisor_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||||
|
self.supervisor_secret = Some(secret.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the runner name for Osiris operations
|
||||||
|
pub fn osiris_runner_name<S: Into<String>>(mut self, name: S) -> Self {
|
||||||
|
self.osiris_runner_name = Some(name.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an admin secret
|
||||||
|
pub fn add_admin_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||||
|
self.admin_secrets.push(secret.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add multiple admin secrets
|
||||||
|
pub fn admin_secrets<I, S>(mut self, secrets: I) -> Self
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = S>,
|
||||||
|
S: Into<String>,
|
||||||
|
{
|
||||||
|
self.admin_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a user secret
|
||||||
|
pub fn add_user_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||||
|
self.user_secrets.push(secret.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add multiple user secrets
|
||||||
|
pub fn user_secrets<I, S>(mut self, secrets: I) -> Self
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = S>,
|
||||||
|
S: Into<String>,
|
||||||
|
{
|
||||||
|
self.user_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a register secret
|
||||||
|
pub fn add_register_secret<S: Into<String>>(mut self, secret: S) -> Self {
|
||||||
|
self.register_secrets.push(secret.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add multiple register secrets
|
||||||
|
pub fn register_secrets<I, S>(mut self, secrets: I) -> Self
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = S>,
|
||||||
|
S: Into<String>,
|
||||||
|
{
|
||||||
|
self.register_secrets.extend(secrets.into_iter().map(|s| s.into()));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a runner to the supervisor
|
||||||
|
pub fn add_runner(mut self, runner_id: String) -> Self {
|
||||||
|
self.runners.insert(runner_id);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the supervisor
|
||||||
|
pub async fn build(self) -> SupervisorResult<Supervisor> {
|
||||||
|
// Create Redis client
|
||||||
|
let redis_client = redis::Client::open(self.redis_url.as_str())
|
||||||
|
.map_err(|e| SupervisorError::ConfigError {
|
||||||
|
reason: format!("Invalid Redis URL: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Create the store
|
||||||
|
let mut store = crate::store::Store::new();
|
||||||
|
|
||||||
|
// Add admin secrets as API keys
|
||||||
|
for secret in &self.admin_secrets {
|
||||||
|
store.key_create(
|
||||||
|
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Admin),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add user secrets as API keys
|
||||||
|
for secret in &self.user_secrets {
|
||||||
|
store.key_create(
|
||||||
|
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::User),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add register secrets as API keys
|
||||||
|
for secret in &self.register_secrets {
|
||||||
|
store.key_create(
|
||||||
|
crate::auth::ApiKey::new(secret.clone(), crate::auth::ApiKeyScope::Registrar),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the client
|
||||||
|
let client = self.client_builder.build().await?;
|
||||||
|
|
||||||
|
// Build Osiris client if configured
|
||||||
|
// Temporarily disabled - needs update
|
||||||
|
// let osiris_client = if let (Some(osiris_url), Some(supervisor_url)) =
|
||||||
|
// (self.osiris_url, self.supervisor_url) {
|
||||||
|
// let mut builder = osiris_client::OsirisClient::builder()
|
||||||
|
// .osiris_url(osiris_url)
|
||||||
|
// .supervisor_url(supervisor_url)
|
||||||
|
// .runner_name(self.osiris_runner_name.unwrap_or_else(|| "osiris-runner".to_string()));
|
||||||
|
//
|
||||||
|
// if let Some(secret) = self.supervisor_secret {
|
||||||
|
// builder = builder.supervisor_secret(secret);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Some(builder.build().map_err(|e| SupervisorError::ConfigError {
|
||||||
|
// reason: format!("Failed to build Osiris client: {}", e),
|
||||||
|
// })?)
|
||||||
|
// } else {
|
||||||
|
// None
|
||||||
|
// };
|
||||||
|
|
||||||
|
// Add pre-configured runners to the store
|
||||||
|
for runner_id in self.runners {
|
||||||
|
let _ = store.runner_add(runner_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Supervisor {
|
||||||
|
store: std::sync::Arc::new(tokio::sync::Mutex::new(store)),
|
||||||
|
job_client: client,
|
||||||
|
redis_client,
|
||||||
|
// osiris_client, // Temporarily disabled
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SupervisorBuilder {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
73
bin/supervisor/src/error.rs
Normal file
73
bin/supervisor/src/error.rs
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
//! Error types for supervisor operations.
|
||||||
|
|
||||||
|
use thiserror::Error;
|
||||||
|
use jsonrpsee::types::{ErrorObject, ErrorObjectOwned};
|
||||||
|
|
||||||
|
/// Result type for supervisor operations
|
||||||
|
pub type SupervisorResult<T> = Result<T, SupervisorError>;
|
||||||
|
|
||||||
|
/// Errors that can occur during supervisor operations
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum SupervisorError {
|
||||||
|
#[error("Runner '{runner_id}' not found")]
|
||||||
|
RunnerNotFound { runner_id: String },
|
||||||
|
|
||||||
|
#[error("Runner '{runner_id}' is already registered")]
|
||||||
|
RunnerAlreadyRegistered { runner_id: String },
|
||||||
|
|
||||||
|
#[error("Job '{job_id}' not found")]
|
||||||
|
JobNotFound { job_id: String },
|
||||||
|
|
||||||
|
#[error("Failed to queue job for runner '{runner_id}': {reason}")]
|
||||||
|
QueueError { runner_id: String, reason: String },
|
||||||
|
|
||||||
|
#[error("Configuration error: {reason}")]
|
||||||
|
ConfigError { reason: String },
|
||||||
|
|
||||||
|
#[error("Invalid secret or API key: {0}")]
|
||||||
|
InvalidSecret(String),
|
||||||
|
|
||||||
|
#[error("Authentication error: {message}")]
|
||||||
|
AuthenticationError { message: String },
|
||||||
|
|
||||||
|
#[error("Insufficient permissions: {message}")]
|
||||||
|
PermissionDenied { message: String },
|
||||||
|
|
||||||
|
#[error("Redis error: {source}")]
|
||||||
|
RedisError {
|
||||||
|
#[from]
|
||||||
|
source: redis::RedisError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[error("Job error: {source}")]
|
||||||
|
JobError {
|
||||||
|
#[from]
|
||||||
|
source: hero_job::JobError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[error("Job client error: {source}")]
|
||||||
|
JobClientError {
|
||||||
|
#[from]
|
||||||
|
source: hero_job_client::ClientError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[error("IO error: {source}")]
|
||||||
|
IoError {
|
||||||
|
#[from]
|
||||||
|
source: std::io::Error,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[error("Osiris client error: {0}")]
|
||||||
|
OsirisError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Implement conversion from SupervisorError → RPC ErrorObject
|
||||||
|
impl From<SupervisorError> for ErrorObject<'static> {
|
||||||
|
fn from(err: SupervisorError) -> Self {
|
||||||
|
ErrorObject::owned(
|
||||||
|
-32603, // Internal error code
|
||||||
|
format!("Supervisor error: {err}"),
|
||||||
|
None::<()>,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
19
bin/supervisor/src/lib.rs
Normal file
19
bin/supervisor/src/lib.rs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
//! Hero Supervisor - Actor management for the Hero ecosystem.
|
||||||
|
//!
|
||||||
|
//! See README.md for detailed documentation and usage examples.
|
||||||
|
|
||||||
|
pub mod supervisor;
|
||||||
|
pub mod builder;
|
||||||
|
pub mod error;
|
||||||
|
pub mod openrpc;
|
||||||
|
pub mod auth;
|
||||||
|
pub mod store;
|
||||||
|
|
||||||
|
// Re-export job client for convenience
|
||||||
|
pub use hero_job_client as job_client;
|
||||||
|
|
||||||
|
// Re-export main types for convenience
|
||||||
|
pub use supervisor::Supervisor;
|
||||||
|
pub use builder::SupervisorBuilder;
|
||||||
|
pub use error::{SupervisorError, SupervisorResult};
|
||||||
|
pub use hero_job::{Job, JobBuilder, JobStatus, JobError};
|
||||||
474
bin/supervisor/src/openrpc.rs
Normal file
474
bin/supervisor/src/openrpc.rs
Normal file
@@ -0,0 +1,474 @@
|
|||||||
|
//! OpenRPC server implementation.
|
||||||
|
|
||||||
|
use jsonrpsee::{
|
||||||
|
core::{RpcResult, async_trait},
|
||||||
|
server::middleware::rpc::{RpcServiceT, RpcServiceBuilder, MethodResponse},
|
||||||
|
proc_macros::rpc,
|
||||||
|
server::{Server, ServerHandle},
|
||||||
|
types::{ErrorObject, ErrorObjectOwned},
|
||||||
|
};
|
||||||
|
use tower_http::cors::{CorsLayer, Any};
|
||||||
|
|
||||||
|
use anyhow;
|
||||||
|
use log::{debug, info, error};
|
||||||
|
|
||||||
|
use crate::{auth::ApiKey, supervisor::Supervisor};
|
||||||
|
use crate::error::SupervisorError;
|
||||||
|
use hero_job::{Job, JobResult, JobStatus};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::fs;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
|
/// Load OpenRPC specification from docs/openrpc.json
|
||||||
|
fn load_openrpc_spec() -> Result<serde_json::Value, Box<dyn std::error::Error>> {
|
||||||
|
let path = "../../docs/openrpc.json";
|
||||||
|
let content = fs::read_to_string(path)?;
|
||||||
|
let spec = serde_json::from_str(&content)?;
|
||||||
|
debug!("Loaded OpenRPC specification from: {}", path);
|
||||||
|
Ok(spec)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request parameters for generating API keys (auto-generates key value)
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct GenerateApiKeyParams {
|
||||||
|
pub name: String,
|
||||||
|
pub scope: String, // "admin", "registrar", or "user"
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Job status response with metadata
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct JobStatusResponse {
|
||||||
|
pub job_id: String,
|
||||||
|
pub status: String,
|
||||||
|
pub created_at: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Supervisor information response
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SupervisorInfo {
|
||||||
|
pub server_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// OpenRPC trait - maps directly to Supervisor methods
|
||||||
|
/// This trait exists only for jsonrpsee's macro system.
|
||||||
|
/// The implementation below is just error type conversion -
|
||||||
|
/// all actual logic lives in Supervisor methods.
|
||||||
|
#[rpc(server)]
|
||||||
|
pub trait SupervisorRpc {
|
||||||
|
/// Create a job without queuing it to a runner
|
||||||
|
#[method(name = "job.create")]
|
||||||
|
async fn job_create(&self, params: Job) -> RpcResult<String>;
|
||||||
|
|
||||||
|
/// Get a job by job ID
|
||||||
|
#[method(name = "job.get")]
|
||||||
|
async fn job_get(&self, job_id: String) -> RpcResult<Job>;
|
||||||
|
|
||||||
|
/// Start a previously created job by queuing it to its assigned runner
|
||||||
|
#[method(name = "job.start")]
|
||||||
|
async fn job_start(&self, job_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// Run a job on the appropriate runner and return the result
|
||||||
|
#[method(name = "job.run")]
|
||||||
|
async fn job_run(&self, params: Job) -> RpcResult<JobResult>;
|
||||||
|
|
||||||
|
/// Get the current status of a job
|
||||||
|
#[method(name = "job.status")]
|
||||||
|
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus>;
|
||||||
|
|
||||||
|
/// Get the result of a completed job (blocks until result is available)
|
||||||
|
#[method(name = "job.result")]
|
||||||
|
async fn job_result(&self, job_id: String) -> RpcResult<JobResult>;
|
||||||
|
|
||||||
|
/// Get logs for a specific job
|
||||||
|
#[method(name = "job.logs")]
|
||||||
|
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>>;
|
||||||
|
|
||||||
|
/// Stop a running job
|
||||||
|
#[method(name = "job.stop")]
|
||||||
|
async fn job_stop(&self, job_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// Delete a job from the system
|
||||||
|
#[method(name = "job.delete")]
|
||||||
|
async fn job_delete(&self, job_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// List all jobs
|
||||||
|
#[method(name = "job.list")]
|
||||||
|
async fn job_list(&self) -> RpcResult<Vec<Job>>;
|
||||||
|
|
||||||
|
/// Add a runner with configuration
|
||||||
|
#[method(name = "runner.create")]
|
||||||
|
async fn runner_create(&self, runner_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// Delete a runner from the supervisor
|
||||||
|
#[method(name = "runner.remove")]
|
||||||
|
async fn runner_delete(&self, runner_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// List all runner IDs
|
||||||
|
#[method(name = "runner.list")]
|
||||||
|
async fn runner_list(&self) -> RpcResult<Vec<String>>;
|
||||||
|
|
||||||
|
/// Ping a runner (dispatch a ping job)
|
||||||
|
#[method(name = "runner.ping")]
|
||||||
|
async fn ping_runner(&self, runner_id: String) -> RpcResult<String>;
|
||||||
|
|
||||||
|
/// Create an API key with provided key value
|
||||||
|
#[method(name = "key.create")]
|
||||||
|
async fn key_create(&self, key: ApiKey) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// Generate a new API key with auto-generated key value
|
||||||
|
#[method(name = "key.generate")]
|
||||||
|
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey>;
|
||||||
|
|
||||||
|
/// Delete an API key
|
||||||
|
#[method(name = "key.delete")]
|
||||||
|
async fn key_delete(&self, key_id: String) -> RpcResult<()>;
|
||||||
|
|
||||||
|
/// List all secrets (returns counts only for security)
|
||||||
|
#[method(name = "key.list")]
|
||||||
|
async fn key_list(&self) -> RpcResult<Vec<ApiKey>>;
|
||||||
|
|
||||||
|
/// Verify an API key and return its metadata
|
||||||
|
#[method(name = "auth.verify")]
|
||||||
|
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse>;
|
||||||
|
|
||||||
|
/// Get supervisor information
|
||||||
|
#[method(name = "supervisor.info")]
|
||||||
|
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo>;
|
||||||
|
|
||||||
|
/// OpenRPC discovery method - returns the OpenRPC document describing this API
|
||||||
|
#[method(name = "rpc.discover")]
|
||||||
|
async fn rpc_discover(&self) -> RpcResult<serde_json::Value>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// RPC implementation on Supervisor
|
||||||
|
///
|
||||||
|
/// This implementation is ONLY for error type conversion (SupervisorError → ErrorObject).
|
||||||
|
/// All business logic is in Supervisor methods - these are thin wrappers.
|
||||||
|
/// Authorization is handled by middleware before methods are called.
|
||||||
|
#[async_trait]
|
||||||
|
impl SupervisorRpcServer for Supervisor {
|
||||||
|
async fn job_create(&self, job: Job) -> RpcResult<String> {
|
||||||
|
Ok(self.job_create(job).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_get(&self, job_id: String) -> RpcResult<Job> {
|
||||||
|
Ok(self.job_get(&job_id).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_list(&self) -> RpcResult<Vec<Job>> {
|
||||||
|
let job_ids = self.job_list().await;
|
||||||
|
let mut jobs = Vec::new();
|
||||||
|
for job_id in job_ids {
|
||||||
|
if let Ok(job) = self.job_get(&job_id).await {
|
||||||
|
jobs.push(job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(jobs)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_run(&self, job: Job) -> RpcResult<JobResult> {
|
||||||
|
let output = self.job_run(job).await?;
|
||||||
|
Ok(JobResult::Success { success: output })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_start(&self, job_id: String) -> RpcResult<()> {
|
||||||
|
self.job_start(&job_id).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_status(&self, job_id: String) -> RpcResult<JobStatus> {
|
||||||
|
Ok(self.job_status(&job_id).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_logs(&self, job_id: String) -> RpcResult<Vec<String>> {
|
||||||
|
Ok(self.job_logs(&job_id, None).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_result(&self, job_id: String) -> RpcResult<JobResult> {
|
||||||
|
match self.job_result(&job_id).await? {
|
||||||
|
Some(result) => {
|
||||||
|
if result.starts_with("Error:") {
|
||||||
|
Ok(JobResult::Error { error: result })
|
||||||
|
} else {
|
||||||
|
Ok(JobResult::Success { success: result })
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => Ok(JobResult::Error { error: "Job result not available".to_string() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_stop(&self, job_id: String) -> RpcResult<()> {
|
||||||
|
self.job_stop(&job_id).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn job_delete(&self, job_id: String) -> RpcResult<()> {
|
||||||
|
self.job_delete(&job_id).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn runner_create(&self, runner_id: String) -> RpcResult<()> {
|
||||||
|
self.runner_create(runner_id).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn runner_delete(&self, runner_id: String) -> RpcResult<()> {
|
||||||
|
Ok(self.runner_delete(&runner_id).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn runner_list(&self) -> RpcResult<Vec<String>> {
|
||||||
|
Ok(self.runner_list().await)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async fn ping_runner(&self, runner_id: String) -> RpcResult<String> {
|
||||||
|
Ok(self.runner_ping(&runner_id).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn key_create(&self, key: ApiKey) -> RpcResult<()> {
|
||||||
|
let _ = self.key_create(key).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn key_generate(&self, params: GenerateApiKeyParams) -> RpcResult<ApiKey> {
|
||||||
|
// Parse scope
|
||||||
|
let api_scope = match params.scope.to_lowercase().as_str() {
|
||||||
|
"admin" => crate::auth::ApiKeyScope::Admin,
|
||||||
|
"registrar" => crate::auth::ApiKeyScope::Registrar,
|
||||||
|
"user" => crate::auth::ApiKeyScope::User,
|
||||||
|
_ => return Err(ErrorObject::owned(-32602, "Invalid scope. Must be 'admin', 'registrar', or 'user'", None::<()>)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let api_key = self.create_api_key(params.name, api_scope).await;
|
||||||
|
Ok(api_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn key_delete(&self, key_id: String) -> RpcResult<()> {
|
||||||
|
self.key_delete(&key_id).await
|
||||||
|
.ok_or_else(|| ErrorObject::owned(-32603, "API key not found", None::<()>))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn key_list(&self) -> RpcResult<Vec<ApiKey>> {
|
||||||
|
Ok(self.key_list().await)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn auth_verify(&self) -> RpcResult<crate::auth::AuthVerifyResponse> {
|
||||||
|
// If this method is called, middleware already verified the key
|
||||||
|
// So we just return success - the middleware wouldn't have let an invalid key through
|
||||||
|
Ok(crate::auth::AuthVerifyResponse {
|
||||||
|
valid: true,
|
||||||
|
name: "verified".to_string(),
|
||||||
|
scope: "authenticated".to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn supervisor_info(&self) -> RpcResult<SupervisorInfo> {
|
||||||
|
Ok(SupervisorInfo {
|
||||||
|
server_url: "http://127.0.0.1:3031".to_string(), // TODO: get from config
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rpc_discover(&self) -> RpcResult<serde_json::Value> {
|
||||||
|
debug!("OpenRPC request: rpc.discover");
|
||||||
|
|
||||||
|
// Read OpenRPC specification from docs/openrpc.json
|
||||||
|
match load_openrpc_spec() {
|
||||||
|
Ok(spec) => Ok(spec),
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to load OpenRPC specification: {}", e);
|
||||||
|
// Fallback to a minimal spec if file loading fails
|
||||||
|
Ok(serde_json::json!({
|
||||||
|
"openrpc": "1.3.2",
|
||||||
|
"info": {
|
||||||
|
"title": "Hero Supervisor OpenRPC API",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "OpenRPC API for managing Hero Supervisor runners and jobs"
|
||||||
|
},
|
||||||
|
"methods": [],
|
||||||
|
"error": "Failed to load full specification"
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Authorization middleware using RpcServiceT
|
||||||
|
/// This middleware is created per-connection and checks permissions for each RPC call
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct AuthMiddleware<S> {
|
||||||
|
supervisor: Supervisor,
|
||||||
|
inner: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> RpcServiceT for AuthMiddleware<S>
|
||||||
|
where
|
||||||
|
S: RpcServiceT<MethodResponse = MethodResponse> + Send + Sync + Clone + 'static,
|
||||||
|
{
|
||||||
|
type MethodResponse = MethodResponse;
|
||||||
|
type BatchResponse = S::BatchResponse;
|
||||||
|
type NotificationResponse = S::NotificationResponse;
|
||||||
|
|
||||||
|
fn call<'a>(&self, req: jsonrpsee::server::middleware::rpc::Request<'a>) -> impl std::future::Future<Output = Self::MethodResponse> + Send + 'a {
|
||||||
|
let supervisor = self.supervisor.clone();
|
||||||
|
let inner = self.inner.clone();
|
||||||
|
let method = req.method_name().to_string();
|
||||||
|
let id = req.id();
|
||||||
|
|
||||||
|
Box::pin(async move {
|
||||||
|
// Check if method requires auth
|
||||||
|
let required_scopes = match crate::auth::get_method_required_scopes(&method) {
|
||||||
|
None => {
|
||||||
|
// Public method - no auth required
|
||||||
|
debug!("ℹ️ Public method: {}", method);
|
||||||
|
return inner.call(req).await;
|
||||||
|
}
|
||||||
|
Some(scopes) => scopes,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract Authorization header from extensions
|
||||||
|
let headers = req.extensions().get::<hyper::HeaderMap>();
|
||||||
|
|
||||||
|
let api_key = headers
|
||||||
|
.and_then(|h| h.get(hyper::header::AUTHORIZATION))
|
||||||
|
.and_then(|value| value.to_str().ok())
|
||||||
|
.and_then(|s| s.strip_prefix("Bearer "))
|
||||||
|
.map(|k| k.to_string());
|
||||||
|
|
||||||
|
let api_key = match api_key {
|
||||||
|
Some(key) => key,
|
||||||
|
None => {
|
||||||
|
error!("❌ Missing Authorization header for method: {}", method);
|
||||||
|
let err = ErrorObjectOwned::owned(
|
||||||
|
-32001,
|
||||||
|
format!("Missing Authorization header for method: {}", method),
|
||||||
|
None::<()>,
|
||||||
|
);
|
||||||
|
return MethodResponse::error(id, err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Verify API key and check scope
|
||||||
|
let key_obj = match supervisor.key_get(&api_key).await {
|
||||||
|
Some(k) => k,
|
||||||
|
None => {
|
||||||
|
error!("❌ Invalid API key");
|
||||||
|
let err = ErrorObjectOwned::owned(-32001, "Invalid API key", None::<()>);
|
||||||
|
return MethodResponse::error(id, err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if !required_scopes.contains(&key_obj.scope) {
|
||||||
|
error!(
|
||||||
|
"❌ Unauthorized: method '{}' requires {:?}, got {:?}",
|
||||||
|
method, required_scopes, key_obj.scope
|
||||||
|
);
|
||||||
|
let err = ErrorObjectOwned::owned(
|
||||||
|
-32001,
|
||||||
|
format!(
|
||||||
|
"Insufficient permissions for '{}'. Required: {:?}, Got: {:?}",
|
||||||
|
method, required_scopes, key_obj.scope
|
||||||
|
),
|
||||||
|
None::<()>,
|
||||||
|
);
|
||||||
|
return MethodResponse::error(id, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("✅ Authorized: {} with scope {:?}", method, key_obj.scope);
|
||||||
|
|
||||||
|
// Authorized - proceed with the call
|
||||||
|
inner.call(req).await
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn batch<'a>(&self, batch: jsonrpsee::server::middleware::rpc::Batch<'a>) -> impl std::future::Future<Output = Self::BatchResponse> + Send + 'a {
|
||||||
|
// For simplicity, pass through batch requests
|
||||||
|
// In production, you'd want to check each request in the batch
|
||||||
|
self.inner.batch(batch)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn notification<'a>(&self, notif: jsonrpsee::server::middleware::rpc::Notification<'a>) -> impl std::future::Future<Output = Self::NotificationResponse> + Send + 'a {
|
||||||
|
self.inner.notification(notif)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// HTTP middleware to propagate headers into request extensions
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct HeaderPropagationService<S> {
|
||||||
|
inner: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S, B> tower::Service<hyper::Request<B>> for HeaderPropagationService<S>
|
||||||
|
where
|
||||||
|
S: tower::Service<hyper::Request<B>> + Clone + Send + 'static,
|
||||||
|
S::Future: Send + 'static,
|
||||||
|
B: Send + 'static,
|
||||||
|
{
|
||||||
|
type Response = S::Response;
|
||||||
|
type Error = S::Error;
|
||||||
|
type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<Self::Response, Self::Error>> + Send>>;
|
||||||
|
|
||||||
|
fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> std::task::Poll<Result<(), Self::Error>> {
|
||||||
|
self.inner.poll_ready(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn call(&mut self, mut req: hyper::Request<B>) -> Self::Future {
|
||||||
|
let headers = req.headers().clone();
|
||||||
|
req.extensions_mut().insert(headers);
|
||||||
|
let fut = self.inner.call(req);
|
||||||
|
Box::pin(fut)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start HTTP OpenRPC server (Unix socket support would require additional dependencies)
|
||||||
|
pub async fn start_http_openrpc_server(
|
||||||
|
supervisor: Supervisor,
|
||||||
|
bind_address: &str,
|
||||||
|
port: u16,
|
||||||
|
) -> anyhow::Result<ServerHandle> {
|
||||||
|
let http_addr: SocketAddr = format!("{}:{}", bind_address, port).parse()?;
|
||||||
|
|
||||||
|
// Configure CORS to allow requests from the admin UI
|
||||||
|
// Note: Authorization header must be explicitly listed, not covered by Any
|
||||||
|
use tower_http::cors::AllowHeaders;
|
||||||
|
let cors = CorsLayer::new()
|
||||||
|
.allow_origin(Any)
|
||||||
|
.allow_headers(AllowHeaders::list([
|
||||||
|
hyper::header::CONTENT_TYPE,
|
||||||
|
hyper::header::AUTHORIZATION,
|
||||||
|
]))
|
||||||
|
.allow_methods(Any)
|
||||||
|
.expose_headers(Any);
|
||||||
|
|
||||||
|
// Build RPC middleware with authorization (per-connection)
|
||||||
|
let supervisor_for_middleware = supervisor.clone();
|
||||||
|
let rpc_middleware = RpcServiceBuilder::new().layer_fn(move |service| {
|
||||||
|
// This closure runs once per connection
|
||||||
|
AuthMiddleware {
|
||||||
|
supervisor: supervisor_for_middleware.clone(),
|
||||||
|
inner: service,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build HTTP middleware stack with CORS and header propagation
|
||||||
|
let http_middleware = tower::ServiceBuilder::new()
|
||||||
|
.layer(cors)
|
||||||
|
.layer(tower::layer::layer_fn(|service| {
|
||||||
|
HeaderPropagationService { inner: service }
|
||||||
|
}));
|
||||||
|
|
||||||
|
let http_server = Server::builder()
|
||||||
|
.set_rpc_middleware(rpc_middleware)
|
||||||
|
.set_http_middleware(http_middleware)
|
||||||
|
.build(http_addr)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let http_handle = http_server.start(supervisor.into_rpc());
|
||||||
|
|
||||||
|
info!("OpenRPC HTTP server running at http://{} with CORS enabled", http_addr);
|
||||||
|
|
||||||
|
Ok(http_handle)
|
||||||
|
}
|
||||||
286
bin/supervisor/src/store.rs
Normal file
286
bin/supervisor/src/store.rs
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
//! In-memory storage layer for Supervisor
|
||||||
|
//!
|
||||||
|
//! Provides CRUD operations for:
|
||||||
|
//! - API Keys
|
||||||
|
//! - Runners
|
||||||
|
//! - Jobs
|
||||||
|
|
||||||
|
use crate::auth::{ApiKey, ApiKeyScope};
|
||||||
|
use crate::error::{SupervisorError, SupervisorResult};
|
||||||
|
use hero_job::Job;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
/// In-memory storage for all supervisor data
|
||||||
|
pub struct Store {
|
||||||
|
/// API keys (key_value -> ApiKey)
|
||||||
|
api_keys: HashMap<String, ApiKey>,
|
||||||
|
/// Registered runner IDs
|
||||||
|
runners: HashSet<String>,
|
||||||
|
/// In-memory job storage (job_id -> Job)
|
||||||
|
jobs: HashMap<String, Job>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Store {
|
||||||
|
/// Create a new store
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
api_keys: HashMap::new(),
|
||||||
|
runners: HashSet::new(),
|
||||||
|
jobs: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== API Key Operations ====================
|
||||||
|
|
||||||
|
/// Create an API key with a specific value
|
||||||
|
pub fn key_create(&mut self, key: ApiKey) -> ApiKey {
|
||||||
|
self.api_keys.insert(key.name.clone(), key.clone());
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new API key with generated UUID
|
||||||
|
pub fn key_create_new(&mut self, name: String, scope: ApiKeyScope) -> ApiKey {
|
||||||
|
let key = ApiKey::new(name, scope);
|
||||||
|
self.api_keys.insert(key.name.clone(), key.clone());
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get an API key by its value
|
||||||
|
pub fn key_get(&self, key_name: &str) -> Option<&ApiKey> {
|
||||||
|
self.api_keys.get(key_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete an API key
|
||||||
|
pub fn key_delete(&mut self, key_name: &str) -> Option<ApiKey> {
|
||||||
|
self.api_keys.remove(key_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all API keys
|
||||||
|
pub fn key_list(&self) -> Vec<ApiKey> {
|
||||||
|
self.api_keys.values().cloned().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List API keys by scope
|
||||||
|
pub fn key_list_by_scope(&self, scope: ApiKeyScope) -> Vec<ApiKey> {
|
||||||
|
self.api_keys
|
||||||
|
.values()
|
||||||
|
.filter(|k| k.scope == scope)
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== Runner Operations ====================
|
||||||
|
|
||||||
|
/// Add a runner
|
||||||
|
pub fn runner_add(&mut self, runner_id: String) -> SupervisorResult<()> {
|
||||||
|
self.runners.insert(runner_id);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove a runner
|
||||||
|
pub fn runner_remove(&mut self, runner_id: &str) -> SupervisorResult<()> {
|
||||||
|
self.runners.remove(runner_id);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a runner exists
|
||||||
|
pub fn runner_exists(&self, runner_id: &str) -> bool {
|
||||||
|
self.runners.contains(runner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all runner IDs
|
||||||
|
pub fn runner_list_all(&self) -> Vec<String> {
|
||||||
|
self.runners.iter().cloned().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==================== Job Operations ====================
|
||||||
|
|
||||||
|
/// Store a job in memory
|
||||||
|
pub fn job_store(&mut self, job: Job) -> SupervisorResult<()> {
|
||||||
|
self.jobs.insert(job.id.clone(), job);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a job from memory
|
||||||
|
pub fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
|
||||||
|
self.jobs
|
||||||
|
.get(job_id)
|
||||||
|
.cloned()
|
||||||
|
.ok_or_else(|| SupervisorError::JobNotFound {
|
||||||
|
job_id: job_id.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a job from memory
|
||||||
|
pub fn job_delete(&mut self, job_id: &str) -> SupervisorResult<()> {
|
||||||
|
self.jobs
|
||||||
|
.remove(job_id)
|
||||||
|
.ok_or_else(|| SupervisorError::JobNotFound {
|
||||||
|
job_id: job_id.to_string(),
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all job IDs
|
||||||
|
pub fn job_list(&self) -> Vec<String> {
|
||||||
|
self.jobs.keys().cloned().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a job exists
|
||||||
|
pub fn job_exists(&self, job_id: &str) -> bool {
|
||||||
|
self.jobs.contains_key(job_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Store {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
api_keys: self.api_keys.clone(),
|
||||||
|
runners: self.runners.clone(),
|
||||||
|
jobs: self.jobs.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use hero_job::JobBuilder;
|
||||||
|
|
||||||
|
fn create_test_store() -> Store {
|
||||||
|
Store::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_test_job(id: &str, runner: &str) -> Job {
|
||||||
|
let mut job = JobBuilder::new()
|
||||||
|
.caller_id("test_caller")
|
||||||
|
.context_id("test_context")
|
||||||
|
.runner(runner)
|
||||||
|
.executor("test")
|
||||||
|
.payload("test payload")
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
job.id = id.to_string(); // Set ID manually
|
||||||
|
job
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_api_key_operations() {
|
||||||
|
let mut store = create_test_store();
|
||||||
|
|
||||||
|
// Create key
|
||||||
|
let key = store.key_create_new("test_key".to_string(), ApiKeyScope::Admin);
|
||||||
|
assert_eq!(key.name, "test_key");
|
||||||
|
assert_eq!(key.scope, ApiKeyScope::Admin);
|
||||||
|
|
||||||
|
// Get key
|
||||||
|
let retrieved = store.key_get(&key.key);
|
||||||
|
assert!(retrieved.is_some());
|
||||||
|
assert_eq!(retrieved.unwrap().name, "test_key");
|
||||||
|
|
||||||
|
// List keys
|
||||||
|
let keys = store.key_list();
|
||||||
|
assert_eq!(keys.len(), 1);
|
||||||
|
|
||||||
|
// List by scope
|
||||||
|
let admin_keys = store.key_list_by_scope(ApiKeyScope::Admin);
|
||||||
|
assert_eq!(admin_keys.len(), 1);
|
||||||
|
|
||||||
|
// Delete key
|
||||||
|
let removed = store.key_delete(&key.key);
|
||||||
|
assert!(removed.is_some());
|
||||||
|
assert!(store.key_get(&key.key).is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_runner_operations() {
|
||||||
|
let mut store = create_test_store();
|
||||||
|
|
||||||
|
// Add runner
|
||||||
|
assert!(store.runner_add("runner1".to_string()).is_ok());
|
||||||
|
assert!(store.runner_exists("runner1"));
|
||||||
|
|
||||||
|
// List runners
|
||||||
|
let runners = store.runner_list_all();
|
||||||
|
assert_eq!(runners.len(), 1);
|
||||||
|
assert!(runners.contains(&"runner1".to_string()));
|
||||||
|
|
||||||
|
// List all runners
|
||||||
|
let all_runners = store.runner_list_all();
|
||||||
|
assert_eq!(all_runners.len(), 1);
|
||||||
|
|
||||||
|
// Remove runner
|
||||||
|
assert!(store.runner_remove("runner1").is_ok());
|
||||||
|
assert!(!store.runner_exists("runner1"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_job_operations() {
|
||||||
|
let mut store = create_test_store();
|
||||||
|
let job = create_test_job("job1", "runner1");
|
||||||
|
|
||||||
|
// Store job
|
||||||
|
assert!(store.job_store(job.clone()).is_ok());
|
||||||
|
assert!(store.job_exists("job1"));
|
||||||
|
|
||||||
|
// Get job
|
||||||
|
let retrieved = store.job_get("job1");
|
||||||
|
assert!(retrieved.is_ok());
|
||||||
|
assert_eq!(retrieved.unwrap().id, "job1");
|
||||||
|
|
||||||
|
// List jobs
|
||||||
|
let jobs = store.job_list();
|
||||||
|
assert_eq!(jobs.len(), 1);
|
||||||
|
assert!(jobs.contains(&"job1".to_string()));
|
||||||
|
|
||||||
|
// Delete job
|
||||||
|
assert!(store.job_delete("job1").is_ok());
|
||||||
|
assert!(!store.job_exists("job1"));
|
||||||
|
assert!(store.job_get("job1").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_job_not_found() {
|
||||||
|
let store = create_test_store();
|
||||||
|
let result = store.job_get("nonexistent");
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_jobs() {
|
||||||
|
let mut store = create_test_store();
|
||||||
|
|
||||||
|
// Add multiple jobs
|
||||||
|
for i in 1..=3 {
|
||||||
|
let job = create_test_job(&format!("job{}", i), "runner1");
|
||||||
|
assert!(store.job_store(job).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify all exist
|
||||||
|
assert_eq!(store.job_list().len(), 3);
|
||||||
|
assert!(store.job_exists("job1"));
|
||||||
|
assert!(store.job_exists("job2"));
|
||||||
|
assert!(store.job_exists("job3"));
|
||||||
|
|
||||||
|
// Delete one
|
||||||
|
assert!(store.job_delete("job2").is_ok());
|
||||||
|
assert_eq!(store.job_list().len(), 2);
|
||||||
|
assert!(!store.job_exists("job2"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_store_clone() {
|
||||||
|
let mut store = create_test_store();
|
||||||
|
store.runner_add("runner1".to_string()).unwrap();
|
||||||
|
|
||||||
|
let job = create_test_job("job1", "runner1");
|
||||||
|
store.job_store(job).unwrap();
|
||||||
|
|
||||||
|
// Clone the store
|
||||||
|
let cloned = store.clone();
|
||||||
|
|
||||||
|
// Verify cloned data
|
||||||
|
assert!(cloned.runner_exists("runner1"));
|
||||||
|
assert!(cloned.job_exists("job1"));
|
||||||
|
}
|
||||||
|
}
|
||||||
360
bin/supervisor/src/supervisor.rs
Normal file
360
bin/supervisor/src/supervisor.rs
Normal file
@@ -0,0 +1,360 @@
|
|||||||
|
//! Main supervisor implementation for managing multiple actor runners.
|
||||||
|
|
||||||
|
use crate::error::{SupervisorError, SupervisorResult};
|
||||||
|
use crate::store::Store;
|
||||||
|
use hero_job_client::Client as JobClient;
|
||||||
|
use hero_job::{Job, JobStatus};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
|
// Re-export RPC types for convenience
|
||||||
|
pub use jsonrpsee::core::RpcResult;
|
||||||
|
pub use jsonrpsee::types::ErrorObject;
|
||||||
|
|
||||||
|
/// Main supervisor that manages multiple runners
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Supervisor {
|
||||||
|
/// Centralized storage layer with interior mutability
|
||||||
|
pub(crate) store: Arc<Mutex<Store>>,
|
||||||
|
/// Job client for Redis operations
|
||||||
|
pub(crate) job_client: JobClient,
|
||||||
|
/// Redis client for direct operations
|
||||||
|
pub(crate) redis_client: redis::Client,
|
||||||
|
// Optional Osiris client for persistent storage - temporarily disabled
|
||||||
|
// pub(crate) osiris_client: Option<osiris_client::OsirisClient>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Supervisor {
|
||||||
|
/// Create a new supervisor builder
|
||||||
|
pub fn builder() -> crate::builder::SupervisorBuilder {
|
||||||
|
crate::builder::SupervisorBuilder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a job (store in memory only, does not dispatch)
|
||||||
|
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||||
|
pub async fn job_create(&self, job: Job) -> SupervisorResult<String> {
|
||||||
|
let runner = job.runner.clone();
|
||||||
|
let job_id = job.id.clone();
|
||||||
|
|
||||||
|
let mut store = self.store.lock().await;
|
||||||
|
if !store.runner_exists(&runner) {
|
||||||
|
return Err(SupervisorError::RunnerNotFound {
|
||||||
|
runner_id: runner,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job in memory only
|
||||||
|
store.job_store(job)?;
|
||||||
|
Ok(job_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a runner from the supervisor
|
||||||
|
pub async fn runner_delete(&self, runner_id: &str) -> SupervisorResult<()> {
|
||||||
|
self.store.lock().await.runner_remove(runner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a runner is registered
|
||||||
|
pub async fn has_runner(&self, runner_id: &str) -> bool {
|
||||||
|
self.store.lock().await.runner_exists(runner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a job by job ID from memory
|
||||||
|
pub async fn job_get(&self, job_id: &str) -> SupervisorResult<Job> {
|
||||||
|
self.store.lock().await.job_get(job_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ping a runner by dispatching a ping job to its queue
|
||||||
|
pub async fn runner_ping(&self, runner_id: &str) -> SupervisorResult<String> {
|
||||||
|
use hero_job::JobBuilder;
|
||||||
|
|
||||||
|
// Check if runner exists
|
||||||
|
let store = self.store.lock().await;
|
||||||
|
if !store.runner_exists(runner_id) {
|
||||||
|
return Err(SupervisorError::RunnerNotFound {
|
||||||
|
runner_id: runner_id.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a ping job
|
||||||
|
let ping_job = JobBuilder::new()
|
||||||
|
.caller_id("supervisor_ping")
|
||||||
|
.context_id("ping_context")
|
||||||
|
.payload("ping")
|
||||||
|
.runner(runner_id)
|
||||||
|
.executor("ping")
|
||||||
|
.timeout(10)
|
||||||
|
.build()
|
||||||
|
.map_err(|e| SupervisorError::QueueError {
|
||||||
|
runner_id: runner_id.to_string(),
|
||||||
|
reason: format!("Failed to create ping job: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Store and dispatch the ping job
|
||||||
|
let job_id = ping_job.id.clone();
|
||||||
|
drop(store);
|
||||||
|
self.store.lock().await.job_store(ping_job.clone())?;
|
||||||
|
self.job_client
|
||||||
|
.store_job_in_redis(&ping_job)
|
||||||
|
.await
|
||||||
|
.map_err(SupervisorError::from)?;
|
||||||
|
self.job_client
|
||||||
|
.job_run(&job_id, runner_id)
|
||||||
|
.await
|
||||||
|
.map_err(SupervisorError::from)?;
|
||||||
|
|
||||||
|
Ok(job_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop a job by ID
|
||||||
|
pub async fn job_stop(&self, job_id: &str) -> SupervisorResult<()> {
|
||||||
|
// For now, we'll implement a basic stop by setting status to Stopping
|
||||||
|
let _ = self.job_client.set_job_status(job_id, JobStatus::Stopping).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a job by ID
|
||||||
|
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||||
|
pub async fn job_delete(&self, job_id: &str) -> SupervisorResult<()> {
|
||||||
|
self.store.lock().await.job_delete(job_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all managed runners
|
||||||
|
pub async fn runner_list(&self) -> Vec<String> {
|
||||||
|
self.store.lock().await.runner_list_all()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a runner is registered
|
||||||
|
pub async fn runner_is_registered(&self, runner_id: &str) -> bool {
|
||||||
|
self.store.lock().await.runner_exists(runner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start a job by dispatching it to a runner's queue (fire-and-forget)
|
||||||
|
pub async fn job_start(&self, job_id: &str) -> SupervisorResult<()> {
|
||||||
|
// Get the job from memory
|
||||||
|
let job = self.job_get(job_id).await?;
|
||||||
|
let runner = job.runner.clone();
|
||||||
|
|
||||||
|
let store = self.store.lock().await;
|
||||||
|
if !store.runner_exists(&runner) {
|
||||||
|
return Err(SupervisorError::RunnerNotFound {
|
||||||
|
runner_id: runner,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job in Redis and dispatch to runner queue
|
||||||
|
self.job_client
|
||||||
|
.store_job_in_redis(&job)
|
||||||
|
.await
|
||||||
|
.map_err(SupervisorError::from)?;
|
||||||
|
|
||||||
|
self.job_client
|
||||||
|
.job_run(&job.id, &runner)
|
||||||
|
.await
|
||||||
|
.map_err(SupervisorError::from)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a job: create, dispatch, and wait for result
|
||||||
|
pub async fn job_run(&self, job: Job) -> SupervisorResult<String> {
|
||||||
|
let runner = job.runner.clone();
|
||||||
|
|
||||||
|
let mut store = self.store.lock().await;
|
||||||
|
if !store.runner_exists(&runner) {
|
||||||
|
return Err(SupervisorError::RunnerNotFound {
|
||||||
|
runner_id: runner,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store job in memory
|
||||||
|
store.job_store(job.clone())?;
|
||||||
|
drop(store);
|
||||||
|
|
||||||
|
// Use job_client's job_run_wait which handles store in Redis, dispatch, and wait
|
||||||
|
self.job_client
|
||||||
|
.job_run_wait(&job, &runner, 30)
|
||||||
|
.await
|
||||||
|
.map_err(SupervisorError::from)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Secret management methods removed - use API key management instead
|
||||||
|
// See add_api_key, remove_api_key, list_api_keys methods below
|
||||||
|
|
||||||
|
/// List all job IDs from memory
|
||||||
|
pub async fn job_list(&self) -> Vec<String> {
|
||||||
|
self.store.lock().await.job_list()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the status of a job
|
||||||
|
pub async fn job_status(&self, job_id: &str) -> SupervisorResult<JobStatus> {
|
||||||
|
// First check if job exists in memory (created but not started)
|
||||||
|
let store = self.store.lock().await;
|
||||||
|
if let Ok(_job) = store.job_get(job_id) {
|
||||||
|
drop(store);
|
||||||
|
// Try to get status from Redis
|
||||||
|
match self.job_client.get_status(job_id).await {
|
||||||
|
Ok(status) => return Ok(status),
|
||||||
|
Err(hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_))) => {
|
||||||
|
// Job exists in memory but not in Redis - it's created but not dispatched
|
||||||
|
return Ok(JobStatus::Created);
|
||||||
|
}
|
||||||
|
Err(e) => return Err(SupervisorError::from(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(store);
|
||||||
|
|
||||||
|
// Job not in memory, try Redis
|
||||||
|
let status = self.job_client.get_status(job_id).await
|
||||||
|
.map_err(|e| match e {
|
||||||
|
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
|
||||||
|
SupervisorError::JobNotFound { job_id: job_id.to_string() }
|
||||||
|
}
|
||||||
|
_ => SupervisorError::from(e)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(status)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the result of a job (returns immediately with current result or error)
|
||||||
|
pub async fn job_result(&self, job_id: &str) -> SupervisorResult<Option<String>> {
|
||||||
|
// Use client's get_status to check if job exists and get its status
|
||||||
|
let status = self.job_client.get_status(job_id).await
|
||||||
|
.map_err(|e| match e {
|
||||||
|
hero_job_client::ClientError::Job(hero_job::JobError::NotFound(_)) => {
|
||||||
|
SupervisorError::JobNotFound { job_id: job_id.to_string() }
|
||||||
|
}
|
||||||
|
_ => SupervisorError::from(e)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// If job has error status, get the error message
|
||||||
|
if status.as_str() == "error" {
|
||||||
|
let error_msg = self.job_client.get_error(job_id).await
|
||||||
|
.map_err(SupervisorError::from)?;
|
||||||
|
|
||||||
|
return Ok(Some(format!("Error: {}", error_msg.unwrap_or_else(|| "Unknown error".to_string()))));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use client's get_result to get the result
|
||||||
|
let result = self.job_client.get_result(job_id).await
|
||||||
|
.map_err(SupervisorError::from)?;
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// API Key Management Methods
|
||||||
|
|
||||||
|
/// Get logs for a specific job
|
||||||
|
///
|
||||||
|
/// Reads log files from the logs/actor/<runner_name>/job-<job_id>/ directory
|
||||||
|
pub async fn job_logs(&self, job_id: &str, lines: Option<usize>) -> SupervisorResult<Vec<String>> {
|
||||||
|
// Determine the logs directory path
|
||||||
|
// Default to ~/hero/logs
|
||||||
|
let logs_root = if let Some(home) = std::env::var_os("HOME") {
|
||||||
|
std::path::PathBuf::from(home).join("hero").join("logs")
|
||||||
|
} else {
|
||||||
|
std::path::PathBuf::from("logs")
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if logs directory exists
|
||||||
|
if !logs_root.exists() {
|
||||||
|
return Ok(vec![format!("Logs directory not found: {}", logs_root.display())]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let actor_dir = logs_root.join("actor");
|
||||||
|
if !actor_dir.exists() {
|
||||||
|
return Ok(vec![format!("Actor logs directory not found: {}", actor_dir.display())]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search through all runner directories to find the job
|
||||||
|
if let Ok(entries) = std::fs::read_dir(&actor_dir) {
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
if entry.path().is_dir() {
|
||||||
|
let job_dir = entry.path().join(format!("job-{}", job_id));
|
||||||
|
|
||||||
|
if job_dir.exists() {
|
||||||
|
// Read all log files in the directory
|
||||||
|
let mut all_logs = Vec::new();
|
||||||
|
|
||||||
|
if let Ok(log_entries) = std::fs::read_dir(&job_dir) {
|
||||||
|
// Collect all log files with their paths for sorting
|
||||||
|
let mut log_files: Vec<_> = log_entries
|
||||||
|
.flatten()
|
||||||
|
.filter(|e| {
|
||||||
|
if !e.path().is_file() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Accept files that start with "log" (covers log.YYYY-MM-DD-HH format)
|
||||||
|
e.file_name().to_string_lossy().starts_with("log")
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort by filename (which includes timestamp for hourly rotation)
|
||||||
|
log_files.sort_by_key(|e| e.path());
|
||||||
|
|
||||||
|
// Read files in order
|
||||||
|
for entry in log_files {
|
||||||
|
if let Ok(content) = std::fs::read_to_string(entry.path()) {
|
||||||
|
all_logs.extend(content.lines().map(|s| s.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If lines limit is specified, return only the last N lines
|
||||||
|
if let Some(n) = lines {
|
||||||
|
let start = all_logs.len().saturating_sub(n);
|
||||||
|
return Ok(all_logs[start..].to_vec());
|
||||||
|
} else {
|
||||||
|
return Ok(all_logs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no logs found, return helpful message
|
||||||
|
Ok(vec![format!("No logs found for job: {}", job_id)])
|
||||||
|
}
|
||||||
|
|
||||||
|
// API Key Management - These methods provide direct access to the key store
|
||||||
|
// Authorization checking should be done at the OpenRPC layer before calling these
|
||||||
|
|
||||||
|
/// Get an API key by its value
|
||||||
|
pub(crate) async fn key_get(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
|
||||||
|
self.store.lock().await.key_get(key_id).cloned()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an API key with a specific value
|
||||||
|
pub(crate) async fn key_create(&self, key: crate::auth::ApiKey) -> crate::auth::ApiKey {
|
||||||
|
self.store.lock().await.key_create(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete an API key
|
||||||
|
pub(crate) async fn key_delete(&self, key_id: &str) -> Option<crate::auth::ApiKey> {
|
||||||
|
self.store.lock().await.key_delete(key_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all API keys
|
||||||
|
pub(crate) async fn key_list(&self) -> Vec<crate::auth::ApiKey> {
|
||||||
|
self.store.lock().await.key_list()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List API keys by scope
|
||||||
|
pub(crate) async fn key_list_by_scope(&self, scope: crate::auth::ApiKeyScope) -> Vec<crate::auth::ApiKey> {
|
||||||
|
self.store.lock().await.key_list_by_scope(scope)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runner Management
|
||||||
|
|
||||||
|
/// Create a new runner
|
||||||
|
/// Authorization must be checked by the caller (e.g., OpenRPC layer)
|
||||||
|
pub async fn runner_create(&self, runner_id: String) -> SupervisorResult<String> {
|
||||||
|
self.store.lock().await.runner_add(runner_id.clone())?;
|
||||||
|
Ok(runner_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new API key with generated UUID
|
||||||
|
pub async fn create_api_key(&self, name: String, scope: crate::auth::ApiKeyScope) -> crate::auth::ApiKey {
|
||||||
|
self.store.lock().await.key_create_new(name, scope)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: Default implementation removed because it requires async initialization
|
||||||
|
// Use Supervisor::builder() for proper initialization
|
||||||
195
bin/supervisor/tests/README.md
Normal file
195
bin/supervisor/tests/README.md
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
# Supervisor End-to-End Tests
|
||||||
|
|
||||||
|
Comprehensive integration tests for all Hero Supervisor OpenRPC client methods.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. **Redis Server Running:**
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Supervisor Running:**
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
./scripts/run.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running Tests
|
||||||
|
|
||||||
|
### Run All Tests
|
||||||
|
```bash
|
||||||
|
cargo test --test end_to_end
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run Specific Test
|
||||||
|
```bash
|
||||||
|
cargo test --test end_to_end test_01_rpc_discover
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run with Output
|
||||||
|
```bash
|
||||||
|
cargo test --test end_to_end -- --nocapture
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run in Order (Sequential)
|
||||||
|
```bash
|
||||||
|
cargo test --test end_to_end -- --test-threads=1 --nocapture
|
||||||
|
```
|
||||||
|
|
||||||
|
## Test Coverage
|
||||||
|
|
||||||
|
### ✅ Discovery & Info
|
||||||
|
- `test_01_rpc_discover` - OpenRPC specification discovery
|
||||||
|
- `test_15_supervisor_info` - Supervisor information
|
||||||
|
|
||||||
|
### ✅ Runner Management
|
||||||
|
- `test_02_runner_register` - Register a new runner
|
||||||
|
- `test_03_runner_list` - List all runners
|
||||||
|
- `test_14_runner_remove` - Remove a runner
|
||||||
|
|
||||||
|
### ✅ Job Management
|
||||||
|
- `test_04_jobs_create` - Create a job without running
|
||||||
|
- `test_05_jobs_list` - List all jobs
|
||||||
|
- `test_06_job_run_simple` - Run a job and wait for result
|
||||||
|
- `test_07_job_status` - Get job status
|
||||||
|
- `test_08_job_get` - Get job by ID
|
||||||
|
- `test_09_job_delete` - Delete a job
|
||||||
|
|
||||||
|
### ✅ Authentication & API Keys
|
||||||
|
- `test_10_auth_verify` - Verify current API key
|
||||||
|
- `test_11_auth_key_create` - Create new API key
|
||||||
|
- `test_12_auth_key_list` - List all API keys
|
||||||
|
- `test_13_auth_key_remove` - Remove an API key
|
||||||
|
|
||||||
|
### ✅ Complete Workflow
|
||||||
|
- `test_99_complete_workflow` - End-to-end integration test
|
||||||
|
|
||||||
|
## Test Configuration
|
||||||
|
|
||||||
|
Tests use the following defaults:
|
||||||
|
- **Supervisor URL:** `http://127.0.0.1:3030`
|
||||||
|
- **Admin Secret:** `807470fd1e1ccc3fb997a1d4177cceb31a68cb355a4412c8fd6e66e517e902be`
|
||||||
|
- **Test Runner:** `test-runner` (all tests use this runner name)
|
||||||
|
|
||||||
|
**Important:** All tests use the same runner name (`test-runner`), so you only need to start one runner with that name to run all tests.
|
||||||
|
|
||||||
|
## Expected Behavior
|
||||||
|
|
||||||
|
### Successful Tests
|
||||||
|
All tests should pass when:
|
||||||
|
- Supervisor is running on port 3030
|
||||||
|
- Admin secret matches configuration
|
||||||
|
- Redis is accessible
|
||||||
|
|
||||||
|
### Expected Warnings
|
||||||
|
Some tests may show warnings if:
|
||||||
|
- `job.run` times out (no actual runner connected to Redis)
|
||||||
|
- Runners already exist from previous test runs
|
||||||
|
|
||||||
|
These are expected and don't indicate test failure.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Connection Refused
|
||||||
|
```
|
||||||
|
Error: tcp connect error, 127.0.0.1:3030, Connection refused
|
||||||
|
```
|
||||||
|
**Solution:** Start the supervisor with `./scripts/run.sh`
|
||||||
|
|
||||||
|
### Method Not Found
|
||||||
|
```
|
||||||
|
Error: Method not found
|
||||||
|
```
|
||||||
|
**Solution:** Rebuild supervisor with latest code:
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
cargo build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Authorization Failed
|
||||||
|
```
|
||||||
|
Error: Missing Authorization header
|
||||||
|
```
|
||||||
|
**Solution:** Check that `ADMIN_SECRET` in test matches supervisor configuration
|
||||||
|
|
||||||
|
### Job Tests Timeout
|
||||||
|
```
|
||||||
|
Error: JsonRpc(RequestTimeout)
|
||||||
|
```
|
||||||
|
**Solution:** Make sure you have a runner connected with the name `test-runner`:
|
||||||
|
```bash
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner/rust
|
||||||
|
cargo run --bin runner_osiris -- test-runner
|
||||||
|
```
|
||||||
|
|
||||||
|
## Continuous Integration
|
||||||
|
|
||||||
|
To run tests in CI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# Start Redis
|
||||||
|
redis-server --daemonize yes
|
||||||
|
|
||||||
|
# Start Supervisor
|
||||||
|
cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor
|
||||||
|
./scripts/run.sh &
|
||||||
|
SUPERVISOR_PID=$!
|
||||||
|
|
||||||
|
# Wait for supervisor to be ready
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
cargo test --test end_to_end
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
kill $SUPERVISOR_PID
|
||||||
|
redis-cli shutdown
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding New Tests
|
||||||
|
|
||||||
|
1. Create a new test function:
|
||||||
|
```rust
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_XX_my_new_test() {
|
||||||
|
println!("\n🧪 Test: my.new.method");
|
||||||
|
let client = create_client().await;
|
||||||
|
// ... test code ...
|
||||||
|
println!("✅ my.new.method works");
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run it:
|
||||||
|
```bash
|
||||||
|
cargo test --test end_to_end test_XX_my_new_test -- --nocapture
|
||||||
|
```
|
||||||
|
|
||||||
|
## Test Output Example
|
||||||
|
|
||||||
|
```
|
||||||
|
🧪 Test: rpc.discover
|
||||||
|
✅ rpc.discover works
|
||||||
|
|
||||||
|
🧪 Test: runner.register
|
||||||
|
✅ runner.register works - registered: test-runner-e2e
|
||||||
|
|
||||||
|
🧪 Test: runner.list
|
||||||
|
✅ runner.list works - found 3 runners
|
||||||
|
- osiris
|
||||||
|
- freezone
|
||||||
|
- test-runner-e2e
|
||||||
|
|
||||||
|
🧪 Test: jobs.create
|
||||||
|
✅ jobs.create works - created job: 550e8400-e29b-41d4-a716-446655440000
|
||||||
|
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Tests are designed to be idempotent (can run multiple times)
|
||||||
|
- Tests clean up after themselves when possible
|
||||||
|
- Some tests depend on previous test state (use `--test-threads=1` for strict ordering)
|
||||||
|
- Job execution tests may timeout if no runner is connected to Redis (this is expected)
|
||||||
482
bin/supervisor/tests/end_to_end.rs
Normal file
482
bin/supervisor/tests/end_to_end.rs
Normal file
@@ -0,0 +1,482 @@
|
|||||||
|
//! End-to-End Integration Tests for Hero Supervisor
|
||||||
|
//!
|
||||||
|
//! Tests all OpenRPC client methods against a running supervisor instance.
|
||||||
|
//! The supervisor is automatically started and stopped for each test run.
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::SupervisorClient;
|
||||||
|
use hero_supervisor::{SupervisorBuilder, openrpc::start_http_openrpc_server};
|
||||||
|
use hero_job::{Job, JobBuilder};
|
||||||
|
use std::sync::Once;
|
||||||
|
|
||||||
|
/// Test configuration
|
||||||
|
const SUPERVISOR_URL: &str = "http://127.0.0.1:3031";
|
||||||
|
const ADMIN_SECRET: &str = "test-admin-secret-for-e2e-tests";
|
||||||
|
const TEST_RUNNER_NAME: &str = "test-runner";
|
||||||
|
|
||||||
|
/// Global initialization flag
|
||||||
|
static INIT: Once = Once::new();
|
||||||
|
|
||||||
|
/// Initialize and start the supervisor (called once)
|
||||||
|
async fn init_supervisor() {
|
||||||
|
// Use a blocking approach to ensure supervisor starts before any test runs
|
||||||
|
static mut INITIALIZED: bool = false;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
INIT.call_once(|| {
|
||||||
|
// Spawn a new runtime for the supervisor
|
||||||
|
std::thread::spawn(|| {
|
||||||
|
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||||
|
rt.block_on(async {
|
||||||
|
// Build supervisor with test configuration
|
||||||
|
let supervisor = SupervisorBuilder::new()
|
||||||
|
.admin_secrets(vec![ADMIN_SECRET.to_string()])
|
||||||
|
.build()
|
||||||
|
.await
|
||||||
|
.expect("Failed to build supervisor");
|
||||||
|
|
||||||
|
// Start OpenRPC server
|
||||||
|
match start_http_openrpc_server(supervisor, "127.0.0.1", 3031).await {
|
||||||
|
Ok(server_handle) => {
|
||||||
|
server_handle.stopped().await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("OpenRPC server error: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Give the server time to start
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||||
|
INITIALIZED = true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to create a test client
|
||||||
|
async fn create_client() -> SupervisorClient {
|
||||||
|
// Ensure supervisor is running
|
||||||
|
init_supervisor().await;
|
||||||
|
|
||||||
|
SupervisorClient::builder()
|
||||||
|
.url(SUPERVISOR_URL)
|
||||||
|
.secret(ADMIN_SECRET)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to create supervisor client")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to create a test job (always uses TEST_RUNNER_NAME)
|
||||||
|
fn create_test_job(payload: &str) -> Job {
|
||||||
|
JobBuilder::new()
|
||||||
|
.caller_id("e2e-test")
|
||||||
|
.context_id("test-context")
|
||||||
|
.runner(TEST_RUNNER_NAME)
|
||||||
|
.payload(payload)
|
||||||
|
.executor("rhai")
|
||||||
|
.timeout(30)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build test job")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_01_rpc_discover() {
|
||||||
|
println!("\n🧪 Test: rpc.discover");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
let result = client.discover().await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "rpc.discover should succeed");
|
||||||
|
let spec = result.unwrap();
|
||||||
|
|
||||||
|
// Verify it's a valid OpenRPC spec
|
||||||
|
assert!(spec.get("openrpc").is_some(), "Should have openrpc field");
|
||||||
|
assert!(spec.get("methods").is_some(), "Should have methods field");
|
||||||
|
|
||||||
|
println!("✅ rpc.discover works");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_02_runner_register() {
|
||||||
|
println!("\n🧪 Test: runner.register");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Register a test runner
|
||||||
|
let result = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
|
||||||
|
// Should succeed or already exist
|
||||||
|
match result {
|
||||||
|
Ok(()) => {
|
||||||
|
println!("✅ runner.register works - registered: {}", TEST_RUNNER_NAME);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// If it fails, it might already exist, which is okay
|
||||||
|
println!("⚠️ runner.register: {:?} (may already exist)", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_03_runner_list() {
|
||||||
|
println!("\n🧪 Test: runner.list");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// First ensure our test runner exists
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
|
||||||
|
// List all runners
|
||||||
|
let result = client.runner_list().await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
println!(" Error: {:?}", e);
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "runner.list should succeed");
|
||||||
|
let runners = result.unwrap();
|
||||||
|
|
||||||
|
assert!(!runners.is_empty(), "Should have at least one runner");
|
||||||
|
assert!(runners.contains(&TEST_RUNNER_NAME.to_string()),
|
||||||
|
"Should contain our test runner");
|
||||||
|
|
||||||
|
println!("✅ runner.list works - found {} runners", runners.len());
|
||||||
|
for runner in &runners {
|
||||||
|
println!(" - {}", runner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_04_jobs_create() {
|
||||||
|
println!("\n🧪 Test: jobs.create");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Ensure runner exists
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
|
||||||
|
// Create a job without running it
|
||||||
|
let job = create_test_job("print('test job');");
|
||||||
|
let result = client.job_create(job).await;
|
||||||
|
|
||||||
|
match &result {
|
||||||
|
Ok(_) => {},
|
||||||
|
Err(e) => println!(" Error: {:?}", e),
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "jobs.create should succeed");
|
||||||
|
let job_id = result.unwrap();
|
||||||
|
|
||||||
|
assert!(!job_id.is_empty(), "Should return a job ID");
|
||||||
|
println!("✅ jobs.create works - created job: {}", job_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_05_jobs_list() {
|
||||||
|
println!("\n🧪 Test: jobs.list");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a job first
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
let job = create_test_job("print('list test');");
|
||||||
|
let _ = client.job_create(job).await;
|
||||||
|
|
||||||
|
// List all jobs
|
||||||
|
let result = client.job_list().await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "jobs.list should succeed");
|
||||||
|
let jobs = result.unwrap();
|
||||||
|
|
||||||
|
println!("✅ jobs.list works - found {} jobs", jobs.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_06_job_run_simple() {
|
||||||
|
println!("\n🧪 Test: job.run (simple script)");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Ensure runner exists
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
|
||||||
|
// Run a simple job
|
||||||
|
let job = create_test_job(r#"
|
||||||
|
print("Hello from test!");
|
||||||
|
42
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let result = client.job_run(job, Some(30)).await;
|
||||||
|
|
||||||
|
// Note: This will timeout if no runner is actually connected to Redis
|
||||||
|
// but we're testing the API call itself
|
||||||
|
match result {
|
||||||
|
Ok(response) => {
|
||||||
|
println!("✅ job.run works - job_id: {}, status: {}",
|
||||||
|
response.job_id, response.status);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("⚠️ job.run: {:?} (runner may not be connected)", e);
|
||||||
|
// This is expected if no actual runner is listening
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_07_job_status() {
|
||||||
|
println!("\n🧪 Test: job.status");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a job first
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
let job = create_test_job("print('status test');");
|
||||||
|
let job_id = client.job_create(job).await.expect("Failed to create job");
|
||||||
|
|
||||||
|
// Get job status
|
||||||
|
let result = client.job_status(&job_id).await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
println!(" Error: {:?}", e);
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "job.status should succeed");
|
||||||
|
let status = result.unwrap();
|
||||||
|
|
||||||
|
println!("✅ job.status works - job: {}, status: {:?}",
|
||||||
|
job_id, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_08_job_get() {
|
||||||
|
println!("\n🧪 Test: job.get");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a job first
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
let original_job = create_test_job("print('get test');");
|
||||||
|
let job_id = client.job_create(original_job.clone()).await
|
||||||
|
.expect("Failed to create job");
|
||||||
|
|
||||||
|
// Get the job
|
||||||
|
let result = client.job_get(&job_id).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "job.get should succeed");
|
||||||
|
let job = result.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(job.id, job_id);
|
||||||
|
println!("✅ job.get works - retrieved job: {}", job.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_09_job_delete() {
|
||||||
|
println!("\n🧪 Test: job.delete");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a job first
|
||||||
|
let _ = client.runner_create(TEST_RUNNER_NAME).await;
|
||||||
|
let job = create_test_job("print('delete test');");
|
||||||
|
let job_id = client.job_create(job).await.expect("Failed to create job");
|
||||||
|
|
||||||
|
// Delete the job
|
||||||
|
let result = client.job_delete(&job_id).await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
println!(" Error: {:?}", e);
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "job.delete should succeed");
|
||||||
|
println!("✅ job.delete works - deleted job: {}", job_id);
|
||||||
|
|
||||||
|
// Verify it's gone
|
||||||
|
let get_result = client.job_get(&job_id).await;
|
||||||
|
assert!(get_result.is_err(), "Job should not exist after deletion");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_10_auth_verify() {
|
||||||
|
println!("\n🧪 Test: auth.verify");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
let result = client.auth_verify().await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "auth.verify should succeed with valid key");
|
||||||
|
let auth_info = result.unwrap();
|
||||||
|
|
||||||
|
println!("✅ auth.verify works");
|
||||||
|
println!(" Scope: {}", auth_info.scope);
|
||||||
|
println!(" Name: {}", auth_info.name.unwrap_or_else(|| "N/A".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_11_auth_key_create() {
|
||||||
|
println!("\n🧪 Test: auth.key.create");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::GenerateApiKeyParams;
|
||||||
|
let params = GenerateApiKeyParams {
|
||||||
|
name: "test-key".to_string(),
|
||||||
|
scope: "user".to_string(),
|
||||||
|
};
|
||||||
|
let result = client.key_generate(params).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "auth.key.create should succeed");
|
||||||
|
let api_key = result.unwrap();
|
||||||
|
|
||||||
|
assert!(!api_key.key.is_empty(), "Should return a key");
|
||||||
|
assert_eq!(api_key.name, "test-key");
|
||||||
|
assert_eq!(api_key.scope, "user");
|
||||||
|
|
||||||
|
println!("✅ auth.key.create works - created key: {}...",
|
||||||
|
&api_key.key[..api_key.key.len().min(8)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_12_auth_key_list() {
|
||||||
|
println!("\n🧪 Test: auth.key.list");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a key first
|
||||||
|
use hero_supervisor_openrpc_client::GenerateApiKeyParams;
|
||||||
|
let params = GenerateApiKeyParams {
|
||||||
|
name: "list-test-key".to_string(),
|
||||||
|
scope: "user".to_string(),
|
||||||
|
};
|
||||||
|
let _ = client.key_generate(params).await;
|
||||||
|
|
||||||
|
let result = client.key_list().await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "auth.key.list should succeed");
|
||||||
|
let keys = result.unwrap();
|
||||||
|
|
||||||
|
println!("✅ auth.key.list works - found {} keys", keys.len());
|
||||||
|
for key in &keys {
|
||||||
|
println!(" - {} ({}): {}...", key.name, key.scope,
|
||||||
|
&key.key[..key.key.len().min(8)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_13_auth_key_remove() {
|
||||||
|
println!("\n🧪 Test: auth.key.remove");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Create a key first
|
||||||
|
use hero_supervisor_openrpc_client::GenerateApiKeyParams;
|
||||||
|
let params = GenerateApiKeyParams {
|
||||||
|
name: "remove-test-key".to_string(),
|
||||||
|
scope: "user".to_string(),
|
||||||
|
};
|
||||||
|
let api_key = client.key_generate(params)
|
||||||
|
.await
|
||||||
|
.expect("Failed to create key");
|
||||||
|
|
||||||
|
// Remove it (use name as the key_id, not the key value)
|
||||||
|
let result = client.key_delete(api_key.name.clone()).await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
println!(" Error: {:?}", e);
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "auth.key.remove should succeed");
|
||||||
|
println!("✅ auth.key.remove works - removed key: {}...",
|
||||||
|
&api_key.key[..api_key.key.len().min(8)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_14_runner_remove() {
|
||||||
|
println!("\n🧪 Test: runner.remove");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// Register a runner to remove
|
||||||
|
let runner_name = "test-runner-to-remove";
|
||||||
|
let _ = client.runner_create(runner_name).await;
|
||||||
|
|
||||||
|
// Remove it
|
||||||
|
let result = client.runner_remove(runner_name).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "runner.remove should succeed");
|
||||||
|
println!("✅ runner.remove works - removed: {}", runner_name);
|
||||||
|
|
||||||
|
// Verify it's gone
|
||||||
|
let runners = client.runner_list().await.unwrap();
|
||||||
|
assert!(!runners.contains(&runner_name.to_string()),
|
||||||
|
"Runner should not exist after removal");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_15_supervisor_info() {
|
||||||
|
println!("\n🧪 Test: supervisor.info");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
let result = client.get_supervisor_info().await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
println!(" Error: {:?}", e);
|
||||||
|
}
|
||||||
|
assert!(result.is_ok(), "supervisor.info should succeed");
|
||||||
|
let info = result.unwrap();
|
||||||
|
|
||||||
|
println!("✅ supervisor.info works");
|
||||||
|
println!(" Server URL: {}", info.server_url);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Integration test that runs a complete workflow
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_99_complete_workflow() {
|
||||||
|
println!("\n🧪 Test: Complete Workflow");
|
||||||
|
|
||||||
|
let client = create_client().await;
|
||||||
|
|
||||||
|
// 1. Register runner
|
||||||
|
println!(" 1. Registering runner...");
|
||||||
|
let _ = client.runner_create("workflow-runner").await;
|
||||||
|
|
||||||
|
// 2. List runners
|
||||||
|
println!(" 2. Listing runners...");
|
||||||
|
let runners = client.runner_list().await.unwrap();
|
||||||
|
assert!(runners.contains(&"workflow-runner".to_string()));
|
||||||
|
|
||||||
|
// 3. Create API key
|
||||||
|
println!(" 3. Creating API key...");
|
||||||
|
use hero_supervisor_openrpc_client::GenerateApiKeyParams;
|
||||||
|
let params = GenerateApiKeyParams {
|
||||||
|
name: "workflow-key".to_string(),
|
||||||
|
scope: "user".to_string(),
|
||||||
|
};
|
||||||
|
let api_key = client.key_generate(params).await.unwrap();
|
||||||
|
|
||||||
|
// 4. Verify auth
|
||||||
|
println!(" 4. Verifying auth...");
|
||||||
|
let _ = client.auth_verify().await.unwrap();
|
||||||
|
|
||||||
|
// 5. Create job
|
||||||
|
println!(" 5. Creating job...");
|
||||||
|
let job = create_test_job("print('workflow test');");
|
||||||
|
let job_id = client.job_create(job).await.unwrap();
|
||||||
|
|
||||||
|
// 6. Get job status
|
||||||
|
println!(" 6. Getting job status...");
|
||||||
|
let _status = client.job_status(&job_id).await.unwrap();
|
||||||
|
|
||||||
|
// 7. List all jobs
|
||||||
|
println!(" 7. Listing all jobs...");
|
||||||
|
let jobs = client.job_list().await.unwrap();
|
||||||
|
assert!(!jobs.is_empty());
|
||||||
|
|
||||||
|
// 8. Delete job
|
||||||
|
println!(" 8. Deleting job...");
|
||||||
|
let _ = client.job_delete(&job_id).await.unwrap();
|
||||||
|
|
||||||
|
// 9. Remove API key
|
||||||
|
println!(" 9. Removing API key...");
|
||||||
|
let _ = client.key_delete(api_key.name).await.unwrap();
|
||||||
|
|
||||||
|
// 10. Remove runner
|
||||||
|
println!(" 10. Removing runner...");
|
||||||
|
let _ = client.runner_remove("workflow-runner").await.unwrap();
|
||||||
|
|
||||||
|
println!("✅ Complete workflow test passed!");
|
||||||
|
}
|
||||||
31
bin/supervisor/tests/job_api_integration_tests.rs
Normal file
31
bin/supervisor/tests/job_api_integration_tests.rs
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
//! Integration tests for the job API
|
||||||
|
//!
|
||||||
|
//! These tests validate the complete job lifecycle using a real supervisor instance.
|
||||||
|
//! They require Redis and a running supervisor to execute properly.
|
||||||
|
|
||||||
|
use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
/// Test helper to create a unique job for testing
|
||||||
|
fn create_test_job(context: &str) -> Result<hero_supervisor_openrpc_client::Job, Box<dyn std::error::Error>> {
|
||||||
|
JobBuilder::new()
|
||||||
|
.caller_id("integration_test")
|
||||||
|
.context_id(context)
|
||||||
|
.payload("echo 'Test job output'")
|
||||||
|
.executor("osis")
|
||||||
|
.runner("osis_runner_1")
|
||||||
|
.timeout(30)
|
||||||
|
.env_var("TEST_VAR", "test_value")
|
||||||
|
.build()
|
||||||
|
.map_err(|e| e.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test helper to check if supervisor is available
|
||||||
|
async fn is_supervisor_available() -> bool {
|
||||||
|
match SupervisorClient::new("http://localhost:3030") {
|
||||||
|
Ok(client) => client.discover().await.is_ok(),
|
||||||
|
Err(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
91
docs/ethymology.md
Normal file
91
docs/ethymology.md
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# HORUS — The Meaning Behind the Name
|
||||||
|
*Hierarchical Orchestration Runtime for Universal Scripts*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Why “Horus”?
|
||||||
|
|
||||||
|
**Horus** is one of the oldest and most symbolic deities of ancient Egypt:
|
||||||
|
a god of the **sky, perception, order, and dominion**.
|
||||||
|
|
||||||
|
In mythology, Horus *is* the sky itself;
|
||||||
|
his **right eye is the sun** (clarity, authority),
|
||||||
|
his **left eye the moon** (rhythm, balance).
|
||||||
|
|
||||||
|
This symbolism aligns perfectly with a system built to supervise, coordinate, and execute distributed workloads.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Symbolic Mapping to the Architecture
|
||||||
|
|
||||||
|
- **Sky** → the compute fabric itself
|
||||||
|
- **Solar eye (sun)** → supervisor layer (visibility, authentication, authority)
|
||||||
|
- **Lunar eye (moon)** → coordinator layer (workflow rhythms, stepwise order)
|
||||||
|
- **Falcon wings** → runners (swift execution of tasks)
|
||||||
|
- **Battle against chaos** → ordering and normalizing raw jobs
|
||||||
|
|
||||||
|
Horus is an archetype of **oversight**, **correct action**, and **restoring balance**—all fundamental qualities of an agentic execution system.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. The Name as a Backronym
|
||||||
|
**H O R U S**
|
||||||
|
**H**ierarchical
|
||||||
|
**O**rchestration
|
||||||
|
**R**untime for
|
||||||
|
**U**niversal
|
||||||
|
**S**cripts
|
||||||
|
|
||||||
|
This describes the system exactly:
|
||||||
|
a runtime that receives jobs, authenticates them, orchestrates workflows, and executes scripts across distributed runners.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Why It Fits This Stack
|
||||||
|
|
||||||
|
The stack consists of:
|
||||||
|
|
||||||
|
- **Job** – the incoming intent
|
||||||
|
- **Supervisor** – verifies, authenticates, admits
|
||||||
|
- **Coordinator** – plans, arranges, sequences
|
||||||
|
- **Runner** – executes scripts
|
||||||
|
- **SAL** – system-level script engine
|
||||||
|
- **Osiris** – object-level storage & retrieval engine
|
||||||
|
|
||||||
|
All of this is unified by the central logic of *oversight, orchestration, and action*.
|
||||||
|
|
||||||
|
Horus expresses these ideas precisely:
|
||||||
|
- Observation → validation & monitoring
|
||||||
|
- Order → workflow coordination
|
||||||
|
- Action → script execution
|
||||||
|
- Sky → the domain that contains all processes beneath it
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Visual & Conceptual Identity
|
||||||
|
|
||||||
|
**Themes:**
|
||||||
|
- The Eye of Horus → observability, correctness, safety
|
||||||
|
- Falcon → agile execution
|
||||||
|
- Sky → the domain of computation
|
||||||
|
- Light (sun/moon) → insight, clarity, cycle
|
||||||
|
|
||||||
|
**Palette concepts:**
|
||||||
|
- Gold + deep blue
|
||||||
|
- Light on dark (sun in sky)
|
||||||
|
- Single-line geometric Eye (modernized)
|
||||||
|
|
||||||
|
The name offers both deep mythic roots and clean, modern branding potential.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Narrative Summary
|
||||||
|
|
||||||
|
**HORUS** is the execution sky:
|
||||||
|
the domain where jobs arrive, gain form, and become actions.
|
||||||
|
It brings clarity to chaos, structure to tasks, and order to distributed systems.
|
||||||
|
|
||||||
|
It is not just a name.
|
||||||
|
It is the story of a system that sees clearly, acts decisively, and orchestrates wisely.
|
||||||
|
|
||||||
|
---
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user