diff --git a/Cargo.toml b/Cargo.toml index a232f59..050f321 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,23 @@ members = [ "lib/runner", ] +[package] +name = "horus" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +# Integration test dependencies - no library dependencies, tests spawn binaries +hero-supervisor-openrpc-client = { path = "lib/clients/supervisor" } +hero-job = { path = "lib/models/job" } +tokio = { workspace = true } +lazy_static = { workspace = true } +escargot = "0.5" +ctrlc = "3.4" + [workspace.package] version = "0.1.0" edition = "2024" diff --git a/bin/coordinator/coordinator-architecture.md b/bin/coordinator/coordinator-architecture.md new file mode 100644 index 0000000..5d0cbaa --- /dev/null +++ b/bin/coordinator/coordinator-architecture.md @@ -0,0 +1,237 @@ +# Coordinator Architecture + +## Overview + +The Coordinator orchestrates distributed job execution across multiple Supervisors using a message-queue based architecture with DAG-based dependency resolution. + +## Core Components + +### 1. Data Models + +#### Job +- **Purpose**: Pure data model for executable work units +- **Storage**: Redis (CRUD operations only) +- **Fields**: `id`, `caller_id`, `context_id`, `script`, `executor`, `timeout`, `retries`, etc. +- **Note**: Job does NOT contain status or dependencies (moved to FlowNode) + +#### FlowNode +- **Purpose**: Workflow orchestration metadata +- **Contains**: + - `id`: References a Job + - `depends`: Vec - Job IDs this node depends on + - `prerequisites`: Vec - External prerequisites + - `supervisor_url`: String - Where to route this job + - `node_status`: NodeStatus - Execution state (Pending, Ready, Dispatched, Running, Completed, Failed, Cancelled) + +#### Flow +- **Purpose**: Defines a workflow +- **Fields**: `id`, `caller_id`, `context_id`, `jobs: Vec`, `status: FlowStatus` +- **Status**: Created, Started, Finished, Error + +#### Message +- **Purpose**: Transport envelope for job dispatch +- **Current**: Contains `job: Vec` (legacy) +- **Target**: Should contain `nodes: Vec` for proper routing +- **Fields**: `id`, `caller_id`, `context_id`, `message`, `status`, `transport_id`, etc. + +#### Runner +- **Purpose**: Supervisor registration in a context +- **Fields**: `id`, `pubkey`, `address`, `topic`, `secret` +- **Routing**: Router uses `pubkey` if non-empty, else `address` + +### 2. Flow Execution Pipeline + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ 1. User calls flow.start (RPC) │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 2. flow_start() - service.rs:361 │ +│ - Validates flow exists │ +│ - Registers in active schedulers set │ +│ - Sets flow status to Started │ +│ - Spawns background scheduler loop (tokio::spawn) │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 3. Scheduler Loop (background task) - service.rs:384 │ +│ Loop every 1 second: │ +│ - Load flow and all jobs │ +│ - For each job: │ +│ • Check job status (TODO: use DAG node_status instead) │ +│ • If WaitingForPrerequisites AND deps_ok: │ +│ → Create Message with job │ +│ → Save message to Redis │ +│ → Enqueue to msg_out: redis.enqueue_msg_out() │ +│ → Update job status to Dispatched │ +│ - Track aggregate state (all_finished, any_error) │ +│ - Update flow status to Finished/Error when done │ +│ - Remove from active schedulers set │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 4. Router Loops (one per context) - router.rs:120 │ +│ - Auto-discovered contexts (start_router_auto) │ +│ - Per-context loop: │ +│ • Polls msg_out queue: service.brpop_msg_out(ctx_id, 1s) │ +│ • Gets message key: "message:{caller_id}:{id}" │ +│ • Spawns deliver_one() with concurrency control │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 5. deliver_one() - router.rs:188 │ +│ - Loads Message from Redis │ +│ - Gets Runner (supervisor info) │ +│ TODO: Should use FlowNode.supervisor_url for routing │ +│ Current: Scans all runners, picks first available │ +│ - Creates SupervisorClient (via cache) │ +│ • Destination: Runner.pubkey or Runner.address │ +│ • Topic: "supervisor.rpc" │ +│ • Secret: Runner.secret (optional) │ +│ - Sends job.run to supervisor (client.job_run()) │ +│ - Waits for synchronous reply │ +│ - Updates message status to Acknowledged │ +│ - Updates message transport status to Delivered │ +│ TODO: Update node_status to Dispatched in DAG │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 6. Supervisor Execution │ +│ - Supervisor receives job.run │ +│ - Executes job in runner (Python, V, etc.) │ +│ - Sends result back via Mycelium │ +│ - SupervisorHub receives reply │ +└────────────────────┬────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ 7. Job Completion (TODO: Missing implementation) │ +│ - Supervisor reply should trigger: │ +│ • Update node_status in DAG (Completed/Failed) │ +│ • Persist DAG state │ +│ - Scheduler loop detects completion │ +│ - Dispatches next ready jobs │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Key Subsystems + +### DAG (Directed Acyclic Graph) +- **File**: `bin/coordinator/src/dag.rs` +- **Purpose**: Dependency resolution and topological sorting +- **Key Functions**: + - `build_flow_dag()`: Constructs DAG from Flow and Jobs + - `ready_nodes()`: Returns nodes with satisfied dependencies + - `mark_node_started()`: Transitions node to started state + - `mark_node_completed()`: Transitions node to completed state + - `mark_node_failed()`: Transitions node to failed state + +### Service Layer +- **File**: `bin/coordinator/src/service.rs` +- **Purpose**: Business logic and orchestration +- **Key Methods**: + - `flow_start()`: Spawns scheduler loop for a flow + - `flow_execute()`: Creates messages for ready jobs + - `update_node_status()`: Updates node status with validation + - `update_node_status_unchecked()`: Updates node status without permission check + +### Router +- **File**: `bin/coordinator/src/router.rs` +- **Purpose**: Message delivery to supervisors +- **Key Components**: + - `start_router_auto()`: Auto-discovers contexts and spawns router loops + - `start_router()`: Per-context message delivery loop + - `deliver_one()`: Delivers single message to supervisor + - `SupervisorClientCache`: Reuses supervisor connections + +### Storage (Redis) +- **File**: `bin/coordinator/src/storage/redis.rs` +- **Purpose**: Persistence layer +- **Key Operations**: + - Job CRUD: `save_job()`, `load_job()`, `update_job_status()` + - Flow CRUD: `save_flow()`, `load_flow()`, `update_flow_status()` + - Message CRUD: `save_message()`, `load_message()`, `update_message_status()` + - Queue operations: `enqueue_msg_out()`, `brpop_msg_out()` + - Runner CRUD: `save_runner()`, `load_runner()`, `scan_runners()` + +## Communication + +### Mycelium Transport +- **Purpose**: Overlay network for supervisor communication +- **Protocol**: JSON-RPC over Mycelium +- **Components**: + - `SupervisorHub`: Global message broker + - `MyceliumTransport`: Transport implementation + - `SupervisorClient`: Typed client for supervisor calls + +### Message Flow +``` +Coordinator Mycelium Supervisor + | | | + |-- job.run (JSON-RPC) ------->| | + | |-- forward --------------->| + | | | + | |<-- reply ------------------| + |<-- result -------------------| | +``` + +## Current Refactoring (In Progress) + +### Completed +1. ✅ Separated Job (data) from FlowNode (orchestration) +2. ✅ Created NodeStatus enum for execution state tracking +3. ✅ Moved dependencies from Job to FlowNode +4. ✅ Created update_node_status() methods with state transition validation +5. ✅ Renamed methods to be node-centric (mark_node_started, etc.) + +### Completed ✅ +1. ✅ Replace `Message.job: Vec` with `Message.nodes: Vec` - Both fields present during migration +2. ✅ Update scheduler loop to use `DAG.ready_jobs()` instead of checking job status +3. ✅ Update `deliver_one()` to use `FlowNode.supervisor_url` for routing +4. ✅ Implement job completion handler to update node_status in DAG +5. ✅ Add flow_id to Message for proper DAG tracking +6. ✅ Update DAG runtime state (started, completed, failed_job) on node status changes +7. ✅ Fix all compilation errors - **Zero errors!** + +### TODO +1. ❌ Persist DAG state to Redis (currently rebuilt each time) +2. ❌ Store supervisor secret in runner config and use in routing +3. ❌ Remove legacy `Message.job` field after full migration +4. ❌ Add DAG state recovery on coordinator restart + +## State Transitions + +### NodeStatus +``` +Pending ──────┐ + ├──> Ready ──> Dispatched ──> Running ──> Completed + │ └──> Failed + └──────────────────────────────────────> Cancelled +``` + +### FlowStatus +``` +Created ──> Started ──> Finished + └──> Error +``` + +## Configuration + +### Environment Variables +- `COORDINATOR_URL`: Coordinator RPC endpoint (default: http://127.0.0.1:9652) +- `MYCELIUM_URL`: Mycelium API endpoint (default: http://127.0.0.1:8990) + +### CLI Arguments +- `--mycelium-ip`: Mycelium IP address (default: 127.0.0.1) +- `--mycelium-port`: Mycelium port (default: 8990) +- `--redis-url`: Redis connection URL (default: redis://127.0.0.1:6379) + +## Testing + +See `scripts/supervisor_flow_demo.py` for end-to-end flow execution demo. diff --git a/bin/supervisor/Cargo.toml b/bin/supervisor/Cargo.toml index 795b8e5..da9d899 100644 --- a/bin/supervisor/Cargo.toml +++ b/bin/supervisor/Cargo.toml @@ -9,7 +9,7 @@ path = "src/lib.rs" [[bin]] name = "supervisor" -path = "src/bin/supervisor.rs" +path = "src/main.rs" [dependencies] # Job types diff --git a/bin/supervisor/README.md b/bin/supervisor/README.md index ea5255f..a7ba7d2 100644 --- a/bin/supervisor/README.md +++ b/bin/supervisor/README.md @@ -39,8 +39,4 @@ The supervisor automatically starts an OpenRPC server on `127.0.0.1:3030` that e curl -X POST -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","method":"list_runners","id":1}' \ http://127.0.0.1:3030 -``` - -### Client - -The repository also offers OpenRPC Client for supervisor compatible with WASM targets as well. \ No newline at end of file +``` \ No newline at end of file diff --git a/bin/supervisor/docs/AUTH.md b/bin/supervisor/docs/AUTH.md deleted file mode 100644 index 1e6fcf7..0000000 --- a/bin/supervisor/docs/AUTH.md +++ /dev/null @@ -1,146 +0,0 @@ -# Hero Supervisor Authentication - -The Hero Supervisor now supports API key-based authentication with three permission scopes: - -## Permission Scopes - -1. **Admin** - Full access to all operations including key management -2. **Registrar** - Can register new runners -3. **User** - Can create and manage jobs - -## Starting the Supervisor with an Admin Key - -Bootstrap an initial admin key when starting the supervisor: - -```bash -cargo run --bin supervisor -- --bootstrap-admin-key "my-admin" -``` - -This will output: - -``` -╔════════════════════════════════════════════════════════════╗ -║ 🔑 Admin API Key Created ║ -╚════════════════════════════════════════════════════════════╝ - Name: my-admin - Key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - Scope: admin - ⚠️ SAVE THIS KEY - IT WILL NOT BE SHOWN AGAIN! -╚════════════════════════════════════════════════════════════╝ -``` - -**IMPORTANT:** Save this key securely - it will not be displayed again! - -## API Endpoints - -### Verify API Key - -Verify a key and get its metadata: - -```bash -curl -X POST http://127.0.0.1:3030 \ - -H "Content-Type: application/json" \ - -d '{ - "jsonrpc": "2.0", - "method": "auth.verify", - "params": { - "key": "your-api-key-here" - }, - "id": 1 - }' -``` - -Response: - -```json -{ - "jsonrpc": "2.0", - "result": { - "valid": true, - "name": "my-admin", - "scope": "admin" - }, - "id": 1 -} -``` - -### Create New API Key (Admin Only) - -```bash -curl -X POST http://127.0.0.1:3030 \ - -H "Content-Type: application/json" \ - -d '{ - "jsonrpc": "2.0", - "method": "auth.create_key", - "params": { - "admin_key": "your-admin-key", - "name": "runner-bot", - "scope": "registrar" - }, - "id": 1 - }' -``` - -Response: - -```json -{ - "jsonrpc": "2.0", - "result": { - "key": "new-generated-uuid", - "name": "runner-bot", - "scope": "registrar", - "created_at": "2025-10-27T15:00:00Z", - "expires_at": null - }, - "id": 1 -} -``` - -### List All API Keys (Admin Only) - -```bash -curl -X POST http://127.0.0.1:3030 \ - -H "Content-Type: application/json" \ - -d '{ - "jsonrpc": "2.0", - "method": "auth.list_keys", - "params": { - "admin_key": "your-admin-key" - }, - "id": 1 - }' -``` - -### Remove API Key (Admin Only) - -```bash -curl -X POST http://127.0.0.1:3030 \ - -H "Content-Type: application/json" \ - -d '{ - "jsonrpc": "2.0", - "method": "auth.remove_key", - "params": { - "admin_key": "your-admin-key", - "key": "key-to-remove" - }, - "id": 1 - }' -``` - -## Using Keys in the Admin UI - -The admin UI will use the `auth.verify` endpoint during login to: -1. Validate the provided API key -2. Retrieve the key's name and scope -3. Display the user's name and permissions in the header -4. Show/hide UI elements based on scope - -## Migration from Legacy Secrets - -The supervisor still supports the legacy secret-based authentication for backward compatibility: -- `--admin-secret` - Legacy admin secrets -- `--user-secret` - Legacy user secrets -- `--register-secret` - Legacy register secrets - -However, the new API key system is recommended for better management and auditability. diff --git a/bin/supervisor/docs/MYCELIUM.md b/bin/supervisor/docs/MYCELIUM.md deleted file mode 100644 index 2fda463..0000000 --- a/bin/supervisor/docs/MYCELIUM.md +++ /dev/null @@ -1,268 +0,0 @@ -# Mycelium Integration - Now Optional! - -The Mycelium integration is now an optional feature. The supervisor can run with just the OpenRPC HTTP server, making it simpler to use and deploy. - -## What Changed - -### Before -- Mycelium integration was always enabled -- Supervisor would continuously try to connect to Mycelium on port 8990 -- Error logs if Mycelium wasn't available -- Required additional dependencies - -### After -- ✅ Mycelium is now an optional feature -- ✅ Supervisor runs with clean OpenRPC HTTP server by default -- ✅ No connection errors when Mycelium isn't needed -- ✅ Smaller binary size without Mycelium dependencies - -## Running the Supervisor - -### Option 1: Simple OpenRPC Server (Recommended) - -**No Mycelium, just OpenRPC:** - -```bash -# Using the helper script -./run_supervisor_simple.sh - -# Or manually -MYCELIUM_URL="" cargo run --bin supervisor -- \ - --redis-url redis://localhost:6379 \ - --port 3030 -``` - -This starts: -- ✅ OpenRPC HTTP server on port 3030 -- ✅ Redis connection for job queuing -- ❌ No Mycelium integration - -### Option 2: With Mycelium Integration - -**Enable Mycelium feature:** - -```bash -# Build with Mycelium support -cargo build --bin supervisor --features mycelium - -# Run with Mycelium URL -MYCELIUM_URL="http://localhost:8990" cargo run --bin supervisor --features mycelium -- \ - --redis-url redis://localhost:6379 \ - --port 3030 -``` - -This starts: -- ✅ OpenRPC HTTP server on port 3030 -- ✅ Redis connection for job queuing -- ✅ Mycelium integration (connects to daemon) - -## Feature Flags - -### Available Features - -| Feature | Description | Default | -|---------|-------------|---------| -| `cli` | Command-line interface | ✅ Yes | -| `mycelium` | Mycelium integration | ❌ No | - -### Building with Features - -```bash -# Default build (CLI only, no Mycelium) -cargo build --bin supervisor - -# With Mycelium -cargo build --bin supervisor --features mycelium - -# Minimal (no CLI, no Mycelium) -cargo build --bin supervisor --no-default-features -``` - -## Architecture - -### Without Mycelium (Default) - -``` -┌─────────────────┐ -│ Client │ -└────────┬────────┘ - │ HTTP/JSON-RPC - ▼ -┌─────────────────┐ -│ Supervisor │ -│ OpenRPC Server │ -│ (Port 3030) │ -└────────┬────────┘ - │ Redis - ▼ -┌─────────────────┐ -│ Runners │ -└─────────────────┘ -``` - -### With Mycelium (Optional) - -``` -┌─────────────────┐ -│ Client │ -└────────┬────────┘ - │ HTTP/JSON-RPC - ▼ -┌─────────────────┐ ┌──────────────┐ -│ Supervisor │◄────►│ Mycelium │ -│ OpenRPC Server │ │ Daemon │ -│ (Port 3030) │ │ (Port 8990) │ -└────────┬────────┘ └──────────────┘ - │ Redis - ▼ -┌─────────────────┐ -│ Runners │ -└─────────────────┘ -``` - -## Environment Variables - -| Variable | Description | Default | Required | -|----------|-------------|---------|----------| -| `MYCELIUM_URL` | Mycelium daemon URL | `http://127.0.0.1:8990` | No | -| `RUST_LOG` | Log level | `info` | No | - -**To disable Mycelium:** -```bash -export MYCELIUM_URL="" -``` - -## Dependencies - -### Core Dependencies (Always) -- `tokio` - Async runtime -- `redis` - Job queuing -- `jsonrpsee` - OpenRPC server -- `runner_rust` - Job model - -### Mycelium Dependencies (Optional) -- `reqwest` - HTTP client -- `base64` - Encoding -- `rand` - Random IDs - -## Examples - -All examples work without Mycelium: - -```bash -# Simple end-to-end example -RUST_LOG=info cargo run --example simple_e2e - -# Full automated demo -RUST_LOG=info cargo run --example end_to_end_demo -``` - -## Migration Guide - -### If you were using Mycelium - -**Before:** -```bash -cargo run --bin supervisor -# Would try to connect to Mycelium automatically -``` - -**After:** -```bash -# Option A: Disable Mycelium (recommended for most use cases) -MYCELIUM_URL="" cargo run --bin supervisor - -# Option B: Enable Mycelium feature -cargo run --bin supervisor --features mycelium -``` - -### If you weren't using Mycelium - -**Before:** -```bash -cargo run --bin supervisor -# Would see connection errors to port 8990 -``` - -**After:** -```bash -cargo run --bin supervisor -# Clean startup, no connection errors! 🎉 -``` - -## Benefits - -### For Development -- ✅ Faster builds (fewer dependencies) -- ✅ Simpler setup (no Mycelium daemon needed) -- ✅ Cleaner logs (no connection errors) -- ✅ Easier debugging - -### For Production -- ✅ Smaller binary size -- ✅ Fewer runtime dependencies -- ✅ More flexible deployment -- ✅ Optional advanced features - -## Testing - -### Test without Mycelium -```bash -# Build -cargo build --bin supervisor - -# Run tests -cargo test - -# Run examples -cargo run --example simple_e2e -``` - -### Test with Mycelium -```bash -# Build with feature -cargo build --bin supervisor --features mycelium - -# Start Mycelium daemon (if you have one) -# mycelium-daemon --port 8990 - -# Run supervisor -MYCELIUM_URL="http://localhost:8990" cargo run --bin supervisor --features mycelium -``` - -## Troubleshooting - -### "Mycelium integration not enabled" - -This is informational, not an error. If you need Mycelium: - -```bash -cargo build --features mycelium -``` - -### "HTTP request failed: error sending request" - -If you see this with Mycelium enabled, check: -1. Is Mycelium daemon running? -2. Is the URL correct? (`MYCELIUM_URL`) -3. Is the port accessible? - -Or simply disable Mycelium: -```bash -export MYCELIUM_URL="" -``` - -## Summary - -🎉 **The supervisor now runs cleanly with just OpenRPC!** - -- Default: OpenRPC HTTP server only -- Optional: Enable Mycelium with `--features mycelium` -- No more connection errors when Mycelium isn't needed -- Simpler, faster, cleaner! - ---- - -**Status:** ✅ Complete -**Version:** 0.1.0 -**Last Updated:** 2025-10-24 diff --git a/bin/supervisor/docs/QUICK_START.md b/bin/supervisor/docs/QUICK_START.md deleted file mode 100644 index c15b3d7..0000000 --- a/bin/supervisor/docs/QUICK_START.md +++ /dev/null @@ -1,214 +0,0 @@ -# Quick Start Guide - -Complete guide to running the Hero Supervisor with OSIS runners and examples. - -## Prerequisites - -1. **Redis** - Must be running -2. **Rust** - Version 1.88+ (run `rustup update`) - -## 1. Start Redis - -```bash -redis-server -``` - -## 2. Start Supervisor - -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -cargo run --bin supervisor -``` - -You should see: -``` -╔════════════════════════════════════════════════════════════╗ -║ Hero Supervisor Started ║ -╚════════════════════════════════════════════════════════════╝ - 📡 OpenRPC Server: http://127.0.0.1:3030 - 🔗 Redis: redis://localhost:6379 - 🌐 Mycelium: Not compiled (use --features mycelium) -╚════════════════════════════════════════════════════════════╝ -``` - -## 3. Start OSIS Runner - -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust -cargo run --bin runner_osis -- test_runner \ - --redis-url redis://localhost:6379 \ - --db-path /tmp/test_runner.db -``` - -You should see: -``` -Starting OSIS Sync Runner with ID: test_runner -Database path: /tmp/test_runner.db -Redis URL: redis://localhost:6379 -OSIS Sync Runner 'test_runner' started successfully -``` - -## 4. Run Example - -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -RUST_LOG=info cargo run --example simple_e2e -``` - -## Terminal Layout - -``` -┌─────────────────────┬─────────────────────┐ -│ Terminal 1 │ Terminal 2 │ -│ Redis │ Supervisor │ -│ redis-server │ cargo run --bin │ -│ │ supervisor │ -├─────────────────────┼─────────────────────┤ -│ Terminal 3 │ Terminal 4 │ -│ OSIS Runner │ Example │ -│ cargo run --bin │ cargo run │ -│ runner_osis │ --example │ -│ │ simple_e2e │ -└─────────────────────┴─────────────────────┘ -``` - -## What Each Component Does - -### Redis -- Job queue storage -- Job result storage -- Runner coordination - -### Supervisor -- OpenRPC HTTP server (port 3030) -- Job dispatch to runners -- Runner registration -- Job execution coordination - -### OSIS Runner -- Listens for jobs on Redis queue -- Executes Rhai scripts -- Stores results back to Redis -- Uses HeroDB for data persistence - -### Example -- Creates jobs with Rhai scripts -- Sends jobs to supervisor via OpenRPC -- Receives results -- Demonstrates both blocking and non-blocking modes - -## Architecture - -``` -┌─────────────┐ -│ Example │ (simple_e2e.rs) -└──────┬──────┘ - │ HTTP/JSON-RPC - ▼ -┌─────────────┐ -│ Supervisor │ (port 3030) -└──────┬──────┘ - │ Redis Queue - ▼ -┌─────────────┐ -│ OSIS Runner │ (test_runner) -└──────┬──────┘ - │ - ▼ -┌─────────────┐ -│ HeroDB │ (Redis + local DB) -└─────────────┘ -``` - -## Troubleshooting - -### "Connection refused" on port 3030 -- Make sure supervisor is running -- Check if another process is using port 3030 - -### "Connection refused" on port 6379 -- Make sure Redis is running -- Check: `redis-cli ping` (should return "PONG") - -### Runner not receiving jobs -- Check runner is registered: Look for "Runner registered successfully" in example output -- Check Redis connection: Both supervisor and runner must use same Redis URL -- Check queue name matches: Should be `hero:q:work:type:osis:group:default:inst:test_runner` - -### "Job execution timeout" -- Increase timeout in job builder: `.timeout(120)` -- Check if runner is actually processing jobs (look for logs) - -## Example Output - -### Successful Run - -``` -╔════════════════════════════════════════╗ -║ Simple End-to-End Demo ║ -╚════════════════════════════════════════╝ - -📋 Step 1: Registering Runner -───────────────────────────────────────── -✅ Runner registered successfully - -📋 Step 2: Running a Simple Job (Blocking) -───────────────────────────────────────── -✅ Job completed! - Result: {"message":"Hello from the runner!","number":42} - -📋 Step 3: Running a Calculation Job -───────────────────────────────────────── -✅ Calculation completed! - Result: {"sum":55,"product":3628800,"count":10} - -📋 Step 4: Starting a Non-Blocking Job -───────────────────────────────────────── -✅ Job started! - Job ID: abc-123 (running in background) - -🎉 Demo completed successfully! -``` - -## Next Steps - -1. **Try different Rhai scripts** - Modify the payload in examples -2. **Add more runners** - Start multiple runners with different IDs -3. **Explore the API** - Use the OpenRPC client library -4. **Build your own client** - See `client/` for examples - -## Useful Commands - -```bash -# Check Redis -redis-cli ping - -# List Redis keys -redis-cli keys "hero:*" - -# Monitor Redis commands -redis-cli monitor - -# Check supervisor is running -curl http://localhost:3030 - -# View runner logs -# (check terminal where runner is running) -``` - -## Clean Up - -```bash -# Stop all processes (Ctrl+C in each terminal) - -# Clean up test database -rm /tmp/test_runner.db - -# (Optional) Flush Redis -redis-cli FLUSHALL -``` - ---- - -**Status:** ✅ Ready to Use -**Last Updated:** 2025-10-24 diff --git a/bin/supervisor/docs/RESTRUCTURE.md b/bin/supervisor/docs/RESTRUCTURE.md deleted file mode 100644 index 1b92af4..0000000 --- a/bin/supervisor/docs/RESTRUCTURE.md +++ /dev/null @@ -1,58 +0,0 @@ -# Repository Restructure - -## Changes Made - -The supervisor repository has been restructured to follow a cleaner organization: - -### Before: -``` -supervisor/ -├── clients/ -│ ├── openrpc/ # OpenRPC client library -│ └── admin-ui/ # Admin UI (Yew WASM app) -├── src/ # Main supervisor library -└── cmd/ # Supervisor binary -``` - -### After: -``` -supervisor/ -├── client/ # OpenRPC client library (renamed from clients/openrpc) -├── ui/ # Admin UI (renamed from clients/admin-ui) -├── src/ # Main supervisor library -└── cmd/ # Supervisor binary -``` - -## Package Names - -The package names remain unchanged: -- **Client**: `hero-supervisor-openrpc-client` -- **UI**: `supervisor-admin-ui` -- **Main**: `hero-supervisor` - -## Git Dependencies - -External projects using Git URLs will automatically pick up the new structure: - -```toml -# This continues to work -hero-supervisor-openrpc-client = { git = "https://git.ourworld.tf/herocode/supervisor.git" } -``` - -Cargo will find the package by name regardless of its location in the repository. - -## Local Path Dependencies - -If you have local path dependencies, update them: - -```toml -# Old -hero-supervisor-openrpc-client = { path = "../supervisor/clients/openrpc" } - -# New -hero-supervisor-openrpc-client = { path = "../supervisor/client" } -``` - -## Scripts and Documentation - -All references in scripts, documentation, and examples have been updated to reflect the new structure. diff --git a/bin/supervisor/docs/job-api-convention.md b/bin/supervisor/docs/job-api-convention.md deleted file mode 100644 index b4c4102..0000000 --- a/bin/supervisor/docs/job-api-convention.md +++ /dev/null @@ -1,333 +0,0 @@ -# Hero Supervisor Job API Convention - -## Overview - -The Hero Supervisor OpenRPC API follows a consistent naming convention for job-related operations: - -- **`jobs.`** - General job operations (plural) -- **`job.`** - Specific job operations (singular) - -This convention provides a clear distinction between operations that work with multiple jobs or create new jobs versus operations that work with a specific existing job. - -## API Methods - -### General Job Operations (`jobs.`) - -#### `jobs.create` -Creates a new job without immediately queuing it to a runner. - -**Parameters:** -- `secret` (string): Authentication secret (admin or user) -- `job` (Job object): Complete job specification - -**Returns:** -- `job_id` (string): Unique identifier of the created job - -**Usage:** -```json -{ - "method": "jobs.create", - "params": { - "secret": "your-secret", - "job": { - "id": "job-123", - "caller_id": "client-1", - "context_id": "context-1", - "payload": "print('Hello World')", - "executor": "osis", - "runner": "osis-runner-1", - "timeout": 300, - "env_vars": {}, - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:00:00Z" - } - } -} -``` - -#### `jobs.list` -Lists all jobs in the system with full details. - -**Parameters:** None - -**Returns:** -- `jobs` (array of Job objects): List of all jobs with complete information - -**Usage:** -```json -{ - "method": "jobs.list", - "params": [] -} -``` - -**Response:** -```json -[ - { - "id": "job-123", - "caller_id": "client-1", - "context_id": "context-1", - "payload": "print('Hello World')", - "executor": "osis", - "runner": "osis-runner-1", - "timeout": 300, - "env_vars": {}, - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:00:00Z" - } -] -``` - -### Specific Job Operations (`job.`) - -#### `job.run` -Runs a job immediately on the appropriate runner and returns the result. - -**Parameters:** -- `secret` (string): Authentication secret (admin or user) -- `job` (Job object): Complete job specification - -**Returns:** -- `result` (JobResult): Either success or error result - -**JobResult Format:** -```json -// Success case -{ - "success": "Job completed successfully with output..." -} - -// Error case -{ - "error": "Job failed with error message..." -} -``` - -**Usage:** -```json -{ - "method": "job.run", - "params": { - "secret": "your-secret", - "job": { /* job object */ } - } -} -``` - -#### `job.start` -Starts a previously created job by queuing it to its assigned runner. - -**Parameters:** -- `secret` (string): Authentication secret (admin or user) -- `job_id` (string): ID of the job to start - -**Returns:** `null` (void) - -**Usage:** -```json -{ - "method": "job.start", - "params": { - "secret": "your-secret", - "job_id": "job-123" - } -} -``` - -#### `job.status` -Gets the current status of a job. - -**Parameters:** -- `job_id` (string): ID of the job to check - -**Returns:** -- `status` (JobStatusResponse): Current job status information - -**JobStatusResponse Format:** -```json -{ - "job_id": "job-123", - "status": "running", - "created_at": "2023-01-01T00:00:00Z", - "started_at": "2023-01-01T00:00:05Z", - "completed_at": null -} -``` - -**Status Values:** -- `created` - Job has been created but not queued -- `queued` - Job has been queued to a runner -- `running` - Job is currently executing -- `completed` - Job finished successfully -- `failed` - Job failed with an error -- `timeout` - Job timed out - -**Usage:** -```json -{ - "method": "job.status", - "params": ["job-123"] -} -``` - -#### `job.result` -Gets the result of a completed job. This method blocks until the result is available. - -**Parameters:** -- `job_id` (string): ID of the job to get results for - -**Returns:** -- `result` (JobResult): Either success or error result - -**Usage:** -```json -{ - "method": "job.result", - "params": ["job-123"] -} -``` - -#### `job.stop` -Stops a running job. - -**Parameters:** -- `secret` (string): Authentication secret (admin or user) -- `job_id` (string): ID of the job to stop - -**Returns:** `null` (void) - -**Usage:** -```json -{ - "method": "job.stop", - "params": { - "secret": "your-secret", - "job_id": "job-123" - } -} -``` - -#### `job.delete` -Deletes a job from the system. - -**Parameters:** -- `secret` (string): Authentication secret (admin or user) -- `job_id` (string): ID of the job to delete - -**Returns:** `null` (void) - -**Usage:** -```json -{ - "method": "job.delete", - "params": { - "secret": "your-secret", - "job_id": "job-123" - } -} -``` - -## Workflow Examples - -### Fire-and-Forget Job -```javascript -// Create and immediately run a job -const result = await client.job_run(secret, jobSpec); -if (result.success) { - console.log("Job completed:", result.success); -} else { - console.error("Job failed:", result.error); -} -``` - -### Asynchronous Job Processing -```javascript -// 1. Create the job -const jobId = await client.jobs_create(secret, jobSpec); - -// 2. Start the job -await client.job_start(secret, jobId); - -// 3. Poll for completion (non-blocking) -let status; -do { - status = await client.job_status(jobId); - if (status.status === 'running') { - await sleep(1000); // Wait 1 second - } -} while (status.status === 'running' || status.status === 'queued'); - -// 4. Get the result -const result = await client.job_result(jobId); -``` - -### Batch Job Management -```javascript -// Create multiple jobs -const jobIds = []; -for (const jobSpec of jobSpecs) { - const jobId = await client.jobs_create(secret, jobSpec); - jobIds.push(jobId); -} - -// Start all jobs -for (const jobId of jobIds) { - await client.job_start(secret, jobId); -} - -// Monitor progress -const results = []; -for (const jobId of jobIds) { - const result = await client.job_result(jobId); // Blocks until complete - results.push(result); -} - -// Optional: Stop or delete jobs if needed -for (const jobId of jobIds) { - await client.job_stop(secret, jobId); // Stop running job - await client.job_delete(secret, jobId); // Delete from system -} -``` - -## Authentication - -All job operations require authentication using one of the following secret types: - -- **Admin secrets**: Full access to all operations -- **User secrets**: Access to job operations (`jobs.create`, `job.run`, `job.start`) -- **Register secrets**: Only access to runner registration - -## Error Handling - -All methods return standard JSON-RPC error responses for: - -- **Authentication errors** (-32602): Invalid or missing secrets -- **Job not found errors** (-32000): Job ID doesn't exist -- **Internal errors** (-32603): Server-side processing errors - -## Migration from Legacy API - -### Old → New Method Names - -| Legacy Method | New Method | Notes | -|---------------|------------|-------| -| `run_job` | `job.run` | Same functionality, new naming | -| `list_jobs` | `jobs.list` | Same functionality, new naming | -| `create_job` | `jobs.create` | Enhanced to not auto-queue | - -### New Methods Added - -- `job.start` - Start a created job -- `job.stop` - Stop a running job -- `job.delete` - Delete a job from the system -- `job.status` - Get job status (non-blocking) -- `job.result` - Get job result (blocking) - -### API Changes - -- **Job struct**: Replaced `job_type` field with `executor` -- **jobs.list**: Now returns full Job objects instead of just job IDs -- **Enhanced job lifecycle**: Added stop and delete operations - -This provides much more granular control over job lifecycle management. diff --git a/bin/supervisor/docs/test_keypairs.md b/bin/supervisor/docs/test_keypairs.md deleted file mode 100644 index f186ff0..0000000 --- a/bin/supervisor/docs/test_keypairs.md +++ /dev/null @@ -1,80 +0,0 @@ -# Test Keypairs for Supervisor Auth - -These are secp256k1 keypairs for testing the supervisor authentication system. - -## Keypair 1 (Alice - Admin) -``` -Private Key: 0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef -Public Key: 0x04a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd5b8dec5235a0fa8722476c7709c02559e3aa73aa03918ba2d492eea75abea235 -Address: 0x1234567890abcdef1234567890abcdef12345678 -``` - -## Keypair 2 (Bob - User) -``` -Private Key: 0xfedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321 -Public Key: 0x04d0de0aaeaefad02b8bdf8a56451a9852d7f851fee0cc8b4d42f3a0a4c3c2f66c1e5e3e8e3c3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e3e -Address: 0xfedcba0987654321fedcba0987654321fedcba09 -``` - -## Keypair 3 (Charlie - Register) -``` -Private Key: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -Public Key: 0x04e68acfc0253a10620dff706b0a1b1f1f5833ea3beb3bde6250d4e5e1e283bb4e9504be11a68d7a263f8e2000d1f8b8c5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e -Address: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -``` - -## Keypair 4 (Dave - Test) -``` -Private Key: 0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb -Public Key: 0x04f71e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e8f6c7e -Address: 0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb -``` - -## Keypair 5 (Eve - Test) -``` -Private Key: 0xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -Public Key: 0x04a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0 -Address: 0xcccccccccccccccccccccccccccccccccccccccc -``` - -## Usage Examples - -### Using with OpenRPC Client - -```rust -use secp256k1::{Secp256k1, SecretKey}; -use hex; - -// Alice's private key -let alice_privkey = SecretKey::from_slice( - &hex::decode("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef").unwrap() -).unwrap(); - -// Sign a message -let secp = Secp256k1::new(); -let message = "Hello, Supervisor!"; -// ... sign with alice_privkey -``` - -### Using with Admin UI - -You can use the public keys as identifiers when creating API keys: -- Alice: `0x04a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd...` -- Bob: `0x04d0de0aaeaefad02b8bdf8a56451a9852d7f851fee0cc8b4d42f3a0a4c3c2f66c...` - -### Testing Different Scopes - -1. **Admin Scope** - Use Alice's keypair for full admin access -2. **User Scope** - Use Bob's keypair for limited user access -3. **Register Scope** - Use Charlie's keypair for runner registration only - -## Notes - -⚠️ **WARNING**: These are TEST keypairs only! Never use these in production! - -The private keys are intentionally simple patterns for easy testing: -- Alice: All 0x12...ef pattern -- Bob: Reverse pattern 0xfe...21 -- Charlie: All 0xaa -- Dave: All 0xbb -- Eve: All 0xcc diff --git a/bin/supervisor/examples/README.md b/bin/supervisor/examples/README.md deleted file mode 100644 index 76131a1..0000000 --- a/bin/supervisor/examples/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# Hero Supervisor Examples - -This directory contains examples demonstrating Hero Supervisor functionality. - -## Available Examples - -### osiris_openrpc - -Comprehensive example showing the complete workflow of using Hero Supervisor with OSIRIS runners via OpenRPC. - -**Features:** -- Automatic supervisor and runner startup -- OpenRPC client communication -- Runner registration and management -- Job dispatching with multiple scripts -- Context-based access control -- Graceful shutdown - -**Run:** -```bash -cargo run --example osiris_openrpc -``` - -See [osiris_openrpc/README.md](osiris_openrpc/README.md) for details. - -## Prerequisites - -All examples require: -- Redis server running on `localhost:6379` -- Rust toolchain installed - -## Example Structure - -``` -examples/ -├── README.md # This file -├── osiris_openrpc/ # OSIRIS + OpenRPC example -│ ├── main.rs # Main example code -│ ├── README.md # Detailed documentation -│ ├── note.rhai # Note creation script -│ ├── event.rhai # Event creation script -│ ├── query.rhai # Query script -│ └── access_denied.rhai # Access control test script -└── _archive/ # Archived old examples -``` - -## Architecture Overview - -The examples demonstrate the Hero Supervisor architecture: - -``` -Client (OpenRPC) - ↓ -Supervisor (OpenRPC Server) - ↓ -Redis Queue - ↓ -Runners (OSIRIS, SAL, etc.) -``` - -## Development - -To add a new example: - -1. Create a new directory under `examples/` -2. Add `main.rs` with your example code -3. Add any required script files (`.rhai`) -4. Add a `README.md` documenting the example -5. Update `Cargo.toml` to register the example -6. Update this README with a link - -## Archived Examples - -Previous examples have been moved to `_archive/` for reference. These may be outdated but can provide useful patterns for specific use cases. diff --git a/bin/supervisor/examples/_archive/E2E_EXAMPLES.md b/bin/supervisor/examples/_archive/E2E_EXAMPLES.md deleted file mode 100644 index f763f02..0000000 --- a/bin/supervisor/examples/_archive/E2E_EXAMPLES.md +++ /dev/null @@ -1,364 +0,0 @@ -# End-to-End Examples - -Complete examples demonstrating the full Supervisor + Runner + Client workflow. - -## Overview - -These examples show how to: -1. Start a Hero Supervisor -2. Start an OSIS Runner -3. Register the runner with the supervisor -4. Execute jobs using both blocking (`job.run`) and non-blocking (`job.start`) modes - -## Prerequisites - -### Required Services - -1. **Redis** - Must be running on `localhost:6379` - ```bash - redis-server - ``` - -2. **Supervisor** - Hero Supervisor with Mycelium integration - ```bash - cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379 - ``` - -3. **Runner** - OSIS Runner to execute jobs - ```bash - cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379 - ``` - -## Examples - -### 1. Simple End-to-End (`simple_e2e.rs`) - -**Recommended for beginners** - A minimal example with clear step-by-step execution. - -#### What it does: -- Registers a runner with the supervisor -- Runs 2 blocking jobs (with immediate results) -- Starts 1 non-blocking job (fire and forget) -- Shows clear output at each step - -#### How to run: - -**Terminal 1 - Redis:** -```bash -redis-server -``` - -**Terminal 2 - Supervisor:** -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -RUST_LOG=info cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379 -``` - -**Terminal 3 - Runner:** -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust -RUST_LOG=info cargo run --bin runner_osis -- test_runner \ - --redis-url redis://localhost:6379 \ - --db-path /tmp/test_runner.db -``` - -**Terminal 4 - Demo:** -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -RUST_LOG=info cargo run --example simple_e2e -``` - -#### Expected Output: - -``` -╔════════════════════════════════════════╗ -║ Simple End-to-End Demo ║ -╚════════════════════════════════════════╝ - -📋 Step 1: Registering Runner -───────────────────────────────────────── -✅ Runner registered successfully - -📋 Step 2: Running a Simple Job (Blocking) -───────────────────────────────────────── -✅ Job completed! - Result: {"message":"Hello from the runner!","number":42,"timestamp":1234567890} - -📋 Step 3: Running a Calculation Job -───────────────────────────────────────── -✅ Calculation completed! - Result: {"sum":55,"product":3628800,"count":10,"average":5} - -📋 Step 4: Starting a Non-Blocking Job -───────────────────────────────────────── -✅ Job started! - Job ID: abc-123 (running in background) - -🎉 Demo completed successfully! -``` - -### 2. Full End-to-End (`end_to_end_demo.rs`) - -**Advanced** - Automatically spawns supervisor and runner processes. - -#### What it does: -- Automatically starts supervisor and runner -- Runs multiple test jobs -- Demonstrates both execution modes -- Handles cleanup automatically - -#### How to run: - -**Terminal 1 - Redis:** -```bash -redis-server -``` - -**Terminal 2 - Demo:** -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -RUST_LOG=info cargo run --example end_to_end_demo -``` - -#### Features: -- ✅ Automatic process management -- ✅ Multiple job examples -- ✅ Graceful shutdown -- ✅ Comprehensive logging - -## Job Execution Modes - -### job.run (Blocking) - -Executes a job and waits for the result. - -**Request:** -```json -{ - "jsonrpc": "2.0", - "method": "job.run", - "params": [{ - "secret": "admin_secret", - "job": { /* job object */ }, - "timeout": 30 - }], - "id": 1 -} -``` - -**Response:** -```json -{ - "jsonrpc": "2.0", - "result": { - "job_id": "uuid", - "status": "completed", - "result": "{ /* actual result */ }" - }, - "id": 1 -} -``` - -**Use when:** -- You need immediate results -- Job completes quickly (< 60 seconds) -- Synchronous workflow - -### job.start (Non-Blocking) - -Starts a job and returns immediately. - -**Request:** -```json -{ - "jsonrpc": "2.0", - "method": "job.start", - "params": [{ - "secret": "admin_secret", - "job": { /* job object */ } - }], - "id": 1 -} -``` - -**Response:** -```json -{ - "jsonrpc": "2.0", - "result": { - "job_id": "uuid", - "status": "queued" - }, - "id": 1 -} -``` - -**Use when:** -- Long-running operations -- Background processing -- Async workflows -- Don't need immediate results - -## Job Structure - -Jobs are created using the `JobBuilder`: - -```rust -use runner_rust::job::JobBuilder; - -let job = JobBuilder::new() - .caller_id("my_client") - .context_id("my_context") - .payload(r#" - // Rhai script to execute - let result = 2 + 2; - to_json(result) - "#) - .runner("runner_name") - .executor("rhai") - .timeout(30) - .build()?; -``` - -### Job Fields - -- **caller_id**: Identifier for the client making the request -- **context_id**: Context for the job execution -- **payload**: Rhai script to execute -- **runner**: Name of the runner to execute on -- **executor**: Type of executor (always "rhai" for OSIS) -- **timeout**: Maximum execution time in seconds - -## Rhai Script Examples - -### Simple Calculation -```rhai -let result = 2 + 2; -to_json(result) -``` - -### String Manipulation -```rhai -let message = "Hello, World!"; -let upper = message.to_upper(); -to_json(upper) -``` - -### Array Operations -```rhai -let numbers = [1, 2, 3, 4, 5]; -let sum = 0; -for n in numbers { - sum += n; -} -to_json(#{sum: sum, count: numbers.len()}) -``` - -### Object Creation -```rhai -let person = #{ - name: "Alice", - age: 30, - email: "alice@example.com" -}; -to_json(person) -``` - -## Troubleshooting - -### "Failed to connect to supervisor" - -**Problem:** Supervisor is not running or wrong port. - -**Solution:** -```bash -# Check if supervisor is running -curl http://localhost:3030 - -# Start supervisor -cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379 -``` - -### "Runner not found" - -**Problem:** Runner is not registered or not running. - -**Solution:** -```bash -# Start the runner -cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379 - -# Check runner logs for connection issues -``` - -### "Job execution timeout" - -**Problem:** Job took longer than timeout value. - -**Solution:** -- Increase timeout in job builder: `.timeout(60)` -- Or in job.run request: `"timeout": 60` - -### "Redis connection failed" - -**Problem:** Redis is not running. - -**Solution:** -```bash -# Start Redis -redis-server - -# Or specify custom Redis URL -cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379 -``` - -## Architecture - -``` -┌─────────────┐ -│ Client │ -│ (Example) │ -└──────┬──────┘ - │ HTTP/JSON-RPC - ▼ -┌─────────────┐ -│ Supervisor │ -│ (Mycelium) │ -└──────┬──────┘ - │ Redis Queue - ▼ -┌─────────────┐ -│ Runner │ -│ (OSIS) │ -└─────────────┘ -``` - -### Flow - -1. **Client** creates a job with Rhai script -2. **Client** sends job to supervisor via JSON-RPC -3. **Supervisor** verifies signatures (if present) -4. **Supervisor** queues job to runner's Redis queue -5. **Runner** picks up job from queue -6. **Runner** executes Rhai script -7. **Runner** stores result in Redis -8. **Supervisor** retrieves result (for job.run) -9. **Client** receives result - -## Next Steps - -- Add signature verification to jobs (see `JOB_SIGNATURES.md`) -- Implement job status polling for non-blocking jobs -- Create custom Rhai functions for your use case -- Scale with multiple runners - -## Related Documentation - -- `JOB_EXECUTION.md` - Detailed job execution modes -- `JOB_SIGNATURES.md` - Cryptographic job signing -- `README.md` - Supervisor overview - ---- - -**Status:** ✅ Production Ready -**Last Updated:** 2025-10-24 diff --git a/bin/supervisor/examples/_archive/EXAMPLES_SUMMARY.md b/bin/supervisor/examples/_archive/EXAMPLES_SUMMARY.md deleted file mode 100644 index 0c740a2..0000000 --- a/bin/supervisor/examples/_archive/EXAMPLES_SUMMARY.md +++ /dev/null @@ -1,192 +0,0 @@ -# Supervisor Examples - Summary - -## ✅ **Complete End-to-End Examples with OpenRPC Client** - -All examples now use the official `hero-supervisor-openrpc-client` library for type-safe, async communication with the supervisor. - -### **What Was Updated:** - -1. **OpenRPC Client Library** (`client/src/lib.rs`) - - Added `JobRunResponse` - Response from blocking `job.run` - - Added `JobStartResponse` - Response from non-blocking `job.start` - - Updated `job_run()` method - Now accepts timeout parameter - - Updated `job_start()` method - Now accepts Job instead of job_id - - Re-exports `Job` and `JobBuilder` from `runner_rust` - -2. **Simple E2E Example** (`examples/simple_e2e.rs`) - - Uses `SupervisorClient` from OpenRPC library - - Clean, type-safe API calls - - No manual JSON-RPC construction - - Perfect for learning and testing - -3. **Full E2E Demo** (`examples/end_to_end_demo.rs`) - - Automated supervisor and runner spawning - - Uses OpenRPC client throughout - - Helper functions for common operations - - Comprehensive test scenarios - -### **Key Changes:** - -**Before (Manual JSON-RPC):** -```rust -let request = json!({ - "jsonrpc": "2.0", - "method": "job.run", - "params": [{ - "secret": secret, - "job": job, - "timeout": 30 - }], - "id": 1 -}); -let response = http_client.post(url).json(&request).send().await?; -``` - -**After (OpenRPC Client):** -```rust -let response = client.job_run(secret, job, Some(30)).await?; -println!("Result: {:?}", response.result); -``` - -### **Client API:** - -#### **Job Execution** - -```rust -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder}; - -// Create client -let client = SupervisorClient::new("http://localhost:3030")?; - -// Register runner -client.register_runner("admin_secret", "runner_name", "queue_name").await?; - -// Run job (blocking - waits for result) -let response = client.job_run("admin_secret", job, Some(60)).await?; -// response.result contains the actual result - -// Start job (non-blocking - returns immediately) -let response = client.job_start("admin_secret", job).await?; -// response.job_id for later polling -``` - -#### **Response Types** - -```rust -// JobRunResponse (from job.run) -pub struct JobRunResponse { - pub job_id: String, - pub status: String, // "completed" - pub result: Option, // Actual result from runner -} - -// JobStartResponse (from job.start) -pub struct JobStartResponse { - pub job_id: String, - pub status: String, // "queued" -} -``` - -### **Examples Overview:** - -| Example | Description | Use Case | -|---------|-------------|----------| -| `simple_e2e.rs` | Manual setup, step-by-step | Learning, testing | -| `end_to_end_demo.rs` | Automated, comprehensive | CI/CD, integration tests | - -### **Running the Examples:** - -**Prerequisites:** -```bash -# Terminal 1: Redis -redis-server - -# Terminal 2: Supervisor -cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379 - -# Terminal 3: Runner -cargo run --bin runner_osis -- test_runner --redis-url redis://localhost:6379 -``` - -**Run Simple Example:** -```bash -# Terminal 4 -RUST_LOG=info cargo run --example simple_e2e -``` - -**Run Full Demo:** -```bash -# Only needs Redis running (spawns supervisor and runner automatically) -RUST_LOG=info cargo run --example end_to_end_demo -``` - -### **Benefits of OpenRPC Client:** - -✅ **Type Safety** - Compile-time checking of requests/responses -✅ **Async/Await** - Native Rust async support -✅ **Error Handling** - Proper Result types with detailed errors -✅ **Auto Serialization** - No manual JSON construction -✅ **Documentation** - IntelliSense and type hints -✅ **Maintainability** - Single source of truth for API - -### **Architecture:** - -``` -┌─────────────────┐ -│ Example Code │ -│ (simple_e2e) │ -└────────┬────────┘ - │ - ▼ -┌─────────────────┐ -│ OpenRPC Client │ -│ (typed API) │ -└────────┬────────┘ - │ JSON-RPC over HTTP - ▼ -┌─────────────────┐ -│ Supervisor │ -│ (Mycelium) │ -└────────┬────────┘ - │ Redis Queue - ▼ -┌─────────────────┐ -│ OSIS Runner │ -│ (Rhai Engine) │ -└─────────────────┘ -``` - -### **Job Execution Modes:** - -**Blocking (`job.run`):** -- Client waits for result -- Uses `queue_and_wait` internally -- Returns actual result -- Best for: CRUD, queries, short jobs - -**Non-Blocking (`job.start`):** -- Client returns immediately -- Job runs in background -- Returns job_id for polling -- Best for: Long jobs, batch processing - -### **Files Modified:** - -- ✅ `client/src/lib.rs` - Updated client methods and response types -- ✅ `examples/simple_e2e.rs` - Refactored to use OpenRPC client -- ✅ `examples/end_to_end_demo.rs` - Refactored to use OpenRPC client -- ✅ `examples/E2E_EXAMPLES.md` - Updated documentation -- ✅ `examples/EXAMPLES_SUMMARY.md` - This file - -### **Next Steps:** - -1. **Add more examples** - Specific use cases (batch jobs, error handling) -2. **Job polling** - Implement `wait_for_job()` helper -3. **WASM support** - Browser-based examples -4. **Signature examples** - Jobs with cryptographic signatures - ---- - -**Status:** ✅ Complete and Production Ready -**Last Updated:** 2025-10-24 -**Client Version:** hero-supervisor-openrpc-client 0.1.0 diff --git a/bin/supervisor/examples/_archive/README.md b/bin/supervisor/examples/_archive/README.md deleted file mode 100644 index bd21499..0000000 --- a/bin/supervisor/examples/_archive/README.md +++ /dev/null @@ -1,182 +0,0 @@ -# Hero Supervisor Examples - -This directory contains examples demonstrating the new job API functionality and workflows. - -## Examples Overview - -### 1. `job_api_examples.rs` - Comprehensive API Demo -Complete demonstration of all new job API methods: -- **Fire-and-forget execution** using `job.run` -- **Asynchronous processing** with `jobs.create`, `job.start`, `job.status`, `job.result` -- **Batch job processing** for multiple jobs -- **Job listing** with `jobs.list` - -**Run with:** -```bash -cargo run --example job_api_examples -``` - -### 2. `simple_job_workflow.rs` - Basic Workflow -Simple example showing the basic job lifecycle: -1. Create job with `jobs.create` -2. Start job with `job.start` -3. Monitor with `job.status` -4. Get result with `job.result` - -**Run with:** -```bash -cargo run --example simple_job_workflow -``` - -### 3. `integration_test.rs` - Integration Tests -Comprehensive integration tests validating: -- Complete job lifecycle -- Immediate job execution -- Job listing functionality -- Authentication error handling -- Nonexistent job operations - -**Run with:** -```bash -cargo test --test integration_test -``` - -## Prerequisites - -Before running the examples, ensure: - -1. **Redis is running:** - ```bash - docker run -d -p 6379:6379 redis:alpine - ``` - -2. **Supervisor is running:** - ```bash - ./target/debug/supervisor --config examples/supervisor/config.toml - ``` - -3. **Runners are configured** in your config.toml: - ```toml - [[actors]] - id = "osis_runner_1" - name = "osis_runner_1" - binary_path = "/path/to/osis_runner" - db_path = "/tmp/osis_db" - redis_url = "redis://localhost:6379" - process_manager = "simple" - ``` - -## API Convention Summary - -The examples demonstrate the new job API convention: - -### General Operations (`jobs.`) -- `jobs.create` - Create a job without queuing it -- `jobs.list` - List all job IDs in the system - -### Specific Operations (`job.`) -- `job.run` - Run a job immediately and return result -- `job.start` - Start a previously created job -- `job.status` - Get current job status (non-blocking) -- `job.result` - Get job result (blocking until complete) - -## Workflow Patterns - -### Pattern 1: Fire-and-Forget -```rust -let result = client.job_run(secret, job).await?; -match result { - JobResult::Success { success } => println!("Output: {}", success), - JobResult::Error { error } => println!("Error: {}", error), -} -``` - -### Pattern 2: Asynchronous Processing -```rust -// Create and start -let job_id = client.jobs_create(secret, job).await?; -client.job_start(secret, &job_id).await?; - -// Monitor (non-blocking) -loop { - let status = client.job_status(&job_id).await?; - if status.status == "completed" { break; } - sleep(Duration::from_secs(1)).await; -} - -// Get result -let result = client.job_result(&job_id).await?; -``` - -### Pattern 3: Batch Processing -```rust -// Create all jobs -let mut job_ids = Vec::new(); -for job_spec in job_specs { - let job_id = client.jobs_create(secret, job_spec).await?; - job_ids.push(job_id); -} - -// Start all jobs -for job_id in &job_ids { - client.job_start(secret, job_id).await?; -} - -// Collect results -for job_id in &job_ids { - let result = client.job_result(job_id).await?; - // Process result... -} -``` - -## Error Handling - -The examples demonstrate proper error handling for: -- **Authentication errors** - Invalid secrets -- **Job not found errors** - Nonexistent job IDs -- **Connection errors** - Supervisor not available -- **Execution errors** - Job failures - -## Authentication - -Examples use different secret types: -- **Admin secrets**: Full system access -- **User secrets**: Job operations only (used in examples) -- **Register secrets**: Runner registration only - -Configure secrets in your supervisor config: -```toml -admin_secrets = ["admin-secret-123"] -user_secrets = ["user-secret-456"] -register_secrets = ["register-secret-789"] -``` - -## Troubleshooting - -### Common Issues - -1. **Connection refused** - - Ensure supervisor is running on localhost:3030 - - Check supervisor logs for errors - -2. **Authentication failed** - - Verify secret is configured in supervisor - - Check secret type matches operation requirements - -3. **Job execution failed** - - Ensure runners are properly configured and running - - Check runner logs for execution errors - - Verify job payload is valid for the target runner - -4. **Redis connection failed** - - Ensure Redis is running on localhost:6379 - - Check Redis connectivity from supervisor - -### Debug Mode - -Run examples with debug logging: -```bash -RUST_LOG=debug cargo run --example job_api_examples -``` - -This will show detailed API calls and responses for troubleshooting. diff --git a/bin/supervisor/examples/_archive/basic_openrpc_client.rs b/bin/supervisor/examples/_archive/basic_openrpc_client.rs deleted file mode 100644 index 397c295..0000000 --- a/bin/supervisor/examples/_archive/basic_openrpc_client.rs +++ /dev/null @@ -1,290 +0,0 @@ -//! Comprehensive OpenRPC Example for Hero Supervisor -//! -//! This example demonstrates the complete OpenRPC workflow: -//! 1. Automatically starting a Hero Supervisor with OpenRPC server using escargot -//! 2. Building and using a mock runner binary -//! 3. Connecting with the OpenRPC client -//! 4. Managing runners (add, start, stop, remove) -//! 5. Creating and queuing jobs -//! 6. Monitoring job execution and verifying results -//! 7. Bulk operations and status monitoring -//! 8. Gracefully shutting down the supervisor -//! -//! To run this example: -//! `cargo run --example basic_openrpc_client` -//! -//! This example is completely self-contained and will start/stop the supervisor automatically. - -use hero_supervisor_openrpc_client::{ - SupervisorClient, RunnerConfig, RunnerType, ProcessManagerType, - JobBuilder -}; -use std::time::Duration; -use escargot::CargoBuild; -use std::process::Stdio; -use tokio::time::sleep; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // env_logger::init(); // Commented out to avoid version conflicts - - println!("🚀 Comprehensive OpenRPC Example for Hero Supervisor"); - println!("===================================================="); - - // Build the supervisor with OpenRPC feature (force rebuild to avoid escargot caching) - println!("\n🔨 Force rebuilding supervisor with OpenRPC feature..."); - - // Clear target directory to force fresh build - let _ = std::process::Command::new("cargo") - .arg("clean") - .output(); - - let supervisor_binary = CargoBuild::new() - .bin("supervisor") - .features("openrpc") - .current_release() - .run()?; - - println!("✅ Supervisor binary built successfully"); - - // Build the mock runner binary - println!("\n🔨 Building mock runner binary..."); - let mock_runner_binary = CargoBuild::new() - .example("mock_runner") - .current_release() - .run()?; - - println!("✅ Mock runner binary built successfully"); - - // Start the supervisor process - println!("\n🚀 Starting supervisor with OpenRPC server..."); - let mut supervisor_process = supervisor_binary - .command() - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; - - println!("✅ Supervisor process started (PID: {})", supervisor_process.id()); - - // Wait for the server to start up - println!("\n⏳ Waiting for OpenRPC server to start..."); - sleep(Duration::from_secs(5)).await; - - // Create client - let client = SupervisorClient::new("http://127.0.0.1:3030")?; - println!("✅ Client created for: {}", client.server_url()); - - // Test connectivity with retries - println!("\n🔍 Testing server connectivity..."); - let mut connection_attempts = 0; - let max_attempts = 10; - - loop { - connection_attempts += 1; - match client.list_runners().await { - Ok(runners) => { - println!("✅ Server is responsive"); - println!("📋 Current runners: {:?}", runners); - break; - } - Err(e) if connection_attempts < max_attempts => { - println!("⏳ Attempt {}/{}: Server not ready yet, retrying...", connection_attempts, max_attempts); - sleep(Duration::from_secs(1)).await; - continue; - } - Err(e) => { - eprintln!("❌ Failed to connect to server after {} attempts: {}", max_attempts, e); - // Clean up the supervisor process before returning - let _ = supervisor_process.kill(); - return Err(e.into()); - } - } - } - - // Add a simple runner using the mock runner binary - let config = RunnerConfig { - actor_id: "basic_example_actor".to_string(), - runner_type: RunnerType::OSISRunner, - binary_path: mock_runner_binary.path().to_path_buf(), - db_path: "/tmp/example_db".to_string(), - redis_url: "redis://localhost:6379".to_string(), - }; - - println!("➕ Adding runner: {}", config.actor_id); - client.add_runner(config, ProcessManagerType::Simple).await?; - - // Start the runner - println!("▶️ Starting runner..."); - client.start_runner("basic_example_actor").await?; - - // Check status - let status = client.get_runner_status("basic_example_actor").await?; - println!("📊 Runner status: {:?}", status); - - // Create and queue multiple jobs to demonstrate functionality - let jobs = vec![ - ("Hello World", "print('Hello from comprehensive OpenRPC example!');"), - ("Math Calculation", "let result = 42 * 2; print(`The answer is: ${result}`);"), - ("Current Time", "print('Job executed at: ' + new Date().toISOString());"), - ]; - - let mut job_ids = Vec::new(); - - for (description, payload) in jobs { - let job = JobBuilder::new() - .caller_id("comprehensive_client") - .context_id("demo") - .payload(payload) - .runner("basic_example_actor") - .executor("rhai") - .timeout(30) - .build()?; - - println!("📤 Queuing job '{}': {}", description, job.id); - client.queue_job_to_runner("basic_example_actor", job.clone()).await?; - job_ids.push((job.id, description.to_string())); - - // Small delay between jobs - sleep(Duration::from_millis(500)).await; - } - - // Demonstrate synchronous job execution using polling approach - // (Note: queue_and_wait OpenRPC method registration needs debugging) - println!("\n🎯 Demonstrating synchronous job execution with result verification..."); - - let sync_jobs = vec![ - ("Synchronous Hello", "print('Hello from synchronous execution!');"), - ("Synchronous Math", "let result = 123 + 456; print(`Calculation result: ${result}`);"), - ("Synchronous Status", "print('Job processed with result verification');"), - ]; - - for (description, payload) in sync_jobs { - let job = JobBuilder::new() - .caller_id("sync_client") - .context_id("sync_demo") - .payload(payload) - .runner("basic_example_actor") - .executor("rhai") - .timeout(30) - .build()?; - - println!("🚀 Executing '{}' with result verification...", description); - let job_id = job.id.clone(); - - // Queue the job - client.queue_job_to_runner("basic_example_actor", job).await?; - - // Poll for completion with timeout - let mut attempts = 0; - let max_attempts = 20; // 10 seconds with 500ms intervals - let mut result = None; - - while attempts < max_attempts { - match client.get_job_result(&job_id).await { - Ok(Some(job_result)) => { - result = Some(job_result); - break; - } - Ok(None) => { - // Job not finished yet, wait and retry - sleep(Duration::from_millis(500)).await; - attempts += 1; - } - Err(e) => { - println!("⚠️ Error getting result for job {}: {}", job_id, e); - break; - } - } - } - - match result { - Some(job_result) => { - println!("✅ Job '{}' completed successfully!", description); - println!(" 📋 Job ID: {}", job_id); - println!(" 📤 Result: {}", job_result); - } - None => { - println!("⏰ Job '{}' did not complete within timeout", description); - } - } - - // Small delay between jobs - sleep(Duration::from_millis(500)).await; - } - - // Demonstrate bulk operations and status monitoring - println!("\n📊 Demonstrating bulk operations and status monitoring..."); - - // Get all runner statuses - println!("📋 Getting all runner statuses..."); - match client.get_all_runner_status().await { - Ok(statuses) => { - println!("✅ Runner statuses:"); - for (runner_id, status) in statuses { - println!(" - {}: {:?}", runner_id, status); - } - } - Err(e) => println!("❌ Failed to get runner statuses: {}", e), - } - - // List all runners one more time - println!("\n📋 Final runner list:"); - match client.list_runners().await { - Ok(runners) => { - println!("✅ Active runners: {:?}", runners); - } - Err(e) => println!("❌ Failed to list runners: {}", e), - } - - // Stop and remove runner - println!("\n⏹️ Stopping runner..."); - client.stop_runner("basic_example_actor", false).await?; - - println!("🗑️ Removing runner..."); - client.remove_runner("basic_example_actor").await?; - - // Final verification - println!("\n🔍 Final verification - listing remaining runners..."); - match client.list_runners().await { - Ok(runners) => { - if runners.contains(&"basic_example_actor".to_string()) { - println!("⚠️ Runner still present: {:?}", runners); - } else { - println!("✅ Runner successfully removed. Remaining runners: {:?}", runners); - } - } - Err(e) => println!("❌ Failed to verify runner removal: {}", e), - } - - // Gracefully shutdown the supervisor process - println!("\n🛑 Shutting down supervisor process..."); - match supervisor_process.kill() { - Ok(()) => { - println!("✅ Supervisor process terminated successfully"); - // Wait for the process to fully exit - match supervisor_process.wait() { - Ok(status) => println!("✅ Supervisor exited with status: {}", status), - Err(e) => println!("⚠️ Error waiting for supervisor exit: {}", e), - } - } - Err(e) => println!("⚠️ Error terminating supervisor: {}", e), - } - - println!("\n🎉 Comprehensive OpenRPC Example Complete!"); - println!("=========================================="); - println!("✅ Successfully demonstrated:"); - println!(" - Automatic supervisor startup with escargot"); - println!(" - Mock runner binary integration"); - println!(" - OpenRPC client connectivity with retry logic"); - println!(" - Runner management (add, start, stop, remove)"); - println!(" - Asynchronous job creation and queuing"); - println!(" - Synchronous job execution with result polling"); - println!(" - Job result verification from Redis job hash"); - println!(" - Bulk operations and status monitoring"); - println!(" - Graceful cleanup and supervisor shutdown"); - println!("\n🎯 The Hero Supervisor OpenRPC integration is fully functional!"); - println!("📝 Note: queue_and_wait method implemented but OpenRPC registration needs debugging"); - println!("🚀 Both async job queuing and sync result polling patterns work perfectly!"); - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/end_to_end_demo.rs b/bin/supervisor/examples/_archive/end_to_end_demo.rs deleted file mode 100644 index 11c3f19..0000000 --- a/bin/supervisor/examples/_archive/end_to_end_demo.rs +++ /dev/null @@ -1,278 +0,0 @@ -//! End-to-End Demo: Supervisor + Runner + Client -//! -//! This example demonstrates the complete workflow: -//! 1. Starts a supervisor with Mycelium integration -//! 2. Starts an OSIS runner -//! 3. Uses the supervisor client to run jobs -//! 4. Shows both job.run (blocking) and job.start (non-blocking) modes -//! -//! Prerequisites: -//! - Redis running on localhost:6379 -//! -//! Usage: -//! ```bash -//! RUST_LOG=info cargo run --example end_to_end_demo -//! ``` - -use anyhow::{Result, Context}; -use log::{info, error}; -use std::process::{Command, Child, Stdio}; -use std::time::Duration; -use tokio::time::sleep; -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder}; - -/// Configuration for the demo -struct DemoConfig { - redis_url: String, - supervisor_port: u16, - runner_id: String, - db_path: String, -} - -impl Default for DemoConfig { - fn default() -> Self { - Self { - redis_url: "redis://localhost:6379".to_string(), - supervisor_port: 3030, - runner_id: "example_runner".to_string(), - db_path: "/tmp/example_runner.db".to_string(), - } - } -} - -/// Supervisor process wrapper -struct SupervisorProcess { - child: Child, -} - -impl SupervisorProcess { - fn start(config: &DemoConfig) -> Result { - info!("🚀 Starting supervisor on port {}...", config.supervisor_port); - - let child = Command::new("cargo") - .args(&[ - "run", - "--bin", - "hero-supervisor", - "--", - "--redis-url", - &config.redis_url, - "--port", - &config.supervisor_port.to_string(), - ]) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .context("Failed to start supervisor")?; - - Ok(Self { child }) - } -} - -impl Drop for SupervisorProcess { - fn drop(&mut self) { - info!("🛑 Stopping supervisor..."); - let _ = self.child.kill(); - let _ = self.child.wait(); - } -} - -/// Runner process wrapper -struct RunnerProcess { - child: Child, -} - -impl RunnerProcess { - fn start(config: &DemoConfig) -> Result { - info!("🤖 Starting OSIS runner '{}'...", config.runner_id); - - let child = Command::new("cargo") - .args(&[ - "run", - "--bin", - "runner_osis", - "--", - &config.runner_id, - "--db-path", - &config.db_path, - "--redis-url", - &config.redis_url, - ]) - .env("RUST_LOG", "info") - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .context("Failed to start runner")?; - - Ok(Self { child }) - } -} - -impl Drop for RunnerProcess { - fn drop(&mut self) { - info!("🛑 Stopping runner..."); - let _ = self.child.kill(); - let _ = self.child.wait(); - } -} - -/// Helper functions for the demo -async fn register_runner_helper(client: &SupervisorClient, runner_id: &str, secret: &str) -> Result<()> { - info!("📝 Registering runner '{}'...", runner_id); - - let queue = format!("hero:q:work:type:osis:group:default:inst:{}", runner_id); - client.register_runner(secret, runner_id, &queue).await?; - - info!("✅ Runner registered successfully"); - Ok(()) -} - -async fn run_job_helper(client: &SupervisorClient, job: runner_rust::job::Job, secret: &str, timeout: u64) -> Result { - info!("🚀 Running job {} (blocking)...", job.id); - - let response = client.job_run(secret, job, Some(timeout)).await?; - - let result = response.result - .ok_or_else(|| anyhow::anyhow!("No result in response"))?; - - info!("✅ Job completed with result: {}", result); - Ok(result) -} - -async fn start_job_helper(client: &SupervisorClient, job: runner_rust::job::Job, secret: &str) -> Result { - info!("🚀 Starting job {} (non-blocking)...", job.id); - - let response = client.job_start(secret, job).await?; - - info!("✅ Job queued with ID: {}", response.job_id); - Ok(response.job_id) -} - -#[tokio::main] -async fn main() -> Result<()> { - // Initialize logging - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); - - println!("\n╔════════════════════════════════════════════════════════════╗"); - println!("║ End-to-End Demo: Supervisor + Runner + Client ║"); - println!("╚════════════════════════════════════════════════════════════╝\n"); - - let config = DemoConfig::default(); - - // Step 1: Start supervisor - println!("📋 Step 1: Starting Supervisor"); - println!("─────────────────────────────────────────────────────────────"); - let _supervisor = SupervisorProcess::start(&config)?; - sleep(Duration::from_secs(3)).await; - println!("✅ Supervisor started on port {}\n", config.supervisor_port); - - // Step 2: Start runner - println!("📋 Step 2: Starting OSIS Runner"); - println!("─────────────────────────────────────────────────────────────"); - let _runner = RunnerProcess::start(&config)?; - sleep(Duration::from_secs(3)).await; - println!("✅ Runner '{}' started\n", config.runner_id); - - // Step 3: Create client and register runner - println!("📋 Step 3: Registering Runner with Supervisor"); - println!("─────────────────────────────────────────────────────────────"); - let client = SupervisorClient::new(&format!("http://localhost:{}", config.supervisor_port))?; - register_runner_helper(&client, &config.runner_id, "admin_secret").await?; - println!("✅ Runner registered\n"); - - sleep(Duration::from_secs(2)).await; - - // Step 4: Run blocking jobs (job.run) - println!("📋 Step 4: Running Blocking Jobs (job.run)"); - println!("─────────────────────────────────────────────────────────────"); - - // Job 1: Simple calculation - println!("\n🔹 Job 1: Simple Calculation"); - let job1 = JobBuilder::new() - .caller_id("demo_client") - .context_id("demo_context") - .payload("let result = 2 + 2; to_json(result)") - .runner(&config.runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let result1 = run_job_helper(&client, job1, "admin_secret", 30).await?; - println!(" Result: {}", result1); - - // Job 2: String manipulation - println!("\n🔹 Job 2: String Manipulation"); - let job2 = JobBuilder::new() - .caller_id("demo_client") - .context_id("demo_context") - .payload(r#"let msg = "Hello from OSIS Runner!"; to_json(msg)"#) - .runner(&config.runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let result2 = run_job_helper(&client, job2, "admin_secret", 30).await?; - println!(" Result: {}", result2); - - // Job 3: Array operations - println!("\n🔹 Job 3: Array Operations"); - let job3 = JobBuilder::new() - .caller_id("demo_client") - .context_id("demo_context") - .payload(r#" - let numbers = [1, 2, 3, 4, 5]; - let sum = 0; - for n in numbers { - sum += n; - } - to_json(#{sum: sum, count: numbers.len()}) - "#) - .runner(&config.runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let result3 = run_job_helper(&client, job3, "admin_secret", 30).await?; - println!(" Result: {}", result3); - - println!("\n✅ All blocking jobs completed successfully\n"); - - // Step 5: Start non-blocking jobs (job.start) - println!("📋 Step 5: Starting Non-Blocking Jobs (job.start)"); - println!("─────────────────────────────────────────────────────────────"); - - println!("\n🔹 Job 4: Background Task"); - let job4 = JobBuilder::new() - .caller_id("demo_client") - .context_id("demo_context") - .payload(r#" - let result = "Background task completed"; - to_json(result) - "#) - .runner(&config.runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let job4_id = start_job_helper(&client, job4, "admin_secret").await?; - println!(" Job ID: {} (running in background)", job4_id); - - println!("\n✅ Non-blocking job started\n"); - - // Step 6: Summary - println!("📋 Step 6: Demo Summary"); - println!("─────────────────────────────────────────────────────────────"); - println!("✅ Supervisor: Running on port {}", config.supervisor_port); - println!("✅ Runner: '{}' registered and processing jobs", config.runner_id); - println!("✅ Blocking jobs: 3 completed successfully"); - println!("✅ Non-blocking jobs: 1 started"); - println!("\n🎉 Demo completed successfully!"); - - // Keep processes running for a bit to see logs - println!("\n⏳ Keeping processes running for 5 seconds..."); - sleep(Duration::from_secs(5)).await; - - println!("\n🛑 Shutting down..."); - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/integration_test.rs b/bin/supervisor/examples/_archive/integration_test.rs deleted file mode 100644 index a540187..0000000 --- a/bin/supervisor/examples/_archive/integration_test.rs +++ /dev/null @@ -1,196 +0,0 @@ -//! Integration test for the new job API -//! -//! This test demonstrates the complete job lifecycle and validates -//! that all new API methods work correctly together. - -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult}; -use std::time::Duration; -use tokio::time::sleep; - -#[tokio::test] -async fn test_complete_job_lifecycle() -> Result<(), Box> { - // Skip test if supervisor is not running - let client = match SupervisorClient::new("http://localhost:3030") { - Ok(c) => c, - Err(_) => { - println!("Skipping integration test - supervisor not available"); - return Ok(()); - } - }; - - // Test connection - if client.discover().await.is_err() { - println!("Skipping integration test - supervisor not responding"); - return Ok(()); - } - - let secret = "user-secret-456"; - - // Test 1: Create job - let job = JobBuilder::new() - .caller_id("integration_test") - .context_id("test_lifecycle") - .payload("echo 'Integration test job'") - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .build()?; - - let job_id = client.jobs_create(secret, job).await?; - assert!(!job_id.is_empty()); - - // Test 2: Start job - client.job_start(secret, &job_id).await?; - - // Test 3: Monitor status - let mut attempts = 0; - let max_attempts = 15; // 15 seconds max - let mut final_status = String::new(); - - while attempts < max_attempts { - let status = client.job_status(&job_id).await?; - final_status = status.status.clone(); - - if final_status == "completed" || final_status == "failed" || final_status == "timeout" { - break; - } - - attempts += 1; - sleep(Duration::from_secs(1)).await; - } - - // Test 4: Get result - let result = client.job_result(&job_id).await?; - match result { - JobResult::Success { success: _ } => { - assert_eq!(final_status, "completed"); - }, - JobResult::Error { error: _ } => { - assert!(final_status == "failed" || final_status == "timeout"); - } - } - - Ok(()) -} - -#[tokio::test] -async fn test_job_run_immediate() -> Result<(), Box> { - let client = match SupervisorClient::new("http://localhost:3030") { - Ok(c) => c, - Err(_) => return Ok(()), // Skip if not available - }; - - if client.discover().await.is_err() { - return Ok(()); // Skip if not responding - } - - let secret = "user-secret-456"; - - let job = JobBuilder::new() - .caller_id("integration_test") - .context_id("test_immediate") - .payload("echo 'Immediate job test'") - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .build()?; - - // Test immediate execution - let result = client.job_run(secret, job).await?; - - // Should get either success or error, but not panic - match result { - JobResult::Success { success } => { - assert!(!success.is_empty()); - }, - JobResult::Error { error } => { - assert!(!error.is_empty()); - } - } - - Ok(()) -} - -#[tokio::test] -async fn test_jobs_list() -> Result<(), Box> { - let client = match SupervisorClient::new("http://localhost:3030") { - Ok(c) => c, - Err(_) => return Ok(()), // Skip if not available - }; - - if client.discover().await.is_err() { - return Ok(()); // Skip if not responding - } - - // Test listing jobs - let job_ids = client.jobs_list().await?; - - // Should return a vector (might be empty) - assert!(job_ids.len() >= 0); - - Ok(()) -} - -#[tokio::test] -async fn test_authentication_errors() -> Result<(), Box> { - let client = match SupervisorClient::new("http://localhost:3030") { - Ok(c) => c, - Err(_) => return Ok(()), // Skip if not available - }; - - if client.discover().await.is_err() { - return Ok(()); // Skip if not responding - } - - let invalid_secret = "invalid-secret"; - - let job = JobBuilder::new() - .caller_id("integration_test") - .context_id("test_auth") - .payload("echo 'Auth test'") - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .build()?; - - // Test that invalid secret fails - let result = client.jobs_create(invalid_secret, job.clone()).await; - assert!(result.is_err()); - - let result = client.job_run(invalid_secret, job.clone()).await; - assert!(result.is_err()); - - let result = client.job_start(invalid_secret, "fake-job-id").await; - assert!(result.is_err()); - - Ok(()) -} - -#[tokio::test] -async fn test_nonexistent_job_operations() -> Result<(), Box> { - let client = match SupervisorClient::new("http://localhost:3030") { - Ok(c) => c, - Err(_) => return Ok(()), // Skip if not available - }; - - if client.discover().await.is_err() { - return Ok(()); // Skip if not responding - } - - let fake_job_id = "nonexistent-job-id"; - - // Test operations on nonexistent job - let result = client.job_status(fake_job_id).await; - assert!(result.is_err()); - - let result = client.job_result(fake_job_id).await; - assert!(result.is_err()); - - Ok(()) -} - -#[tokio::main] -async fn main() -> Result<(), Box> { - println!("Integration test example - this would contain test logic"); - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/job_api_examples.rs b/bin/supervisor/examples/_archive/job_api_examples.rs deleted file mode 100644 index 10f6ad7..0000000 --- a/bin/supervisor/examples/_archive/job_api_examples.rs +++ /dev/null @@ -1,269 +0,0 @@ -//! Examples demonstrating the new job API workflows -//! -//! This example shows how to use the new job API methods: -//! - jobs.create: Create a job without queuing -//! - jobs.list: List all jobs -//! - job.run: Run a job and get result immediately -//! - job.start: Start a created job -//! - job.status: Get job status (non-blocking) -//! - job.result: Get job result (blocking) - -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult}; -use std::time::Duration; -use tokio::time::sleep; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Initialize logging - env_logger::init(); - - println!("🚀 Hero Supervisor Job API Examples"); - println!("===================================\n"); - - // Create client - let client = SupervisorClient::new("http://localhost:3030")?; - let secret = "user-secret-456"; // Use a user secret for job operations - - // Test connection - println!("📡 Testing connection..."); - match client.discover().await { - Ok(_) => println!("✅ Connected to supervisor\n"), - Err(e) => { - println!("❌ Failed to connect: {}", e); - println!("Make sure the supervisor is running with: ./supervisor --config examples/supervisor/config.toml\n"); - return Ok(()); - } - } - - // Example 1: Fire-and-forget job execution - println!("🔥 Example 1: Fire-and-forget job execution"); - println!("--------------------------------------------"); - - let job = JobBuilder::new() - .caller_id("example_client") - .context_id("fire_and_forget") - .payload("echo 'Hello from fire-and-forget job!'") - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .build()?; - - println!("Running job immediately..."); - match client.job_run(secret, job).await { - Ok(JobResult::Success { success }) => { - println!("✅ Job completed successfully:"); - println!(" Output: {}", success); - }, - Ok(JobResult::Error { error }) => { - println!("❌ Job failed:"); - println!(" Error: {}", error); - }, - Err(e) => { - println!("❌ API call failed: {}", e); - } - } - println!(); - - // Example 2: Asynchronous job processing - println!("⏰ Example 2: Asynchronous job processing"); - println!("------------------------------------------"); - - let job = JobBuilder::new() - .caller_id("example_client") - .context_id("async_processing") - .payload("sleep 2 && echo 'Hello from async job!'") - .executor("osis") - .runner("osis_runner_1") - .timeout(60) - .build()?; - - // Step 1: Create the job - println!("1. Creating job..."); - let job_id = match client.jobs_create(secret, job).await { - Ok(id) => { - println!("✅ Job created with ID: {}", id); - id - }, - Err(e) => { - println!("❌ Failed to create job: {}", e); - return Ok(()); - } - }; - - // Step 2: Start the job - println!("2. Starting job..."); - match client.job_start(secret, &job_id).await { - Ok(_) => println!("✅ Job started"), - Err(e) => { - println!("❌ Failed to start job: {}", e); - return Ok(()); - } - } - - // Step 3: Poll for completion (non-blocking) - println!("3. Monitoring job progress..."); - let mut attempts = 0; - let max_attempts = 30; // 30 seconds max - - loop { - attempts += 1; - - match client.job_status(&job_id).await { - Ok(status) => { - println!(" Status: {} (attempt {})", status.status, attempts); - - if status.status == "completed" || status.status == "failed" || status.status == "timeout" { - break; - } - - if attempts >= max_attempts { - println!(" ⏰ Timeout waiting for job completion"); - break; - } - - sleep(Duration::from_secs(1)).await; - }, - Err(e) => { - println!(" ❌ Failed to get job status: {}", e); - break; - } - } - } - - // Step 4: Get the result - println!("4. Getting job result..."); - match client.job_result(&job_id).await { - Ok(JobResult::Success { success }) => { - println!("✅ Job completed successfully:"); - println!(" Output: {}", success); - }, - Ok(JobResult::Error { error }) => { - println!("❌ Job failed:"); - println!(" Error: {}", error); - }, - Err(e) => { - println!("❌ Failed to get job result: {}", e); - } - } - println!(); - - // Example 3: Batch job processing - println!("📦 Example 3: Batch job processing"); - println!("-----------------------------------"); - - let job_specs = vec![ - ("echo 'Batch job 1'", "batch_1"), - ("echo 'Batch job 2'", "batch_2"), - ("echo 'Batch job 3'", "batch_3"), - ]; - - let mut job_ids = Vec::new(); - - // Create all jobs - println!("Creating batch jobs..."); - for (i, (payload, context)) in job_specs.iter().enumerate() { - let job = JobBuilder::new() - .caller_id("example_client") - .context_id(context) - .payload(payload) - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .build()?; - - match client.jobs_create(secret, job).await { - Ok(job_id) => { - println!("✅ Created job {}: {}", i + 1, job_id); - job_ids.push(job_id); - }, - Err(e) => { - println!("❌ Failed to create job {}: {}", i + 1, e); - } - } - } - - // Start all jobs - println!("Starting all batch jobs..."); - for (i, job_id) in job_ids.iter().enumerate() { - match client.job_start(secret, job_id).await { - Ok(_) => println!("✅ Started job {}", i + 1), - Err(e) => println!("❌ Failed to start job {}: {}", i + 1, e), - } - } - - // Collect results - println!("Collecting results..."); - for (i, job_id) in job_ids.iter().enumerate() { - match client.job_result(job_id).await { - Ok(JobResult::Success { success }) => { - println!("✅ Job {} result: {}", i + 1, success); - }, - Ok(JobResult::Error { error }) => { - println!("❌ Job {} failed: {}", i + 1, error); - }, - Err(e) => { - println!("❌ Failed to get result for job {}: {}", i + 1, e); - } - } - } - println!(); - - // Example 4: List all jobs - println!("📋 Example 4: Listing all jobs"); - println!("-------------------------------"); - - match client.jobs_list().await { - Ok(jobs) => { - println!("✅ Found {} jobs in the system:", jobs.len()); - for (i, job) in jobs.iter().take(10).enumerate() { - println!(" {}. {}", i + 1, job.id); - } - if jobs.len() > 10 { - println!(" ... and {} more", jobs.len() - 10); - } - }, - Err(e) => { - println!("❌ Failed to list jobs: {}", e); - } - } - println!(); - - println!("🎉 All examples completed!"); - println!("\nAPI Convention Summary:"); - println!("- jobs.create: Create job without queuing"); - println!("- jobs.list: List all job IDs"); - println!("- job.run: Run job and return result immediately"); - println!("- job.start: Start a created job"); - println!("- job.status: Get job status (non-blocking)"); - println!("- job.result: Get job result (blocking)"); - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_job_builder() { - let job = JobBuilder::new() - .caller_id("test") - .context_id("test") - .payload("echo 'test'") - .executor("osis") - .runner("test_runner") - .build(); - - assert!(job.is_ok()); - let job = job.unwrap(); - assert_eq!(job.caller_id, "test"); - assert_eq!(job.context_id, "test"); - assert_eq!(job.payload, "echo 'test'"); - } - - #[tokio::test] - async fn test_client_creation() { - let client = SupervisorClient::new("http://localhost:3030"); - assert!(client.is_ok()); - } -} diff --git a/bin/supervisor/examples/_archive/mock_runner.rs b/bin/supervisor/examples/_archive/mock_runner.rs deleted file mode 100644 index 26b54e7..0000000 --- a/bin/supervisor/examples/_archive/mock_runner.rs +++ /dev/null @@ -1,171 +0,0 @@ -//! Mock Runner Binary for Testing OpenRPC Examples -//! -//! This is a simple mock runner that simulates an actor binary for testing -//! the Hero Supervisor OpenRPC integration. It connects to Redis, listens for -//! jobs using the proper Hero job queue system, and echoes the job payload. -//! -//! Usage: -//! ```bash -//! cargo run --example mock_runner -- --actor-id test_actor --db-path /tmp/test_db --redis-url redis://localhost:6379 -//! ``` - -use std::env; -use std::time::Duration; -use tokio::time::sleep; -use redis::AsyncCommands; -use hero_supervisor::{ - Job, JobStatus, JobError, Client, ClientBuilder -}; - -#[derive(Debug, Clone)] -pub struct MockRunnerConfig { - pub actor_id: String, - pub db_path: String, - pub redis_url: String, -} - -impl MockRunnerConfig { - pub fn from_args() -> Result> { - let args: Vec = env::args().collect(); - - let mut actor_id = None; - let mut db_path = None; - let mut redis_url = None; - - let mut i = 1; - while i < args.len() { - match args[i].as_str() { - "--actor-id" => { - if i + 1 < args.len() { - actor_id = Some(args[i + 1].clone()); - i += 2; - } else { - return Err("Missing value for --actor-id".into()); - } - } - "--db-path" => { - if i + 1 < args.len() { - db_path = Some(args[i + 1].clone()); - i += 2; - } else { - return Err("Missing value for --db-path".into()); - } - } - "--redis-url" => { - if i + 1 < args.len() { - redis_url = Some(args[i + 1].clone()); - i += 2; - } else { - return Err("Missing value for --redis-url".into()); - } - } - _ => i += 1, - } - } - - Ok(MockRunnerConfig { - actor_id: actor_id.ok_or("Missing required --actor-id argument")?, - db_path: db_path.ok_or("Missing required --db-path argument")?, - redis_url: redis_url.unwrap_or_else(|| "redis://localhost:6379".to_string()), - }) - } -} - -pub struct MockRunner { - config: MockRunnerConfig, - client: Client, -} - -impl MockRunner { - pub async fn new(config: MockRunnerConfig) -> Result> { - let client = ClientBuilder::new() - .redis_url(&config.redis_url) - .build() - .await?; - - Ok(MockRunner { - config, - client, - }) - } - - pub async fn run(&self) -> Result<(), Box> { - println!("🤖 Mock Runner '{}' starting...", self.config.actor_id); - println!("📂 DB Path: {}", self.config.db_path); - println!("🔗 Redis URL: {}", self.config.redis_url); - - // Use the proper Hero job queue key for this actor instance - // Format: hero:q:work:type:{job_type}:group:{group}:inst:{instance} - let work_queue_key = format!("hero:q:work:type:osis:group:default:inst:{}", self.config.actor_id); - - println!("👂 Listening for jobs on queue: {}", work_queue_key); - - loop { - // Try to pop a job ID from the work queue using the Hero protocol - let job_id = self.client.get_job_id(&work_queue_key).await?; - - match job_id { - Some(job_id) => { - println!("📨 Received job ID: {}", job_id); - if let Err(e) = self.process_job(&job_id).await { - eprintln!("❌ Error processing job {}: {}", job_id, e); - // Mark job as error - if let Err(e2) = self.client.set_job_status(&job_id, JobStatus::Error).await { - eprintln!("❌ Failed to set job error status: {}", e2); - } - } - } - None => { - // No jobs available, wait a bit - sleep(Duration::from_millis(100)).await; - } - } - } - } - - async fn process_job(&self, job_id: &str) -> Result<(), JobError> { - // Load the job from Redis using the Hero job system - let job = self.client.get_job(job_id).await?; - - self.process_job_internal(&self.client, job_id, &job).await - } - - async fn process_job_internal( - &self, - client: &Client, - job_id: &str, - job: &Job, - ) -> Result<(), JobError> { - println!("🔄 Processing job {} with payload: {}", job_id, job.payload); - - // Mark job as started - client.set_job_status(job_id, JobStatus::Started).await?; - println!("🚀 Job {} marked as Started", job_id); - - // Simulate processing time - sleep(Duration::from_millis(500)).await; - - // Echo the payload (simulate job execution) - let output = format!("echo: {}", job.payload); - println!("📤 Output: {}", output); - - // Set the job result - client.set_result(job_id, &output).await?; - - println!("✅ Job {} completed successfully", job_id); - - Ok(()) - } -} - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Parse command line arguments - let config = MockRunnerConfig::from_args()?; - - // Create and run the mock runner - let runner = MockRunner::new(config).await?; - runner.run().await?; - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/simple_e2e.rs b/bin/supervisor/examples/_archive/simple_e2e.rs deleted file mode 100644 index 5776b9f..0000000 --- a/bin/supervisor/examples/_archive/simple_e2e.rs +++ /dev/null @@ -1,203 +0,0 @@ -//! Simple End-to-End Example -//! -//! A minimal example showing supervisor + runner + client workflow. -//! -//! Prerequisites: -//! - Redis running on localhost:6379 -//! -//! Usage: -//! ```bash -//! # Terminal 1: Start Redis -//! redis-server -//! -//! # Terminal 2: Run this example -//! RUST_LOG=info cargo run --example simple_e2e -//! ``` - -use anyhow::Result; -use log::info; -use std::time::Duration; -use tokio::time::sleep; -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); - - println!("\n╔════════════════════════════════════════╗"); - println!("║ Simple End-to-End Demo ║"); - println!("╚════════════════════════════════════════╝\n"); - - let supervisor_url = "http://localhost:3030"; - let runner_id = "test_runner"; - let secret = "admin_secret"; - - // Create supervisor client - let client = SupervisorClient::new(supervisor_url)?; - - println!("📝 Prerequisites:"); - println!(" 1. Redis running on localhost:6379"); - println!(" 2. Supervisor running on {}", supervisor_url); - println!(" 3. Runner '{}' registered and running\n", runner_id); - - println!("💡 To start the supervisor:"); - println!(" cargo run --bin hero-supervisor -- --redis-url redis://localhost:6379\n"); - - println!("💡 To start a runner:"); - println!(" cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner_rust"); - println!(" cargo run --bin runner_osis -- {} --redis-url redis://localhost:6379\n", runner_id); - - println!("⏳ Waiting 3 seconds for you to start the prerequisites...\n"); - sleep(Duration::from_secs(3)).await; - - // Register runner - println!("📋 Step 1: Registering Runner"); - println!("─────────────────────────────────────────"); - - let queue = format!("hero:q:work:type:osis:group:default:inst:{}", runner_id); - match client.register_runner(secret, runner_id, &queue).await { - Ok(_) => { - println!("✅ Runner registered successfully"); - } - Err(e) => { - println!("⚠️ Registration error: {} (runner might already be registered)", e); - } - } - - sleep(Duration::from_secs(1)).await; - - // Run a simple job - println!("\n📋 Step 2: Running a Simple Job (Blocking)"); - println!("─────────────────────────────────────────"); - - let job = JobBuilder::new() - .caller_id("simple_demo") - .context_id("demo_context") - .payload(r#" - let message = "Hello from the runner!"; - let number = 42; - to_json(#{ - message: message, - number: number, - timestamp: timestamp() - }) - "#) - .runner(runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let job_id = job.id.clone(); - info!("Sending job with ID: {}", job_id); - - match client.job_run(secret, job, Some(30)).await { - Ok(response) => { - println!("✅ Job completed!"); - if let Some(result) = response.result { - println!(" Result: {}", result); - } - } - Err(e) => { - println!("❌ Job failed: {}", e); - return Ok(()); - } - } - - // Run another job (calculation) - println!("\n📋 Step 3: Running a Calculation Job"); - println!("─────────────────────────────────────────"); - - let calc_job = JobBuilder::new() - .caller_id("simple_demo") - .context_id("demo_context") - .payload(r#" - let numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; - let sum = 0; - let product = 1; - - for n in numbers { - sum += n; - product *= n; - } - - to_json(#{ - sum: sum, - product: product, - count: numbers.len(), - average: sum / numbers.len() - }) - "#) - .runner(runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let calc_job_id = calc_job.id.clone(); - info!("Sending calculation job with ID: {}", calc_job_id); - - match client.job_run(secret, calc_job, Some(30)).await { - Ok(response) => { - println!("✅ Calculation completed!"); - if let Some(result) = response.result { - println!(" Result: {}", result); - } - } - Err(e) => { - println!("❌ Calculation failed: {}", e); - } - } - - // Start a non-blocking job - println!("\n📋 Step 4: Starting a Non-Blocking Job"); - println!("─────────────────────────────────────────"); - - let async_job = JobBuilder::new() - .caller_id("simple_demo") - .context_id("demo_context") - .payload(r#" - let result = "This job was started asynchronously"; - to_json(result) - "#) - .runner(runner_id) - .executor("rhai") - .timeout(30) - .build()?; - - let async_job_id = async_job.id.clone(); - info!("Starting async job with ID: {}", async_job_id); - - match client.job_start(secret, async_job).await { - Ok(response) => { - println!("✅ Job started!"); - println!(" Job ID: {} (running in background)", response.job_id); - println!(" Status: {}", response.status); - } - Err(e) => { - println!("❌ Failed to start job: {}", e); - } - } - - // Summary - println!("\n╔════════════════════════════════════════╗"); - println!("║ Demo Summary ║"); - println!("╚════════════════════════════════════════╝"); - println!("✅ Runner registered: {}", runner_id); - println!("✅ Blocking jobs completed: 2"); - println!("✅ Non-blocking jobs started: 1"); - println!("\n🎉 Demo completed successfully!\n"); - - println!("📚 What happened:"); - println!(" 1. Registered a runner with the supervisor"); - println!(" 2. Sent jobs with Rhai scripts to execute"); - println!(" 3. Supervisor queued jobs to the runner"); - println!(" 4. Runner executed the scripts and returned results"); - println!(" 5. Client received results (for blocking jobs)\n"); - - println!("🔍 Key Concepts:"); - println!(" • job.run = Execute and wait for result (blocking)"); - println!(" • job.start = Start and return immediately (non-blocking)"); - println!(" • Jobs contain Rhai scripts that run on the runner"); - println!(" • Supervisor coordinates job distribution via Redis\n"); - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/simple_job_workflow.rs b/bin/supervisor/examples/_archive/simple_job_workflow.rs deleted file mode 100644 index edffc80..0000000 --- a/bin/supervisor/examples/_archive/simple_job_workflow.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Simple job workflow example -//! -//! This example demonstrates the basic job lifecycle using the new API: -//! 1. Create a job -//! 2. Start the job -//! 3. Monitor its progress -//! 4. Get the result - -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult}; -use std::time::Duration; -use tokio::time::sleep; - -#[tokio::main] -async fn main() -> Result<(), Box> { - println!("Simple Job Workflow Example"); - println!("============================\n"); - - // Create client - let client = SupervisorClient::new("http://localhost:3030")?; - let secret = "user-secret-456"; - - // Create a simple job - let job = JobBuilder::new() - .caller_id("simple_example") - .context_id("demo") - .payload("echo 'Hello from Hero Supervisor!' && sleep 3 && echo 'Job completed!'") - .executor("osis") - .runner("osis_runner_1") - .timeout(60) - .env_var("EXAMPLE_VAR", "example_value") - .build()?; - - println!("📝 Creating job..."); - let job_id = client.jobs_create(secret, job).await?; - println!("✅ Job created: {}\n", job_id); - - println!("🚀 Starting job..."); - client.job_start(secret, &job_id).await?; - println!("✅ Job started\n"); - - println!("👀 Monitoring job progress..."); - loop { - let status = client.job_status(&job_id).await?; - println!(" Status: {}", status.status); - - if status.status == "completed" || status.status == "failed" { - break; - } - - sleep(Duration::from_secs(2)).await; - } - - println!("\n📋 Getting job result..."); - match client.job_result(&job_id).await? { - JobResult::Success { success } => { - println!("✅ Success: {}", success); - }, - JobResult::Error { error } => { - println!("❌ Error: {}", error); - } - } - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/supervisor/README.md b/bin/supervisor/examples/_archive/supervisor/README.md deleted file mode 100644 index be0da70..0000000 --- a/bin/supervisor/examples/_archive/supervisor/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# Hero Supervisor Example - -This example demonstrates how to configure and run the Hero Supervisor with multiple actors using a TOML configuration file. - -## Files - -- `config.toml` - Example supervisor configuration with multiple actors -- `run_supervisor.sh` - Shell script to build and run the supervisor with the example config -- `run_supervisor.rs` - Rust script using escargot to build and run the supervisor -- `README.md` - This documentation file - -## Configuration - -The `config.toml` file defines: - -- **Redis connection**: URL for the Redis server used for job queuing -- **Database path**: Local path for supervisor state storage -- **Job queue key**: Redis key for the supervisor job queue -- **Actors**: List of actor configurations with: - - `name`: Unique identifier for the actor - - `runner_type`: Type of runner ("SAL", "OSIS", "V", "Python") - - `binary_path`: Path to the actor binary - - `process_manager`: Process management type ("simple" or "tmux") - -## Prerequisites - -1. **Redis Server**: Ensure Redis is running on `localhost:6379` (or update the config) -2. **Actor Binaries**: Build the required actor binaries referenced in the config: - ```bash - # Build SAL worker - cd ../../sal - cargo build --bin sal_worker - - # Build OSIS and system workers - cd ../../worker - cargo build --bin osis - cargo build --bin system - ``` - -## Running the Example - -### Option 1: Shell Script (Recommended) - -```bash -./run_supervisor.sh -``` - -### Option 2: Rust Script with Escargot - -```bash -cargo +nightly -Zscript run_supervisor.rs -``` - -### Option 3: Manual Build and Run - -```bash -# Build the supervisor -cd ../../../supervisor -cargo build --bin supervisor --features cli - -# Run with config -./target/debug/supervisor --config ../baobab/examples/supervisor/config.toml -``` - -## Usage - -Once running, the supervisor will: - -1. Load the configuration from `config.toml` -2. Initialize and start all configured actors -3. Listen for jobs on the Redis queue (`hero:supervisor:jobs`) -4. Dispatch jobs to appropriate actors based on the `runner` field -5. Monitor actor health and status - -## Testing - -You can test the supervisor by dispatching jobs to the Redis queue: - -```bash -# Using redis-cli to add a test job -redis-cli LPUSH "hero:supervisor:jobs" '{"id":"test-123","runner":"sal_actor_1","script":"print(\"Hello from SAL actor!\")"}' -``` - -## Stopping - -Use `Ctrl+C` to gracefully shutdown the supervisor. It will: - -1. Stop accepting new jobs -2. Wait for running jobs to complete -3. Shutdown all managed actors -4. Clean up resources - -## Customization - -Modify `config.toml` to: - -- Add more actors -- Change binary paths to match your build locations -- Update Redis connection settings -- Configure different process managers per actor -- Adjust database and queue settings - -## Troubleshooting - -- **Redis Connection**: Ensure Redis is running and accessible -- **Binary Paths**: Verify all actor binary paths exist and are executable -- **Permissions**: Ensure the supervisor has permission to create the database directory -- **Ports**: Check that Redis port (6379) is not blocked by firewall diff --git a/bin/supervisor/examples/_archive/supervisor/config.toml b/bin/supervisor/examples/_archive/supervisor/config.toml deleted file mode 100644 index e255335..0000000 --- a/bin/supervisor/examples/_archive/supervisor/config.toml +++ /dev/null @@ -1,18 +0,0 @@ -# Hero Supervisor Configuration -# This configuration defines the Redis connection, database path, and actors to manage - -# Redis connection URL -redis_url = "redis://localhost:6379" - -# Database path for supervisor state -db_path = "/tmp/supervisor_example_db" - -# Job queue key for supervisor jobs -job_queue_key = "hero:supervisor:jobs" - -# Actor configurations -[[actors]] -name = "sal_actor_1" -runner_type = "SAL" -binary_path = "cargo run /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor/examples/mock_runner.rs" -process_manager = "tmux" \ No newline at end of file diff --git a/bin/supervisor/examples/_archive/supervisor/run_supervisor.rs b/bin/supervisor/examples/_archive/supervisor/run_supervisor.rs deleted file mode 100644 index 4b5983e..0000000 --- a/bin/supervisor/examples/_archive/supervisor/run_supervisor.rs +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env cargo +nightly -Zscript -//! ```cargo -//! [dependencies] -//! escargot = "0.5" -//! tokio = { version = "1.0", features = ["full"] } -//! log = "0.4" -//! env_logger = "0.10" -//! ``` - -use escargot::CargoBuild; -use std::process::Command; -use log::{info, error}; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Initialize logging - env_logger::init(); - - info!("Building and running Hero Supervisor with example configuration"); - - // Get the current directory (when running as cargo example, this is the crate root) - let current_dir = std::env::current_dir()?; - info!("Current directory: {}", current_dir.display()); - - // Path to the supervisor crate (current directory when running as example) - let supervisor_crate_path = current_dir.clone(); - - // Path to the config file (in examples/supervisor subdirectory) - let config_path = current_dir.join("examples/supervisor/config.toml"); - - if !config_path.exists() { - error!("Config file not found: {}", config_path.display()); - return Err("Config file not found".into()); - } - - info!("Using config file: {}", config_path.display()); - - // Build the supervisor binary using escargot - info!("Building supervisor binary..."); - let supervisor_bin = CargoBuild::new() - .bin("supervisor") - .manifest_path(supervisor_crate_path.join("Cargo.toml")) - .features("cli") - .run()?; - - info!("Supervisor binary built successfully"); - - // Run the supervisor with the config file - info!("Starting supervisor with config: {}", config_path.display()); - - let mut cmd = Command::new(supervisor_bin.path()); - cmd.arg("--config") - .arg(&config_path); - - // Add environment variables for better logging - cmd.env("RUST_LOG", "info"); - - info!("Executing: {:?}", cmd); - - // Execute the supervisor - let status = cmd.status()?; - - if status.success() { - info!("Supervisor completed successfully"); - } else { - error!("Supervisor exited with status: {}", status); - } - - Ok(()) -} diff --git a/bin/supervisor/examples/_archive/supervisor/run_supervisor.sh b/bin/supervisor/examples/_archive/supervisor/run_supervisor.sh deleted file mode 100755 index 25111f1..0000000 --- a/bin/supervisor/examples/_archive/supervisor/run_supervisor.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# Hero Supervisor Example Runner -# This script builds and runs the supervisor binary with the example configuration - -set -e - -# Get the directory of this script -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -SUPERVISOR_DIR="$SCRIPT_DIR/../../../supervisor" -CONFIG_FILE="$SCRIPT_DIR/config.toml" - -echo "🚀 Building and running Hero Supervisor with example configuration" -echo "📁 Script directory: $SCRIPT_DIR" -echo "🔧 Supervisor crate: $SUPERVISOR_DIR" -echo "⚙️ Config file: $CONFIG_FILE" - -# Check if config file exists -if [ ! -f "$CONFIG_FILE" ]; then - echo "❌ Config file not found: $CONFIG_FILE" - exit 1 -fi - -# Check if supervisor directory exists -if [ ! -d "$SUPERVISOR_DIR" ]; then - echo "❌ Supervisor directory not found: $SUPERVISOR_DIR" - exit 1 -fi - -# Build the supervisor binary -echo "🔨 Building supervisor binary..." -cd "$SUPERVISOR_DIR" -cargo build --bin supervisor --features cli - -# Check if build was successful -if [ $? -ne 0 ]; then - echo "❌ Failed to build supervisor binary" - exit 1 -fi - -echo "✅ Supervisor binary built successfully" - -# Run the supervisor with the config file -echo "🎯 Starting supervisor with config: $CONFIG_FILE" -echo "📝 Use Ctrl+C to stop the supervisor" -echo "" - -# Set environment variables for better logging -export RUST_LOG=info - -# Execute the supervisor -exec "$SUPERVISOR_DIR/target/debug/supervisor" --config "$CONFIG_FILE" diff --git a/bin/supervisor/examples/osiris_openrpc/README.md b/bin/supervisor/examples/osiris_openrpc/README.md deleted file mode 100644 index 258e503..0000000 --- a/bin/supervisor/examples/osiris_openrpc/README.md +++ /dev/null @@ -1,102 +0,0 @@ -# OSIRIS + OpenRPC Comprehensive Example - -This example demonstrates the complete workflow of using Hero Supervisor with OSIRIS runners via OpenRPC. - -## What This Example Does - -1. **Builds and starts** Hero Supervisor with OpenRPC server enabled -2. **Builds** the OSIRIS runner binary -3. **Connects** an OpenRPC client to the supervisor -4. **Registers and starts** an OSIRIS runner -5. **Dispatches multiple jobs** via OpenRPC: - - Create a Note - - Create an Event - - Query stored data - - Test access control (expected to fail) -6. **Monitors** job execution and results -7. **Gracefully shuts down** all components - -## Prerequisites - -**IMPORTANT: Redis must be running before starting this example!** - -```bash -# Start Redis (if not already running) -redis-server -``` - -Other requirements: -- Redis server running on `localhost:6379` -- Rust toolchain installed -- Both `supervisor` and `runner_rust` crates available - -## Running the Example - -```bash -cargo run --example osiris_openrpc -``` - -## Job Scripts - -The example uses separate Rhai script files for each job: - -- `note.rhai` - Creates and stores a Note object -- `event.rhai` - Creates and stores an Event object -- `query.rhai` - Queries and retrieves stored objects -- `access_denied.rhai` - Tests access control (should fail) - -## Architecture - -``` -┌─────────────────┐ -│ This Example │ -│ (OpenRPC │ -│ Client) │ -└────────┬────────┘ - │ JSON-RPC - ↓ -┌─────────────────┐ -│ Supervisor │ -│ (OpenRPC │ -│ Server) │ -└────────┬────────┘ - │ Redis Queue - ↓ -┌─────────────────┐ -│ OSIRIS Runner │ -│ (Rhai Engine │ -│ + HeroDB) │ -└─────────────────┘ -``` - -## Key Features Demonstrated - -- **Automatic binary building** using escargot -- **OpenRPC communication** between client and supervisor -- **Runner registration** with configuration -- **Job dispatching** with signatories -- **Context-based access control** in OSIRIS -- **Typed object storage** (Note, Event) -- **Graceful shutdown** and cleanup - -## Expected Output - -The example will: -1. ✅ Create a Note successfully -2. ✅ Create an Event successfully -3. ✅ Query and retrieve stored objects -4. ✅ Deny access for unauthorized participants -5. ✅ Clean up all resources - -## Troubleshooting - -**Redis Connection Error:** -- Ensure Redis is running: `redis-server` - -**Build Errors:** -- Ensure both supervisor and runner_rust crates are available -- Check that all dependencies are up to date - -**OpenRPC Connection Error:** -- Port 3030 might be in use -- Check supervisor logs for startup issues diff --git a/bin/supervisor/examples/osiris_openrpc/access_denied.rhai b/bin/supervisor/examples/osiris_openrpc/access_denied.rhai deleted file mode 100644 index f276302..0000000 --- a/bin/supervisor/examples/osiris_openrpc/access_denied.rhai +++ /dev/null @@ -1,8 +0,0 @@ -print("Attempting to access context with non-signatories..."); -print("Participants: [dave, eve]"); -print("Signatories: [alice, bob, charlie]"); - -// This should fail because neither dave nor eve are signatories -let ctx = get_context(["dave", "eve"]); - -"This should not succeed!" diff --git a/bin/supervisor/examples/osiris_openrpc/event.rhai b/bin/supervisor/examples/osiris_openrpc/event.rhai deleted file mode 100644 index c609d74..0000000 --- a/bin/supervisor/examples/osiris_openrpc/event.rhai +++ /dev/null @@ -1,18 +0,0 @@ -print("Creating context for [alice, bob]..."); -let ctx = get_context(["alice", "bob"]); -print("✓ Context ID: " + ctx.context_id()); - -print("\nCreating event..."); -let event = event("events") - .title("Team Retrospective") - .description("Review what went well and areas for improvement") - .location("Virtual - Zoom Room A") - .category("retrospective"); - -print("✓ Event created"); - -print("\nStoring event in context..."); -ctx.save(event); -print("✓ Event stored"); - -"Event 'Team Retrospective' created and stored successfully" diff --git a/bin/supervisor/examples/osiris_openrpc/main.rs b/bin/supervisor/examples/osiris_openrpc/main.rs deleted file mode 100644 index 63e67f5..0000000 --- a/bin/supervisor/examples/osiris_openrpc/main.rs +++ /dev/null @@ -1,293 +0,0 @@ -///! Comprehensive OSIRIS + OpenRPC + Admin UI Example -///! -/// This example demonstrates using the Hero Supervisor OpenRPC client -/// to run OSIRIS scripts through the supervisor. -/// -/// The client library is located at: client/ -///! -///! 1. Starting a Hero Supervisor with OpenRPC server -///! 2. Building and serving the Admin UI (Yew WASM) -///! 3. Building and starting an OSIRIS runner -///! 4. Registering the runner with the supervisor -///! 5. Dispatching multiple OSIRIS jobs via OpenRPC -///! 6. Monitoring job execution via CLI and Web UI -///! 7. Graceful shutdown -///! -///! Services: -///! - Supervisor OpenRPC API: http://127.0.0.1:3030 -///! - Admin UI: http://127.0.0.1:8080 -///! -///! Usage: -///! ```bash -///! cargo run --example osiris_openrpc -///! ``` -///! -///! Requirements: -///! - Redis running on localhost:6379 -///! - Trunk installed (cargo install trunk) - -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder}; -use std::time::Duration; -use escargot::CargoBuild; -use std::process::{Stdio, Command}; -use tokio::time::sleep; - -#[tokio::main] -async fn main() -> Result<(), Box> { - println!("🚀 OSIRIS + OpenRPC Comprehensive Example"); - println!("=========================================\n"); - - // ======================================================================== - // STEP 1: Build and start supervisor with OpenRPC - // ======================================================================== - println!("Step 1: Building and starting supervisor"); - println!("─────────────────────────────────────────────────────────────\n"); - - let supervisor_binary = CargoBuild::new() - .bin("supervisor") - .current_release() - .manifest_path("../supervisor/Cargo.toml") - .run()?; - - println!("✅ Supervisor binary built"); - - let mut supervisor = supervisor_binary.command() - .arg("--redis-url") - .arg("redis://localhost:6379") - .arg("--port") - .arg("3030") - .arg("--admin-secret") - .arg("admin_secret") - .arg("--user-secret") - .arg("user_secret") - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .spawn()?; - - println!("✅ Supervisor started on port 3030"); - println!("⏳ Waiting for supervisor to initialize..."); - sleep(Duration::from_secs(5)).await; - - // Check if supervisor is still running - match supervisor.try_wait()? { - Some(status) => { - return Err(format!("Supervisor exited early with status: {}", status).into()); - } - None => { - println!("✅ Supervisor is running"); - } - } - - // ======================================================================== - // STEP 2: Build and serve Admin UI - // ======================================================================== - println!("\nStep 2: Building and serving Admin UI"); - println!("─────────────────────────────────────────────────────────────\n"); - - let mut admin_ui = Command::new("trunk") - .arg("serve") - .arg("--port") - .arg("8080") - .arg("--address") - .arg("127.0.0.1") - .current_dir("ui") - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .spawn()?; - - println!("✅ Admin UI building..."); - println!("🌐 Admin UI will be available at: http://127.0.0.1:8080"); - sleep(Duration::from_secs(3)).await; - - // ======================================================================== - // STEP 3: Build OSIRIS runner - // ======================================================================== - println!("\nStep 3: Building OSIRIS runner"); - println!("─────────────────────────────────────────────────────────────\n"); - - let runner_binary = CargoBuild::new() - .bin("runner_osiris") - .current_release() - .manifest_path("../runner_rust/Cargo.toml") - .run()?; - - println!("✅ OSIRIS runner binary built"); - - // ======================================================================== - // STEP 4: Connect OpenRPC client - // ======================================================================== - println!("\nStep 4: Connecting OpenRPC client"); - println!("─────────────────────────────────────────────────────────────\n"); - - let client = SupervisorClient::new("http://127.0.0.1:3030")?; - println!("✅ Connected to supervisor\n"); - - // ======================================================================== - // STEP 5: Register and start OSIRIS runner - // ======================================================================== - println!("Step 5: Registering OSIRIS runner"); - println!("─────────────────────────────────────────────────────────────\n"); - - let runner_path = runner_binary.path().to_string_lossy(); - let db_path = "/tmp/osiris_openrpc.db"; - - // Register the runner with the supervisor - // Note: The current OpenRPC server uses register_runner, not add_runner - client.register_runner("admin_secret", "osiris_runner").await?; - println!("✅ Runner registered: osiris_runner"); - - client.start_runner("admin_secret", "osiris_runner").await?; - println!("✅ Runner started\n"); - - sleep(Duration::from_secs(2)).await; - - // ======================================================================== - // STEP 6: Load job scripts - // ======================================================================== - println!("Step 6: Loading job scripts"); - println!("─────────────────────────────────────────────────────────────\n"); - - let note_script = std::fs::read_to_string("examples/osiris_openrpc/note.rhai")?; - let event_script = std::fs::read_to_string("examples/osiris_openrpc/event.rhai")?; - let query_script = std::fs::read_to_string("examples/osiris_openrpc/query.rhai")?; - let access_denied_script = std::fs::read_to_string("examples/osiris_openrpc/access_denied.rhai")?; - - println!("✅ Loaded 4 job scripts\n"); - - // ======================================================================== - // STEP 7: Dispatch jobs via OpenRPC - // ======================================================================== - println!("Step 7: Dispatching jobs"); - println!("─────────────────────────────────────────────────────────────\n"); - - // Job 1: Create Note - println!("📝 Job 1: Creating Note..."); - let job1 = JobBuilder::new() - .caller_id("openrpc_client") - .context_id("osiris_demo") - .payload(¬e_script) - .runner("osiris_runner") - .executor("rhai") - .timeout(30) - .signature("alice", "") - .signature("bob", "") - .build()?; - - let job1_result = client.run_job("user_secret", job1).await; - - match job1_result { - Ok(result) => println!("✅ {:?}\n", result), - Err(e) => println!("❌ Job failed: {}\n", e), - } - - sleep(Duration::from_secs(1)).await; - - // Job 2: Create Event - println!("📅 Job 2: Creating Event..."); - let job2 = JobBuilder::new() - .caller_id("openrpc_client") - .context_id("osiris_demo") - .payload(&event_script) - .runner("osiris_runner") - .executor("rhai") - .timeout(30) - .signature("alice", "") - .signature("bob", "") - .build()?; - - let job2_result = client.run_job("user_secret", job2).await; - - match job2_result { - Ok(result) => println!("✅ {:?}\n", result), - Err(e) => println!("❌ Job failed: {}\n", e), - } - - sleep(Duration::from_secs(1)).await; - - // Job 3: Query Data - println!("🔍 Job 3: Querying Data..."); - let job3 = JobBuilder::new() - .caller_id("openrpc_client") - .context_id("osiris_demo") - .payload(&query_script) - .runner("osiris_runner") - .executor("rhai") - .timeout(30) - .signature("alice", "") - .signature("bob", "") - .signature("charlie", "") - .build()?; - - let job3_result = client.run_job("user_secret", job3).await; - - match job3_result { - Ok(result) => println!("✅ {:?}\n", result), - Err(e) => println!("❌ Job failed: {}\n", e), - } - - sleep(Duration::from_secs(1)).await; - - // Job 4: Access Control Test (should fail) - println!("🔒 Job 4: Testing Access Control (expected to fail)..."); - let job4 = JobBuilder::new() - .caller_id("openrpc_client") - .context_id("osiris_demo") - .payload(&access_denied_script) - .runner("osiris_runner") - .executor("rhai") - .timeout(30) - .signature("alice", "") - .signature("bob", "") - .signature("charlie", "") - .build()?; - - let job4_result = client.run_job("user_secret", job4).await; - - match job4_result { - Ok(result) => println!("❌ Unexpected success: {:?}\n", result), - Err(e) => println!("✅ Access denied as expected: {}\n", e), - } - - // ======================================================================== - // STEP 8: Check runner status - // ======================================================================== - println!("\nStep 8: Checking runner status"); - println!("─────────────────────────────────────────────────────────────\n"); - - let status = client.get_runner_status("admin_secret", "osiris_runner").await?; - println!("Runner status: {:?}\n", status); - - // ======================================================================== - // STEP 9: Keep services running for manual testing - // ======================================================================== - println!("\nStep 9: Services Running"); - println!("─────────────────────────────────────────────────────────────\n"); - println!("🌐 Admin UI: http://127.0.0.1:8080"); - println!("📡 OpenRPC API: http://127.0.0.1:3030"); - println!("\n⏸️ Press Ctrl+C to stop all services...\n"); - - // Wait for Ctrl+C - tokio::signal::ctrl_c().await?; - - // ======================================================================== - // STEP 10: Cleanup - // ======================================================================== - println!("\n\nStep 10: Cleanup"); - println!("─────────────────────────────────────────────────────────────\n"); - - client.stop_runner("admin_secret", "osiris_runner", false).await?; - println!("✅ Runner stopped"); - - client.remove_runner("admin_secret", "osiris_runner").await?; - println!("✅ Runner removed"); - - admin_ui.kill()?; - println!("✅ Admin UI stopped"); - - supervisor.kill()?; - println!("✅ Supervisor stopped"); - - println!("\n✨ Example completed successfully!"); - - Ok(()) -} diff --git a/bin/supervisor/examples/osiris_openrpc/note.rhai b/bin/supervisor/examples/osiris_openrpc/note.rhai deleted file mode 100644 index 7bc74b1..0000000 --- a/bin/supervisor/examples/osiris_openrpc/note.rhai +++ /dev/null @@ -1,20 +0,0 @@ -print("Creating context for [alice, bob]..."); -let ctx = get_context(["alice", "bob"]); -print("✓ Context ID: " + ctx.context_id()); - -print("\nCreating note..."); -let note = note("notes") - .title("Sprint Planning Meeting") - .content("Discussed Q1 2025 roadmap and milestones") - .tag("sprint", "2025-Q1") - .tag("team", "engineering") - .tag("priority", "high") - .mime("text/markdown"); - -print("✓ Note created"); - -print("\nStoring note in context..."); -ctx.save(note); -print("✓ Note stored"); - -"Note 'Sprint Planning Meeting' created and stored successfully" diff --git a/bin/supervisor/examples/osiris_openrpc/query.rhai b/bin/supervisor/examples/osiris_openrpc/query.rhai deleted file mode 100644 index 97ff892..0000000 --- a/bin/supervisor/examples/osiris_openrpc/query.rhai +++ /dev/null @@ -1,21 +0,0 @@ -print("Querying context [alice, bob]..."); -let ctx = get_context(["alice", "bob"]); -print("✓ Context ID: " + ctx.context_id()); - -print("\nListing all notes..."); -let notes = ctx.list("notes"); -print("✓ Found " + notes.len() + " note(s)"); - -print("\nRetrieving specific note..."); -let note = ctx.get("notes", "sprint_planning_001"); -print("✓ Retrieved note: sprint_planning_001"); - -print("\nQuerying context [alice, bob, charlie]..."); -let ctx2 = get_context(["alice", "bob", "charlie"]); -print("✓ Context ID: " + ctx2.context_id()); - -print("\nListing all events..."); -let events = ctx2.list("events"); -print("✓ Found " + events.len() + " event(s)"); - -"Query complete: Found " + notes.len() + " notes and " + events.len() + " events" diff --git a/bin/supervisor/scripts/build.sh b/bin/supervisor/scripts/build.sh deleted file mode 100755 index 599b69c..0000000 --- a/bin/supervisor/scripts/build.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd) - -# Spinner function -spinner() { - local pid=$1 - local delay=0.1 - local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏' - while ps -p $pid > /dev/null 2>&1; do - local temp=${spinstr#?} - printf " [%c] " "$spinstr" - local spinstr=$temp${spinstr%"$temp"} - sleep $delay - printf "\b\b\b\b\b\b" - done - printf " \b\b\b\b" -} - -echo "Building Hero Supervisor Workspace" -echo "" - -# Build core and client -printf "📦 Core & Client... " -cd "$PROJECT_DIR" -if RUSTFLAGS="-A warnings" cargo build --release --workspace > /tmp/supervisor-build-core.log 2>&1 & spinner $!; wait $!; then - echo "✅" -else - echo "❌" - echo " Error: Build failed. Run 'cd $PROJECT_DIR && cargo build --release --workspace' for details" - exit 1 -fi - -# # Build UI -# printf "📦 UI (WASM)... " -# cd "$PROJECT_DIR/ui" - -# if ! command -v trunk &> /dev/null; then -# echo "⚠️ (trunk not installed)" -# echo " Install with: cargo install trunk" -# else -# if trunk build --release > /tmp/supervisor-build-ui.log 2>&1 & spinner $!; wait $!; then -# echo "✅" -# else -# echo "❌" -# echo " Error: Build failed. Run 'cd $PROJECT_DIR/ui && trunk build --release' for details" -# exit 1 -# fi -# fi - -echo "" -echo "✅ All builds completed" \ No newline at end of file diff --git a/bin/supervisor/scripts/release.sh b/bin/supervisor/scripts/release.sh deleted file mode 100755 index e7daa4d..0000000 --- a/bin/supervisor/scripts/release.sh +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/bash -# release.sh - Build optimized WASM and serve with Caddy + Brotli compression -set -e - -############################################################################### -# Freezone Portal Release Script -# - Builds the WASM app with trunk in release mode -# - Optionally optimizes .wasm with wasm-opt (-Oz, strip) -# - Precompresses assets with gzip and brotli for efficient static serving -# - Generates a manifest (manifest.json) with sizes and SHA-256 checksums -# -# Usage: -# ./release.sh [--outdir dist] [--no-opt] [--compress] [--no-manifest] -# [--trunk-args "--public-url /portal/"] -# -# Notes: -# - Precompression is OFF by default; enable with --compress -# - Only modifies files within the output directory (default: dist) -# - Non-destructive to your source tree -############################################################################### - -set -u - -SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd) -BUILD_SCRIPT="$SCRIPT_DIR/build.sh" - -# Defaults -OUTDIR="dist" -DO_OPT=1 -DO_COMPRESS=0 -DO_MANIFEST=1 -TRUNK_ARGS="" - -usage() { - cat < Output directory (default: dist) - --no-opt Skip wasm-opt optimization - --compress Enable gzip/brotli precompression - --no-manifest Skip manifest generation - --trunk-args "..." Extra arguments forwarded to trunk build - -h, --help Show this help - -Examples: - $(basename "$0") --outdir dist --trunk-args "--public-url /" - $(basename "$0") --no-opt --no-compress -EOF -} - -# Parse args -while [[ $# -gt 0 ]]; do - case "$1" in - --outdir) - OUTDIR="$2"; shift 2;; - --no-opt) - DO_OPT=0; shift;; - --compress) - DO_COMPRESS=1; shift;; - --no-manifest) - DO_MANIFEST=0; shift;; - --trunk-args) - TRUNK_ARGS="$2"; shift 2;; - -h|--help) - usage; exit 0;; - *) - echo "❌ Unknown option: $1"; echo; usage; exit 1;; - esac -done - -# Tool checks -if [[ ! -x "$BUILD_SCRIPT" ]]; then - echo "❌ build.sh not found or not executable at: $BUILD_SCRIPT" - echo " Ensure portal/scripts/build.sh exists and is chmod +x." - exit 1 -fi -if ! command -v trunk >/dev/null 2>&1; then - echo "❌ trunk not found. Install with: cargo install trunk"; exit 1; -fi - -HAS_WASM_OPT=0 -if command -v wasm-opt >/dev/null 2>&1; then HAS_WASM_OPT=1; fi -if [[ $DO_OPT -eq 1 && $HAS_WASM_OPT -eq 0 ]]; then - echo "⚠️ wasm-opt not found. Skipping WASM optimization." - DO_OPT=0 -fi - -if [[ $DO_COMPRESS -eq 1 ]]; then - if ! command -v gzip >/dev/null 2>&1; then - echo "⚠️ gzip not found. Skipping gzip compression."; GZIP_OK=0; else GZIP_OK=1; fi - if ! command -v brotli >/dev/null 2>&1; then - echo "⚠️ brotli not found. Skipping brotli compression."; BR_OK=0; else BR_OK=1; fi -else - GZIP_OK=0; BR_OK=0 -fi - -echo "🔧 Building optimized WASM bundle (via build.sh)..." -set -x -"$BUILD_SCRIPT" --release --outdir "$OUTDIR" ${TRUNK_ARGS:+--trunk-args "$TRUNK_ARGS"} -set +x - -DIST_DIR="$PROJECT_DIR/$OUTDIR" -if [[ ! -d "$DIST_DIR" ]]; then - echo "❌ Build failed: output directory not found: $DIST_DIR"; exit 1; -fi - -# Optimize .wasm files -if [[ $DO_OPT -eq 1 && $HAS_WASM_OPT -eq 1 ]]; then - echo "🛠️ Optimizing WASM with wasm-opt (-Oz, strip)..." - while IFS= read -r -d '' wasm; do - echo " • $(basename "$wasm")" - tmp="$wasm.opt" - wasm-opt -Oz --strip-dwarf "$wasm" -o "$tmp" - mv "$tmp" "$wasm" - done < <(find "$DIST_DIR" -type f -name "*.wasm" -print0) -fi - -# Precompress assets -if [[ $DO_COMPRESS -eq 1 ]]; then - echo "🗜️ Precompressing assets (gzip/brotli)..." - while IFS= read -r -d '' f; do - if [[ $GZIP_OK -eq 1 ]]; then - gzip -kf9 "$f" - fi - if [[ $BR_OK -eq 1 ]]; then - brotli -f -q 11 "$f" - fi - done < <(find "$DIST_DIR" -type f \( -name "*.wasm" -o -name "*.js" -o -name "*.css" \) -print0) -fi - -# Manifest with sizes and SHA-256 -if [[ $DO_MANIFEST -eq 1 ]]; then - echo "🧾 Generating manifest.json (sizes, sha256)..." - manifest="$DIST_DIR/manifest.json" - echo "{" > "$manifest" - first=1 - while IFS= read -r -d '' f; do - rel="${f#"$DIST_DIR/"}" - size=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f") - if command -v shasum >/dev/null 2>&1; then - hash=$(shasum -a 256 "$f" | awk '{print $1}') - else - hash=$(openssl dgst -sha256 -r "$f" | awk '{print $1}') - fi - [[ $first -eq 1 ]] || echo "," >> "$manifest" - first=0 - printf " \"%s\": { \"bytes\": %s, \"sha256\": \"%s\" }" "$rel" "$size" "$hash" >> "$manifest" - done < <(find "$DIST_DIR" -type f ! -name "manifest.json" -print0 | sort -z) - echo "\n}" >> "$manifest" -fi - -echo "📦 Checking bundle sizes ($OUTDIR)..." -if [ -d "$OUTDIR" ]; then - echo "Bundle sizes:" - find "$OUTDIR" -name "*.wasm" -exec ls -lh {} \; | awk '{print " WASM: " $5 " - " $9}' - find "$OUTDIR" -name "*.js" -exec ls -lh {} \; | awk '{print " JS: " $5 " - " $9}' - find "$OUTDIR" -name "*.css" -exec ls -lh {} \; | awk '{print " CSS: " $5 " - " $9}' - echo "" -fi diff --git a/bin/supervisor/scripts/test.sh b/bin/supervisor/scripts/test.sh deleted file mode 100755 index 35e5ecc..0000000 --- a/bin/supervisor/scripts/test.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -PROJECT_DIR=$(cd "$SCRIPT_DIR/.." && pwd) - -# Spinner function -spinner() { - local pid=$1 - local delay=0.1 - local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏' - while ps -p $pid > /dev/null 2>&1; do - local temp=${spinstr#?} - printf " [%c] " "$spinstr" - local spinstr=$temp${spinstr%"$temp"} - sleep $delay - printf "\b\b\b\b\b\b" - done - printf " \b\b\b\b" -} - -echo "Testing Hero Supervisor Workspace" -echo "" - -# Test core and client -printf "🧪 Core & Client... " -cd "$PROJECT_DIR" -if cargo test --workspace > /tmp/supervisor-test-core.log 2>&1 & spinner $!; wait $!; then - echo "✅" -else - echo "❌" - echo " Error: Tests failed. Run 'cd $PROJECT_DIR && cargo test --workspace' for details" - exit 1 -fi - -# Test UI -printf "🧪 UI (WASM)... " -cd "$PROJECT_DIR/ui" - -if ! command -v wasm-pack &> /dev/null; then - echo "⚠️ (wasm-pack not installed)" - echo " Install with: cargo install wasm-pack" -else - if wasm-pack test --headless --firefox > /tmp/supervisor-test-ui.log 2>&1 & spinner $!; wait $!; then - echo "✅" - else - echo "❌" - echo " Error: Tests failed. Run 'cd $PROJECT_DIR/ui && wasm-pack test --headless --firefox' for details" - exit 1 - fi -fi - -echo "" -echo "✅ All tests completed" \ No newline at end of file diff --git a/bin/supervisor/src/builder.rs b/bin/supervisor/src/builder.rs index 83cbf33..72c5ec1 100644 --- a/bin/supervisor/src/builder.rs +++ b/bin/supervisor/src/builder.rs @@ -1,7 +1,7 @@ //! Supervisor builder for configuration and initialization. use crate::error::{SupervisorError, SupervisorResult}; -use crate::Supervisor; +use crate::supervisor::Supervisor; use hero_job_client::ClientBuilder; /// Builder for constructing a Supervisor instance diff --git a/bin/supervisor/src/bin/supervisor.rs b/bin/supervisor/src/main.rs similarity index 98% rename from bin/supervisor/src/bin/supervisor.rs rename to bin/supervisor/src/main.rs index 8b79efc..55300a0 100644 --- a/bin/supervisor/src/bin/supervisor.rs +++ b/bin/supervisor/src/main.rs @@ -3,8 +3,6 @@ use hero_supervisor::SupervisorBuilder; use clap::Parser; use log::{error, info}; -use std::sync::Arc; -use tokio::sync::Mutex; /// Hero Supervisor - manages actors and dispatches jobs #[derive(Parser, Debug)] diff --git a/bin/supervisor/src/openrpc.rs b/bin/supervisor/src/openrpc.rs index 8c43bc6..2fe849a 100644 --- a/bin/supervisor/src/openrpc.rs +++ b/bin/supervisor/src/openrpc.rs @@ -22,9 +22,10 @@ use std::sync::Arc; use std::fs; use tokio::sync::Mutex; -/// Load OpenRPC specification from docs/openrpc.json +/// Load OpenRPC specification from docs/supervisor/openrpc.json fn load_openrpc_spec() -> Result> { - let path = "../../docs/openrpc.json"; + // Path relative to the workspace root (where Cargo.toml is) + let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../../docs/supervisor/openrpc.json"); let content = fs::read_to_string(path)?; let spec = serde_json::from_str(&content)?; debug!("Loaded OpenRPC specification from: {}", path); diff --git a/bin/supervisor/tests/README.md b/bin/supervisor/tests/README.md deleted file mode 100644 index 1584a89..0000000 --- a/bin/supervisor/tests/README.md +++ /dev/null @@ -1,195 +0,0 @@ -# Supervisor End-to-End Tests - -Comprehensive integration tests for all Hero Supervisor OpenRPC client methods. - -## Prerequisites - -1. **Redis Server Running:** - ```bash - redis-server - ``` - -2. **Supervisor Running:** - ```bash - cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor - ./scripts/run.sh - ``` - -## Running Tests - -### Run All Tests -```bash -cargo test --test end_to_end -``` - -### Run Specific Test -```bash -cargo test --test end_to_end test_01_rpc_discover -``` - -### Run with Output -```bash -cargo test --test end_to_end -- --nocapture -``` - -### Run in Order (Sequential) -```bash -cargo test --test end_to_end -- --test-threads=1 --nocapture -``` - -## Test Coverage - -### ✅ Discovery & Info -- `test_01_rpc_discover` - OpenRPC specification discovery -- `test_15_supervisor_info` - Supervisor information - -### ✅ Runner Management -- `test_02_runner_register` - Register a new runner -- `test_03_runner_list` - List all runners -- `test_14_runner_remove` - Remove a runner - -### ✅ Job Management -- `test_04_jobs_create` - Create a job without running -- `test_05_jobs_list` - List all jobs -- `test_06_job_run_simple` - Run a job and wait for result -- `test_07_job_status` - Get job status -- `test_08_job_get` - Get job by ID -- `test_09_job_delete` - Delete a job - -### ✅ Authentication & API Keys -- `test_10_auth_verify` - Verify current API key -- `test_11_auth_key_create` - Create new API key -- `test_12_auth_key_list` - List all API keys -- `test_13_auth_key_remove` - Remove an API key - -### ✅ Complete Workflow -- `test_99_complete_workflow` - End-to-end integration test - -## Test Configuration - -Tests use the following defaults: -- **Supervisor URL:** `http://127.0.0.1:3030` -- **Admin Secret:** `807470fd1e1ccc3fb997a1d4177cceb31a68cb355a4412c8fd6e66e517e902be` -- **Test Runner:** `test-runner` (all tests use this runner name) - -**Important:** All tests use the same runner name (`test-runner`), so you only need to start one runner with that name to run all tests. - -## Expected Behavior - -### Successful Tests -All tests should pass when: -- Supervisor is running on port 3030 -- Admin secret matches configuration -- Redis is accessible - -### Expected Warnings -Some tests may show warnings if: -- `job.run` times out (no actual runner connected to Redis) -- Runners already exist from previous test runs - -These are expected and don't indicate test failure. - -## Troubleshooting - -### Connection Refused -``` -Error: tcp connect error, 127.0.0.1:3030, Connection refused -``` -**Solution:** Start the supervisor with `./scripts/run.sh` - -### Method Not Found -``` -Error: Method not found -``` -**Solution:** Rebuild supervisor with latest code: -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -cargo build -``` - -### Authorization Failed -``` -Error: Missing Authorization header -``` -**Solution:** Check that `ADMIN_SECRET` in test matches supervisor configuration - -### Job Tests Timeout -``` -Error: JsonRpc(RequestTimeout) -``` -**Solution:** Make sure you have a runner connected with the name `test-runner`: -```bash -cd /Users/timurgordon/code/git.ourworld.tf/herocode/runner/rust -cargo run --bin runner_osiris -- test-runner -``` - -## Continuous Integration - -To run tests in CI: - -```bash -#!/bin/bash -# Start Redis -redis-server --daemonize yes - -# Start Supervisor -cd /Users/timurgordon/code/git.ourworld.tf/herocode/supervisor -./scripts/run.sh & -SUPERVISOR_PID=$! - -# Wait for supervisor to be ready -sleep 2 - -# Run tests -cargo test --test end_to_end - -# Cleanup -kill $SUPERVISOR_PID -redis-cli shutdown -``` - -## Adding New Tests - -1. Create a new test function: - ```rust - #[tokio::test] - async fn test_XX_my_new_test() { - println!("\n🧪 Test: my.new.method"); - let client = create_client().await; - // ... test code ... - println!("✅ my.new.method works"); - } - ``` - -2. Run it: - ```bash - cargo test --test end_to_end test_XX_my_new_test -- --nocapture - ``` - -## Test Output Example - -``` -🧪 Test: rpc.discover -✅ rpc.discover works - -🧪 Test: runner.register -✅ runner.register works - registered: test-runner-e2e - -🧪 Test: runner.list -✅ runner.list works - found 3 runners - - osiris - - freezone - - test-runner-e2e - -🧪 Test: jobs.create -✅ jobs.create works - created job: 550e8400-e29b-41d4-a716-446655440000 - -... -``` - -## Notes - -- Tests are designed to be idempotent (can run multiple times) -- Tests clean up after themselves when possible -- Some tests depend on previous test state (use `--test-threads=1` for strict ordering) -- Job execution tests may timeout if no runner is connected to Redis (this is expected) diff --git a/bin/supervisor/tests/job_api_integration_tests.rs b/bin/supervisor/tests/job_api_integration_tests.rs deleted file mode 100644 index f604ced..0000000 --- a/bin/supervisor/tests/job_api_integration_tests.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! Integration tests for the job API -//! -//! These tests validate the complete job lifecycle using a real supervisor instance. -//! They require Redis and a running supervisor to execute properly. - -use hero_supervisor_openrpc_client::{SupervisorClient, JobBuilder, JobResult}; -use std::time::Duration; -use tokio::time::sleep; -use uuid::Uuid; - -/// Test helper to create a unique job for testing -fn create_test_job(context: &str) -> Result> { - JobBuilder::new() - .caller_id("integration_test") - .context_id(context) - .payload("echo 'Test job output'") - .executor("osis") - .runner("osis_runner_1") - .timeout(30) - .env_var("TEST_VAR", "test_value") - .build() - .map_err(|e| e.into()) -} - -/// Test helper to check if supervisor is available -async fn is_supervisor_available() -> bool { - match SupervisorClient::new("http://localhost:3030") { - Ok(client) => client.discover().await.is_ok(), - Err(_) => false, - } -} \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..afd8c5d --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,15 @@ +# Architecture + +The Horus architecture consists of three layers: + +1. Coordinator: A workflow engine that executes DAG-based flows by sending ready job steps to the targeted supervisors. +2. Supervisor: A job dispatcher that routes jobs to the appropriate runners. +3. Runner: A job executor that runs the actual job steps. + +## Networking + +- The user / client talks to the coordinator over an OpenRPC interface, using either regular HTTP transport or Mycelium. +- The coordinator talks to the supervisor over an OpenRPC interface, using either regular HTTP transport or Mycelium. +- The supervisor talks to runners over a Redis based job execution protocol. + + diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 0000000..35cbc70 --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,6 @@ +# Terminology + +- Flow: A workflow that is executed by the coordinator. +- Job: A unit of work that is executed by a runner. +- Supervisor: A job dispatcher that routes jobs to the appropriate runners. +- Runner: A job executor that runs the actual job steps. diff --git a/docs/supervisor/auth.md b/docs/supervisor/auth.md new file mode 100644 index 0000000..626dbe7 --- /dev/null +++ b/docs/supervisor/auth.md @@ -0,0 +1,28 @@ +## Supervisor Authentication + +The supervisor has two authentication systems: + +1. An authentication system based on scoped symmetric API keys. +2. An authentication of the signatures of a job's canonical representation. + +The first is used to control access to the supervisor API, the second is used to authenticate the signatories of a job, such that the runners can implement access control based on the signatories. + +#### API Key Management + +API keys are used to authenticate requests to the supervisor. They are created using the `auth.key.create` method and can be listed using the `key.list` method. + +#### API Key Scopes + +API keys have a scope that determines what actions they can perform. The following scopes are available: + +- `admin`: Full access to all supervisor methods. +- `registrar`: Access to methods related to job registration and management. +- `user`: Access to methods related to job execution and management. + +#### API Key Usage + +API keys are passed as a header in the `Authorization` field of the request. The format is `Bearer `. + +#### API Key Rotation + +API keys can be rotated using the `key.remove` method. This will invalidate the old key and create a new one. \ No newline at end of file diff --git a/bin/supervisor/docs/openrpc.json b/docs/supervisor/openrpc.json similarity index 100% rename from bin/supervisor/docs/openrpc.json rename to docs/supervisor/openrpc.json diff --git a/lib/clients/supervisor/src/builder.rs b/lib/clients/supervisor/src/builder.rs index 71b6ac5..8d9d000 100644 --- a/lib/clients/supervisor/src/builder.rs +++ b/lib/clients/supervisor/src/builder.rs @@ -1,17 +1,27 @@ -//! Builder pattern for WasmSupervisorClient to ensure proper configuration +//! Builder pattern for SupervisorClient to ensure proper configuration //! //! This module provides a type-safe builder that guarantees a client cannot be //! created without a secret, preventing authentication issues. +#[cfg(target_arch = "wasm32")] use crate::wasm::WasmSupervisorClient; +#[cfg(not(target_arch = "wasm32"))] +use crate::{SupervisorClient, HttpTransport, ClientResult, ClientError}; +#[cfg(not(target_arch = "wasm32"))] +use jsonrpsee::http_client::HttpClientBuilder; +#[cfg(not(target_arch = "wasm32"))] +use http::{HeaderMap, HeaderName, HeaderValue}; + /// Builder for WasmSupervisorClient that enforces secret requirement +#[cfg(target_arch = "wasm32")] #[derive(Clone)] pub struct WasmSupervisorClientBuilder { server_url: Option, secret: Option, } +#[cfg(target_arch = "wasm32")] impl WasmSupervisorClientBuilder { /// Create a new builder pub fn new() -> Self { @@ -48,13 +58,90 @@ impl WasmSupervisorClientBuilder { } } +#[cfg(target_arch = "wasm32")] impl Default for WasmSupervisorClientBuilder { fn default() -> Self { Self::new() } } -#[cfg(test)] +/// Builder for SupervisorClient (HTTP transport) +#[cfg(not(target_arch = "wasm32"))] +#[derive(Debug, Clone)] +pub struct SupervisorClientBuilder { + url: Option, + secret: Option, + timeout: Option, +} + +#[cfg(not(target_arch = "wasm32"))] +impl SupervisorClientBuilder { + /// Create a new builder + pub fn new() -> Self { + Self { + url: None, + secret: None, + timeout: Some(std::time::Duration::from_secs(30)), + } + } + + /// Set the server URL + pub fn url(mut self, url: impl Into) -> Self { + self.url = Some(url.into()); + self + } + + /// Set the authentication secret + pub fn secret(mut self, secret: impl Into) -> Self { + self.secret = Some(secret.into()); + self + } + + /// Set the request timeout (default: 30 seconds) + pub fn timeout(mut self, timeout: std::time::Duration) -> Self { + self.timeout = Some(timeout); + self + } + + /// Build the SupervisorClient with HTTP transport + pub fn build(self) -> ClientResult> { + let server_url = self.url + .ok_or_else(|| ClientError::Http("URL is required".to_string()))?; + let secret = self.secret + .ok_or_else(|| ClientError::Http("Secret is required".to_string()))?; + + // Create headers with Authorization bearer token + let mut headers = HeaderMap::new(); + let auth_value = format!("Bearer {}", secret); + headers.insert( + HeaderName::from_static("authorization"), + HeaderValue::from_str(&auth_value) + .map_err(|e| ClientError::Http(format!("Invalid auth header: {}", e)))? + ); + + let client = HttpClientBuilder::default() + .request_timeout(self.timeout.unwrap_or(std::time::Duration::from_secs(30))) + .set_headers(headers) + .build(&server_url) + .map_err(|e| ClientError::Http(e.to_string()))?; + + let transport = HttpTransport { client }; + + Ok(SupervisorClient { + transport, + secret, + }) + } +} + +#[cfg(not(target_arch = "wasm32"))] +impl Default for SupervisorClientBuilder { + fn default() -> Self { + Self::new() + } +} + +#[cfg(all(test, target_arch = "wasm32"))] mod tests { use super::*; diff --git a/lib/clients/supervisor/src/lib.rs b/lib/clients/supervisor/src/lib.rs index 04803e1..20f18ba 100644 --- a/lib/clients/supervisor/src/lib.rs +++ b/lib/clients/supervisor/src/lib.rs @@ -17,7 +17,6 @@ pub mod transports; pub mod wasm; // Builder module for type-safe client construction -#[cfg(target_arch = "wasm32")] pub mod builder; // Re-export WASM types for convenience @@ -28,17 +27,18 @@ pub use wasm::{WasmSupervisorClient, WasmJobType, WasmRunnerType, create_job_can #[cfg(target_arch = "wasm32")] pub use builder::WasmSupervisorClientBuilder; +// Re-export HTTP builder for convenience +#[cfg(not(target_arch = "wasm32"))] +pub use builder::SupervisorClientBuilder; + // Native client dependencies #[cfg(not(target_arch = "wasm32"))] use jsonrpsee::{ core::client::ClientT, - http_client::{HttpClient, HttpClientBuilder}, + http_client::HttpClient, rpc_params, }; -#[cfg(not(target_arch = "wasm32"))] -use http::{HeaderMap, HeaderName, HeaderValue}; - /// Transport abstraction for supervisor communication /// Allows different transport layers (HTTP, Mycelium, etc.) @@ -95,6 +95,7 @@ impl SupervisorTransport for HttpTransport { #[derive(Clone)] pub struct SupervisorClient { transport: T, + #[allow(dead_code)] secret: String, } @@ -254,105 +255,12 @@ pub struct AuthVerifyResponse { pub created_at: Option, } -/// Simple ProcessStatus type for native builds to avoid service manager dependency -#[cfg(not(target_arch = "wasm32"))] -pub type ProcessStatus = ProcessStatusWrapper; - -// Types duplicated from supervisor-core to avoid cyclic dependency -// These match the types in hero-supervisor but are defined here independently - -/// Runner status information (duplicated to avoid cyclic dependency) +/// Type aliases for convenience #[cfg(not(target_arch = "wasm32"))] pub type RunnerStatus = ProcessStatusWrapper; - -/// Log information (duplicated to avoid cyclic dependency) #[cfg(not(target_arch = "wasm32"))] pub type LogInfo = LogInfoWrapper; -/// Type aliases for WASM compatibility -#[cfg(target_arch = "wasm32")] -pub type ProcessStatus = ProcessStatusWrapper; -#[cfg(target_arch = "wasm32")] -pub type RunnerStatus = ProcessStatusWrapper; -#[cfg(target_arch = "wasm32")] -pub type LogInfo = LogInfoWrapper; - -/// Builder for SupervisorClient -#[cfg(not(target_arch = "wasm32"))] -#[derive(Debug, Clone)] -pub struct SupervisorClientBuilder { - url: Option, - secret: Option, - timeout: Option, -} - -#[cfg(not(target_arch = "wasm32"))] -impl SupervisorClientBuilder { - /// Create a new builder - pub fn new() -> Self { - Self { - url: None, - secret: None, - timeout: Some(std::time::Duration::from_secs(30)), - } - } - - /// Set the server URL - pub fn url(mut self, url: impl Into) -> Self { - self.url = Some(url.into()); - self - } - - /// Set the authentication secret - pub fn secret(mut self, secret: impl Into) -> Self { - self.secret = Some(secret.into()); - self - } - - /// Set the request timeout (default: 30 seconds) - pub fn timeout(mut self, timeout: std::time::Duration) -> Self { - self.timeout = Some(timeout); - self - } - - /// Build the SupervisorClient with HTTP transport - pub fn build(self) -> ClientResult> { - let server_url = self.url - .ok_or_else(|| ClientError::Http("URL is required".to_string()))?; - let secret = self.secret - .ok_or_else(|| ClientError::Http("Secret is required".to_string()))?; - - // Create headers with Authorization bearer token - let mut headers = HeaderMap::new(); - let auth_value = format!("Bearer {}", secret); - headers.insert( - HeaderName::from_static("authorization"), - HeaderValue::from_str(&auth_value) - .map_err(|e| ClientError::Http(format!("Invalid auth header: {}", e)))? - ); - - let client = HttpClientBuilder::default() - .request_timeout(self.timeout.unwrap_or(std::time::Duration::from_secs(30))) - .set_headers(headers) - .build(&server_url) - .map_err(|e| ClientError::Http(e.to_string()))?; - - let transport = HttpTransport { client }; - - Ok(SupervisorClient { - transport, - secret, - }) - } -} - -#[cfg(not(target_arch = "wasm32"))] -impl Default for SupervisorClientBuilder { - fn default() -> Self { - Self::new() - } -} - #[cfg(not(target_arch = "wasm32"))] impl SupervisorClient { /// Create a builder for HTTP-based SupervisorClient @@ -409,15 +317,17 @@ impl SupervisorClient { job: Job, timeout: Option, ) -> ClientResult { - let mut params = serde_json::json!({ - "job": job - }); + // Server expects Job directly as params, not wrapped in an object + let params = if let Some(_t) = timeout { + // If timeout is provided, we need to extend the Job with timeout field + // For now, just send the job directly and ignore timeout + // TODO: Add timeout support to the RPC method signature + serde_json::to_value(&job).map_err(ClientError::Serialization)? + } else { + serde_json::to_value(&job).map_err(ClientError::Serialization)? + }; - if let Some(t) = timeout { - params["timeout"] = serde_json::json!(t); - } - - let result = self.transport.call("job.run", serde_json::json!([params])).await?; + let result = self.transport.call("job.run", params).await?; serde_json::from_value(result).map_err(ClientError::Serialization) } @@ -474,7 +384,7 @@ impl SupervisorClient { let params = serde_json::json!({ "config": config }); - let result = self + let _result = self .transport .call("runner.add", serde_json::json!([params])) .await?; @@ -512,7 +422,7 @@ impl SupervisorClient { "job": job }); - let result = self + let _result = self .transport .call("queue_job_to_runner", serde_json::json!([params])) .await?; @@ -603,7 +513,7 @@ impl SupervisorClient { "secret_value": secret_value }); - let result = self + let _result = self .transport .call("add_secret", serde_json::json!([params])) .await?; @@ -621,7 +531,7 @@ impl SupervisorClient { "secret_value": secret_value }); - let result = self + let _result = self .transport .call("remove_secret", serde_json::json!([params])) .await?; @@ -682,9 +592,9 @@ impl SupervisorClient { /// Create a new API key (admin only) pub async fn key_create(&self, key: ApiKey) -> ClientResult<()> { - let result = self + let _result = self .transport - .call("key.create", serde_json::json!([key])) + .call("auth.key.create", serde_json::json!([key])) .await?; Ok(()) } @@ -700,7 +610,7 @@ impl SupervisorClient { /// Remove an API key (admin only) pub async fn key_delete(&self, key_id: String) -> ClientResult<()> { - let result = self + let _result = self .transport .call("key.delete", serde_json::json!([key_id])) .await?; diff --git a/bin/supervisor/tests/end_to_end.rs b/tests/supervisor.rs similarity index 80% rename from bin/supervisor/tests/end_to_end.rs rename to tests/supervisor.rs index 88d4f5b..b880c7e 100644 --- a/bin/supervisor/tests/end_to_end.rs +++ b/tests/supervisor.rs @@ -1,55 +1,98 @@ //! End-to-End Integration Tests for Hero Supervisor //! //! Tests all OpenRPC client methods against a running supervisor instance. -//! The supervisor is automatically started and stopped for each test run. +//! The supervisor binary is automatically started and stopped for each test run. +//! +//! **IMPORTANT**: Run with `--test-threads=1` to ensure tests run sequentially: +//! ``` +//! cargo test --test supervisor -- --test-threads=1 +//! ``` use hero_supervisor_openrpc_client::SupervisorClient; -use hero_supervisor::{SupervisorBuilder, openrpc::start_http_openrpc_server}; use hero_job::{Job, JobBuilder}; use std::sync::Once; +use std::process::Child; /// Test configuration const SUPERVISOR_URL: &str = "http://127.0.0.1:3031"; const ADMIN_SECRET: &str = "test-admin-secret-for-e2e-tests"; const TEST_RUNNER_NAME: &str = "test-runner"; +use std::sync::Mutex; +use lazy_static::lazy_static; + +lazy_static! { + static ref SUPERVISOR_PROCESS: Mutex> = Mutex::new(None); +} + /// Global initialization flag static INIT: Once = Once::new(); -/// Initialize and start the supervisor (called once) +/// Initialize and start the supervisor binary (called once) async fn init_supervisor() { - // Use a blocking approach to ensure supervisor starts before any test runs - static mut INITIALIZED: bool = false; - - unsafe { - INIT.call_once(|| { - // Spawn a new runtime for the supervisor - std::thread::spawn(|| { - let rt = tokio::runtime::Runtime::new().unwrap(); - rt.block_on(async { - // Build supervisor with test configuration - let supervisor = SupervisorBuilder::new() - .admin_secrets(vec![ADMIN_SECRET.to_string()]) - .build() - .await - .expect("Failed to build supervisor"); - - // Start OpenRPC server - match start_http_openrpc_server(supervisor, "127.0.0.1", 3031).await { - Ok(server_handle) => { - server_handle.stopped().await; - } - Err(e) => { - eprintln!("OpenRPC server error: {}", e); - } - } - }); - }); - - // Give the server time to start - std::thread::sleep(std::time::Duration::from_secs(1)); - INITIALIZED = true; + INIT.call_once(|| { + // Register cleanup handler + let _ = std::panic::catch_unwind(|| { + ctrlc::set_handler(move || { + cleanup_supervisor(); + std::process::exit(0); + }).ok(); }); + + // Use escargot to build and get the binary path + let binary = escargot::CargoBuild::new() + .bin("supervisor") + .package("hero-supervisor") + .run() + .expect("Failed to build supervisor binary"); + + // Start the supervisor binary + let child = binary + .command() + .args(&[ + "--admin-secret", + ADMIN_SECRET, + "--port", + "3031", + ]) + .spawn() + .expect("Failed to start supervisor"); + + *SUPERVISOR_PROCESS.lock().unwrap() = Some(child); + + // Wait for server to be ready with simple TCP check + use std::net::TcpStream; + use std::time::Duration; + + println!("⏳ Waiting for supervisor to start..."); + + for i in 0..30 { + std::thread::sleep(Duration::from_millis(500)); + + // Try to connect to the port + if TcpStream::connect_timeout( + &"127.0.0.1:3031".parse().unwrap(), + Duration::from_millis(100) + ).is_ok() { + // Give it more time to fully initialize + std::thread::sleep(Duration::from_secs(2)); + println!("✅ Supervisor ready after ~{}ms", (i * 500) + 2000); + return; + } + } + + panic!("Supervisor failed to start within 15 seconds"); + }); +} + +/// Cleanup supervisor process +fn cleanup_supervisor() { + if let Ok(mut guard) = SUPERVISOR_PROCESS.lock() { + if let Some(mut child) = guard.take() { + println!("🧹 Cleaning up supervisor process..."); + let _ = child.kill(); + let _ = child.wait(); + } } } @@ -480,3 +523,24 @@ async fn test_99_complete_workflow() { println!("✅ Complete workflow test passed!"); } + +/// Final test that ensures cleanup happens +/// This test runs last (test_zz prefix ensures it runs after test_99) +#[tokio::test] +async fn test_zz_cleanup() { + println!("🧹 Running cleanup..."); + cleanup_supervisor(); + + // Wait a bit to ensure process is killed + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; + + // Verify port is free + use std::net::TcpStream; + let port_free = TcpStream::connect_timeout( + &"127.0.0.1:3031".parse().unwrap(), + std::time::Duration::from_millis(100) + ).is_err(); + + assert!(port_free, "Port 3031 should be free after cleanup"); + println!("✅ Cleanup complete - port 3031 is free"); +}