Add full flow script example

Signed-off-by: Lee Smet <lee.smet@hotmail.com>
This commit is contained in:
Lee Smet
2025-09-03 20:17:12 +02:00
parent 7aa35b6d06
commit c4971aa794

View File

@@ -0,0 +1,349 @@
#!/usr/bin/env python3
"""
Supervisor flow demo for HeroCoordinator.
This script:
- Creates an actor
- Creates a context granting the actor admin/reader/executor privileges
- Registers a Runner in the context targeting a Supervisor reachable via Mycelium (by public key or IP)
- Creates simple Python jobs (text jobs) with a small dependency chain
- Creates a flow referencing those jobs
- Starts the flow and polls until it finishes (or errors)
Transport: JSON-RPC over HTTP to the Coordinator (default COORDINATOR_URL=http://127.0.0.1:9652).
Example usage:
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-ip 2001:db8::1
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-pk bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32
Notes:
- Exactly one of --dst-ip or --dst-pk must be provided.
- Runner.topic defaults to "supervisor.rpc" (see main.rs).
- The router auto-discovers contexts and will deliver job.run messages to the supervisor.
"""
import argparse
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional, Tuple
from urllib import request, error
JSONRPC_VERSION = "2.0"
def env_url() -> str:
return os.getenv("COORDINATOR_URL", "http://127.0.0.1:9652").rstrip("/")
class JsonRpcClient:
def __init__(self, url: str):
self.url = url
self._id = 0
def call(self, method: str, params: Dict[str, Any]) -> Any:
self._id += 1
payload = {
"jsonrpc": JSONRPC_VERSION,
"id": self._id,
"method": method,
"params": params,
}
data = json.dumps(payload).encode("utf-8")
req = request.Request(self.url, data=data, headers={"Content-Type": "application/json"})
try:
with request.urlopen(req) as resp:
body = resp.read()
except error.HTTPError as e:
try:
details = e.read().decode("utf-8", "ignore")
except Exception:
details = ""
raise RuntimeError(f"HTTP error {e.code}: {details}") from e
except error.URLError as e:
raise RuntimeError(f"URL error: {e.reason}") from e
try:
obj = json.loads(body.decode("utf-8"))
except Exception as e:
raise RuntimeError(f"Invalid JSON response: {body!r}") from e
if isinstance(obj, list):
raise RuntimeError("Batch responses are not supported")
if obj.get("error"):
raise RuntimeError(f"RPC error: {json.dumps(obj['error'])}")
return obj.get("result")
def print_header(title: str):
print("\n" + "=" * 80)
print(title)
print("=" * 80)
def pretty(obj: Any):
print(json.dumps(obj, indent=2, sort_keys=True))
def try_create_or_load(client: JsonRpcClient, create_method: str, create_params: Dict[str, Any],
load_method: str, load_params: Dict[str, Any]) -> Any:
"""Attempt a create; if it fails due to existence, try load."""
try:
return client.call(create_method, create_params)
except RuntimeError as e:
msg = str(e)
# Server maps AlreadyExists to StorageError, we don't have a structured error code here.
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
# Fall back to load
return client.call(load_method, load_params)
raise
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Create actor/context/runner/jobs/flow; start and wait until completion.")
group = p.add_mutually_exclusive_group(required=True)
group.add_argument("--dst-ip", help="Supervisor Mycelium IP address (IPv4 or IPv6)")
group.add_argument("--dst-pk", help="Supervisor public key (64-hex)")
p.add_argument("--context-id", type=int, default=2, help="Context id (Redis DB index; 0-15). Default: 2")
p.add_argument("--actor-id", type=int, default=11001, help="Actor id. Default: 11001")
p.add_argument("--runner-id", type=int, default=12001, help="Runner id. Default: 12001")
p.add_argument("--flow-id", type=int, default=13001, help="Flow id. Default: 13001")
p.add_argument("--base-job-id", type=int, default=20000, help="Base job id for first job; subsequent jobs increment. Default: 20000")
p.add_argument("--jobs", type=int, default=3, help="Number of jobs to create (>=1). Forms a simple chain. Default: 3")
p.add_argument("--timeout-secs", type=int, default=60, help="Per-job timeout seconds. Default: 60")
p.add_argument("--retries", type=int, default=0, help="Per-job retries (0-255). Default: 0")
p.add_argument(
"--script-type",
choices=["Python", "V", "Osis", "Sal"],
default="Python",
help="ScriptType for jobs/runner. Default: Python"
)
p.add_argument("--topic", default="supervisor.rpc", help="Supervisor topic. Default: supervisor.rpc")
p.add_argument("--poll-interval", type=float, default=2.0, help="Flow poll interval seconds. Default: 2.0")
p.add_argument("--poll-timeout", type=int, default=600, help="Max seconds to wait for flow completion. Default: 600")
return p.parse_args()
def main():
args = parse_args()
if args.jobs < 1:
print("ERROR: --jobs must be >= 1", file=sys.stderr)
sys.exit(2)
url = env_url()
client = JsonRpcClient(url)
actor_id = int(args.actor_id)
context_id = int(args.context_id)
runner_id = int(args.runner_id)
flow_id = int(args.flow_id)
base_job_id = int(args.base_job_id)
script_type = args.script_type
timeout = int(args.timeout_secs)
retries = int(args.retries)
topic = args.topic
# 1) Actor
print_header("actor.create (or load)")
actor = try_create_or_load(
client,
"actor.create",
{
"actor": {
"id": actor_id,
"pubkey": "demo-pubkey",
"address": ["127.0.0.1"],
}
},
"actor.load",
{"id": actor_id},
)
pretty(actor)
# 2) Context
print_header("context.create (or load)")
context = try_create_or_load(
client,
"context.create",
{
"context": {
"id": context_id,
"admins": [actor_id],
"readers": [actor_id],
"executors": [actor_id],
}
},
"context.load",
{"id": context_id},
)
pretty(context)
# 3) Runner in this context
# Router picks pubkey if non-empty, else IP address.
# However, RunnerCreate requires both fields; we fill both and control routing via pubkey empty/non-empty.
runner_pubkey = args.dst_pk if args.dst_pk else ""
runner_address = args.dst_ip if args.dst_ip else "127.0.0.1"
print_header("runner.create (or load)")
# runner.load requires both context_id and id
try:
runner = client.call("runner.create", {
"context_id": context_id,
"runner": {
"id": runner_id,
"pubkey": runner_pubkey,
"address": runner_address,
"topic": topic,
"script_type": script_type,
"local": False
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
runner = client.call("runner.load", {"context_id": context_id, "id": runner_id})
else:
raise
pretty(runner)
# 4) Jobs
# Build a simple chain: J0 (root), J1 depends on J0, J2 depends on J1, ... up to N-1
job_ids: List[int] = []
for i in range(args.jobs):
jid = base_job_id + i
depends = [] if i == 0 else [base_job_id + (i - 1)]
job_payload = {
"id": jid,
"caller_id": actor_id,
"context_id": context_id,
"script": f"print('Job {i} running')",
"script_type": script_type,
"timeout": timeout,
"retries": retries,
"env_vars": {},
"prerequisites": [],
"depends": depends,
}
print_header(f"job.create - {jid} {'(root)' if not depends else f'(depends on {depends})'}")
try:
job = client.call("job.create", {
"context_id": context_id,
"job": job_payload
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
job = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
else:
raise
pretty(job)
job_ids.append(jid)
# 5) Flow
print_header("flow.create (or load)")
try:
flow = client.call("flow.create", {
"context_id": context_id,
"flow": {
"id": flow_id,
"caller_id": actor_id,
"context_id": context_id,
"jobs": job_ids,
"env_vars": {}
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
else:
raise
pretty(flow)
# Optional: show DAG
try:
print_header("flow.dag")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: flow.dag failed: {e}", file=sys.stderr)
# 6) Start flow (idempotent; returns bool whether scheduler started)
print_header("flow.start")
started = client.call("flow.start", {"context_id": context_id, "id": flow_id})
print(f"flow.start -> {started}")
# 7) Poll until Finished or Error (or timeout)
print_header("Polling flow.load until completion")
t0 = time.time()
status = None
last_status_print = 0.0
poll_count = 0
while True:
poll_count += 1
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
status = flow.get("status")
now = time.time()
if now - last_status_print >= max(1.0, float(args.poll_interval)):
print(f"[{int(now - t0)}s] flow.status = {status}")
last_status_print = now
# Every 5th poll, print the current flow DAG
if (poll_count % 5) == 0:
try:
print_header("flow.dag (periodic)")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: periodic flow.dag failed: {e}", file=sys.stderr)
if status in ("Finished", "Error"):
break
if (now - t0) > args.poll_timeout:
print(f"ERROR: Flow did not complete within {args.poll_timeout}s (status={status})", file=sys.stderr)
break
time.sleep(float(args.poll_interval))
# 8) Final summary: job statuses
print_header("Final job statuses")
for jid in job_ids:
try:
j = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
print(f"Job {jid}: status={j.get('status')} result={j.get('result')}")
except Exception as e:
print(f"Job {jid}: load failed: {e}", file=sys.stderr)
# Exit code
if status == "Finished":
print_header("Result")
print("Flow finished successfully.")
sys.exit(0)
else:
print_header("Result")
print(f"Flow ended with status={status}")
sys.exit(1)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nInterrupted.")
sys.exit(130)
except Exception as e:
print_header("Error")
print(str(e))
sys.exit(1)