Compare commits

..

40 Commits

Author SHA1 Message Date
Lee Smet
c4971aa794 Add full flow script example
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:17:12 +02:00
Lee Smet
7aa35b6d06 Fix remainder of HSET return value deconding
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:16:53 +02:00
Lee Smet
60946af1df Fix pushMessage parameter encoding
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:11:10 +02:00
Lee Smet
83990cf16a Properly encode topic in mycelium rpc
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:09:47 +02:00
Lee Smet
dbb9493bcb Improve code format in router
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:54:11 +02:00
Lee Smet
d921dca75c Fix default mycelium jsonrpc api port
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:50:45 +02:00
Lee Smet
4a15269442 Fix more HSET types in redis driver
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:46:55 +02:00
Lee Smet
43fd61d662 Remove unused imports
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 11:29:26 +02:00
Lee Smet
38709e06f3 Add script to test actor/context/job/flow create and flow dag
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 15:43:32 +02:00
Lee Smet
08de312cd9 Fix HSET response decoding
The command internally uses (the deprecated) HMSET which just returns OK
on success instead of the amount of fields written

Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:30:41 +02:00
Lee Smet
4d1cd3d910 Format codebase
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:22:42 +02:00
Lee Smet
c1c1ae3bd1 Bump thiserror to latest version
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:22:12 +02:00
Lee Smet
ec339c5cbe Add some internal logging
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:10:04 +02:00
Lee Smet
2aa6277385 Actors are global
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 10:29:32 +02:00
Lee Smet
9c47eaaf93 Embedd rpc spec in rpc api
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 09:59:42 +02:00
Lee Smet
fce0ccb2d8 Fetch job results if a job is finished
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-28 17:09:04 +02:00
Lee Smet
e5a6228448 Periodically check the job status on the supervisor
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-28 16:42:48 +02:00
Lee Smet
052539409b Separate mycelium client more from supervisor client
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-28 15:24:03 +02:00
Lee Smet
1551b4707b Periodically verify the status of messages sent over mycelium
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-28 14:53:08 +02:00
Lee Smet
4b597cc445 Add calling of supervisor over mycelium
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-28 13:50:59 +02:00
Lee Smet
cf06c7fa36 Remove leftover logic
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-27 16:01:45 +02:00
Lee Smet
6f7fded175 Convert jobs to messages
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-27 15:33:43 +02:00
Lee Smet
fde456fd5e Add new supervisor client over mycelium
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-27 14:17:58 +02:00
Lee Smet
a8227eb808 Revert "Add supervisor API"
This reverts commit bc30c9cc89.
2025-08-27 12:52:54 +02:00
Lee Smet
bc30c9cc89 Add supervisor API
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 23:19:05 +02:00
Lee Smet
7ce19f8b6d Add validation for service methods
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 14:08:41 +02:00
Lee Smet
bc6cb16732 General code improvements
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 12:48:36 +02:00
Lee Smet
74995fa6fe Add job status update in service layer
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 12:45:50 +02:00
Lee Smet
f30706a25a Add service layer to abstract business logic
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 12:28:36 +02:00
Lee Smet
208d18c280 Allow updating job status in DB
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 11:34:29 +02:00
Lee Smet
e45e5939e9 Improve doc comment
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 11:02:45 +02:00
Lee Smet
e57c76fb36 Allow updating DAG
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-22 10:59:46 +02:00
Lee Smet
1939a3d09d Add DAG loading to spec
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 17:03:21 +02:00
Lee Smet
ec91a15131 Implement DAG for flow
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 16:45:10 +02:00
Lee Smet
eb69a44039 Improve rpc api return objects encoding
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 15:38:04 +02:00
Lee Smet
30b7acdaba Fix some quirks
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 14:07:48 +02:00
Lee Smet
9482a4e3ee Bump jsonrpsee to latest version
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 13:56:32 +02:00
Lee Smet
04669eb638 Bump redis to latest version
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 13:37:25 +02:00
Lee Smet
62c200b5bd Add openrpc API
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 13:29:39 +02:00
Lee Smet
c5f0f80492 Add redis storage driver
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-21 11:23:42 +02:00
27 changed files with 7949 additions and 283 deletions

1475
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,4 +9,17 @@ serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.143"
tokio = { version = "1.47.1", features = ["full"] }
tracing = "0.1.41"
redis = { version = "0.25.4", features = ["tokio-comp", "connection-manager"] }
redis = { version = "0.32.5", features = [
"tokio-comp",
"connection-manager",
"aio",
] }
jsonrpsee = { version = "0.26.0", features = ["server", "macros"] }
async-trait = "0.1.83"
# HTTP client to call Mycelium JSON-RPC
reqwest = { version = "0.12.7", features = ["json", "rustls-tls"] }
# Base64 encoding for message payloads
base64 = "0.22.1"
# Error derive for clean error types
thiserror = "2.0.16"
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "fmt"] }

361
scripts/jsonrpc_demo.py Normal file
View File

@@ -0,0 +1,361 @@
#!/usr/bin/env python3
"""
Demo script for HeroCoordinator JSON-RPC API.
- Creates an actor
- Verifies by loading the actor
- Creates a context with the actor as admin/reader/executor
- Creates three jobs with dependencies
- Creates a flow referencing those jobs
- Fetches and prints the flow DAG
Usage:
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/jsonrpc_demo.py
Defaults to http://127.0.0.1:9652 if COORDINATOR_URL is not set.
"""
import os
import json
import sys
from urllib import request, error
from typing import Any, Dict, List, Tuple
JSONRPC_VERSION = "2.0"
class JsonRpcClient:
def __init__(self, url: str):
self.url = url.rstrip("/")
self._id = 0
def call(self, method: str, params: Dict[str, Any]) -> Any:
self._id += 1
payload = {
"jsonrpc": JSONRPC_VERSION,
"id": self._id,
"method": method,
"params": params,
}
data = json.dumps(payload).encode("utf-8")
req = request.Request(self.url, data=data, headers={"Content-Type": "application/json"})
try:
with request.urlopen(req) as resp:
body = resp.read()
except error.HTTPError as e:
try:
details = e.read().decode("utf-8", "ignore")
except Exception:
details = ""
raise RuntimeError(f"HTTP error {e.code}: {details}") from e
except error.URLError as e:
raise RuntimeError(f"URL error: {e.reason}") from e
try:
obj = json.loads(body.decode("utf-8"))
except Exception as e:
raise RuntimeError(f"Invalid JSON response: {body!r}") from e
# JSON-RPC single response expected
if isinstance(obj, list):
raise RuntimeError("Batch responses are not supported in this demo")
if obj.get("error"):
raise RuntimeError(f"RPC error: {json.dumps(obj['error'])}")
return obj.get("result")
def print_header(title: str):
print("\n" + "=" * 80)
print(title)
print("=" * 80)
def pretty_print(obj: Any):
print(json.dumps(obj, indent=2, sort_keys=True))
def summarize_dag(dag: Dict[str, Any]):
print_header("Flow DAG Summary")
flow_id = dag.get("flow_id")
caller_id = dag.get("caller_id")
context_id = dag.get("context_id")
print(f"flow_id={flow_id} caller_id={caller_id} context_id={context_id}")
edges: List[Tuple[int, int]] = dag.get("edges", [])
roots: List[int] = dag.get("roots", [])
leaves: List[int] = dag.get("leaves", [])
levels: List[List[int]] = dag.get("levels", [])
nodes: Dict[str, Any] = dag.get("nodes", {})
print("Edges:")
for a, b in edges:
print(f" {a} -> {b}")
print(f"Roots: {roots}")
print(f"Leaves: {leaves}")
print("Levels:")
for i, lvl in enumerate(levels):
print(f" L{i}: {lvl}")
# Show nodes and their dependencies (from JobSummary)
print("Nodes:")
for k, v in nodes.items():
depends = v.get("depends", [])
prerequisites = v.get("prerequisites", [])
stype = v.get("script_type")
print(f" Job {k}: depends={depends} prerequisites={prerequisites} script_type={stype}")
def assert_edges(edges: List[Tuple[int, int]], required: List[Tuple[int, int]]):
edge_set = {(int(a), int(b)) for a, b in edges}
missing = [e for e in required if e not in edge_set]
if missing:
raise AssertionError(f"Missing expected edges in DAG: {missing}; got={sorted(edge_set)}")
def main():
url = os.getenv("COORDINATOR_URL", "http://127.0.0.1:9652")
client = JsonRpcClient(url)
# Deterministic demo IDs; change if collisions happen
actor_id = 1001
context_id = 1 # Redis DB indices are 0-15; keep <= 15
job_a = 3001
job_b = 3002
job_c = 3003
job_d = 3004
job_e = 3005
job_f = 3006
job_g = 3007
job_h = 3008
job_i = 3009
flow_id = 4001
print_header("actor.create")
actor = client.call("actor.create", {
"actor": {
"id": actor_id,
"pubkey": "demo-pubkey",
"address": ["127.0.0.1"]
}
})
pretty_print(actor)
print_header("actor.load")
actor_loaded = client.call("actor.load", {"id": actor_id})
pretty_print(actor_loaded)
print_header("context.create")
context = client.call("context.create", {
"context": {
"id": context_id,
"admins": [actor_id],
"readers": [actor_id],
"executors": [actor_id]
}
})
pretty_print(context)
print_header("job.create - A (root)")
jobA = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_a,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('A')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": []
}
})
pretty_print(jobA)
print_header("job.create - B (root)")
jobB = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_b,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('B')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": []
}
})
pretty_print(jobB)
print_header("job.create - C (depends on A and B)")
jobC = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_c,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('C')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_a, job_b]
}
})
pretty_print(jobC)
print_header("job.create - D (depends on A)")
jobD = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_d,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('D')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_a]
}
})
pretty_print(jobD)
print_header("job.create - E (depends on B)")
jobE = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_e,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('E')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_b]
}
})
pretty_print(jobE)
print_header("job.create - F (depends on C and D)")
jobF = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_f,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('F')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_c, job_d]
}
})
pretty_print(jobF)
print_header("job.create - G (depends on C and E)")
jobG = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_g,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('G')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_c, job_e]
}
})
pretty_print(jobG)
print_header("job.create - H (leaf; depends on F and G)")
jobH = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_h,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('H')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_f, job_g]
}
})
pretty_print(jobH)
print_header("job.create - I (leaf; depends on F and G)")
jobI = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_i,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('I')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_f, job_g]
}
})
pretty_print(jobI)
print_header("flow.create")
flow = client.call("flow.create", {
"context_id": context_id,
"flow": {
"id": flow_id,
"caller_id": actor_id,
"context_id": context_id,
"jobs": [job_a, job_b, job_c, job_d, job_e, job_f, job_g, job_h, job_i],
"env_vars": {}
}
})
pretty_print(flow)
print_header("flow.dag")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
summarize_dag(dag)
# Validate roots and leaves
got_roots = list(map(int, dag.get("roots", [])))
if got_roots != sorted([job_a, job_b]):
print("WARNING: Unexpected roots:", got_roots, file=sys.stderr)
got_leaves = {int(x) for x in dag.get("leaves", [])}
expected_leaves = {job_h, job_i}
if got_leaves != expected_leaves:
print("WARNING: Unexpected leaves:", got_leaves, "expected:", expected_leaves, file=sys.stderr)
# Check edges reflect the expanded DAG
expected_edges = [
(job_a, job_c), (job_b, job_c),
(job_a, job_d), (job_b, job_e),
(job_c, job_f), (job_d, job_f),
(job_c, job_g), (job_e, job_g),
(job_f, job_h), (job_g, job_h),
(job_f, job_i), (job_g, job_i),
]
try:
assert_edges(dag.get("edges", []), expected_edges)
print("DAG edges contain expected dependencies:", expected_edges)
except AssertionError as e:
print("WARNING:", e, file=sys.stderr)
if __name__ == "__main__":
try:
main()
except Exception as e:
print_header("Error")
print(str(e))
sys.exit(1)

View File

@@ -0,0 +1,349 @@
#!/usr/bin/env python3
"""
Supervisor flow demo for HeroCoordinator.
This script:
- Creates an actor
- Creates a context granting the actor admin/reader/executor privileges
- Registers a Runner in the context targeting a Supervisor reachable via Mycelium (by public key or IP)
- Creates simple Python jobs (text jobs) with a small dependency chain
- Creates a flow referencing those jobs
- Starts the flow and polls until it finishes (or errors)
Transport: JSON-RPC over HTTP to the Coordinator (default COORDINATOR_URL=http://127.0.0.1:9652).
Example usage:
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-ip 2001:db8::1
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-pk bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32
Notes:
- Exactly one of --dst-ip or --dst-pk must be provided.
- Runner.topic defaults to "supervisor.rpc" (see main.rs).
- The router auto-discovers contexts and will deliver job.run messages to the supervisor.
"""
import argparse
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional, Tuple
from urllib import request, error
JSONRPC_VERSION = "2.0"
def env_url() -> str:
return os.getenv("COORDINATOR_URL", "http://127.0.0.1:9652").rstrip("/")
class JsonRpcClient:
def __init__(self, url: str):
self.url = url
self._id = 0
def call(self, method: str, params: Dict[str, Any]) -> Any:
self._id += 1
payload = {
"jsonrpc": JSONRPC_VERSION,
"id": self._id,
"method": method,
"params": params,
}
data = json.dumps(payload).encode("utf-8")
req = request.Request(self.url, data=data, headers={"Content-Type": "application/json"})
try:
with request.urlopen(req) as resp:
body = resp.read()
except error.HTTPError as e:
try:
details = e.read().decode("utf-8", "ignore")
except Exception:
details = ""
raise RuntimeError(f"HTTP error {e.code}: {details}") from e
except error.URLError as e:
raise RuntimeError(f"URL error: {e.reason}") from e
try:
obj = json.loads(body.decode("utf-8"))
except Exception as e:
raise RuntimeError(f"Invalid JSON response: {body!r}") from e
if isinstance(obj, list):
raise RuntimeError("Batch responses are not supported")
if obj.get("error"):
raise RuntimeError(f"RPC error: {json.dumps(obj['error'])}")
return obj.get("result")
def print_header(title: str):
print("\n" + "=" * 80)
print(title)
print("=" * 80)
def pretty(obj: Any):
print(json.dumps(obj, indent=2, sort_keys=True))
def try_create_or_load(client: JsonRpcClient, create_method: str, create_params: Dict[str, Any],
load_method: str, load_params: Dict[str, Any]) -> Any:
"""Attempt a create; if it fails due to existence, try load."""
try:
return client.call(create_method, create_params)
except RuntimeError as e:
msg = str(e)
# Server maps AlreadyExists to StorageError, we don't have a structured error code here.
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
# Fall back to load
return client.call(load_method, load_params)
raise
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Create actor/context/runner/jobs/flow; start and wait until completion.")
group = p.add_mutually_exclusive_group(required=True)
group.add_argument("--dst-ip", help="Supervisor Mycelium IP address (IPv4 or IPv6)")
group.add_argument("--dst-pk", help="Supervisor public key (64-hex)")
p.add_argument("--context-id", type=int, default=2, help="Context id (Redis DB index; 0-15). Default: 2")
p.add_argument("--actor-id", type=int, default=11001, help="Actor id. Default: 11001")
p.add_argument("--runner-id", type=int, default=12001, help="Runner id. Default: 12001")
p.add_argument("--flow-id", type=int, default=13001, help="Flow id. Default: 13001")
p.add_argument("--base-job-id", type=int, default=20000, help="Base job id for first job; subsequent jobs increment. Default: 20000")
p.add_argument("--jobs", type=int, default=3, help="Number of jobs to create (>=1). Forms a simple chain. Default: 3")
p.add_argument("--timeout-secs", type=int, default=60, help="Per-job timeout seconds. Default: 60")
p.add_argument("--retries", type=int, default=0, help="Per-job retries (0-255). Default: 0")
p.add_argument(
"--script-type",
choices=["Python", "V", "Osis", "Sal"],
default="Python",
help="ScriptType for jobs/runner. Default: Python"
)
p.add_argument("--topic", default="supervisor.rpc", help="Supervisor topic. Default: supervisor.rpc")
p.add_argument("--poll-interval", type=float, default=2.0, help="Flow poll interval seconds. Default: 2.0")
p.add_argument("--poll-timeout", type=int, default=600, help="Max seconds to wait for flow completion. Default: 600")
return p.parse_args()
def main():
args = parse_args()
if args.jobs < 1:
print("ERROR: --jobs must be >= 1", file=sys.stderr)
sys.exit(2)
url = env_url()
client = JsonRpcClient(url)
actor_id = int(args.actor_id)
context_id = int(args.context_id)
runner_id = int(args.runner_id)
flow_id = int(args.flow_id)
base_job_id = int(args.base_job_id)
script_type = args.script_type
timeout = int(args.timeout_secs)
retries = int(args.retries)
topic = args.topic
# 1) Actor
print_header("actor.create (or load)")
actor = try_create_or_load(
client,
"actor.create",
{
"actor": {
"id": actor_id,
"pubkey": "demo-pubkey",
"address": ["127.0.0.1"],
}
},
"actor.load",
{"id": actor_id},
)
pretty(actor)
# 2) Context
print_header("context.create (or load)")
context = try_create_or_load(
client,
"context.create",
{
"context": {
"id": context_id,
"admins": [actor_id],
"readers": [actor_id],
"executors": [actor_id],
}
},
"context.load",
{"id": context_id},
)
pretty(context)
# 3) Runner in this context
# Router picks pubkey if non-empty, else IP address.
# However, RunnerCreate requires both fields; we fill both and control routing via pubkey empty/non-empty.
runner_pubkey = args.dst_pk if args.dst_pk else ""
runner_address = args.dst_ip if args.dst_ip else "127.0.0.1"
print_header("runner.create (or load)")
# runner.load requires both context_id and id
try:
runner = client.call("runner.create", {
"context_id": context_id,
"runner": {
"id": runner_id,
"pubkey": runner_pubkey,
"address": runner_address,
"topic": topic,
"script_type": script_type,
"local": False
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
runner = client.call("runner.load", {"context_id": context_id, "id": runner_id})
else:
raise
pretty(runner)
# 4) Jobs
# Build a simple chain: J0 (root), J1 depends on J0, J2 depends on J1, ... up to N-1
job_ids: List[int] = []
for i in range(args.jobs):
jid = base_job_id + i
depends = [] if i == 0 else [base_job_id + (i - 1)]
job_payload = {
"id": jid,
"caller_id": actor_id,
"context_id": context_id,
"script": f"print('Job {i} running')",
"script_type": script_type,
"timeout": timeout,
"retries": retries,
"env_vars": {},
"prerequisites": [],
"depends": depends,
}
print_header(f"job.create - {jid} {'(root)' if not depends else f'(depends on {depends})'}")
try:
job = client.call("job.create", {
"context_id": context_id,
"job": job_payload
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
job = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
else:
raise
pretty(job)
job_ids.append(jid)
# 5) Flow
print_header("flow.create (or load)")
try:
flow = client.call("flow.create", {
"context_id": context_id,
"flow": {
"id": flow_id,
"caller_id": actor_id,
"context_id": context_id,
"jobs": job_ids,
"env_vars": {}
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
else:
raise
pretty(flow)
# Optional: show DAG
try:
print_header("flow.dag")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: flow.dag failed: {e}", file=sys.stderr)
# 6) Start flow (idempotent; returns bool whether scheduler started)
print_header("flow.start")
started = client.call("flow.start", {"context_id": context_id, "id": flow_id})
print(f"flow.start -> {started}")
# 7) Poll until Finished or Error (or timeout)
print_header("Polling flow.load until completion")
t0 = time.time()
status = None
last_status_print = 0.0
poll_count = 0
while True:
poll_count += 1
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
status = flow.get("status")
now = time.time()
if now - last_status_print >= max(1.0, float(args.poll_interval)):
print(f"[{int(now - t0)}s] flow.status = {status}")
last_status_print = now
# Every 5th poll, print the current flow DAG
if (poll_count % 5) == 0:
try:
print_header("flow.dag (periodic)")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: periodic flow.dag failed: {e}", file=sys.stderr)
if status in ("Finished", "Error"):
break
if (now - t0) > args.poll_timeout:
print(f"ERROR: Flow did not complete within {args.poll_timeout}s (status={status})", file=sys.stderr)
break
time.sleep(float(args.poll_interval))
# 8) Final summary: job statuses
print_header("Final job statuses")
for jid in job_ids:
try:
j = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
print(f"Job {jid}: status={j.get('status')} result={j.get('result')}")
except Exception as e:
print(f"Job {jid}: load failed: {e}", file=sys.stderr)
# Exit code
if status == "Finished":
print_header("Result")
print("Flow finished successfully.")
sys.exit(0)
else:
print_header("Result")
print(f"Flow ended with status={status}")
sys.exit(1)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nInterrupted.")
sys.exit(130)
except Exception as e:
print_header("Error")
print(str(e))
sys.exit(1)

1393
specs/openrpc.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -10,7 +10,7 @@
| **Runner** | Public key, Mycelium address, topic name, type (`v\|python\|osis\|rust`), local flag, timestamps | `runner:<id>` (hash) | The *worker* that actually executes **RunnerJob** scripts. It subscribes to a Mycelium topic (normally `runner<id>`). If `local == true` the runner also consumes jobs directly from a Redis queue that is named after the scripttype suffix (`v`, `python`, …). |
| **RunnerJob**| Script source, type (`osis\|sal\|v\|python`), envvars, prerequisites, dependencies, status, timestamps, result map | `job:<caller_id>:<id>` (hash) | A single executable unit. It lives inside a **Context**, belongs to a **Runner**, and is queued according to its `script_type` (e.g. `queue:python`). Its status moves through the lifecycle `dispatched → waiting_for_prerequisites → started → finished|error`. |
> **Key idea:** All objects are persisted as *hashes* in a **Redis** database that is dedicated to a *Context*. The system is completely **decentralised** each actor owns its own context and can spin up as many runners as needed. Communication between actors, runners and the rest of the system happens over **Mycelium**, a messagebus that uses Redis lists as queues.
> **Key idea:** All objects are persisted as *hashes*. Contextscoped objects (**Context**, **Flow**, **Message**, **Runner**, **RunnerJob**) live in a **Redis** database dedicated to that context. **Actors are global** and are stored in Redis DB 0 under `actor:<id>`. The system is completely **decentralised** each actor owns its own context and can spin up as many runners as needed. Communication between actors, runners and the rest of the system happens over **Mycelium**, a messagebus that uses Redis lists as queues.
---

7
src/clients/mod.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod mycelium_client;
pub mod supervisor_client;
pub mod types;
pub use mycelium_client::{MyceliumClient, MyceliumClientError};
pub use supervisor_client::{SupervisorClient, SupervisorClientError};
pub use types::Destination;

View File

@@ -0,0 +1,237 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use reqwest::Client as HttpClient;
use serde_json::{Value, json};
use thiserror::Error;
use crate::clients::Destination;
use crate::models::TransportStatus;
/// Lightweight client for Mycelium JSON-RPC (send + query status)
#[derive(Clone)]
pub struct MyceliumClient {
base_url: String, // e.g. http://127.0.0.1:8990
http: HttpClient,
id_counter: Arc<AtomicU64>,
}
#[derive(Debug, Error)]
pub enum MyceliumClientError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Transport timed out waiting for a reply (408)")]
TransportTimeout,
#[error("JSON-RPC error: {0}")]
RpcError(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
}
impl MyceliumClient {
pub fn new(base_url: impl Into<String>) -> Result<Self, MyceliumClientError> {
let url = base_url.into();
let http = HttpClient::builder().build()?;
Ok(Self {
base_url: url,
http,
id_counter: Arc::new(AtomicU64::new(1)),
})
}
fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
async fn jsonrpc(&self, method: &str, params: Value) -> Result<Value, MyceliumClientError> {
let req = json!({
"jsonrpc": "2.0",
"id": self.next_id(),
"method": method,
"params": [ params ]
});
let resp = self.http.post(&self.base_url).json(&req).send().await?;
let status = resp.status();
let body: Value = resp.json().await?;
if let Some(err) = body.get("error") {
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
let msg = err
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
if code == 408 {
return Err(MyceliumClientError::TransportTimeout);
}
return Err(MyceliumClientError::RpcError(format!(
"code={code} msg={msg}"
)));
}
if !status.is_success() {
return Err(MyceliumClientError::RpcError(format!(
"HTTP {status}, body {body}"
)));
}
Ok(body)
}
/// Call messageStatus with an outbound message id (hex string)
pub async fn message_status(
&self,
id_hex: &str,
) -> Result<TransportStatus, MyceliumClientError> {
let params = json!({ "id": id_hex });
let body = self.jsonrpc("messageStatus", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
// Accept both { status: "..."} and bare "..."
let status_str = if let Some(s) = result.get("status").and_then(|v| v.as_str()) {
s.to_string()
} else if let Some(s) = result.as_str() {
s.to_string()
} else {
return Err(MyceliumClientError::InvalidResponse(format!(
"unexpected result shape: {result}"
)));
};
Self::map_status(&status_str).ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("unknown status: {status_str}"))
})
}
fn map_status(s: &str) -> Option<TransportStatus> {
match s {
"queued" => Some(TransportStatus::Queued),
"sent" => Some(TransportStatus::Sent),
"delivered" => Some(TransportStatus::Delivered),
"read" => Some(TransportStatus::Read),
"failed" => Some(TransportStatus::Failed),
_ => None,
}
}
/// Build params object for pushMessage without performing any network call.
/// Exposed for serializer-only tests and reuse.
pub(crate) fn build_push_params(
dst: &Destination,
topic: &str,
payload_b64: &str,
reply_timeout: Option<u64>,
) -> Value {
let dst_v = match dst {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
};
let mut message = json!({
"dst": dst_v,
"topic": topic,
"payload": payload_b64,
});
if let Some(rt) = reply_timeout {
message["reply_timeout"] = json!(rt);
}
message
}
/// pushMessage: send a message with dst/topic/payload. Optional reply_timeout for sync replies.
pub async fn push_message(
&self,
dst: &Destination,
topic: &str,
payload_b64: &str,
reply_timeout: Option<u64>,
) -> Result<Value, MyceliumClientError> {
let params = Self::build_push_params(dst, topic, payload_b64, reply_timeout);
let body = self.jsonrpc("pushMessage", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
Ok(result.clone())
}
/// Helper to extract outbound message id from pushMessage result (InboundMessage or PushMessageResponseId)
pub fn extract_message_id_from_result(result: &Value) -> Option<String> {
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::clients::Destination;
#[test]
fn build_push_params_shapes_ip_pk_and_timeout() {
// IP destination
let p1 = MyceliumClient::build_push_params(
&Destination::Ip("2001:db8::1".parse().unwrap()),
"supervisor.rpc",
"Zm9vYmFy", // "foobar"
Some(10),
);
let msg1 = p1.get("message").unwrap();
assert_eq!(
msg1.get("topic").unwrap().as_str().unwrap(),
"supervisor.rpc"
);
assert_eq!(msg1.get("payload").unwrap().as_str().unwrap(), "Zm9vYmFy");
assert_eq!(
msg1.get("dst")
.unwrap()
.get("ip")
.unwrap()
.as_str()
.unwrap(),
"2001:db8::1"
);
assert_eq!(p1.get("reply_timeout").unwrap().as_u64().unwrap(), 10);
// PK destination without timeout
let p2 = MyceliumClient::build_push_params(
&Destination::Pk(
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".into(),
),
"supervisor.rpc",
"YmF6", // "baz"
None,
);
let msg2 = p2.get("message").unwrap();
assert_eq!(
msg2.get("dst")
.unwrap()
.get("pk")
.unwrap()
.as_str()
.unwrap(),
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
);
assert!(p2.get("reply_timeout").is_none());
}
#[test]
fn extract_message_id_variants() {
// PushMessageResponseId
let r1 = json!({"id":"0123456789abcdef"});
assert_eq!(
MyceliumClient::extract_message_id_from_result(&r1).unwrap(),
"0123456789abcdef"
);
// InboundMessage-like
let r2 = json!({
"id":"fedcba9876543210",
"srcIp":"449:abcd:0123:defa::1",
"payload":"hpV+"
});
assert_eq!(
MyceliumClient::extract_message_id_from_result(&r2).unwrap(),
"fedcba9876543210"
);
}
}

View File

@@ -0,0 +1,506 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde_json::{Value, json};
use thiserror::Error;
use crate::clients::{Destination, MyceliumClient, MyceliumClientError};
#[derive(Clone)]
pub struct SupervisorClient {
mycelium: Arc<MyceliumClient>, // Delegated Mycelium transport
destination: Destination, // ip or pk
topic: String, // e.g. "supervisor.rpc"
secret: Option<String>, // optional, required by several supervisor methods
id_counter: Arc<AtomicU64>, // JSON-RPC id generator (for inner supervisor requests)
}
#[derive(Debug, Error)]
pub enum SupervisorClientError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Transport timed out waiting for a reply (408)")]
TransportTimeout,
#[error("JSON-RPC error: {0}")]
RpcError(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
#[error("Missing secret for method requiring authentication")]
MissingSecret,
}
impl From<MyceliumClientError> for SupervisorClientError {
fn from(e: MyceliumClientError) -> Self {
match e {
MyceliumClientError::TransportTimeout => SupervisorClientError::TransportTimeout,
MyceliumClientError::RpcError(m) => SupervisorClientError::RpcError(m),
MyceliumClientError::InvalidResponse(m) => SupervisorClientError::InvalidResponse(m),
MyceliumClientError::Http(err) => SupervisorClientError::Http(err),
MyceliumClientError::Json(err) => SupervisorClientError::Json(err),
}
}
}
impl SupervisorClient {
/// Preferred constructor: provide a shared Mycelium client.
pub fn new_with_client(
mycelium: Arc<MyceliumClient>,
destination: Destination,
topic: impl Into<String>,
secret: Option<String>,
) -> Self {
Self {
mycelium,
destination,
topic: topic.into(),
secret,
id_counter: Arc::new(AtomicU64::new(1)),
}
}
/// Backward-compatible constructor that builds a Mycelium client from base_url.
/// base_url defaults to Mycelium spec "http://127.0.0.1:8990" if empty.
pub fn new(
base_url: impl Into<String>,
destination: Destination,
topic: impl Into<String>,
secret: Option<String>,
) -> Result<Self, SupervisorClientError> {
let mut url = base_url.into();
if url.is_empty() {
url = "http://127.0.0.1:8990".to_string();
}
let mycelium = Arc::new(MyceliumClient::new(url)?);
Ok(Self::new_with_client(mycelium, destination, topic, secret))
}
fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
/// Internal helper used by tests to inspect dst JSON shape.
fn build_dst(&self) -> Value {
match &self.destination {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
}
}
fn build_supervisor_payload(&self, method: &str, params: Value) -> Value {
json!({
"jsonrpc": "2.0",
"id": self.next_id(),
"method": method,
"params": params,
})
}
fn encode_payload(payload: &Value) -> Result<String, SupervisorClientError> {
let s = serde_json::to_string(payload)?;
Ok(BASE64_STANDARD.encode(s.as_bytes()))
}
fn encode_topic(topic: &[u8]) -> String {
BASE64_STANDARD.encode(topic)
}
fn extract_message_id_from_result(result: &Value) -> Option<String> {
// Two possibilities per Mycelium spec oneOf:
// - PushMessageResponseId: { "id": "0123456789abcdef" }
// - InboundMessage: object containing "id" plus srcIp, ...; we still return id.
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
/// Generic call: build supervisor JSON-RPC message, send via Mycelium pushMessage, return outbound message id (hex).
pub async fn call(&self, method: &str, params: Value) -> Result<String, SupervisorClientError> {
let inner = self.build_supervisor_payload(method, params);
let payload_b64 = Self::encode_payload(&inner)?;
let result = self
.mycelium
.push_message(
&self.destination,
&Self::encode_topic(self.topic.as_bytes()),
&payload_b64,
None,
)
.await?;
if let Some(id) = MyceliumClient::extract_message_id_from_result(&result) {
return Ok(id);
}
// Some servers might return the oneOf wrapped, handle len==1 array defensively (not in spec but resilient)
if let Some(arr) = result.as_array()
&& arr.len() == 1
&& let Some(id) = MyceliumClient::extract_message_id_from_result(&arr[0])
{
return Ok(id);
}
Err(SupervisorClientError::InvalidResponse(format!(
"result did not contain message id: {result}"
)))
}
/// Synchronous variant: wait for a JSON-RPC reply via Mycelium reply_timeout, and return the inner JSON-RPC "result".
/// If the supervisor returns an error object, map to RpcError.
pub async fn call_sync(
&self,
method: &str,
params: Value,
reply_timeout_secs: u64,
) -> Result<Value, SupervisorClientError> {
let inner = self.build_supervisor_payload(method, params);
let payload_b64 = Self::encode_payload(&inner)?;
let result = self
.mycelium
.push_message(
&self.destination,
&self.topic,
&payload_b64,
Some(reply_timeout_secs),
)
.await?;
// Expect an InboundMessage-like with a base64 payload containing the supervisor JSON-RPC response
let payload_field = if let Some(p) = result.get("payload").and_then(|v| v.as_str()) {
p.to_string()
} else if let Some(arr) = result.as_array() {
// Defensive: handle single-element array shape
if let Some(one) = arr.get(0) {
one.get("payload")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| {
SupervisorClientError::InvalidResponse(format!(
"missing payload in result: {result}"
))
})?
} else {
return Err(SupervisorClientError::TransportTimeout);
}
} else {
// No payload => no reply received within timeout (Mycelium would have returned just an id)
return Err(SupervisorClientError::TransportTimeout);
};
let raw = BASE64_STANDARD
.decode(payload_field.as_bytes())
.map_err(|e| {
SupervisorClientError::InvalidResponse(format!("invalid base64 payload: {e}"))
})?;
let rpc_resp: Value = serde_json::from_slice(&raw)?;
if let Some(err) = rpc_resp.get("error") {
return Err(SupervisorClientError::RpcError(err.to_string()));
}
let res = rpc_resp.get("result").ok_or_else(|| {
SupervisorClientError::InvalidResponse(format!(
"missing result in supervisor reply: {rpc_resp}"
))
})?;
Ok(res.clone())
}
fn need_secret(&self) -> Result<&str, SupervisorClientError> {
self.secret
.as_deref()
.ok_or(SupervisorClientError::MissingSecret)
}
// -----------------------------
// Typed wrappers for Supervisor API
// Asynchronous-only: returns outbound message id
// -----------------------------
// Runners
pub async fn list_runners(&self) -> Result<String, SupervisorClientError> {
self.call("list_runners", json!([])).await
}
pub async fn register_runner(
&self,
name: impl Into<String>,
queue: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"name": name.into(),
"queue": queue.into()
}]);
self.call("register_runner", params).await
}
pub async fn remove_runner(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
self.call("remove_runner", json!([actor_id.into()])).await
}
pub async fn start_runner(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
self.call("start_runner", json!([actor_id.into()])).await
}
pub async fn stop_runner(
&self,
actor_id: impl Into<String>,
force: bool,
) -> Result<String, SupervisorClientError> {
self.call("stop_runner", json!([actor_id.into(), force]))
.await
}
pub async fn get_runner_status(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
self.call("get_runner_status", json!([actor_id.into()]))
.await
}
pub async fn get_all_runner_status(&self) -> Result<String, SupervisorClientError> {
self.call("get_all_runner_status", json!([])).await
}
pub async fn start_all(&self) -> Result<String, SupervisorClientError> {
self.call("start_all", json!([])).await
}
pub async fn stop_all(&self, force: bool) -> Result<String, SupervisorClientError> {
self.call("stop_all", json!([force])).await
}
pub async fn get_all_status(&self) -> Result<String, SupervisorClientError> {
self.call("get_all_status", json!([])).await
}
// Jobs
pub async fn jobs_create(&self, job: Value) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job": job
}]);
self.call("jobs.create", params).await
}
pub async fn jobs_list(&self) -> Result<String, SupervisorClientError> {
self.call("jobs.list", json!([])).await
}
pub async fn job_run(&self, job: Value) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job": job
}]);
self.call("job.run", params).await
}
pub async fn job_start(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call("job.start", params).await
}
pub async fn job_status(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
self.call("job.status", json!([job_id.into()])).await
}
/// Synchronous job.status: waits for the supervisor to reply and returns the status string.
/// The supervisor result may be an object with { status: "..." } or a bare string.
pub async fn job_status_sync(
&self,
job_id: impl Into<String>,
reply_timeout_secs: u64,
) -> Result<String, SupervisorClientError> {
let res = self
.call_sync("job.status", json!([job_id.into()]), reply_timeout_secs)
.await?;
let status = if let Some(s) = res.get("status").and_then(|v| v.as_str()) {
s.to_string()
} else if let Some(s) = res.as_str() {
s.to_string()
} else {
return Err(SupervisorClientError::InvalidResponse(format!(
"unexpected job.status result shape: {res}"
)));
};
Ok(status)
}
pub async fn job_result(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
self.call("job.result", json!([job_id.into()])).await
}
/// Synchronous job.result: waits for the supervisor to reply and returns a map
/// containing exactly one of:
/// - {"success": "..."} on success
/// - {"error": "..."} on error reported by the runner
/// Some servers may return a bare string; we treat that as {"success": "<string>"}.
pub async fn job_result_sync(
&self,
job_id: impl Into<String>,
reply_timeout_secs: u64,
) -> Result<std::collections::HashMap<String, String>, SupervisorClientError> {
let res = self
.call_sync("job.result", json!([job_id.into()]), reply_timeout_secs)
.await?;
use std::collections::HashMap;
let mut out: HashMap<String, String> = HashMap::new();
if let Some(obj) = res.as_object() {
if let Some(s) = obj.get("success").and_then(|v| v.as_str()) {
out.insert("success".to_string(), s.to_string());
return Ok(out);
}
if let Some(s) = obj.get("error").and_then(|v| v.as_str()) {
out.insert("error".to_string(), s.to_string());
return Ok(out);
}
return Err(SupervisorClientError::InvalidResponse(format!(
"unexpected job.result result shape: {res}"
)));
} else if let Some(s) = res.as_str() {
out.insert("success".to_string(), s.to_string());
return Ok(out);
}
Err(SupervisorClientError::InvalidResponse(format!(
"unexpected job.result result shape: {res}"
)))
}
pub async fn job_stop(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call("job.stop", params).await
}
pub async fn job_delete(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call("job.delete", params).await
}
// Discovery
pub async fn rpc_discover(&self) -> Result<String, SupervisorClientError> {
self.call("rpc.discover", json!([])).await
}
}
// -----------------------------
// Tests (serialization-only)
// -----------------------------
#[cfg(test)]
mod tests {
use super::*;
use std::net::IpAddr;
fn mk_client() -> SupervisorClient {
// Uses the legacy constructor but will not issue real network calls in these tests.
SupervisorClient::new(
"http://127.0.0.1:8990",
Destination::Pk(
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".to_string(),
),
"supervisor.rpc",
Some("secret".to_string()),
)
.unwrap()
}
#[test]
fn builds_dst_ip_and_pk() {
let c_ip = SupervisorClient::new(
"http://127.0.0.1:8990",
Destination::Ip("2001:db8::1".parse().unwrap()),
"supervisor.rpc",
None,
)
.unwrap();
let v_ip = c_ip.build_dst();
assert_eq!(v_ip.get("ip").unwrap().as_str().unwrap(), "2001:db8::1");
let c_pk = mk_client();
let v_pk = c_pk.build_dst();
assert_eq!(
v_pk.get("pk").unwrap().as_str().unwrap(),
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
);
}
#[test]
fn encodes_supervisor_payload_b64() {
let c = mk_client();
let payload = c.build_supervisor_payload("list_runners", json!([]));
let b64 = SupervisorClient::encode_payload(&payload).unwrap();
// decode and compare round-trip JSON
let raw = base64::engine::general_purpose::STANDARD
.decode(b64.as_bytes())
.unwrap();
let decoded: Value = serde_json::from_slice(&raw).unwrap();
assert_eq!(
decoded.get("method").unwrap().as_str().unwrap(),
"list_runners"
);
assert_eq!(decoded.get("jsonrpc").unwrap().as_str().unwrap(), "2.0");
}
#[test]
fn extract_message_id_works_for_both_variants() {
// PushMessageResponseId
let r1 = json!({"id":"0123456789abcdef"});
assert_eq!(
SupervisorClient::extract_message_id_from_result(&r1).unwrap(),
"0123456789abcdef"
);
// InboundMessage-like
let r2 = json!({
"id":"fedcba9876543210",
"srcIp":"449:abcd:0123:defa::1",
"payload":"hpV+"
});
assert_eq!(
SupervisorClient::extract_message_id_from_result(&r2).unwrap(),
"fedcba9876543210"
);
}
}

9
src/clients/types.rs Normal file
View File

@@ -0,0 +1,9 @@
use std::net::IpAddr;
/// Destination for Mycelium messages (shared by clients)
#[derive(Clone, Debug)]
pub enum Destination {
Ip(IpAddr),
/// 64-hex public key of the receiver node
Pk(String),
}

346
src/dag.rs Normal file
View File

@@ -0,0 +1,346 @@
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
use std::fmt;
use crate::{
models::{Flow, Job, ScriptType},
storage::RedisDriver,
};
pub type DagResult<T> = Result<T, DagError>;
#[derive(Debug)]
pub enum DagError {
Storage(Box<dyn std::error::Error + Send + Sync>),
MissingDependency { job: u32, depends_on: u32 },
CycleDetected { remaining: Vec<u32> },
UnknownJob { job: u32 },
DependenciesIncomplete { job: u32, missing: Vec<u32> },
FlowFailed { failed_job: u32 },
JobNotStarted { job: u32 },
}
impl fmt::Display for DagError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DagError::Storage(e) => write!(f, "Storage error: {}", e),
DagError::MissingDependency { job, depends_on } => write!(
f,
"Job {} depends on {}, which is not part of the flow.jobs list",
job, depends_on
),
DagError::CycleDetected { remaining } => {
write!(f, "Cycle detected; unresolved nodes: {:?}", remaining)
}
DagError::UnknownJob { job } => write!(f, "Unknown job id: {}", job),
DagError::DependenciesIncomplete { job, missing } => write!(
f,
"Job {} cannot start; missing completed deps: {:?}",
job, missing
),
DagError::FlowFailed { failed_job } => {
write!(f, "Flow failed due to job {}", failed_job)
}
DagError::JobNotStarted { job } => write!(
f,
"Job {} cannot be completed because it is not marked as started",
job
),
}
}
}
impl std::error::Error for DagError {}
impl From<Box<dyn std::error::Error + Send + Sync>> for DagError {
fn from(e: Box<dyn std::error::Error + Send + Sync>) -> Self {
DagError::Storage(e)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobSummary {
pub id: u32,
pub depends: Vec<u32>,
pub prerequisites: Vec<String>,
pub script_type: ScriptType,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowDag {
pub flow_id: u32,
pub caller_id: u32,
pub context_id: u32,
pub nodes: HashMap<u32, JobSummary>,
pub edges: Vec<(u32, u32)>, // (from prerequisite, to job)
pub reverse_edges: Vec<(u32, u32)>, // (from job, to prerequisite)
pub roots: Vec<u32>, // in_degree == 0
pub leaves: Vec<u32>, // out_degree == 0
pub levels: Vec<Vec<u32>>, // topological layers for parallel execution
// Runtime execution state
pub started: HashSet<u32>,
pub completed: HashSet<u32>,
pub failed_job: Option<u32>,
}
pub async fn build_flow_dag(
redis: &RedisDriver,
context_id: u32,
flow_id: u32,
) -> DagResult<FlowDag> {
// Load flow
let flow: Flow = redis
.load_flow(context_id, flow_id)
.await
.map_err(DagError::from)?;
let caller_id = flow.caller_id();
let flow_job_ids = flow.jobs();
// Build a set for faster membership tests
let job_id_set: HashSet<u32> = flow_job_ids.iter().copied().collect();
// Load all jobs
let mut jobs: HashMap<u32, Job> = HashMap::with_capacity(flow_job_ids.len());
for jid in flow_job_ids {
let job = redis
.load_job(context_id, caller_id, *jid)
.await
.map_err(DagError::from)?;
jobs.insert(*jid, job);
}
// Validate dependencies and construct adjacency
let mut edges: Vec<(u32, u32)> = Vec::new();
let mut reverse_edges: Vec<(u32, u32)> = Vec::new();
let mut adj: HashMap<u32, Vec<u32>> = HashMap::with_capacity(jobs.len());
let mut rev_adj: HashMap<u32, Vec<u32>> = HashMap::with_capacity(jobs.len());
let mut in_degree: HashMap<u32, usize> = HashMap::with_capacity(jobs.len());
for &jid in flow_job_ids {
adj.entry(jid).or_default();
rev_adj.entry(jid).or_default();
in_degree.entry(jid).or_insert(0);
}
for (&jid, job) in &jobs {
for &dep in job.depends() {
if !job_id_set.contains(&dep) {
return Err(DagError::MissingDependency {
job: jid,
depends_on: dep,
});
}
// edge: dep -> jid
edges.push((dep, jid));
reverse_edges.push((jid, dep));
adj.get_mut(&dep).unwrap().push(jid);
rev_adj.get_mut(&jid).unwrap().push(dep);
*in_degree.get_mut(&jid).unwrap() += 1;
}
}
// Kahn's algorithm for topological sorting, with level construction
let mut zero_in: VecDeque<u32> = in_degree
.iter()
.filter_map(|(k, v)| if *v == 0 { Some(*k) } else { None })
.collect();
let mut processed_count = 0usize;
let mut levels: Vec<Vec<u32>> = Vec::new();
// To make deterministic, sort initial zero_in
{
let mut tmp: Vec<u32> = zero_in.iter().copied().collect();
tmp.sort_unstable();
zero_in = tmp.into_iter().collect();
}
while !zero_in.is_empty() {
let mut level: Vec<u32> = Vec::new();
// drain current frontier
let mut next_zero: Vec<u32> = Vec::new();
let mut current_frontier: Vec<u32> = zero_in.drain(..).collect();
current_frontier.sort_unstable();
for u in current_frontier {
level.push(u);
processed_count += 1;
if let Some(children) = adj.get(&u) {
let mut sorted_children = children.clone();
sorted_children.sort_unstable();
for &v in &sorted_children {
let d = in_degree.get_mut(&v).unwrap();
*d -= 1;
if *d == 0 {
next_zero.push(v);
}
}
}
}
next_zero.sort_unstable();
zero_in = next_zero.into_iter().collect();
levels.push(level);
}
if processed_count != jobs.len() {
let remaining: Vec<u32> = in_degree
.into_iter()
.filter_map(|(k, v)| if v > 0 { Some(k) } else { None })
.collect();
return Err(DagError::CycleDetected { remaining });
}
// Roots and leaves
let roots: Vec<u32> = levels.first().cloned().unwrap_or_default();
let leaves: Vec<u32> = adj
.iter()
.filter_map(|(k, v)| if v.is_empty() { Some(*k) } else { None })
.collect();
// Nodes map (JobSummary)
let mut nodes: HashMap<u32, JobSummary> = HashMap::with_capacity(jobs.len());
for (&jid, job) in &jobs {
let summary = JobSummary {
id: jid,
depends: job.depends().to_vec(),
prerequisites: job.prerequisites().to_vec(),
script_type: job.script_type(),
};
nodes.insert(jid, summary);
}
// Sort edges deterministically
edges.sort_unstable();
reverse_edges.sort_unstable();
let dag = FlowDag {
flow_id,
caller_id,
context_id,
nodes,
edges,
reverse_edges,
roots,
leaves,
levels,
started: HashSet::new(),
completed: HashSet::new(),
failed_job: None,
};
Ok(dag)
}
impl FlowDag {
/// Return all jobs that are ready to be processed.
/// A job is ready if:
/// - it exists in the DAG
/// - it is not already started or completed
/// - it has no dependencies, or all dependencies are completed
///
/// If any job has failed, the entire flow is considered failed and an error is returned.
pub fn ready_jobs(&self) -> DagResult<Vec<u32>> {
if let Some(failed_job) = self.failed_job {
return Err(DagError::FlowFailed { failed_job });
}
let mut ready: Vec<u32> = Vec::new();
for (&jid, summary) in &self.nodes {
if self.completed.contains(&jid) || self.started.contains(&jid) {
continue;
}
let mut deps_ok = true;
for dep in &summary.depends {
if !self.completed.contains(dep) {
deps_ok = false;
break;
}
}
if deps_ok {
ready.push(jid);
}
}
ready.sort_unstable();
Ok(ready)
}
/// Mark a job as started.
/// Strict validation rules:
/// - Unknown jobs are rejected with UnknownJob
/// - If the flow has already failed, return FlowFailed
/// - If the job is already started or completed, this is a no-op (idempotent)
/// - If any dependency is not completed, return DependenciesIncomplete with the missing deps
pub fn mark_job_started(&mut self, job: u32) -> DagResult<()> {
if !self.nodes.contains_key(&job) {
return Err(DagError::UnknownJob { job });
}
if self.completed.contains(&job) || self.started.contains(&job) {
return Ok(());
}
if let Some(failed_job) = self.failed_job {
return Err(DagError::FlowFailed { failed_job });
}
let summary = self.nodes.get(&job).expect("checked contains_key");
let missing: Vec<u32> = summary
.depends
.iter()
.copied()
.filter(|d| !self.completed.contains(d))
.collect();
if !missing.is_empty() {
return Err(DagError::DependenciesIncomplete { job, missing });
}
self.started.insert(job);
Ok(())
}
/// Mark a job as completed.
/// Strict validation rules:
/// - Unknown jobs are rejected with UnknownJob
/// - If the job is already completed, this is a no-op (idempotent)
/// - If the flow has already failed, return FlowFailed
/// - If the job was not previously started, return JobNotStarted
pub fn mark_job_completed(&mut self, job: u32) -> DagResult<()> {
if !self.nodes.contains_key(&job) {
return Err(DagError::UnknownJob { job });
}
if self.completed.contains(&job) {
return Ok(());
}
if let Some(failed_job) = self.failed_job {
return Err(DagError::FlowFailed { failed_job });
}
if !self.started.contains(&job) {
return Err(DagError::JobNotStarted { job });
}
self.started.remove(&job);
self.completed.insert(job);
Ok(())
}
/// Mark a job as failed.
/// Behavior:
/// - Unknown jobs are rejected with UnknownJob
/// - If a failure is already recorded:
/// - If it is the same job, no-op (idempotent)
/// - If it is a different job, return FlowFailed with the already-failed job
/// - Otherwise record this job as the failed job
pub fn mark_job_failed(&mut self, job: u32) -> DagResult<()> {
if !self.nodes.contains_key(&job) {
return Err(DagError::UnknownJob { job });
}
match self.failed_job {
Some(existing) if existing == job => Ok(()),
Some(existing) => Err(DagError::FlowFailed {
failed_job: existing,
}),
None => {
self.failed_job = Some(job);
Ok(())
}
}
}
}

View File

@@ -1,6 +1,8 @@
pub mod clients;
pub mod dag;
pub mod models;
pub mod redis_driver;
pub mod router;
pub mod rpc;
pub mod service;
pub mod storage;
mod time;
pub use redis_driver::RedisDriver;
pub use redis_driver::Result as RedisResult;

View File

@@ -1,6 +1,9 @@
use clap::Parser;
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;
use tracing::{error, info};
use tracing_subscriber::EnvFilter;
#[derive(Debug, Clone, Parser)]
#[command(
name = "herocoordinator",
@@ -22,8 +25,8 @@ struct Cli {
long = "mycelium-port",
short = 'p',
env = "MYCELIUM_PORT",
default_value_t = 9651u16,
help = "Port for Mycelium JSON-RPC (default: 9651)"
default_value_t = 8990u16,
help = "Port for Mycelium JSON-RPC (default: 8990)"
)]
mycelium_port: u16,
@@ -35,13 +38,98 @@ struct Cli {
help = "Socket address of Redis instance (default: 127.0.0.1:6379)"
)]
redis_addr: SocketAddr,
#[arg(
long = "api-http-ip",
env = "API_HTTP_IP",
default_value = "127.0.0.1",
help = "Bind IP for HTTP JSON-RPC server (default: 127.0.0.1)"
)]
api_http_ip: IpAddr,
#[arg(
long = "api-http-port",
env = "API_HTTP_PORT",
default_value_t = 9652u16,
help = "Bind port for HTTP JSON-RPC server (default: 9652)"
)]
api_http_port: u16,
#[arg(
long = "api-ws-ip",
env = "API_WS_IP",
default_value = "127.0.0.1",
help = "Bind IP for WebSocket JSON-RPC server (default: 127.0.0.1)"
)]
api_ws_ip: IpAddr,
#[arg(
long = "api-ws-port",
env = "API_WS_PORT",
default_value_t = 9653u16,
help = "Bind port for WebSocket JSON-RPC server (default: 9653)"
)]
api_ws_port: u16,
}
fn main() {
#[tokio::main]
async fn main() {
let cli = Cli::parse();
// Initialize tracing subscriber (pretty formatter; controlled by RUST_LOG)
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
tracing_subscriber::fmt()
.with_env_filter(filter)
.pretty()
.with_target(true)
.with_level(true)
.init();
println!(
"mycelium_ip={}, mycelium_port={}, redis_addr={}",
cli.mycelium_ip, cli.mycelium_port, cli.redis_addr
);
let http_addr = SocketAddr::new(cli.api_http_ip, cli.api_http_port);
let ws_addr = SocketAddr::new(cli.api_ws_ip, cli.api_ws_port);
// Initialize Redis driver
let redis = herocoordinator::storage::RedisDriver::new(cli.redis_addr.to_string())
.await
.expect("Failed to connect to Redis");
// Initialize Service
let service = herocoordinator::service::AppService::new(redis);
let service_for_router = service.clone();
// Shared application state
let state = Arc::new(herocoordinator::rpc::AppState::new(service));
// Start router workers (auto-discovered contexts)
{
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
let cfg = herocoordinator::router::RouterConfig {
context_ids: Vec::new(), // ignored by start_router_auto
concurrency: 32,
base_url,
topic: "supervisor.rpc".to_string(),
transport_poll_interval_secs: 2,
transport_poll_timeout_secs: 300,
};
let _auto_handle = herocoordinator::router::start_router_auto(service_for_router, cfg);
}
// Build RPC modules for both servers
let http_module = herocoordinator::rpc::build_module(state.clone());
let ws_module = herocoordinator::rpc::build_module(state.clone());
info!(%http_addr, %ws_addr, redis_addr=%cli.redis_addr, "Starting JSON-RPC servers");
// Start servers
let _http_handle = herocoordinator::rpc::start_http(http_addr, http_module)
.await
.expect("Failed to start HTTP server");
let _ws_handle = herocoordinator::rpc::start_ws(ws_addr, ws_module)
.await
.expect("Failed to start WS server");
// Wait for Ctrl+C to terminate
if let Err(e) = tokio::signal::ctrl_c().await {
error!(error=%e, "Failed to listen for shutdown signal");
}
info!("Shutdown signal received, exiting.");
}

View File

@@ -8,8 +8,8 @@ mod script_type;
pub use actor::Actor;
pub use context::Context;
pub use flow::Flow;
pub use job::Job;
pub use message::Message;
pub use flow::{Flow, FlowStatus};
pub use job::{Job, JobStatus};
pub use message::{Message, MessageFormatType, MessageStatus, MessageType, TransportStatus};
pub use runner::Runner;
pub use script_type::ScriptType;

View File

@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
use crate::time::Timestamp;
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
pub struct Actor {
id: u32,
pubkey: String,
@@ -13,13 +13,3 @@ pub struct Actor {
created_at: Timestamp,
updated_at: Timestamp,
}
impl Actor {
pub fn redis_key(&self) -> String {
format!("actor:{}", self.id)
}
pub fn key(id: u32) -> String {
format!("actor:{}", id)
}
}

View File

@@ -2,26 +2,16 @@ use serde::{Deserialize, Serialize};
use crate::time::Timestamp;
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
pub struct Context {
/// Redis DB to use
id: u32,
pub id: u32,
/// Actor ids which have admin rights on this context
admins: Vec<u32>,
pub admins: Vec<u32>,
/// Actor ids which can read the context info
readers: Vec<u32>,
pub readers: Vec<u32>,
/// Actor ids which can execute jobs in this context
executors: Vec<u32>,
created_at: Timestamp,
upddated_at: Timestamp,
}
impl Context {
pub fn redis_key(&self) -> String {
format!("context:{}", self.id)
}
pub fn key(id: u32) -> String {
format!("context:{}", id)
}
pub executors: Vec<u32>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}

View File

@@ -4,28 +4,29 @@ use serde::{Deserialize, Serialize};
use crate::time::Timestamp;
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
pub struct Flow {
/// Job Id set tby the actor which created it
id: u32,
pub id: u32,
/// Actor Id who created this job
caller_id: u32,
pub caller_id: u32,
/// The context in which this job is executed
context_id: u32,
pub context_id: u32,
/// List of jobs which make up the flow
jobs: Vec<u32>,
pub jobs: Vec<u32>,
/// Environment variables, passed to every job when executed
env_vars: HashMap<String, String>,
pub env_vars: HashMap<String, String>,
/// The result of the flow
result: HashMap<String, String>,
created_at: Timestamp,
updated_at: Timestamp,
status: FlowStatus,
pub result: HashMap<String, String>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
pub status: FlowStatus,
}
/// The status of a flow
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Debug)]
pub enum FlowStatus {
Created,
Dispatched,
Started,
Error,
@@ -33,11 +34,16 @@ pub enum FlowStatus {
}
impl Flow {
pub fn redis_key(&self) -> String {
format!("flow:{}", self.id)
pub fn id(&self) -> u32 {
self.id
}
pub fn key(id: u32) -> String {
format!("flow:{}", id)
pub fn caller_id(&self) -> u32 {
self.caller_id
}
pub fn context_id(&self) -> u32 {
self.context_id
}
pub fn jobs(&self) -> &[u32] {
&self.jobs
}
}

View File

@@ -4,31 +4,31 @@ use serde::{Deserialize, Serialize};
use crate::{models::ScriptType, time::Timestamp};
#[derive(Serialize, Deserialize)]
#[derive(Clone, Serialize, Deserialize)]
pub struct Job {
/// Job Id, this is given by the actor who created the job
id: u32,
pub id: u32,
/// Actor ID which created this job
caller_id: u32,
pub caller_id: u32,
/// Context in which the job is executed
context_id: u32,
script: String,
script_type: ScriptType,
pub context_id: u32,
pub script: String,
pub script_type: ScriptType,
/// Timeout in seconds for this job
timeout: u32,
pub timeout: u32,
/// Max amount of times to retry this job
retries: u8,
env_vars: HashMap<String, String>,
result: HashMap<String, String>,
prerequisites: Vec<String>,
pub retries: u8,
pub env_vars: HashMap<String, String>,
pub result: HashMap<String, String>,
pub prerequisites: Vec<String>,
/// Ids of jobs this job depends on, i.e. this job can't start until those have finished
depends: Vec<u32>,
created_at: Timestamp,
updated_at: Timestamp,
status: JobStatus,
pub depends: Vec<u32>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
pub status: JobStatus,
}
#[derive(Serialize, Deserialize)]
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
pub enum JobStatus {
Dispatched,
WaitingForPrerequisites,
@@ -38,11 +38,25 @@ pub enum JobStatus {
}
impl Job {
pub fn redis_key(&self) -> String {
format!("job:{}:{}", self.caller_id, self.id)
pub fn id(&self) -> u32 {
self.id
}
pub fn key(caller_id: u32, id: u32) -> String {
format!("job:{}:{}", caller_id, id)
pub fn caller_id(&self) -> u32 {
self.caller_id
}
pub fn context_id(&self) -> u32 {
self.context_id
}
pub fn depends(&self) -> &[u32] {
&self.depends
}
pub fn prerequisites(&self) -> &[String] {
&self.prerequisites
}
pub fn script_type(&self) -> ScriptType {
self.script_type.clone()
}
pub fn status(&self) -> JobStatus {
self.status.clone()
}
}

View File

@@ -5,38 +5,44 @@ use crate::{
time::Timestamp,
};
#[derive(Serialize, Deserialize)]
#[derive(Clone, Serialize, Deserialize)]
pub struct Message {
/// Unique ID for the message, set by the caller
id: u32,
pub id: u32,
/// Id of the actor who sent this message
caller_id: u32,
pub caller_id: u32,
/// Id of the context in which this message was sent
context_id: u32,
message: String,
message_type: ScriptType,
message_format_type: MessageFormatType,
pub context_id: u32,
pub message: String,
pub message_type: ScriptType,
pub message_format_type: MessageFormatType,
/// Seconds for the message to arrive at the destination
timeout: u32,
pub timeout: u32,
/// Seconds for the receiver to acknowledge receipt of the message
timeout_ack: u32,
pub timeout_ack: u32,
/// Seconds for the receiver to send us a reply
timeout_result: u32,
job: Vec<Job>,
logs: Vec<Log>,
created_at: Timestamp,
updated_at: Timestamp,
status: MessageStatus,
pub timeout_result: u32,
/// Outbound transport id returned by Mycelium on push
pub transport_id: Option<String>,
/// Latest transport status as reported by Mycelium
pub transport_status: Option<TransportStatus>,
pub job: Vec<Job>,
pub logs: Vec<Log>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
pub status: MessageStatus,
}
#[derive(Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MessageType {
Job,
Chat,
Mail,
}
#[derive(Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MessageStatus {
Dispatched,
Acknowledged,
@@ -44,7 +50,16 @@ pub enum MessageStatus {
Processed,
}
#[derive(Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum TransportStatus {
Queued,
Sent,
Delivered,
Read,
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MessageFormatType {
Html,
Text,
@@ -52,13 +67,3 @@ pub enum MessageFormatType {
}
type Log = String;
impl Message {
pub fn redis_key(&self) -> String {
format!("message:{}:{}", self.caller_id, self.id)
}
pub fn key(caller_id: u32, id: u32) -> String {
format!("message:{}:{}", caller_id, id)
}
}

View File

@@ -2,37 +2,30 @@ use std::net::IpAddr;
use serde::{Deserialize, Serialize};
use crate::models::ScriptType;
use crate::time::Timestamp;
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
pub struct Runner {
id: u32,
pub id: u32,
/// Mycelium public key
pubkey: String,
pub pubkey: String,
/// Mycelium address
address: IpAddr,
pub address: IpAddr,
/// Needs to be set by the runner, usually `runner<runnerid`
topic: String,
pub topic: String,
/// The script type this runner can execute; used for routing
pub script_type: ScriptType,
/// If this is true, the runner also listens on a local redis queue
local: bool,
crated_at: Timestamp,
updated_at: Timestamp,
pub local: bool,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
pub enum RunnerType {
V,
Python,
Osis,
Rust,
}
impl Runner {
pub fn redis_key(&self) -> String {
format!("runner:{}", self.id)
}
pub fn key(id: u32) -> String {
format!("runner:{}", id)
}
}

View File

@@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
#[derive(Deserialize, Serialize)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
pub enum ScriptType {
Osis,
Sal,

View File

@@ -1,115 +0,0 @@
use std::net::SocketAddr;
use redis::{AsyncCommands, Client, RedisError, aio::ConnectionManager};
use serde::{Serialize, de::DeserializeOwned};
use crate::models::{Actor, Context, Flow, Job, Message, Runner};
pub type Result<T> = std::result::Result<T, RedisError>;
/// Async Redis driver that stores models as a single JSON blob under HSET field "data".
/// Keys follow the canonical patterns defined in specs/models.md.
pub struct RedisDriver {
manager: ConnectionManager,
}
impl RedisDriver {
/// Connect using a full Redis URL, e.g. "redis://127.0.0.1:6379".
pub async fn connect_url(url: impl AsRef<str>) -> Result<Self> {
let client = Client::open(url.as_ref())?;
let conn = client.get_connection_manager().await?;
Ok(Self { manager: conn })
}
/// Connect using a SocketAddr, e.g. 127.0.0.1:6379.
pub async fn connect_addr(addr: SocketAddr) -> Result<Self> {
Self::connect_url(Self::url_from_addr(addr)).await
}
fn url_from_addr(addr: SocketAddr) -> String {
format!("redis://{}", addr)
}
/// Save any Serialize value as JSON into HSET key field "data".
pub async fn save_json<T: Serialize>(&self, key: &str, value: &T) -> Result<()> {
let json = serde_json::to_string(value).map_err(|e| {
RedisError::from((
redis::ErrorKind::TypeError,
"serde_json::to_string failed",
format!("{}", e),
))
})?;
let mut conn = self.manager.clone();
let _: () = conn.hset(key, "data", json).await?;
Ok(())
}
/// Load any Deserialize value from HGET key field "data".
pub async fn load_json<T: DeserializeOwned>(&self, key: &str) -> Result<T> {
let mut conn = self.manager.clone();
let json: String = conn.hget(key, "data").await?;
let value = serde_json::from_str::<T>(&json).map_err(|e| {
RedisError::from((
redis::ErrorKind::TypeError,
"serde_json::from_str failed",
format!("{}", e),
))
})?;
Ok(value)
}
// Actor
pub async fn save_actor(&self, actor: &Actor) -> Result<()> {
self.save_json(&actor.redis_key(), actor).await
}
pub async fn load_actor(&self, id: u32) -> Result<Actor> {
self.load_json(&Actor::key(id)).await
}
// Context
pub async fn save_context(&self, ctx: &Context) -> Result<()> {
self.save_json(&ctx.redis_key(), ctx).await
}
pub async fn load_context(&self, id: u32) -> Result<Context> {
self.load_json(&Context::key(id)).await
}
// Flow
pub async fn save_flow(&self, flow: &Flow) -> Result<()> {
self.save_json(&flow.redis_key(), flow).await
}
pub async fn load_flow(&self, id: u32) -> Result<Flow> {
self.load_json(&Flow::key(id)).await
}
// Runner
pub async fn save_runner(&self, runner: &Runner) -> Result<()> {
self.save_json(&runner.redis_key(), runner).await
}
pub async fn load_runner(&self, id: u32) -> Result<Runner> {
self.load_json(&Runner::key(id)).await
}
// Job
pub async fn save_job(&self, job: &Job) -> Result<()> {
self.save_json(&job.redis_key(), job).await
}
pub async fn load_job(&self, caller_id: u32, id: u32) -> Result<Job> {
self.load_json(&Job::key(caller_id, id)).await
}
// Message
pub async fn save_message(&self, msg: &Message) -> Result<()> {
self.save_json(&msg.redis_key(), msg).await
}
pub async fn load_message(&self, caller_id: u32, id: u32) -> Result<Message> {
self.load_json(&Message::key(caller_id, id)).await
}
}

475
src/router.rs Normal file
View File

@@ -0,0 +1,475 @@
use std::{collections::HashSet, sync::Arc};
use serde_json::{Value, json};
use tokio::sync::Semaphore;
use crate::{
clients::{Destination, MyceliumClient, SupervisorClient},
models::{Job, JobStatus, Message, MessageStatus, ScriptType, TransportStatus},
service::AppService,
};
use tracing::{error, info};
#[derive(Clone, Debug)]
pub struct RouterConfig {
pub context_ids: Vec<u32>,
pub concurrency: usize,
pub base_url: String, // e.g. http://127.0.0.1:8990
pub topic: String, // e.g. "supervisor.rpc"
// Transport status polling configuration
pub transport_poll_interval_secs: u64, // e.g. 2
pub transport_poll_timeout_secs: u64, // e.g. 300 (5 minutes)
}
/// Start background router loops, one per context.
/// Each loop:
/// - BRPOP msg_out with 1s timeout
/// - Loads the Message by key, selects a Runner by script_type
/// - Sends supervisor JSON-RPC via Mycelium
/// - On success: Message.status = Acknowledged
/// - On error: Message.status = Error and append a log
pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::JoinHandle<()>> {
let mut handles = Vec::new();
for ctx_id in cfg.context_ids.clone() {
let service_cloned = service.clone();
let cfg_cloned = cfg.clone();
let handle = tokio::spawn(async move {
let sem = Arc::new(Semaphore::new(cfg_cloned.concurrency));
// Create a shared Mycelium client for this context loop (retry until available)
let mycelium = loop {
match MyceliumClient::new(cfg_cloned.base_url.clone()) {
Ok(c) => break Arc::new(c),
Err(e) => {
error!(context_id=ctx_id, error=%e, "MyceliumClient init error");
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
}
};
loop {
// Pop next message key (blocking with timeout)
match service_cloned.brpop_msg_out(ctx_id, 1).await {
Ok(Some(key)) => {
let permit = {
// acquire a concurrency permit (non-fair is fine)
let sem = sem.clone();
// if semaphore is exhausted, await until a slot becomes available
match sem.acquire_owned().await {
Ok(p) => p,
Err(_) => {
// Semaphore closed; exit loop
break;
}
}
};
let service_task = service_cloned.clone();
let cfg_task = cfg_cloned.clone();
tokio::spawn({
let mycelium = mycelium.clone();
async move {
// Ensure permit is dropped at end of task
let _permit = permit;
if let Err(e) =
deliver_one(&service_task, &cfg_task, ctx_id, &key, mycelium)
.await
{
error!(context_id=ctx_id, key=%key, error=%e, "Delivery error");
}
}
});
}
Ok(None) => {
// timeout: just tick
continue;
}
Err(e) => {
error!(context_id=ctx_id, error=%e, "BRPOP error");
// small backoff to avoid busy-loop on persistent errors
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
}
}
}
});
handles.push(handle);
}
handles
}
async fn deliver_one(
service: &AppService,
cfg: &RouterConfig,
context_id: u32,
msg_key: &str,
mycelium: Arc<MyceliumClient>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Parse "message:{caller_id}:{id}"
let (caller_id, id) = parse_message_key(msg_key)
.ok_or_else(|| format!("invalid message key format: {}", msg_key))?;
// Load message
let msg: Message = service.load_message(context_id, caller_id, id).await?;
// Embedded job id (if any)
let job_id_opt: Option<u32> = msg.job.first().map(|j| j.id);
// Determine routing script_type
let desired: ScriptType = determine_script_type(&msg);
// Discover runners and select a matching one
let runners = service.scan_runners(context_id).await?;
let Some(runner) = runners.into_iter().find(|r| r.script_type == desired) else {
let log = format!(
"No runner with script_type {:?} available in context {} for message {}",
desired, context_id, msg_key
);
let _ = service
.append_message_logs(context_id, caller_id, id, vec![log.clone()])
.await;
let _ = service
.update_message_status(context_id, caller_id, id, MessageStatus::Error)
.await;
return Err(log.into());
};
// Build SupervisorClient
let dest = if !runner.pubkey.trim().is_empty() {
Destination::Pk(runner.pubkey.clone())
} else {
Destination::Ip(runner.address)
};
// Keep clones for poller usage
let dest_for_poller = dest.clone();
let topic_for_poller = cfg.topic.clone();
let client = SupervisorClient::new_with_client(
mycelium.clone(),
dest.clone(),
cfg.topic.clone(),
None, // secret
);
// Build supervisor method and params from Message
let method = msg.message.clone();
let params = build_params(&msg)?;
// Send
let out_id = client.call(&method, params).await?;
// Store transport id and initial Sent status
let _ = service
.update_message_transport(
context_id,
caller_id,
id,
Some(out_id.clone()),
Some(TransportStatus::Sent),
)
.await;
// Mark as acknowledged on success
service
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
.await?;
// Spawn transport-status poller
{
let service_poll = service.clone();
let poll_interval = std::time::Duration::from_secs(cfg.transport_poll_interval_secs);
let poll_timeout = std::time::Duration::from_secs(cfg.transport_poll_timeout_secs);
let out_id_cloned = out_id.clone();
let mycelium = mycelium.clone();
// Determine reply timeout for supervisor job.result: prefer message.timeout_result, fallback to router config timeout
let job_result_reply_timeout: u64 = if msg.timeout_result > 0 {
msg.timeout_result as u64
} else {
cfg.transport_poll_timeout_secs
};
tokio::spawn(async move {
let start = std::time::Instant::now();
let client = mycelium;
// Supervisor call context captured for sync status checks
let sup_dest = dest_for_poller;
let sup_topic = topic_for_poller;
let job_id_opt = job_id_opt;
let mut last_status: Option<TransportStatus> = Some(TransportStatus::Sent);
loop {
if start.elapsed() >= poll_timeout {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec!["Transport-status polling timed out".to_string()],
)
.await;
// leave last known status; do not override
break;
}
match client.message_status(&out_id_cloned).await {
Ok(s) => {
if last_status.as_ref() != Some(&s) {
let _ = service_poll
.update_message_transport(
context_id,
caller_id,
id,
None,
Some(s.clone()),
)
.await;
last_status = Some(s.clone());
}
// Stop on terminal states
if matches!(s, TransportStatus::Delivered | TransportStatus::Read) {
// On Read, fetch supervisor job.status and update local job/message if terminal
if matches!(s, TransportStatus::Read)
&& let Some(job_id) = job_id_opt
{
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup.job_status_sync(job_id.to_string(), 10).await {
Ok(remote_status) => {
if let Some((mapped, terminal)) =
map_supervisor_job_status(&remote_status)
{
if terminal {
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
mapped.clone(),
)
.await;
// After terminal status, fetch supervisor job.result and store into Job.result
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup
.job_result_sync(
job_id.to_string(),
job_result_reply_timeout,
)
.await
{
Ok(result_map) => {
// Persist the result into the Job.result map (merge)
let _ = service_poll
.update_job_result_merge_unchecked(
context_id,
caller_id,
job_id,
result_map.clone(),
)
.await;
// Log which key was stored (success or error)
let key = result_map
.keys()
.next()
.cloned()
.unwrap_or_else(|| {
"unknown".to_string()
});
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Stored supervisor job.result for job {} ({})",
job_id, key
)],
)
.await;
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.result fetch error for job {}: {}",
job_id, e
)],
)
.await;
}
}
// Mark message as processed
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Supervisor job.status for job {} -> {} (mapped to {:?})",
job_id, remote_status, mapped
)],
)
.await;
}
} else {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Unknown supervisor status '{}' for job {}",
remote_status, job_id
)],
)
.await;
}
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("job.status sync error: {}", e)],
)
.await;
}
}
}
break;
}
if matches!(s, TransportStatus::Failed) {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Transport failed for outbound id {out_id_cloned}"
)],
)
.await;
break;
}
}
Err(e) => {
// Log and continue polling
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("messageStatus query error: {e}")],
)
.await;
}
}
tokio::time::sleep(poll_interval).await;
}
});
}
Ok(())
}
fn determine_script_type(msg: &Message) -> ScriptType {
// Prefer embedded job's script_type if available, else fallback to message.message_type
match msg.job.first() {
Some(j) => j.script_type.clone(),
None => msg.message_type.clone(),
}
}
fn build_params(msg: &Message) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
// Minimal mapping:
// - "job.run" with exactly one embedded job: [{ "job": <job> }]
// - otherwise: []
if msg.message == "job.run"
&& let Some(j) = msg.job.first()
{
let jv = job_to_json(j)?;
return Ok(json!([ { "job": jv } ]));
}
Ok(json!([]))
}
fn job_to_json(job: &Job) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
Ok(serde_json::to_value(job)?)
}
fn parse_message_key(s: &str) -> Option<(u32, u32)> {
// Expect "message:{caller_id}:{id}"
let mut it = s.split(':');
match (it.next(), it.next(), it.next(), it.next()) {
(Some("message"), Some(caller), Some(id), None) => {
let caller_id = caller.parse::<u32>().ok()?;
let msg_id = id.parse::<u32>().ok()?;
Some((caller_id, msg_id))
}
_ => None,
}
}
/// Map supervisor job.status -> (local JobStatus, terminal)
fn map_supervisor_job_status(s: &str) -> Option<(JobStatus, bool)> {
match s {
"created" | "queued" => Some((JobStatus::Dispatched, false)),
"running" => Some((JobStatus::Started, false)),
"completed" => Some((JobStatus::Finished, true)),
"failed" | "timeout" => Some((JobStatus::Error, true)),
_ => None,
}
}
/// Auto-discover contexts periodically and ensure a router loop exists for each.
/// Returns a JoinHandle of the discovery task (router loops are detached).
pub fn start_router_auto(service: AppService, cfg: RouterConfig) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
let mut active: HashSet<u32> = HashSet::new();
loop {
match service.list_context_ids().await {
Ok(ids) => {
for ctx_id in ids {
if !active.contains(&ctx_id) {
// Spawn a loop for this new context
let cfg_ctx = RouterConfig {
context_ids: vec![ctx_id],
..cfg.clone()
};
let _ = start_router(service.clone(), cfg_ctx);
active.insert(ctx_id);
info!(context_id = ctx_id, "Started loop for context");
}
}
}
Err(e) => {
error!(error=%e, "list_context_ids error");
}
}
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
})
}

672
src/rpc.rs Normal file
View File

@@ -0,0 +1,672 @@
use std::{
collections::HashMap,
net::{IpAddr, SocketAddr},
sync::Arc,
};
use jsonrpsee::{
RpcModule,
server::{ServerBuilder, ServerHandle},
types::error::ErrorObjectOwned,
};
use serde::Deserialize;
use serde_json::{Value, json};
use crate::{
dag::{DagError, FlowDag},
models::{
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType,
MessageStatus, Runner, ScriptType,
},
service::AppService,
time::current_timestamp,
};
/// The OpenRPC specification for the HeroCoordinator JSON-RPC API
const OPENRPC_SPEC: &str = include_str!("../specs/openrpc.json");
pub struct AppState {
pub service: AppService,
}
impl AppState {
pub fn new(service: AppService) -> Self {
Self { service }
}
}
// -----------------------------
// Error helpers
// -----------------------------
fn invalid_params_err<E: std::fmt::Display>(e: E) -> ErrorObjectOwned {
ErrorObjectOwned::owned(-32602, "Invalid params", Some(Value::String(e.to_string())))
}
fn storage_err(e: Box<dyn std::error::Error + Send + Sync>) -> ErrorObjectOwned {
let msg = e.to_string();
if msg.contains("Key not found") {
ErrorObjectOwned::owned(-32001, "Not Found", Some(Value::String(msg)))
} else {
ErrorObjectOwned::owned(-32010, "Storage Error", Some(Value::String(msg)))
}
}
fn dag_err(e: DagError) -> ErrorObjectOwned {
match e {
DagError::Storage(inner) => storage_err(inner),
DagError::MissingDependency { .. } => ErrorObjectOwned::owned(
-32020,
"DAG Missing Dependency",
Some(Value::String(e.to_string())),
),
DagError::CycleDetected { .. } => ErrorObjectOwned::owned(
-32021,
"DAG Cycle Detected",
Some(Value::String(e.to_string())),
),
DagError::UnknownJob { .. } => ErrorObjectOwned::owned(
-32022,
"DAG Unknown Job",
Some(Value::String(e.to_string())),
),
DagError::DependenciesIncomplete { .. } => ErrorObjectOwned::owned(
-32023,
"DAG Dependencies Incomplete",
Some(Value::String(e.to_string())),
),
DagError::FlowFailed { .. } => ErrorObjectOwned::owned(
-32024,
"DAG Flow Failed",
Some(Value::String(e.to_string())),
),
DagError::JobNotStarted { .. } => ErrorObjectOwned::owned(
-32025,
"DAG Job Not Started",
Some(Value::String(e.to_string())),
),
}
}
// -----------------------------
// Create DTOs and Param wrappers
// -----------------------------
#[derive(Debug, Deserialize)]
pub struct ActorCreate {
pub id: u32,
pub pubkey: String,
pub address: Vec<IpAddr>,
}
impl ActorCreate {
pub fn into_domain(self) -> Result<Actor, String> {
let ts = current_timestamp();
let v = json!({
"id": self.id,
"pubkey": self.pubkey,
"address": self.address,
"created_at": ts,
"updated_at": ts,
});
serde_json::from_value(v).map_err(|e| e.to_string())
}
}
#[derive(Debug, Deserialize)]
pub struct ContextCreate {
pub id: u32,
pub admins: Vec<u32>,
pub readers: Vec<u32>,
pub executors: Vec<u32>,
}
impl ContextCreate {
pub fn into_domain(self) -> Context {
let ts = current_timestamp();
let ContextCreate {
id,
admins,
readers,
executors,
} = self;
Context {
id,
admins,
readers,
executors,
created_at: ts,
updated_at: ts,
}
}
}
#[derive(Debug, Deserialize)]
pub struct RunnerCreate {
pub id: u32,
pub pubkey: String,
pub address: IpAddr,
pub topic: String,
/// The script type this runner executes (used for routing)
pub script_type: ScriptType,
pub local: bool,
}
impl RunnerCreate {
pub fn into_domain(self) -> Runner {
let ts = current_timestamp();
let RunnerCreate {
id,
pubkey,
address,
topic,
script_type,
local,
} = self;
Runner {
id,
pubkey,
address,
topic,
script_type,
local,
created_at: ts,
updated_at: ts,
}
}
}
#[derive(Debug, Deserialize)]
pub struct FlowCreate {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub jobs: Vec<u32>,
pub env_vars: HashMap<String, String>,
}
impl FlowCreate {
pub fn into_domain(self) -> Flow {
let ts = current_timestamp();
let FlowCreate {
id,
caller_id,
context_id,
jobs,
env_vars,
} = self;
Flow {
id,
caller_id,
context_id,
jobs,
env_vars,
result: HashMap::new(),
created_at: ts,
updated_at: ts,
status: FlowStatus::Created,
}
}
}
#[derive(Debug, Deserialize)]
pub struct JobCreate {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub script: String,
pub script_type: ScriptType,
pub timeout: u32,
pub retries: u8,
pub env_vars: HashMap<String, String>,
pub prerequisites: Vec<String>,
pub depends: Vec<u32>,
}
impl JobCreate {
pub fn into_domain(self) -> Job {
let ts = current_timestamp();
let JobCreate {
id,
caller_id,
context_id,
script,
script_type,
timeout,
retries,
env_vars,
prerequisites,
depends,
} = self;
Job {
id,
caller_id,
context_id,
script,
script_type,
timeout,
retries,
env_vars,
result: HashMap::new(),
prerequisites,
depends,
created_at: ts,
updated_at: ts,
status: JobStatus::WaitingForPrerequisites,
}
}
}
#[derive(Debug, Deserialize)]
pub struct MessageCreate {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub message: String,
pub message_type: ScriptType,
pub message_format_type: MessageFormatType,
pub timeout: u32,
pub timeout_ack: u32,
pub timeout_result: u32,
pub job: Vec<JobCreate>,
}
impl MessageCreate {
pub fn into_domain(self) -> Message {
let ts = current_timestamp();
let MessageCreate {
id,
caller_id,
context_id,
message,
message_type,
message_format_type,
timeout,
timeout_ack,
timeout_result,
job,
} = self;
Message {
id,
caller_id,
context_id,
message,
message_type,
message_format_type,
timeout,
timeout_ack,
timeout_result,
transport_id: None,
transport_status: None,
job: job.into_iter().map(JobCreate::into_domain).collect(),
logs: Vec::new(),
created_at: ts,
updated_at: ts,
status: MessageStatus::Dispatched,
}
}
}
#[derive(Debug, Deserialize)]
pub struct ActorCreateParams {
pub actor: ActorCreate,
}
#[derive(Debug, Deserialize)]
pub struct ActorLoadParams {
pub id: u32,
}
#[derive(Debug, Deserialize)]
pub struct ContextCreateParams {
pub context: ContextCreate,
}
#[derive(Debug, Deserialize)]
pub struct ContextLoadParams {
pub id: u32,
}
#[derive(Debug, Deserialize)]
pub struct RunnerCreateParams {
pub context_id: u32,
pub runner: RunnerCreate,
}
#[derive(Debug, Deserialize)]
pub struct RunnerLoadParams {
pub context_id: u32,
pub id: u32,
}
#[derive(Debug, Deserialize)]
pub struct FlowCreateParams {
pub context_id: u32,
pub flow: FlowCreate,
}
#[derive(Debug, Deserialize)]
pub struct FlowLoadParams {
pub context_id: u32,
pub id: u32,
}
#[derive(Debug, Deserialize)]
pub struct JobCreateParams {
pub context_id: u32,
pub job: JobCreate,
}
#[derive(Debug, Deserialize)]
pub struct JobLoadParams {
pub context_id: u32,
pub caller_id: u32,
pub id: u32,
}
#[derive(Debug, Deserialize)]
pub struct MessageCreateParams {
pub context_id: u32,
pub message: MessageCreate,
}
#[derive(Debug, Deserialize)]
pub struct MessageLoadParams {
pub context_id: u32,
pub caller_id: u32,
pub id: u32,
}
// -----------------------------
// Rpc module builder (manual registration)
// -----------------------------
pub fn build_module(state: Arc<AppState>) -> RpcModule<()> {
let mut module: RpcModule<()> = RpcModule::new(());
// Actor
{
let state = state.clone();
module
.register_async_method("actor.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: ActorCreateParams = params.parse().map_err(invalid_params_err)?;
let actor = p.actor.into_domain().map_err(invalid_params_err)?;
let actor = state
.service
.create_actor(actor)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(actor)
}
})
.expect("register actor.create");
}
{
let state = state.clone();
module
.register_async_method("actor.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: ActorLoadParams = params.parse().map_err(invalid_params_err)?;
let actor = state.service.load_actor(p.id).await.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(actor)
}
})
.expect("register actor.load");
}
// Context
{
let state = state.clone();
module
.register_async_method("context.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: ContextCreateParams = params.parse().map_err(invalid_params_err)?;
let ctx = p.context.into_domain();
let ctx = state
.service
.create_context(ctx)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(ctx)
}
})
.expect("register context.create");
}
{
let state = state.clone();
module
.register_async_method("context.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: ContextLoadParams = params.parse().map_err(invalid_params_err)?;
let ctx = state
.service
.load_context(p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(ctx)
}
})
.expect("register context.load");
}
// Runner
{
let state = state.clone();
module
.register_async_method("runner.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: RunnerCreateParams = params.parse().map_err(invalid_params_err)?;
let runner = p.runner.into_domain();
let runner = state
.service
.create_runner(p.context_id, runner)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(runner)
}
})
.expect("register runner.create");
}
{
let state = state.clone();
module
.register_async_method("runner.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: RunnerLoadParams = params.parse().map_err(invalid_params_err)?;
let runner = state
.service
.load_runner(p.context_id, p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(runner)
}
})
.expect("register runner.load");
}
// Flow
{
let state = state.clone();
module
.register_async_method("flow.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: FlowCreateParams = params.parse().map_err(invalid_params_err)?;
let flow = p.flow.into_domain();
let flow = state
.service
.create_flow(p.context_id, flow)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(flow)
}
})
.expect("register flow.create");
}
{
let state = state.clone();
module
.register_async_method("flow.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
let flow = state
.service
.load_flow(p.context_id, p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(flow)
}
})
.expect("register flow.load");
}
{
let state = state.clone();
module
.register_async_method("flow.dag", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
let dag: FlowDag = state
.service
.flow_dag(p.context_id, p.id)
.await
.map_err(dag_err)?;
Ok::<_, ErrorObjectOwned>(dag)
}
})
.expect("register flow.dag");
}
{
let state = state.clone();
module
.register_async_method("flow.start", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: FlowLoadParams = params.parse().map_err(invalid_params_err)?;
let started: bool = state
.service
.flow_start(p.context_id, p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(started)
}
})
.expect("register flow.start");
}
// Job
{
let state = state.clone();
module
.register_async_method("job.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: JobCreateParams = params.parse().map_err(invalid_params_err)?;
let job = p.job.into_domain();
let job = state
.service
.create_job(p.context_id, job)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(job)
}
})
.expect("register job.create");
}
{
let state = state.clone();
module
.register_async_method("job.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: JobLoadParams = params.parse().map_err(invalid_params_err)?;
let job = state
.service
.load_job(p.context_id, p.caller_id, p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(job)
}
})
.expect("register job.load");
}
// Message
{
let state = state.clone();
module
.register_async_method("message.create", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: MessageCreateParams = params.parse().map_err(invalid_params_err)?;
let message = p.message.into_domain();
let message = state
.service
.create_message(p.context_id, message)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(message)
}
})
.expect("register message.create");
}
{
let state = state;
module
.register_async_method("message.load", move |params, _caller, _ctx| {
let state = state.clone();
async move {
let p: MessageLoadParams = params.parse().map_err(invalid_params_err)?;
let msg = state
.service
.load_message(p.context_id, p.caller_id, p.id)
.await
.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(msg)
}
})
.expect("register message.load");
}
{
module
.register_async_method("rpc.discover", move |_params, _caller, _ctx| async move {
let spec = serde_json::from_str::<serde_json::Value>(OPENRPC_SPEC)
.expect("Failed to parse OpenRPC spec");
Ok::<_, ErrorObjectOwned>(spec)
})
.expect("register rpc.discover");
}
module
}
// -----------------------------
// Server runners (HTTP/WS on separate listeners)
// -----------------------------
pub async fn start_http<C>(
addr: SocketAddr,
module: RpcModule<C>,
) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
let server = ServerBuilder::default().build(addr).await?;
let handle = server.start(module);
Ok(handle)
}
pub async fn start_ws<C>(
addr: SocketAddr,
module: RpcModule<C>,
) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
// jsonrpsee server supports both HTTP and WS; using a second listener gives us a dedicated WS port.
let server = ServerBuilder::default().build(addr).await?;
let handle = server.start(module);
Ok(handle)
}

1171
src/service.rs Normal file

File diff suppressed because it is too large Load Diff

3
src/storage.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod redis;
pub use redis::RedisDriver;

754
src/storage/redis.rs Normal file
View File

@@ -0,0 +1,754 @@
use std::collections::HashMap as StdHashMap;
use redis::{AsyncCommands, aio::ConnectionManager};
use serde::Serialize;
use serde::de::DeserializeOwned;
use serde_json::{Map as JsonMap, Value};
use tokio::sync::Mutex;
use crate::models::{
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner,
TransportStatus,
};
use tracing::{error, warn};
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
/// Async Redis driver that saves/loads every model as a Redis hash (HSET),
/// using canonical keys as specified in the specs.
/// - Complex fields (arrays, maps, nested structs) are JSON-encoded per field
/// - Scalars are written as plain strings (numbers/bools as their string representation)
/// - On load, each field value is first attempted to parse as JSON; if that fails it is treated as a plain string
pub struct RedisDriver {
/// Base address, e.g. "127.0.0.1:6379" or "redis://127.0.0.1:6379"
base_addr: String,
/// Cache of connection managers per DB index
managers: Mutex<StdHashMap<u32, ConnectionManager>>,
}
impl RedisDriver {
/// Create a new driver for the given Redis address.
/// Accepts either "host:port" or "redis://host:port"
pub async fn new(addr: impl Into<String>) -> Result<Self> {
let raw = addr.into();
let base_addr = if raw.starts_with("redis://") {
raw
} else {
format!("redis://{}", raw)
};
Ok(Self {
base_addr,
managers: Mutex::new(StdHashMap::new()),
})
}
/// Get or create a ConnectionManager for the given DB index.
async fn manager_for_db(&self, db: u32) -> Result<ConnectionManager> {
{
// Fast path: check existing
let guard = self.managers.lock().await;
if let Some(cm) = guard.get(&db) {
return Ok(cm.clone());
}
}
// Slow path: create a new manager and cache it
let url = format!("{}/{}", self.base_addr.trim_end_matches('/'), db);
let client = redis::Client::open(url.as_str()).map_err(|e| {
error!(%url, db=%db, error=%e, "Redis client open failed");
e
})?;
let cm = client.get_connection_manager().await.map_err(|e| {
error!(%url, db=%db, error=%e, "Redis connection manager init failed");
e
})?;
let mut guard = self.managers.lock().await;
let entry = guard.entry(db).or_insert(cm);
Ok(entry.clone())
}
// -----------------------------
// Generic helpers (serde <-> HSET)
// -----------------------------
fn struct_to_hset_pairs<T: Serialize>(value: &T) -> Result<Vec<(String, String)>> {
let json = serde_json::to_value(value)?;
let obj = json
.as_object()
.ok_or("Model must serialize to a JSON object")?;
let mut pairs = Vec::with_capacity(obj.len());
for (k, v) in obj {
let s = match v {
Value::Array(_) | Value::Object(_) => serde_json::to_string(v)?, // complex - store JSON
Value::String(s) => s.clone(), // string - plain
Value::Number(n) => n.to_string(), // number - plain
Value::Bool(b) => b.to_string(), // bool - plain
Value::Null => "null".to_string(), // null sentinel
};
pairs.push((k.clone(), s));
}
Ok(pairs)
}
fn hmap_to_struct<T: DeserializeOwned>(map: StdHashMap<String, String>) -> Result<T> {
let mut obj = JsonMap::with_capacity(map.len());
for (k, s) in map {
// Try parse as JSON first (works for arrays, objects, numbers, booleans, null)
// If that fails, fallback to string.
match serde_json::from_str::<Value>(&s) {
Ok(v) => {
obj.insert(k, v);
}
Err(_) => {
obj.insert(k, Value::String(s));
}
}
}
let json = Value::Object(obj);
let model = serde_json::from_value(json)?;
Ok(model)
}
async fn hset_model<T: Serialize>(&self, db: u32, key: &str, model: &T) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let pairs = Self::struct_to_hset_pairs(model).map_err(|e| {
error!(db=%db, key=%key, error=%e, "Serialize model to HSET pairs failed");
e
})?;
// Ensure no stale fields
let del_res: redis::RedisResult<u64> = cm.del(key).await;
if let Err(e) = del_res {
warn!(db=%db, key=%key, error=%e, "DEL before HSET failed");
}
// Write all fields
let _: () = cm.hset_multiple(key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET multiple failed");
e
})?;
Ok(())
}
async fn hget_model<T: DeserializeOwned>(&self, db: u32, key: &str) -> Result<T> {
let mut cm = self.manager_for_db(db).await?;
let map: StdHashMap<String, String> = cm.hgetall(key).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HGETALL failed");
e
})?;
if map.is_empty() {
// NotFound is expected in some flows; don't log as error
return Err(format!("Key not found: {}", key).into());
}
Self::hmap_to_struct(map).map_err(|e| {
error!(db=%db, key=%key, error=%e, "Deserialize model from HGETALL failed");
e
})
}
// -----------------------------
// Key helpers (canonical keys)
// -----------------------------
fn actor_key(id: u32) -> String {
format!("actor:{}", id)
}
fn context_key(id: u32) -> String {
format!("context:{}", id)
}
fn flow_key(id: u32) -> String {
format!("flow:{}", id)
}
fn runner_key(id: u32) -> String {
format!("runner:{}", id)
}
fn job_key(caller_id: u32, id: u32) -> String {
format!("job:{}:{}", caller_id, id)
}
fn message_key(caller_id: u32, id: u32) -> String {
format!("message:{}:{}", caller_id, id)
}
// -----------------------------
// Context (DB = context.id)
// -----------------------------
/// Save a Context in its own DB (db index = context.id)
pub async fn save_context(&self, ctx: &Context) -> Result<()> {
// We don't have field access; compute db and key via JSON to avoid changing model definitions.
// Extract "id" from serialized JSON object.
let json = serde_json::to_value(ctx)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Context.id missing or not a number")? as u32;
let key = Self::context_key(id);
// Write the context hash in its own DB
self.hset_model(id, &key, ctx).await?;
// Register this context id in the global registry (DB 0)
let _ = self.register_context_id(id).await;
Ok(())
}
/// Load a Context from its own DB (db index = id)
pub async fn load_context(&self, id: u32) -> Result<Context> {
let key = Self::context_key(id);
self.hget_model(id, &key).await
}
// -----------------------------
// Actor
// -----------------------------
/// Save an Actor to the given DB (tenant/context DB)
pub async fn save_actor(&self, db: u32, actor: &Actor) -> Result<()> {
let json = serde_json::to_value(actor)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Actor.id missing or not a number")? as u32;
let key = Self::actor_key(id);
self.hset_model(db, &key, actor).await
}
/// Load an Actor by id from the given DB
pub async fn load_actor(&self, db: u32, id: u32) -> Result<Actor> {
let key = Self::actor_key(id);
self.hget_model(db, &key).await
}
/// Save an Actor globally in DB 0 (Actor is context-independent)
pub async fn save_actor_global(&self, actor: &Actor) -> Result<()> {
let json = serde_json::to_value(actor)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Actor.id missing or not a number")? as u32;
let key = Self::actor_key(id);
self.hset_model(0, &key, actor).await
}
/// Load an Actor globally from DB 0 by id
pub async fn load_actor_global(&self, id: u32) -> Result<Actor> {
let key = Self::actor_key(id);
self.hget_model(0, &key).await
}
// -----------------------------
// Runner
// -----------------------------
pub async fn save_runner(&self, db: u32, runner: &Runner) -> Result<()> {
let json = serde_json::to_value(runner)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Runner.id missing or not a number")? as u32;
let key = Self::runner_key(id);
self.hset_model(db, &key, runner).await
}
pub async fn load_runner(&self, db: u32, id: u32) -> Result<Runner> {
let key = Self::runner_key(id);
self.hget_model(db, &key).await
}
// -----------------------------
// Flow
// -----------------------------
pub async fn save_flow(&self, db: u32, flow: &Flow) -> Result<()> {
let json = serde_json::to_value(flow)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Flow.id missing or not a number")? as u32;
let key = Self::flow_key(id);
self.hset_model(db, &key, flow).await
}
pub async fn load_flow(&self, db: u32, id: u32) -> Result<Flow> {
let key = Self::flow_key(id);
self.hget_model(db, &key).await
}
// -----------------------------
// Job
// -----------------------------
pub async fn save_job(&self, db: u32, job: &Job) -> Result<()> {
let json = serde_json::to_value(job)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Job.id missing or not a number")? as u32;
let caller_id = json
.get("caller_id")
.and_then(|v| v.as_u64())
.ok_or("Job.caller_id missing or not a number")? as u32;
let key = Self::job_key(caller_id, id);
self.hset_model(db, &key, job).await
}
pub async fn load_job(&self, db: u32, caller_id: u32, id: u32) -> Result<Job> {
let key = Self::job_key(caller_id, id);
self.hget_model(db, &key).await
}
/// Atomically update a job's status and `updated_at` fields.
/// - No transition validation is performed.
/// - Writes only the two fields via HSET to avoid rewriting the whole model.
pub async fn update_job_status(
&self,
db: u32,
caller_id: u32,
id: u32,
status: JobStatus,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::job_key(caller_id, id);
// Serialize enum into the same plain string representation stored by create paths
let status_str = match serde_json::to_value(&status)? {
Value::String(s) => s,
v => v.to_string(),
};
let ts = crate::time::current_timestamp();
let pairs = vec![
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_status failed");
e
})?;
Ok(())
}
// -----------------------------
// Message
// -----------------------------
pub async fn save_message(&self, db: u32, message: &Message) -> Result<()> {
let json = serde_json::to_value(message)?;
let id = json
.get("id")
.and_then(|v| v.as_u64())
.ok_or("Message.id missing or not a number")? as u32;
let caller_id = json
.get("caller_id")
.and_then(|v| v.as_u64())
.ok_or("Message.caller_id missing or not a number")? as u32;
let key = Self::message_key(caller_id, id);
self.hset_model(db, &key, message).await
}
pub async fn load_message(&self, db: u32, caller_id: u32, id: u32) -> Result<Message> {
let key = Self::message_key(caller_id, id);
self.hget_model(db, &key).await
}
// -----------------------------
// Partial update helpers
// -----------------------------
/// Flow: update only status and updated_at
pub async fn update_flow_status(&self, db: u32, id: u32, status: FlowStatus) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::flow_key(id);
let status_str = match serde_json::to_value(&status)? {
Value::String(s) => s,
v => v.to_string(),
};
let ts = crate::time::current_timestamp();
let pairs = vec![
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_status failed");
e
})?;
Ok(())
}
/// Message: update only status and updated_at
pub async fn update_message_status(
&self,
db: u32,
caller_id: u32,
id: u32,
status: MessageStatus,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::message_key(caller_id, id);
let status_str = match serde_json::to_value(&status)? {
Value::String(s) => s,
v => v.to_string(),
};
let ts = crate::time::current_timestamp();
let pairs = vec![
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_message_status failed");
e
})?;
Ok(())
}
/// Message: update transport_id / transport_status (optionally) and bump updated_at
pub async fn update_message_transport(
&self,
db: u32,
caller_id: u32,
id: u32,
transport_id: Option<String>,
transport_status: Option<TransportStatus>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::message_key(caller_id, id);
let mut pairs: Vec<(String, String)> = Vec::new();
if let Some(tid) = transport_id {
pairs.push(("transport_id".to_string(), tid));
}
if let Some(ts_status) = transport_status {
let status_str = match serde_json::to_value(&ts_status)? {
Value::String(s) => s,
v => v.to_string(),
};
pairs.push(("transport_status".to_string(), status_str));
}
// Always bump updated_at
let ts = crate::time::current_timestamp();
pairs.push(("updated_at".to_string(), ts.to_string()));
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_message_transport failed");
e
})?;
Ok(())
}
/// Flow: merge env_vars map and bump updated_at
pub async fn update_flow_env_vars_merge(
&self,
db: u32,
id: u32,
patch: StdHashMap<String, String>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::flow_key(id);
let current: Option<String> = cm.hget(&key, "env_vars").await.ok();
let mut obj = match current
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| v.as_object().cloned())
{
Some(m) => m,
None => JsonMap::new(),
};
for (k, v) in patch {
obj.insert(k, Value::String(v));
}
let env_vars_str = Value::Object(obj).to_string();
let ts = crate::time::current_timestamp();
let pairs = vec![
("env_vars".to_string(), env_vars_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_env_vars_merge failed");
e
})?;
Ok(())
}
/// Flow: merge result map and bump updated_at
pub async fn update_flow_result_merge(
&self,
db: u32,
id: u32,
patch: StdHashMap<String, String>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::flow_key(id);
let current: Option<String> = cm.hget(&key, "result").await.ok();
let mut obj = match current
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| v.as_object().cloned())
{
Some(m) => m,
None => JsonMap::new(),
};
for (k, v) in patch {
obj.insert(k, Value::String(v));
}
let result_str = Value::Object(obj).to_string();
let ts = crate::time::current_timestamp();
let pairs = vec![
("result".to_string(), result_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_result_merge failed");
e
})?;
Ok(())
}
/// Job: merge env_vars map and bump updated_at
pub async fn update_job_env_vars_merge(
&self,
db: u32,
caller_id: u32,
id: u32,
patch: StdHashMap<String, String>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::job_key(caller_id, id);
let current: Option<String> = cm.hget(&key, "env_vars").await.ok();
let mut obj = match current
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| v.as_object().cloned())
{
Some(m) => m,
None => JsonMap::new(),
};
for (k, v) in patch {
obj.insert(k, Value::String(v));
}
let env_vars_str = Value::Object(obj).to_string();
let ts = crate::time::current_timestamp();
let pairs = vec![
("env_vars".to_string(), env_vars_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_env_vars_merge failed");
e
})?;
Ok(())
}
/// Job: merge result map and bump updated_at
pub async fn update_job_result_merge(
&self,
db: u32,
caller_id: u32,
id: u32,
patch: StdHashMap<String, String>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::job_key(caller_id, id);
let current: Option<String> = cm.hget(&key, "result").await.ok();
let mut obj = match current
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| v.as_object().cloned())
{
Some(m) => m,
None => JsonMap::new(),
};
for (k, v) in patch {
obj.insert(k, Value::String(v));
}
let result_str = Value::Object(obj).to_string();
let ts = crate::time::current_timestamp();
let pairs = vec![
("result".to_string(), result_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_result_merge failed");
e
})?;
Ok(())
}
/// Flow: set jobs list and bump updated_at
pub async fn update_flow_jobs_set(&self, db: u32, id: u32, new_jobs: Vec<u32>) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::flow_key(id);
let jobs_str = serde_json::to_string(&new_jobs)?;
let ts = crate::time::current_timestamp();
let pairs = vec![
("jobs".to_string(), jobs_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_jobs_set failed");
e
})?;
Ok(())
}
/// Message: append logs (no dedup) and bump updated_at
pub async fn append_message_logs(
&self,
db: u32,
caller_id: u32,
id: u32,
new_logs: Vec<String>,
) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let key = Self::message_key(caller_id, id);
let current: Option<String> = cm.hget(&key, "logs").await.ok();
let mut arr: Vec<Value> = current
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| v.as_array().cloned())
.unwrap_or_default();
for l in new_logs {
arr.push(Value::String(l));
}
let logs_str = Value::Array(arr).to_string();
let ts = crate::time::current_timestamp();
let pairs = vec![
("logs".to_string(), logs_str),
("updated_at".to_string(), ts.to_string()),
];
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET append_message_logs failed");
e
})?;
Ok(())
}
// -----------------------------
// Queues (lists)
// -----------------------------
/// Push a value onto a Redis list using LPUSH in the given DB.
pub async fn lpush_list(&self, db: u32, list: &str, value: &str) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let _: i64 = cm.lpush(list, value).await.map_err(|e| {
error!(db=%db, list=%list, value=%value, error=%e, "LPUSH failed");
e
})?;
Ok(())
}
/// Enqueue a message key onto the outbound queue (msg_out).
/// The value is the canonical message key "message:{caller_id}:{id}".
pub async fn enqueue_msg_out(&self, db: u32, caller_id: u32, id: u32) -> Result<()> {
let key = Self::message_key(caller_id, id);
self.lpush_list(db, "msg_out", &key).await
}
/// Block-pop from msg_out with timeout (seconds). Returns the message key if present.
/// Uses BRPOP so that the queue behaves FIFO with LPUSH producer.
pub async fn brpop_msg_out(&self, db: u32, timeout_secs: usize) -> Result<Option<String>> {
let mut cm = self.manager_for_db(db).await?;
// BRPOP returns (list, element) on success
let res: Option<(String, String)> = redis::cmd("BRPOP")
.arg("msg_out")
.arg(timeout_secs)
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=%db, timeout_secs=%timeout_secs, error=%e, "BRPOP failed");
e
})?;
Ok(res.map(|(_, v)| v))
}
/// Scan all runner:* keys in this DB and return the deserialized Runner entries.
pub async fn scan_runners(&self, db: u32) -> Result<Vec<Runner>> {
let mut cm = self.manager_for_db(db).await?;
let mut out: Vec<Runner> = Vec::new();
let mut cursor: u64 = 0;
loop {
let (next, keys): (u64, Vec<String>) = redis::cmd("SCAN")
.arg(cursor)
.arg("MATCH")
.arg("runner:*")
.arg("COUNT")
.arg(100)
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=%db, cursor=%cursor, error=%e, "SCAN failed");
e
})?;
for k in keys {
if let Ok(r) = self.hget_model::<Runner>(db, &k).await {
out.push(r);
}
}
if next == 0 {
break;
}
cursor = next;
}
Ok(out)
}
// -----------------------------
// Global registry (DB 0) for Context IDs
// -----------------------------
/// Register a context id in the global set "contexts" stored in DB 0.
pub async fn register_context_id(&self, id: u32) -> Result<()> {
let mut cm = self.manager_for_db(0).await?;
let _: i64 = redis::cmd("SADD")
.arg("contexts")
.arg(id)
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=0, context_id=%id, error=%e, "SADD contexts failed");
e
})?;
Ok(())
}
/// List all registered context ids from the global set in DB 0.
pub async fn list_context_ids(&self) -> Result<Vec<u32>> {
let mut cm = self.manager_for_db(0).await?;
// Using SMEMBERS and parsing into u32
let vals: Vec<String> = redis::cmd("SMEMBERS")
.arg("contexts")
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=0, error=%e, "SMEMBERS contexts failed");
e
})?;
let mut out = Vec::with_capacity(vals.len());
for v in vals {
if let Ok(n) = v.parse::<u32>() {
out.push(n);
}
}
out.sort_unstable();
Ok(out)
}
}