rename worker to actor
This commit is contained in:
@@ -1,20 +1,20 @@
|
||||
# Worker Lifecycle Management
|
||||
# Actor Lifecycle Management
|
||||
|
||||
The Hero Supervisor includes comprehensive worker lifecycle management functionality using [Zinit](https://github.com/threefoldtech/zinit) as the process manager. This enables the supervisor to manage worker processes, perform health monitoring, and implement load balancing.
|
||||
The Hero Supervisor includes comprehensive actor lifecycle management functionality using [Zinit](https://github.com/threefoldtech/zinit) as the process manager. This enables the supervisor to manage actor processes, perform health monitoring, and implement load balancing.
|
||||
|
||||
## Overview
|
||||
|
||||
The lifecycle management system provides:
|
||||
|
||||
- **Worker Process Management**: Start, stop, restart, and monitor worker binaries
|
||||
- **Health Monitoring**: Automatic ping jobs every 10 minutes for idle workers
|
||||
- **Graceful Shutdown**: Clean termination of worker processes
|
||||
- **Actor Process Management**: Start, stop, restart, and monitor actor binaries
|
||||
- **Health Monitoring**: Automatic ping jobs every 10 minutes for idle actors
|
||||
- **Graceful Shutdown**: Clean termination of actor processes
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ Supervisor │ │ WorkerLifecycle │ │ Zinit │
|
||||
│ Supervisor │ │ ActorLifecycle │ │ Zinit │
|
||||
│ │◄──►│ Manager │◄──►│ (Process │
|
||||
│ (Job Dispatch) │ │ │ │ Manager) │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
@@ -22,49 +22,49 @@ The lifecycle management system provides:
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ Redis │ │ Health Monitor │ │ Worker Binaries │
|
||||
│ Redis │ │ Health Monitor │ │ Actor Binaries │
|
||||
│ (Job Queue) │ │ (Ping Jobs) │ │ (OSIS/SAL/V) │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### WorkerConfig
|
||||
### ActorConfig
|
||||
|
||||
Defines configuration for a worker binary:
|
||||
Defines configuration for a actor binary:
|
||||
|
||||
```rust
|
||||
use hero_supervisor::{WorkerConfig, ScriptType};
|
||||
use hero_supervisor::{ActorConfig, ScriptType};
|
||||
use std::path::PathBuf;
|
||||
use std::collections::HashMap;
|
||||
|
||||
let config = WorkerConfig::new(
|
||||
"osis_worker_0".to_string(),
|
||||
PathBuf::from("/usr/local/bin/osis_worker"),
|
||||
let config = ActorConfig::new(
|
||||
"osis_actor_0".to_string(),
|
||||
PathBuf::from("/usr/local/bin/osis_actor"),
|
||||
ScriptType::OSIS,
|
||||
)
|
||||
.with_args(vec![
|
||||
"--redis-url".to_string(),
|
||||
"redis://localhost:6379".to_string(),
|
||||
"--worker-id".to_string(),
|
||||
"osis_worker_0".to_string(),
|
||||
"--actor-id".to_string(),
|
||||
"osis_actor_0".to_string(),
|
||||
])
|
||||
.with_env({
|
||||
let mut env = HashMap::new();
|
||||
env.insert("RUST_LOG".to_string(), "info".to_string());
|
||||
env.insert("WORKER_TYPE".to_string(), "osis".to_string());
|
||||
env.insert("ACTOR_TYPE".to_string(), "osis".to_string());
|
||||
env
|
||||
})
|
||||
.with_health_check("/usr/local/bin/osis_worker --health-check".to_string())
|
||||
.with_health_check("/usr/local/bin/osis_actor --health-check".to_string())
|
||||
.with_dependencies(vec!["redis".to_string()]);
|
||||
```
|
||||
|
||||
### WorkerLifecycleManager
|
||||
### ActorLifecycleManager
|
||||
|
||||
Main component for managing worker lifecycles:
|
||||
Main component for managing actor lifecycles:
|
||||
|
||||
```rust
|
||||
use hero_supervisor::{WorkerLifecycleManagerBuilder, Supervisor};
|
||||
use hero_supervisor::{ActorLifecycleManagerBuilder, Supervisor};
|
||||
|
||||
let supervisor = SupervisorBuilder::new()
|
||||
.redis_url("redis://localhost:6379")
|
||||
@@ -72,11 +72,11 @@ let supervisor = SupervisorBuilder::new()
|
||||
.context_id("production")
|
||||
.build()?;
|
||||
|
||||
let mut lifecycle_manager = WorkerLifecycleManagerBuilder::new("/var/run/zinit.sock".to_string())
|
||||
let mut lifecycle_manager = ActorLifecycleManagerBuilder::new("/var/run/zinit.sock".to_string())
|
||||
.with_supervisor(supervisor.clone())
|
||||
.add_worker(osis_worker_config)
|
||||
.add_worker(sal_worker_config)
|
||||
.add_worker(v_worker_config)
|
||||
.add_actor(osis_actor_config)
|
||||
.add_actor(sal_actor_config)
|
||||
.add_actor(v_actor_config)
|
||||
.build();
|
||||
```
|
||||
|
||||
@@ -84,45 +84,45 @@ let mut lifecycle_manager = WorkerLifecycleManagerBuilder::new("/var/run/zinit.s
|
||||
|
||||
The lifecycle manager supports all Hero script types:
|
||||
|
||||
- **OSIS**: Rhai/HeroScript execution workers
|
||||
- **SAL**: System Abstraction Layer workers
|
||||
- **OSIS**: Rhai/HeroScript execution actors
|
||||
- **SAL**: System Abstraction Layer actors
|
||||
- **V**: HeroScript execution in V language
|
||||
- **Python**: HeroScript execution in Python
|
||||
|
||||
## Key Features
|
||||
|
||||
### 1. Worker Management
|
||||
### 1. Actor Management
|
||||
|
||||
```rust
|
||||
// Start all configured workers
|
||||
lifecycle_manager.start_all_workers().await?;
|
||||
// Start all configured actors
|
||||
lifecycle_manager.start_all_actors().await?;
|
||||
|
||||
// Stop all workers
|
||||
lifecycle_manager.stop_all_workers().await?;
|
||||
// Stop all actors
|
||||
lifecycle_manager.stop_all_actors().await?;
|
||||
|
||||
// Restart specific worker
|
||||
lifecycle_manager.restart_worker("osis_worker_0").await?;
|
||||
// Restart specific actor
|
||||
lifecycle_manager.restart_actor("osis_actor_0").await?;
|
||||
|
||||
// Get worker status
|
||||
let status = lifecycle_manager.get_worker_status("osis_worker_0").await?;
|
||||
println!("Worker state: {:?}, PID: {}", status.state, status.pid);
|
||||
// Get actor status
|
||||
let status = lifecycle_manager.get_actor_status("osis_actor_0").await?;
|
||||
println!("Actor state: {:?}, PID: {}", status.state, status.pid);
|
||||
```
|
||||
|
||||
### 2. Health Monitoring
|
||||
|
||||
The system automatically monitors worker health:
|
||||
The system automatically monitors actor health:
|
||||
|
||||
- Tracks last job execution time for each worker
|
||||
- Sends ping jobs to workers idle for 10+ minutes
|
||||
- Restarts workers that fail ping checks 3 times
|
||||
- Updates job times when workers receive tasks
|
||||
- Tracks last job execution time for each actor
|
||||
- Sends ping jobs to actors idle for 10+ minutes
|
||||
- Restarts actors that fail ping checks 3 times
|
||||
- Updates job times when actors receive tasks
|
||||
|
||||
```rust
|
||||
// Manual health check
|
||||
lifecycle_manager.monitor_worker_health().await?;
|
||||
lifecycle_manager.monitor_actor_health().await?;
|
||||
|
||||
// Update job time (called automatically by supervisor)
|
||||
lifecycle_manager.update_worker_job_time("osis_worker_0");
|
||||
lifecycle_manager.update_actor_job_time("osis_actor_0");
|
||||
|
||||
// Start continuous health monitoring
|
||||
lifecycle_manager.start_health_monitoring().await; // Runs forever
|
||||
@@ -130,26 +130,26 @@ lifecycle_manager.start_health_monitoring().await; // Runs forever
|
||||
|
||||
### 3. Dynamic Scaling
|
||||
|
||||
Scale workers up or down based on demand:
|
||||
Scale actors up or down based on demand:
|
||||
|
||||
```rust
|
||||
// Scale OSIS workers to 5 instances
|
||||
lifecycle_manager.scale_workers(&ScriptType::OSIS, 5).await?;
|
||||
// Scale OSIS actors to 5 instances
|
||||
lifecycle_manager.scale_actors(&ScriptType::OSIS, 5).await?;
|
||||
|
||||
// Scale down SAL workers to 1 instance
|
||||
lifecycle_manager.scale_workers(&ScriptType::SAL, 1).await?;
|
||||
// Scale down SAL actors to 1 instance
|
||||
lifecycle_manager.scale_actors(&ScriptType::SAL, 1).await?;
|
||||
|
||||
// Check current running count
|
||||
let count = lifecycle_manager.get_running_worker_count(&ScriptType::V).await;
|
||||
println!("Running V workers: {}", count);
|
||||
let count = lifecycle_manager.get_running_actor_count(&ScriptType::V).await;
|
||||
println!("Running V actors: {}", count);
|
||||
```
|
||||
|
||||
### 4. Service Dependencies
|
||||
|
||||
Workers can depend on other services:
|
||||
Actors can depend on other services:
|
||||
|
||||
```rust
|
||||
let config = WorkerConfig::new(name, binary, script_type)
|
||||
let config = ActorConfig::new(name, binary, script_type)
|
||||
.with_dependencies(vec![
|
||||
"redis".to_string(),
|
||||
"database".to_string(),
|
||||
@@ -157,25 +157,25 @@ let config = WorkerConfig::new(name, binary, script_type)
|
||||
]);
|
||||
```
|
||||
|
||||
Zinit ensures dependencies start before the worker.
|
||||
Zinit ensures dependencies start before the actor.
|
||||
|
||||
## Integration with Supervisor
|
||||
|
||||
The lifecycle manager integrates seamlessly with the supervisor:
|
||||
|
||||
```rust
|
||||
use hero_supervisor::{Supervisor, WorkerLifecycleManager};
|
||||
use hero_supervisor::{Supervisor, ActorLifecycleManager};
|
||||
|
||||
// Create supervisor and lifecycle manager
|
||||
let supervisor = SupervisorBuilder::new().build()?;
|
||||
let mut lifecycle_manager = WorkerLifecycleManagerBuilder::new(zinit_socket)
|
||||
let mut lifecycle_manager = ActorLifecycleManagerBuilder::new(zinit_socket)
|
||||
.with_supervisor(supervisor.clone())
|
||||
.build();
|
||||
|
||||
// Start workers
|
||||
lifecycle_manager.start_all_workers().await?;
|
||||
// Start actors
|
||||
lifecycle_manager.start_all_actors().await?;
|
||||
|
||||
// Create and execute jobs (supervisor automatically routes to workers)
|
||||
// Create and execute jobs (supervisor automatically routes to actors)
|
||||
let job = supervisor
|
||||
.new_job()
|
||||
.script_type(ScriptType::OSIS)
|
||||
@@ -191,15 +191,15 @@ println!("Job result: {}", result);
|
||||
The lifecycle manager automatically creates Zinit service configurations:
|
||||
|
||||
```yaml
|
||||
# Generated service config for osis_worker_0
|
||||
exec: "/usr/local/bin/osis_worker --redis-url redis://localhost:6379 --worker-id osis_worker_0"
|
||||
test: "/usr/local/bin/osis_worker --health-check"
|
||||
# Generated service config for osis_actor_0
|
||||
exec: "/usr/local/bin/osis_actor --redis-url redis://localhost:6379 --actor-id osis_actor_0"
|
||||
test: "/usr/local/bin/osis_actor --health-check"
|
||||
oneshot: false # Restart on exit
|
||||
after:
|
||||
- redis
|
||||
env:
|
||||
RUST_LOG: "info"
|
||||
WORKER_TYPE: "osis"
|
||||
ACTOR_TYPE: "osis"
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
@@ -209,10 +209,10 @@ The system provides comprehensive error handling:
|
||||
```rust
|
||||
use hero_supervisor::SupervisorError;
|
||||
|
||||
match lifecycle_manager.start_worker(&config).await {
|
||||
Ok(_) => println!("Worker started successfully"),
|
||||
Err(SupervisorError::WorkerStartFailed(worker, reason)) => {
|
||||
eprintln!("Failed to start {}: {}", worker, reason);
|
||||
match lifecycle_manager.start_actor(&config).await {
|
||||
Ok(_) => println!("Actor started successfully"),
|
||||
Err(SupervisorError::ActorStartFailed(actor, reason)) => {
|
||||
eprintln!("Failed to start {}: {}", actor, reason);
|
||||
}
|
||||
Err(e) => eprintln!("Other error: {}", e),
|
||||
}
|
||||
@@ -243,11 +243,11 @@ REDIS_URL=redis://localhost:6379 cargo run --example lifecycle_demo
|
||||
redis-server
|
||||
```
|
||||
|
||||
3. **Worker Binaries**: Compiled worker binaries for each script type
|
||||
- `/usr/local/bin/osis_worker`
|
||||
- `/usr/local/bin/sal_worker`
|
||||
- `/usr/local/bin/v_worker`
|
||||
- `/usr/local/bin/python_worker`
|
||||
3. **Actor Binaries**: Compiled actor binaries for each script type
|
||||
- `/usr/local/bin/osis_actor`
|
||||
- `/usr/local/bin/sal_actor`
|
||||
- `/usr/local/bin/v_actor`
|
||||
- `/usr/local/bin/python_actor`
|
||||
|
||||
## Configuration Best Practices
|
||||
|
||||
@@ -267,15 +267,15 @@ REDIS_URL=redis://localhost:6379 cargo run --example lifecycle_demo
|
||||
- Check socket permissions: `ls -la /var/run/zinit.sock`
|
||||
- Verify socket path in configuration
|
||||
|
||||
2. **Worker Start Failed**
|
||||
2. **Actor Start Failed**
|
||||
- Check binary exists and is executable
|
||||
- Verify dependencies are running
|
||||
- Review Zinit logs: `zinit logs <service-name>`
|
||||
|
||||
3. **Health Check Failures**
|
||||
- Implement proper health check endpoint in workers
|
||||
- Implement proper health check endpoint in actors
|
||||
- Verify health check command syntax
|
||||
- Check worker responsiveness
|
||||
- Check actor responsiveness
|
||||
|
||||
4. **Redis Connection Issues**
|
||||
- Ensure Redis is running and accessible
|
||||
@@ -289,10 +289,10 @@ REDIS_URL=redis://localhost:6379 cargo run --example lifecycle_demo
|
||||
zinit list
|
||||
|
||||
# View service logs
|
||||
zinit logs osis_worker_0
|
||||
zinit logs osis_actor_0
|
||||
|
||||
# Check service status
|
||||
zinit status osis_worker_0
|
||||
zinit status osis_actor_0
|
||||
|
||||
# Monitor Redis queues
|
||||
redis-cli keys "hero:job:*"
|
||||
@@ -300,20 +300,20 @@ redis-cli keys "hero:job:*"
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
- **Scaling**: Start with minimal workers and scale based on queue depth
|
||||
- **Scaling**: Start with minimal actors and scale based on queue depth
|
||||
- **Health Monitoring**: Adjust ping intervals based on workload patterns
|
||||
- **Resource Usage**: Monitor CPU/memory usage of worker processes
|
||||
- **Resource Usage**: Monitor CPU/memory usage of actor processes
|
||||
- **Queue Depth**: Monitor Redis queue lengths for scaling decisions
|
||||
|
||||
## Security
|
||||
|
||||
- **Process Isolation**: Zinit provides process isolation
|
||||
- **User Permissions**: Run workers with appropriate user permissions
|
||||
- **User Permissions**: Run actors with appropriate user permissions
|
||||
- **Network Security**: Secure Redis and Zinit socket access
|
||||
- **Binary Validation**: Verify worker binary integrity before deployment
|
||||
- **Binary Validation**: Verify actor binary integrity before deployment
|
||||
|
||||
|
||||
## Future
|
||||
|
||||
- **Load Balancing**: Dynamic scaling of workers based on demand
|
||||
- **Load Balancing**: Dynamic scaling of actors based on demand
|
||||
- **Service Dependencies**: Proper startup ordering with dependency management
|
Reference in New Issue
Block a user