merge branches and cleanup db
This commit is contained in:
451
heromodels/docs/tst_integration_plan.md
Normal file
451
heromodels/docs/tst_integration_plan.md
Normal file
@@ -0,0 +1,451 @@
|
||||
# TST Integration Plan for HeroDB
|
||||
|
||||
## Overview
|
||||
|
||||
This document outlines the plan for adding generic functionality to the `herodb/src/db` module to use the Ternary Search Tree (TST) for storing objects with prefixed IDs and implementing a generic list function to retrieve all objects with a specific prefix.
|
||||
|
||||
## Current Architecture
|
||||
|
||||
Currently:
|
||||
- Each model has a `db_prefix()` method that returns a string prefix (e.g., "vote" for Vote objects)
|
||||
- Objects are stored in OurDB with numeric IDs
|
||||
- The `list()` method in `OurDbStore` is not implemented
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### 1. Create a TST-based Index Manager (herodb/src/db/tst_index.rs)
|
||||
|
||||
Create a new module that manages TST instances for different model prefixes:
|
||||
|
||||
```rust
|
||||
use crate::db::error::{DbError, DbResult};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tst::TST;
|
||||
|
||||
/// Manages TST-based indexes for model objects
|
||||
pub struct TSTIndexManager {
|
||||
/// Base path for TST databases
|
||||
base_path: PathBuf,
|
||||
|
||||
/// Map of model prefixes to their TST instances
|
||||
tst_instances: std::collections::HashMap<String, TST>,
|
||||
}
|
||||
|
||||
impl TSTIndexManager {
|
||||
/// Creates a new TST index manager
|
||||
pub fn new<P: AsRef<Path>>(base_path: P) -> DbResult<Self> {
|
||||
let base_path = base_path.as_ref().to_path_buf();
|
||||
|
||||
// Create directory if it doesn't exist
|
||||
std::fs::create_dir_all(&base_path).map_err(DbError::IoError)?;
|
||||
|
||||
Ok(Self {
|
||||
base_path,
|
||||
tst_instances: std::collections::HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets or creates a TST instance for a model prefix
|
||||
pub fn get_tst(&mut self, prefix: &str) -> DbResult<&mut TST> {
|
||||
if !self.tst_instances.contains_key(prefix) {
|
||||
// Create a new TST instance for this prefix
|
||||
let tst_path = self.base_path.join(format!("{}_tst", prefix));
|
||||
let tst_path_str = tst_path.to_string_lossy().to_string();
|
||||
|
||||
// Create the TST
|
||||
let tst = TST::new(&tst_path_str, false)
|
||||
.map_err(|e| DbError::GeneralError(format!("TST error: {:?}", e)))?;
|
||||
|
||||
// Insert it into the map
|
||||
self.tst_instances.insert(prefix.to_string(), tst);
|
||||
}
|
||||
|
||||
// Return a mutable reference to the TST
|
||||
Ok(self.tst_instances.get_mut(prefix).unwrap())
|
||||
}
|
||||
|
||||
/// Adds or updates an object in the TST index
|
||||
pub fn set(&mut self, prefix: &str, id: u32, data: Vec<u8>) -> DbResult<()> {
|
||||
// Get the TST for this prefix
|
||||
let tst = self.get_tst(prefix)?;
|
||||
|
||||
// Create the key in the format prefix_id
|
||||
let key = format!("{}_{}", prefix, id);
|
||||
|
||||
// Set the key-value pair in the TST
|
||||
tst.set(&key, data)
|
||||
.map_err(|e| DbError::GeneralError(format!("TST error: {:?}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes an object from the TST index
|
||||
pub fn delete(&mut self, prefix: &str, id: u32) -> DbResult<()> {
|
||||
// Get the TST for this prefix
|
||||
let tst = self.get_tst(prefix)?;
|
||||
|
||||
// Create the key in the format prefix_id
|
||||
let key = format!("{}_{}", prefix, id);
|
||||
|
||||
// Delete the key from the TST
|
||||
tst.delete(&key)
|
||||
.map_err(|e| DbError::GeneralError(format!("TST error: {:?}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Lists all objects with a given prefix
|
||||
pub fn list(&mut self, prefix: &str) -> DbResult<Vec<(u32, Vec<u8>)>> {
|
||||
// Get the TST for this prefix
|
||||
let tst = self.get_tst(prefix)?;
|
||||
|
||||
// Get all keys with this prefix
|
||||
let keys = tst.list(prefix)
|
||||
.map_err(|e| DbError::GeneralError(format!("TST error: {:?}", e)))?;
|
||||
|
||||
// Get all values for these keys
|
||||
let mut result = Vec::with_capacity(keys.len());
|
||||
for key in keys {
|
||||
// Extract the ID from the key (format: prefix_id)
|
||||
let id_str = key.split('_').nth(1).ok_or_else(|| {
|
||||
DbError::GeneralError(format!("Invalid key format: {}", key))
|
||||
})?;
|
||||
|
||||
let id = id_str.parse::<u32>().map_err(|_| {
|
||||
DbError::GeneralError(format!("Invalid ID in key: {}", key))
|
||||
})?;
|
||||
|
||||
// Get the value from the TST
|
||||
let data = tst.get(&key)
|
||||
.map_err(|e| DbError::GeneralError(format!("TST error: {:?}", e)))?;
|
||||
|
||||
result.push((id, data));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Update DB Module (herodb/src/db/mod.rs)
|
||||
|
||||
Add the new module to the db module:
|
||||
|
||||
```rust
|
||||
pub mod db;
|
||||
pub mod error;
|
||||
pub mod macros;
|
||||
pub mod model;
|
||||
pub mod model_methods;
|
||||
pub mod store;
|
||||
pub mod tst_index; // Add the new module
|
||||
|
||||
pub use db::DB;
|
||||
pub use db::DBBuilder;
|
||||
pub use error::{DbError, DbResult};
|
||||
pub use model::Model;
|
||||
pub use model::Storable;
|
||||
```
|
||||
|
||||
### 3. Modify DB Struct (herodb/src/db/db.rs)
|
||||
|
||||
Update the DB struct to include the TST index manager:
|
||||
|
||||
```rust
|
||||
/// Main DB manager that automatically handles all models
|
||||
#[derive(Clone, CustomType)]
|
||||
pub struct DB {
|
||||
db_path: PathBuf,
|
||||
|
||||
// Type map for generic operations
|
||||
type_map: HashMap<TypeId, Arc<RwLock<dyn DbOperations>>>,
|
||||
|
||||
// TST index manager
|
||||
tst_index: Arc<RwLock<TSTIndexManager>>,
|
||||
|
||||
// Transaction state
|
||||
transaction: Arc<RwLock<Option<TransactionState>>>,
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Extend Transaction Handling
|
||||
|
||||
Extend the `DbOperation` enum to include model prefix and ID information:
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Clone)]
|
||||
enum DbOperation {
|
||||
Set {
|
||||
model_type: TypeId,
|
||||
serialized: Vec<u8>,
|
||||
model_prefix: String, // Add model prefix
|
||||
model_id: u32, // Add model ID
|
||||
},
|
||||
Delete {
|
||||
model_type: TypeId,
|
||||
id: u32,
|
||||
model_prefix: String, // Add model prefix
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Update Transaction Recording
|
||||
|
||||
Modify the `set` and `delete` methods to record model prefix and ID in the transaction:
|
||||
|
||||
```rust
|
||||
pub fn set<T: Model>(&self, model: &T) -> DbResult<()> {
|
||||
// Try to acquire a write lock on the transaction
|
||||
let mut tx_guard = self.transaction.write().unwrap();
|
||||
|
||||
// Check if there's an active transaction
|
||||
if let Some(tx_state) = tx_guard.as_mut() {
|
||||
if tx_state.active {
|
||||
// Serialize the model for later use
|
||||
let serialized = model.to_bytes()?;
|
||||
|
||||
// Record a Set operation in the transaction with prefix and ID
|
||||
tx_state.operations.push(DbOperation::Set {
|
||||
model_type: TypeId::of::<T>(),
|
||||
serialized: serialized.clone(),
|
||||
model_prefix: T::db_prefix().to_string(),
|
||||
model_id: model.get_id(),
|
||||
});
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// ... rest of the method ...
|
||||
}
|
||||
|
||||
pub fn delete<T: Model>(&self, id: u32) -> DbResult<()> {
|
||||
// Try to acquire a write lock on the transaction
|
||||
let mut tx_guard = self.transaction.write().unwrap();
|
||||
|
||||
// Check if there's an active transaction
|
||||
if let Some(tx_state) = tx_guard.as_mut() {
|
||||
if tx_state.active {
|
||||
// Record a Delete operation in the transaction with prefix
|
||||
tx_state.operations.push(DbOperation::Delete {
|
||||
model_type: TypeId::of::<T>(),
|
||||
id,
|
||||
model_prefix: T::db_prefix().to_string(),
|
||||
});
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// ... rest of the method ...
|
||||
}
|
||||
```
|
||||
|
||||
### 6. Update Transaction Commit
|
||||
|
||||
Modify the `commit_transaction` method to update both OurDB and the TST index:
|
||||
|
||||
```rust
|
||||
pub fn commit_transaction(&self) -> DbResult<()> {
|
||||
let mut tx_guard = self.transaction.write().unwrap();
|
||||
|
||||
if let Some(tx_state) = tx_guard.take() {
|
||||
if !tx_state.active {
|
||||
return Err(DbError::TransactionError("Transaction not active".into()));
|
||||
}
|
||||
|
||||
// Create a backup of the transaction state in case we need to rollback
|
||||
let backup = tx_state.clone();
|
||||
|
||||
// Try to execute all operations
|
||||
let result = (|| {
|
||||
for op in &tx_state.operations {
|
||||
match op {
|
||||
DbOperation::Set {
|
||||
model_type,
|
||||
serialized,
|
||||
model_prefix,
|
||||
model_id,
|
||||
} => {
|
||||
// Apply to OurDB
|
||||
self.apply_set_operation(*model_type, serialized)?;
|
||||
|
||||
// Apply to TST index
|
||||
let mut tst_index = self.tst_index.write().unwrap();
|
||||
tst_index.set(model_prefix, *model_id, serialized.clone())?;
|
||||
}
|
||||
DbOperation::Delete {
|
||||
model_type,
|
||||
id,
|
||||
model_prefix,
|
||||
} => {
|
||||
// Apply to OurDB
|
||||
let db_ops = self
|
||||
.type_map
|
||||
.get(model_type)
|
||||
.ok_or_else(|| DbError::TypeError)?;
|
||||
let mut db_ops_guard = db_ops.write().unwrap();
|
||||
db_ops_guard.delete(*id)?;
|
||||
|
||||
// Apply to TST index
|
||||
let mut tst_index = self.tst_index.write().unwrap();
|
||||
tst_index.delete(model_prefix, *id)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
// If any operation failed, restore the transaction state
|
||||
if result.is_err() {
|
||||
*tx_guard = Some(backup);
|
||||
return result;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
Err(DbError::TransactionError("No active transaction".into()))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7. Implement List Method
|
||||
|
||||
Implement the `list` method to use the TST's prefix search:
|
||||
|
||||
```rust
|
||||
pub fn list<T: Model>(&self) -> DbResult<Vec<T>> {
|
||||
// Get the prefix for this model type
|
||||
let prefix = T::db_prefix();
|
||||
|
||||
// Use the TST index to get all objects with this prefix
|
||||
let mut tst_index = self.tst_index.write().unwrap();
|
||||
let items = tst_index.list(prefix)?;
|
||||
|
||||
// Deserialize the objects
|
||||
let mut result = Vec::with_capacity(items.len());
|
||||
for (_, data) in items {
|
||||
let model = T::from_bytes(&data)?;
|
||||
result.push(model);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Add Recovery Mechanism
|
||||
|
||||
Add a method to synchronize the TST index with OurDB in case they get out of sync:
|
||||
|
||||
```rust
|
||||
pub fn synchronize_tst_index<T: Model>(&self) -> DbResult<()> {
|
||||
// Get all models from OurDB
|
||||
let models = self.list_from_ourdb::<T>()?;
|
||||
|
||||
// Clear the TST index for this model type
|
||||
let mut tst_index = self.tst_index.write().unwrap();
|
||||
let prefix = T::db_prefix();
|
||||
|
||||
// Rebuild the TST index
|
||||
for model in models {
|
||||
let id = model.get_id();
|
||||
let data = model.to_bytes()?;
|
||||
tst_index.set(prefix, id, data)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Helper method to list models directly from OurDB (not using TST)
|
||||
fn list_from_ourdb<T: Model>(&self) -> DbResult<Vec<T>> {
|
||||
match self.type_map.get(&TypeId::of::<T>()) {
|
||||
Some(db_ops) => {
|
||||
let db_ops_guard = db_ops.read().unwrap();
|
||||
let result_any = db_ops_guard.list()?;
|
||||
match result_any.downcast::<Vec<T>>() {
|
||||
Ok(vec_t) => Ok(*vec_t),
|
||||
Err(_) => Err(DbError::TypeError),
|
||||
}
|
||||
}
|
||||
None => Err(DbError::TypeError),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Implementation Flow
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant DB
|
||||
participant TransactionState
|
||||
participant OurDbStore
|
||||
participant TSTIndexManager
|
||||
participant TST
|
||||
|
||||
Client->>DB: begin_transaction()
|
||||
DB->>TransactionState: create new transaction
|
||||
|
||||
Client->>DB: set(model)
|
||||
DB->>TransactionState: record Set operation with prefix and ID
|
||||
|
||||
Client->>DB: delete(model)
|
||||
DB->>TransactionState: record Delete operation with prefix and ID
|
||||
|
||||
Client->>DB: commit_transaction()
|
||||
DB->>TransactionState: get all operations
|
||||
|
||||
loop For each operation
|
||||
alt Set operation
|
||||
DB->>OurDbStore: apply_set_operation()
|
||||
DB->>TSTIndexManager: set(prefix, id, data)
|
||||
TSTIndexManager->>TST: set(key, data)
|
||||
else Delete operation
|
||||
DB->>OurDbStore: delete(id)
|
||||
DB->>TSTIndexManager: delete(prefix, id)
|
||||
TSTIndexManager->>TST: delete(key)
|
||||
end
|
||||
end
|
||||
|
||||
alt Success
|
||||
DB-->>Client: Ok(())
|
||||
else Error
|
||||
DB->>TransactionState: restore transaction state
|
||||
DB-->>Client: Err(error)
|
||||
end
|
||||
|
||||
Client->>DB: list<T>()
|
||||
DB->>TSTIndexManager: list(prefix)
|
||||
TSTIndexManager->>TST: list(prefix)
|
||||
TST-->>TSTIndexManager: keys
|
||||
TSTIndexManager->>TST: get(key) for each key
|
||||
TST-->>TSTIndexManager: data
|
||||
TSTIndexManager-->>DB: (id, data) pairs
|
||||
DB->>DB: deserialize data to models
|
||||
DB-->>Client: Vec<T>
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
1. Create unit tests for the TST index manager
|
||||
2. Test the list functionality with different model types
|
||||
3. Test transaction handling (commit and rollback)
|
||||
4. Test error recovery mechanisms
|
||||
5. Test edge cases (empty database, large datasets)
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. Add TST dependency to herodb/Cargo.toml
|
||||
2. Create the tst_index.rs module
|
||||
3. Update the DB module to include the TST index manager
|
||||
4. Extend the transaction handling
|
||||
5. Implement the list method
|
||||
6. Add tests for the new functionality
|
||||
7. Update documentation
|
||||
|
||||
## Considerations
|
||||
|
||||
1. **Performance**: The TST operations add overhead to insert/delete operations, but provide efficient list functionality.
|
||||
2. **Consistency**: The enhanced transaction handling ensures consistency between OurDB and the TST index.
|
||||
3. **Error Handling**: Proper error handling and recovery mechanisms are essential for maintaining data integrity.
|
||||
4. **Backward Compatibility**: The implementation should maintain backward compatibility with existing code.
|
Reference in New Issue
Block a user