From 5c5225c8f77d5c435be7fa3463d2115be544255c Mon Sep 17 00:00:00 2001 From: Timur Gordon <31495328+timurgordon@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:38:22 +0200 Subject: [PATCH] added architecture specs --- ourdb/architecture.md | 439 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 439 insertions(+) create mode 100644 ourdb/architecture.md diff --git a/ourdb/architecture.md b/ourdb/architecture.md new file mode 100644 index 0000000..d6072f7 --- /dev/null +++ b/ourdb/architecture.md @@ -0,0 +1,439 @@ +# OurDB: Architecture for V to Rust Port + +## 1. Overview + +OurDB is a lightweight, efficient key-value database implementation that provides data persistence with history tracking capabilities. This document outlines the architecture for porting OurDB from its original V implementation to Rust, maintaining all existing functionality while leveraging Rust's memory safety, performance, and ecosystem. + +## 2. Current Architecture (V Implementation) + +The current V implementation of OurDB consists of three main components in a layered architecture: + +```mermaid +graph TD + A[Client Code] --> B[Frontend API] + B --> C[Lookup Table] + B --> D[Backend Storage] + C --> D +``` + +### 2.1 Frontend (db.v) + +The frontend provides the public API for database operations and coordinates between the lookup table and backend storage components. + +Key responsibilities: +- Exposing high-level operations (set, get, delete, history) +- Managing incremental ID generation in auto-increment mode +- Coordinating data flow between lookup and backend components +- Handling database lifecycle (open, close, destroy) + +### 2.2 Lookup Table (lookup.v) + +The lookup table maps keys to physical locations in the backend storage. + +Key responsibilities: +- Maintaining key-to-location mapping +- Optimizing key sizes based on database configuration +- Supporting both memory and disk-based lookup tables +- Handling sparse data efficiently +- Providing next ID generation for incremental mode + +### 2.3 Backend Storage (backend.v) + +The backend storage manages the actual data persistence in files. + +Key responsibilities: +- Managing physical data storage in files +- Ensuring data integrity with CRC32 checksums +- Supporting multiple file backends for large datasets +- Implementing low-level read/write operations +- Tracking record history through linked locations + +### 2.4 Core Data Structures + +#### OurDB +```v +@[heap] +pub struct OurDB { +mut: + lookup &LookupTable +pub: + path string // directory for storage + incremental_mode bool + file_size u32 = 500 * (1 << 20) // 500MB +pub mut: + file os.File + file_nr u16 // the file which is open + last_used_file_nr u16 +} +``` + +#### LookupTable +```v +pub struct LookupTable { + keysize u8 + lookuppath string +mut: + data []u8 + incremental ?u32 // points to next empty slot if incremental mode is enabled +} +``` + +#### Location +```v +pub struct Location { +pub mut: + file_nr u16 + position u32 +} +``` + +### 2.5 Storage Format + +#### Record Format +Each record in the backend storage includes: +- 2 bytes: Data size +- 4 bytes: CRC32 checksum +- 6 bytes: Previous record location (for history) +- N bytes: Actual data + +#### Lookup Table Optimization +The lookup table automatically optimizes its key size based on the database configuration: +- 2 bytes: For databases with < 65,536 records +- 3 bytes: For databases with < 16,777,216 records +- 4 bytes: For databases with < 4,294,967,296 records +- 6 bytes: For large databases requiring multiple files + +## 3. Proposed Rust Architecture + +The Rust implementation will maintain the same layered architecture while leveraging Rust's type system, ownership model, and error handling. + +```mermaid +graph TD + A[Client Code] --> B[OurDB API] + B --> C[LookupTable] + B --> D[Backend] + C --> D + E[Error Handling] --> B + E --> C + E --> D + F[Configuration] --> B +``` + +### 3.1 Core Components + +#### 3.1.1 OurDB (API Layer) + +```rust +pub struct OurDB { + path: String, + incremental_mode: bool, + file_size: u32, + lookup: LookupTable, + file: Option, + file_nr: u16, + last_used_file_nr: u16, +} + +impl OurDB { + pub fn new(config: OurDBConfig) -> Result; + pub fn set(&mut self, id: Option, data: &[u8]) -> Result; + pub fn get(&mut self, id: u32) -> Result, Error>; + pub fn get_history(&mut self, id: u32, depth: u8) -> Result>, Error>; + pub fn delete(&mut self, id: u32) -> Result<(), Error>; + pub fn get_next_id(&mut self) -> Result; + pub fn close(&mut self) -> Result<(), Error>; + pub fn destroy(&mut self) -> Result<(), Error>; +} +``` + +#### 3.1.2 LookupTable + +```rust +pub struct LookupTable { + keysize: u8, + lookuppath: String, + data: Vec, + incremental: Option, +} + +impl LookupTable { + fn new(config: LookupConfig) -> Result; + fn get(&self, id: u32) -> Result; + fn set(&mut self, id: u32, location: Location) -> Result<(), Error>; + fn delete(&mut self, id: u32) -> Result<(), Error>; + fn get_next_id(&self) -> Result; + fn increment_index(&mut self) -> Result<(), Error>; + fn export_data(&self, path: &str) -> Result<(), Error>; + fn import_data(&mut self, path: &str) -> Result<(), Error>; + fn export_sparse(&self, path: &str) -> Result<(), Error>; + fn import_sparse(&mut self, path: &str) -> Result<(), Error>; +} +``` + +#### 3.1.3 Location + +```rust +pub struct Location { + file_nr: u16, + position: u32, +} + +impl Location { + fn new(bytes: &[u8], keysize: u8) -> Result; + fn to_bytes(&self) -> Result, Error>; + fn to_u64(&self) -> u64; +} +``` + +#### 3.1.4 Backend + +The backend functionality will be implemented as methods on the OurDB struct: + +```rust +impl OurDB { + fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error>; + fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error>; + fn get_file_nr(&mut self) -> Result; + fn set_(&mut self, id: u32, old_location: Location, data: &[u8]) -> Result<(), Error>; + fn get_(&mut self, location: Location) -> Result, Error>; + fn get_prev_pos_(&mut self, location: Location) -> Result; + fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error>; + fn close_(&mut self); +} +``` + +#### 3.1.5 Configuration + +```rust +pub struct OurDBConfig { + pub record_nr_max: u32, + pub record_size_max: u32, + pub file_size: u32, + pub path: String, + pub incremental_mode: bool, + pub reset: bool, +} + +struct LookupConfig { + size: u32, + keysize: u8, + lookuppath: String, + incremental_mode: bool, +} +``` + +#### 3.1.6 Error Handling + +```rust +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("Invalid key size: {0}")] + InvalidKeySize(u8), + + #[error("Record not found: {0}")] + RecordNotFound(u32), + + #[error("Data corruption: CRC mismatch")] + DataCorruption, + + #[error("Index out of bounds: {0}")] + IndexOutOfBounds(u32), + + #[error("Incremental mode not enabled")] + IncrementalNotEnabled, + + #[error("Lookup table is full")] + LookupTableFull, + + #[error("Invalid file number: {0}")] + InvalidFileNumber(u16), + + #[error("Invalid operation: {0}")] + InvalidOperation(String), +} +``` + +## 4. Implementation Strategy + +### 4.1 Phase 1: Core Data Structures + +1. Implement the `Location` struct with serialization/deserialization +2. Implement the `Error` enum for error handling +3. Implement the configuration structures + +### 4.2 Phase 2: Lookup Table + +1. Implement the `LookupTable` struct with memory-based storage +2. Add disk-based storage support +3. Implement key size optimization +4. Add incremental ID support +5. Implement import/export functionality + +### 4.3 Phase 3: Backend Storage + +1. Implement file management functions +2. Implement record serialization/deserialization with CRC32 +3. Implement history tracking through linked locations +4. Add support for multiple backend files + +### 4.4 Phase 4: Frontend API + +1. Implement the `OurDB` struct with core operations +2. Add high-level API methods (set, get, delete, history) +3. Implement database lifecycle management + +### 4.5 Phase 5: Testing and Optimization + +1. Port existing tests from V to Rust +2. Add new tests for Rust-specific functionality +3. Benchmark and optimize performance +4. Ensure compatibility with existing OurDB files + +## 5. Implementation Considerations + +### 5.1 Memory Management + +Leverage Rust's ownership model for safe and efficient memory management: +- Use `Vec` for data buffers instead of raw pointers +- Implement proper RAII for file handles +- Use references and borrows to avoid unnecessary copying +- Consider using `Bytes` from the `bytes` crate for zero-copy operations + +### 5.2 Error Handling + +Use Rust's `Result` type for comprehensive error handling: +- Define custom error types for OurDB-specific errors +- Propagate errors using the `?` operator +- Provide detailed error messages +- Implement proper error conversion using the `From` trait + +### 5.3 File I/O + +Optimize file operations for performance: +- Use `BufReader` and `BufWriter` for buffered I/O +- Implement proper file locking for concurrent access +- Consider memory-mapped files for lookup tables +- Use `seek` and `read_exact` for precise positioning + +### 5.4 Concurrency + +Consider thread safety for concurrent database access: +- Use interior mutability patterns where appropriate +- Implement `Send` and `Sync` traits for thread safety +- Consider using `RwLock` for shared read access +- Provide clear documentation on thread safety guarantees + +### 5.5 Performance Optimizations + +Identify opportunities for performance improvements: +- Use memory-mapped files for lookup tables +- Implement caching for frequently accessed records +- Use zero-copy operations where possible +- Consider async I/O for non-blocking operations + +## 6. Testing Strategy + +### 6.1 Unit Tests + +Write comprehensive unit tests for each component: +- Test `Location` serialization/deserialization +- Test `LookupTable` operations +- Test backend storage functions +- Test error handling + +### 6.2 Integration Tests + +Write integration tests for the complete system: +- Test database creation and configuration +- Test basic CRUD operations +- Test history tracking +- Test incremental ID generation +- Test file management + +### 6.3 Compatibility Tests + +Ensure compatibility with existing OurDB files: +- Test reading existing V-created OurDB files +- Test writing files that can be read by the V implementation +- Test migration scenarios + +### 6.4 Performance Tests + +Benchmark performance against the V implementation: +- Measure throughput for set/get operations +- Measure latency for different operations +- Test with different database sizes +- Test with different record sizes + +## 7. Project Structure + +``` +ourdb/ +├── Cargo.toml +├── src/ +│ ├── lib.rs # Public API and re-exports +│ ├── ourdb.rs # OurDB implementation (frontend) +│ ├── lookup.rs # Lookup table implementation +│ ├── location.rs # Location struct implementation +│ ├── backend.rs # Backend storage implementation +│ ├── error.rs # Error types +│ ├── config.rs # Configuration structures +│ └── utils.rs # Utility functions +├── tests/ +│ ├── unit/ # Unit tests +│ ├── integration/ # Integration tests +│ └── compatibility/ # Compatibility tests +└── examples/ + ├── basic.rs # Basic usage example + ├── history.rs # History tracking example + └── client_server.rs # Client-server example +``` + +## 8. Dependencies + +The Rust implementation will use the following dependencies: + +- `thiserror` for error handling +- `crc32fast` for CRC32 calculation +- `bytes` for efficient byte manipulation +- `memmap2` for memory-mapped files (optional) +- `serde` for serialization (optional, for future extensions) +- `log` for logging +- `criterion` for benchmarking + +## 9. Compatibility Considerations + +To ensure compatibility with the V implementation: + +1. Maintain the same file format for data storage +2. Preserve the lookup table format +3. Keep the same CRC32 calculation method +4. Ensure identical behavior for incremental ID generation +5. Maintain the same history tracking mechanism + +## 10. Future Extensions + +Potential future extensions to consider: + +1. Async API for non-blocking operations +2. Transactions support +3. Better concurrency control +4. Compression support +5. Encryption support +6. Streaming API for large values +7. Iterators for scanning records +8. Secondary indexes + +## 11. Conclusion + +This architecture provides a roadmap for porting OurDB from V to Rust while maintaining compatibility and leveraging Rust's strengths. The implementation will follow a phased approach, starting with core data structures and gradually building up to the complete system. + +The Rust implementation aims to be: +- **Safe**: Leveraging Rust's ownership model for memory safety +- **Fast**: Maintaining or improving performance compared to V +- **Compatible**: Working with existing OurDB files +- **Extensible**: Providing a foundation for future enhancements +- **Well-tested**: Including comprehensive test coverage \ No newline at end of file