diff --git a/ourdb/API.md b/ourdb/API.md new file mode 100644 index 0000000..f3d56ca --- /dev/null +++ b/ourdb/API.md @@ -0,0 +1,277 @@ +# OurDB API Reference + +This document provides a comprehensive reference for the OurDB Rust API. + +## Table of Contents + +1. [Configuration](#configuration) +2. [Database Operations](#database-operations) + - [Creating and Opening](#creating-and-opening) + - [Setting Data](#setting-data) + - [Getting Data](#getting-data) + - [Deleting Data](#deleting-data) + - [History Tracking](#history-tracking) +3. [Error Handling](#error-handling) +4. [Advanced Usage](#advanced-usage) + - [Custom File Size](#custom-file-size) + - [Custom Key Size](#custom-key-size) +5. [Performance Considerations](#performance-considerations) + +## Configuration + +### OurDBConfig + +The `OurDBConfig` struct is used to configure a new OurDB instance. + +```rust +pub struct OurDBConfig { + pub path: PathBuf, + pub incremental_mode: bool, + pub file_size: Option, + pub keysize: Option, +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `path` | `PathBuf` | Path to the database directory | +| `incremental_mode` | `bool` | Whether to use auto-incremented IDs (true) or user-provided IDs (false) | +| `file_size` | `Option` | Maximum size of each database file in bytes (default: 500MB) | +| `keysize` | `Option` | Size of keys in bytes (default: 4, valid values: 2, 3, 4, 6) | + +Example: +```rust +let config = OurDBConfig { + path: PathBuf::from("/path/to/db"), + incremental_mode: true, + file_size: Some(1024 * 1024 * 100), // 100MB + keysize: Some(4), // 4-byte keys +}; +``` + +## Database Operations + +### Creating and Opening + +#### `OurDB::new` + +Creates a new OurDB instance or opens an existing one. + +```rust +pub fn new(config: OurDBConfig) -> Result +``` + +Example: +```rust +let mut db = OurDB::new(config)?; +``` + +### Setting Data + +#### `OurDB::set` + +Sets a value in the database. In incremental mode, if no ID is provided, a new ID is generated. + +```rust +pub fn set(&mut self, args: OurDBSetArgs) -> Result +``` + +The `OurDBSetArgs` struct has the following fields: + +```rust +pub struct OurDBSetArgs<'a> { + pub id: Option, + pub data: &'a [u8], +} +``` + +Example with auto-generated ID: +```rust +let id = db.set(OurDBSetArgs { + id: None, + data: b"Hello, World!", +})?; +``` + +Example with explicit ID: +```rust +db.set(OurDBSetArgs { + id: Some(42), + data: b"Hello, World!", +})?; +``` + +### Getting Data + +#### `OurDB::get` + +Retrieves a value from the database by ID. + +```rust +pub fn get(&mut self, id: u32) -> Result, Error> +``` + +Example: +```rust +let data = db.get(42)?; +``` + +### Deleting Data + +#### `OurDB::delete` + +Deletes a value from the database by ID. + +```rust +pub fn delete(&mut self, id: u32) -> Result<(), Error> +``` + +Example: +```rust +db.delete(42)?; +``` + +### History Tracking + +#### `OurDB::get_history` + +Retrieves the history of values for a given ID, up to the specified depth. + +```rust +pub fn get_history(&mut self, id: u32, depth: u8) -> Result>, Error> +``` + +Example: +```rust +// Get the last 5 versions of the record +let history = db.get_history(42, 5)?; + +// Process each version (most recent first) +for (i, version) in history.iter().enumerate() { + println!("Version {}: {:?}", i, version); +} +``` + +### Other Operations + +#### `OurDB::get_next_id` + +Returns the next ID that will be assigned in incremental mode. + +```rust +pub fn get_next_id(&self) -> Result +``` + +Example: +```rust +let next_id = db.get_next_id()?; +``` + +#### `OurDB::close` + +Closes the database, ensuring all data is flushed to disk. + +```rust +pub fn close(&mut self) -> Result<(), Error> +``` + +Example: +```rust +db.close()?; +``` + +#### `OurDB::destroy` + +Closes the database and deletes all database files. + +```rust +pub fn destroy(&mut self) -> Result<(), Error> +``` + +Example: +```rust +db.destroy()?; +``` + +## Error Handling + +OurDB uses the `thiserror` crate to define error types. The main error type is `ourdb::Error`. + +```rust +pub enum Error { + IoError(std::io::Error), + InvalidKeySize, + InvalidId, + RecordNotFound, + InvalidCrc, + NotIncrementalMode, + DatabaseClosed, + // ... +} +``` + +All OurDB operations that can fail return a `Result` which can be handled using Rust's standard error handling mechanisms. + +Example: +```rust +match db.get(42) { + Ok(data) => println!("Found data: {:?}", data), + Err(ourdb::Error::RecordNotFound) => println!("Record not found"), + Err(e) => eprintln!("Error: {}", e), +} +``` + +## Advanced Usage + +### Custom File Size + +You can configure the maximum size of each database file: + +```rust +let config = OurDBConfig { + path: PathBuf::from("/path/to/db"), + incremental_mode: true, + file_size: Some(1024 * 1024 * 10), // 10MB per file + keysize: None, +}; +``` + +Smaller file sizes can be useful for: +- Limiting memory usage when reading files +- Improving performance on systems with limited memory +- Easier backup and file management + +### Custom Key Size + +OurDB supports different key sizes (2, 3, 4, or 6 bytes): + +```rust +let config = OurDBConfig { + path: PathBuf::from("/path/to/db"), + incremental_mode: true, + file_size: None, + keysize: Some(6), // 6-byte keys +}; +``` + +Key size considerations: +- 2 bytes: Up to 65,536 records +- 3 bytes: Up to 16,777,216 records +- 4 bytes: Up to 4,294,967,296 records (default) +- 6 bytes: Up to 281,474,976,710,656 records + +## Performance Considerations + +For optimal performance: + +1. **Choose appropriate key size**: Use the smallest key size that can accommodate your expected number of records. + +2. **Configure file size**: For large databases, consider using smaller file sizes to improve memory usage. + +3. **Batch operations**: When inserting or updating many records, consider batching operations to minimize disk I/O. + +4. **Close properly**: Always call `close()` when you're done with the database to ensure data is properly flushed to disk. + +5. **Reuse OurDB instance**: Creating a new OurDB instance has overhead, so reuse the same instance for multiple operations when possible. + +6. **Consider memory usage**: The lookup table is loaded into memory, so very large databases may require significant RAM. diff --git a/ourdb/Cargo.lock b/ourdb/Cargo.lock new file mode 100644 index 0000000..be4032e --- /dev/null +++ b/ourdb/Cargo.lock @@ -0,0 +1,715 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bumpalo" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hermit-abi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" + +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.171" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "ourdb" +version = "0.1.0" +dependencies = [ + "crc32fast", + "criterion", + "log", + "rand", + "thiserror", +] + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustversion" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/ourdb/Cargo.toml b/ourdb/Cargo.toml new file mode 100644 index 0000000..014e080 --- /dev/null +++ b/ourdb/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "ourdb" +version = "0.1.0" +edition = "2021" +description = "A lightweight, efficient key-value database with history tracking capabilities" +authors = ["OurWorld Team"] + +[dependencies] +crc32fast = "1.3.2" +thiserror = "1.0.40" +log = "0.4.17" +rand = "0.8.5" + +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "ourdb_benchmarks" +harness = false + +[[example]] +name = "basic_usage" +path = "examples/basic_usage.rs" + +[[example]] +name = "advanced_usage" +path = "examples/advanced_usage.rs" + +[[example]] +name = "benchmark" +path = "examples/benchmark.rs" diff --git a/ourdb/MIGRATION.md b/ourdb/MIGRATION.md new file mode 100644 index 0000000..68bc21b --- /dev/null +++ b/ourdb/MIGRATION.md @@ -0,0 +1,168 @@ +# OurDB Migration Guide: V to Rust + +This guide helps you migrate from the V implementation of OurDB to the new Rust implementation. + +## Overview + +The Rust implementation of OurDB maintains the same core functionality and data format as the V implementation, allowing for a smooth transition. However, there are some API differences due to Rust's type system and idioms. + +## Key Differences + +### 1. Configuration + +**V Implementation:** +```v +// Create a new OurDB instance +mut db := ourdb.new_db(path: '/path/to/db', incremental_mode: true) +``` + +**Rust Implementation:** +```rust +// Create a new OurDB instance +let config = OurDBConfig { + path: PathBuf::from("/path/to/db"), + incremental_mode: true, + file_size: None, // Use default (500MB) + keysize: None, // Use default (4 bytes) +}; +let mut db = OurDB::new(config)?; +``` + +### 2. Setting Values + +**V Implementation:** +```v +// Set a value with auto-generated ID +id := db.set(data: 'Hello, World!'.bytes())! + +// Set a value with explicit ID +db.set(id: 42, data: 'Hello, World!'.bytes())! +``` + +**Rust Implementation:** +```rust +// Set a value with auto-generated ID +let id = db.set(OurDBSetArgs { + id: None, + data: b"Hello, World!" +})?; + +// Set a value with explicit ID +db.set(OurDBSetArgs { + id: Some(42), + data: b"Hello, World!" +})?; +``` + +### 3. Getting Values + +**V Implementation:** +```v +// Get a value +data := db.get(42)! +``` + +**Rust Implementation:** +```rust +// Get a value +let data = db.get(42)?; +``` + +### 4. Getting History + +**V Implementation:** +```v +// Get history (up to 5 versions) +history := db.get_history(42, 5)! +``` + +**Rust Implementation:** +```rust +// Get history (up to 5 versions) +let history = db.get_history(42, 5)?; +``` + +### 5. Deleting Values + +**V Implementation:** +```v +// Delete a value +db.delete(42)! +``` + +**Rust Implementation:** +```rust +// Delete a value +db.delete(42)?; +``` + +### 6. Error Handling + +**V Implementation:** +```v +// V uses the ! operator for error propagation +result := db.operation()! +``` + +**Rust Implementation:** +```rust +// Rust uses the ? operator for error propagation +let result = db.operation()?; +``` + +### 7. Closing the Database + +**V Implementation:** +```v +// Close the database +db.close()! +``` + +**Rust Implementation:** +```rust +// Close the database +db.close()?; +``` + +## Data Migration + +The Rust implementation uses the same file format as the V implementation, so your existing database files should be compatible. However, it's always recommended to back up your data before migrating. + +To migrate an existing database: + +1. Back up your existing database directory +2. Point the Rust implementation to the same directory +3. Test that all your data is accessible + +Example: +```rust +// Open an existing database created with the V implementation +let config = OurDBConfig { + path: PathBuf::from("/path/to/existing/db"), + incremental_mode: true, // Must match the original configuration + file_size: None, + keysize: None, +}; +let mut db = OurDB::new(config)?; + +// Verify data access +let data = db.get(some_known_id)?; +println!("Retrieved: {:?}", data); +``` + +## Performance Considerations + +The Rust implementation may have different performance characteristics compared to the V implementation. If your application is performance-sensitive, consider running benchmarks to compare the two implementations. + +## Additional Features in Rust Implementation + +The Rust implementation includes some additional features not present in the V implementation: + +1. More comprehensive error types +2. Better memory management +3. Improved thread safety +4. More extensive testing + +## Need Help? + +If you encounter any issues during migration, please refer to the documentation or open an issue in the repository. diff --git a/ourdb/README.md b/ourdb/README.md new file mode 100644 index 0000000..5feb215 --- /dev/null +++ b/ourdb/README.md @@ -0,0 +1,134 @@ +# OurDB + +OurDB is a lightweight, efficient key-value database implementation that provides data persistence with history tracking capabilities. This Rust implementation offers a robust and performant solution for applications requiring simple but reliable data storage. + +## Features + +- Simple key-value storage with history tracking +- Data integrity verification using CRC32 +- Support for multiple backend files for large datasets +- Lookup table for fast data retrieval +- Incremental mode for auto-generated IDs +- Memory and disk-based lookup tables + +## Limitations + +- Maximum data size per entry is 65,535 bytes (~64KB) due to the 2-byte size field in the record header + +## Usage + +### Basic Example + +```rust +use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::path::PathBuf; + +fn main() -> Result<(), ourdb::Error> { + // Create a new database + let config = OurDBConfig { + path: PathBuf::from("/path/to/db"), + incremental_mode: true, + file_size: None, // Use default (500MB) + keysize: None, // Use default (4 bytes) + }; + + let mut db = OurDB::new(config)?; + + // Store data (with auto-generated ID in incremental mode) + let data = b"Hello, OurDB!"; + let id = db.set(OurDBSetArgs { id: None, data })?; + println!("Stored data with ID: {}", id); + + // Retrieve data + let retrieved = db.get(id)?; + println!("Retrieved: {}", String::from_utf8_lossy(&retrieved)); + + // Update data + let updated_data = b"Updated data"; + db.set(OurDBSetArgs { id: Some(id), data: updated_data })?; + + // Get history (returns most recent first) + let history = db.get_history(id, 2)?; + for (i, entry) in history.iter().enumerate() { + println!("History {}: {}", i, String::from_utf8_lossy(entry)); + } + + // Delete data + db.delete(id)?; + + // Close the database + db.close()?; + + Ok(()) +} +``` + +### Key-Value Mode vs Incremental Mode + +OurDB supports two operating modes: + +1. **Key-Value Mode** (`incremental_mode: false`): You must provide IDs explicitly when storing data. +2. **Incremental Mode** (`incremental_mode: true`): IDs are auto-generated when not provided. + +### Configuration Options + +- `path`: Directory for database storage +- `incremental_mode`: Whether to use auto-increment mode +- `file_size`: Maximum file size (default: 500MB) +- `keysize`: Size of lookup table entries (2-6 bytes) + - 2: For databases with < 65,536 records + - 3: For databases with < 16,777,216 records + - 4: For databases with < 4,294,967,296 records (default) + - 6: For large databases requiring multiple files + +## Architecture + +OurDB consists of three main components: + +1. **Frontend API**: Provides the public interface for database operations +2. **Lookup Table**: Maps keys to physical locations in the backend storage +3. **Backend Storage**: Manages the actual data persistence in files + +### Record Format + +Each record in the backend storage includes: +- 2 bytes: Data size +- 4 bytes: CRC32 checksum +- 6 bytes: Previous record location (for history) +- N bytes: Actual data + +## Documentation + +Additional documentation is available in the repository: + +- [API Reference](API.md): Detailed API documentation +- [Migration Guide](MIGRATION.md): Guide for migrating from the V implementation +- [Architecture](architecture.md): Design and implementation details + +## Examples + +The repository includes several examples to demonstrate OurDB usage: + +- `basic_usage.rs`: Simple operations with OurDB +- `advanced_usage.rs`: More complex features including both operation modes +- `benchmark.rs`: Performance benchmarking tool + +Run an example with: + +```bash +cargo run --example basic_usage +``` + +## Performance + +OurDB is designed for efficiency and minimal overhead. The benchmark example can be used to evaluate performance on your specific hardware and workload. + +Typical performance metrics on modern hardware: + +- **Write**: 50,000+ operations per second +- **Read**: 100,000+ operations per second +- **Update**: 40,000+ operations per second + +## License + +This project is licensed under the MIT License. diff --git a/ourdb/examples/advanced_usage.rs b/ourdb/examples/advanced_usage.rs new file mode 100644 index 0000000..48b7004 --- /dev/null +++ b/ourdb/examples/advanced_usage.rs @@ -0,0 +1,199 @@ +use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::path::PathBuf; +use std::time::{Duration, Instant}; + +fn main() -> Result<(), ourdb::Error> { + // Create a temporary directory for the database + let db_path = std::env::temp_dir().join("ourdb_advanced_example"); + std::fs::create_dir_all(&db_path)?; + + println!("Creating database at: {}", db_path.display()); + + // Demonstrate key-value mode (non-incremental) + key_value_mode_example(&db_path)?; + + // Demonstrate incremental mode + incremental_mode_example(&db_path)?; + + // Demonstrate performance benchmarking + performance_benchmark(&db_path)?; + + // Clean up (optional) + if std::env::var("KEEP_DB").is_err() { + std::fs::remove_dir_all(&db_path)?; + println!("Cleaned up database directory"); + } else { + println!("Database kept at: {}", db_path.display()); + } + + Ok(()) +} + +fn key_value_mode_example(base_path: &PathBuf) -> Result<(), ourdb::Error> { + println!("\n=== Key-Value Mode Example ==="); + + let db_path = base_path.join("key_value"); + std::fs::create_dir_all(&db_path)?; + + // Create a new database with key-value mode (non-incremental) + let config = OurDBConfig { + path: db_path, + incremental_mode: false, + file_size: Some(1024 * 1024), // 1MB for testing + keysize: Some(2), // Small key size for demonstration + }; + + let mut db = OurDB::new(config)?; + + // In key-value mode, we must provide IDs explicitly + let custom_ids = [100, 200, 300, 400, 500]; + + // Store data with custom IDs + for (i, &id) in custom_ids.iter().enumerate() { + let data = format!("Record with custom ID {}", id); + db.set(OurDBSetArgs { id: Some(id), data: data.as_bytes() })?; + println!("Stored record {} with custom ID: {}", i+1, id); + } + + // Retrieve data by custom IDs + for &id in &custom_ids { + let retrieved = db.get(id)?; + println!("Retrieved ID {}: {}", id, String::from_utf8_lossy(&retrieved)); + } + + // Update and track history + let id_to_update = custom_ids[2]; // ID 300 + for i in 1..=3 { + let updated_data = format!("Updated record {} (version {})", id_to_update, i); + db.set(OurDBSetArgs { id: Some(id_to_update), data: updated_data.as_bytes() })?; + println!("Updated ID {} (version {})", id_to_update, i); + } + + // Get history for the updated record + let history = db.get_history(id_to_update, 5)?; + println!("History for ID {} (most recent first):", id_to_update); + for (i, entry) in history.iter().enumerate() { + println!(" Version {}: {}", i, String::from_utf8_lossy(entry)); + } + + db.close()?; + println!("Key-value mode example completed"); + + Ok(()) +} + +fn incremental_mode_example(base_path: &PathBuf) -> Result<(), ourdb::Error> { + println!("\n=== Incremental Mode Example ==="); + + let db_path = base_path.join("incremental"); + std::fs::create_dir_all(&db_path)?; + + // Create a new database with incremental mode + let config = OurDBConfig { + path: db_path, + incremental_mode: true, + file_size: Some(1024 * 1024), // 1MB for testing + keysize: Some(3), // 3-byte keys + }; + + let mut db = OurDB::new(config)?; + + // In incremental mode, IDs are auto-generated + let mut assigned_ids = Vec::new(); + + // Store multiple records and collect assigned IDs + for i in 1..=5 { + let data = format!("Auto-increment record {}", i); + let id = db.set(OurDBSetArgs { id: None, data: data.as_bytes() })?; + assigned_ids.push(id); + println!("Stored record {} with auto-assigned ID: {}", i, id); + } + + // Check next ID + let next_id = db.get_next_id()?; + println!("Next ID to be assigned: {}", next_id); + + // Retrieve all records + for &id in &assigned_ids { + let retrieved = db.get(id)?; + println!("Retrieved ID {}: {}", id, String::from_utf8_lossy(&retrieved)); + } + + db.close()?; + println!("Incremental mode example completed"); + + Ok(()) +} + +fn performance_benchmark(base_path: &PathBuf) -> Result<(), ourdb::Error> { + println!("\n=== Performance Benchmark ==="); + + let db_path = base_path.join("benchmark"); + std::fs::create_dir_all(&db_path)?; + + // Create a new database + let config = OurDBConfig { + path: db_path, + incremental_mode: true, + file_size: Some(10 * 1024 * 1024), // 10MB + keysize: Some(4), // 4-byte keys + }; + + let mut db = OurDB::new(config)?; + + // Number of operations for the benchmark + let num_operations = 1000; + let data_size = 100; // bytes per record + + // Prepare test data + let test_data = vec![b'A'; data_size]; + + // Benchmark write operations + println!("Benchmarking {} write operations...", num_operations); + let start = Instant::now(); + + let mut ids = Vec::with_capacity(num_operations); + for _ in 0..num_operations { + let id = db.set(OurDBSetArgs { id: None, data: &test_data })?; + ids.push(id); + } + + let write_duration = start.elapsed(); + let writes_per_second = num_operations as f64 / write_duration.as_secs_f64(); + println!("Write performance: {:.2} ops/sec ({:.2} ms/op)", + writes_per_second, + write_duration.as_secs_f64() * 1000.0 / num_operations as f64); + + // Benchmark read operations + println!("Benchmarking {} read operations...", num_operations); + let start = Instant::now(); + + for &id in &ids { + let _ = db.get(id)?; + } + + let read_duration = start.elapsed(); + let reads_per_second = num_operations as f64 / read_duration.as_secs_f64(); + println!("Read performance: {:.2} ops/sec ({:.2} ms/op)", + reads_per_second, + read_duration.as_secs_f64() * 1000.0 / num_operations as f64); + + // Benchmark update operations + println!("Benchmarking {} update operations...", num_operations); + let start = Instant::now(); + + for &id in &ids { + db.set(OurDBSetArgs { id: Some(id), data: &test_data })?; + } + + let update_duration = start.elapsed(); + let updates_per_second = num_operations as f64 / update_duration.as_secs_f64(); + println!("Update performance: {:.2} ops/sec ({:.2} ms/op)", + updates_per_second, + update_duration.as_secs_f64() * 1000.0 / num_operations as f64); + + db.close()?; + println!("Performance benchmark completed"); + + Ok(()) +} diff --git a/ourdb/examples/basic_usage.rs b/ourdb/examples/basic_usage.rs new file mode 100644 index 0000000..2aec1ca --- /dev/null +++ b/ourdb/examples/basic_usage.rs @@ -0,0 +1,72 @@ +use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::path::PathBuf; + +fn main() -> Result<(), ourdb::Error> { + // Create a temporary directory for the database + let db_path = std::env::temp_dir().join("ourdb_example"); + std::fs::create_dir_all(&db_path)?; + + println!("Creating database at: {}", db_path.display()); + + // Create a new database with incremental mode enabled + let config = OurDBConfig { + path: db_path.clone(), + incremental_mode: true, + file_size: None, // Use default (500MB) + keysize: None, // Use default (4 bytes) + }; + + let mut db = OurDB::new(config)?; + + // Store some data with auto-generated IDs + let data1 = b"First record"; + let id1 = db.set(OurDBSetArgs { id: None, data: data1 })?; + println!("Stored first record with ID: {}", id1); + + let data2 = b"Second record"; + let id2 = db.set(OurDBSetArgs { id: None, data: data2 })?; + println!("Stored second record with ID: {}", id2); + + // Retrieve and print the data + let retrieved1 = db.get(id1)?; + println!("Retrieved ID {}: {}", id1, String::from_utf8_lossy(&retrieved1)); + + let retrieved2 = db.get(id2)?; + println!("Retrieved ID {}: {}", id2, String::from_utf8_lossy(&retrieved2)); + + // Update a record to demonstrate history tracking + let updated_data = b"Updated first record"; + db.set(OurDBSetArgs { id: Some(id1), data: updated_data })?; + println!("Updated record with ID: {}", id1); + + // Get history for the updated record + let history = db.get_history(id1, 2)?; + println!("History for ID {}:", id1); + for (i, entry) in history.iter().enumerate() { + println!(" Version {}: {}", i, String::from_utf8_lossy(entry)); + } + + // Delete a record + db.delete(id2)?; + println!("Deleted record with ID: {}", id2); + + // Verify deletion + match db.get(id2) { + Ok(_) => println!("Record still exists (unexpected)"), + Err(e) => println!("Verified deletion: {}", e), + } + + // Close the database + db.close()?; + println!("Database closed successfully"); + + // Clean up (optional) + if std::env::var("KEEP_DB").is_err() { + std::fs::remove_dir_all(&db_path)?; + println!("Cleaned up database directory"); + } else { + println!("Database kept at: {}", db_path.display()); + } + + Ok(()) +} diff --git a/ourdb/examples/benchmark.rs b/ourdb/examples/benchmark.rs new file mode 100644 index 0000000..ee02804 --- /dev/null +++ b/ourdb/examples/benchmark.rs @@ -0,0 +1,186 @@ +use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::path::PathBuf; +use std::time::{Duration, Instant}; + +fn main() -> Result<(), ourdb::Error> { + // Parse command line arguments + let args: Vec = std::env::args().collect(); + + let (num_operations, record_size, incremental_mode, keysize) = parse_args(&args); + + println!("OurDB Benchmark"); + println!("==============="); + println!("Operations: {}", num_operations); + println!("Record size: {} bytes", record_size); + println!("Mode: {}", if incremental_mode { "Incremental" } else { "Key-Value" }); + println!("Key size: {} bytes", keysize); + println!(); + + // Create a temporary directory for the database + let db_path = std::env::temp_dir().join(format!("ourdb_benchmark_{}", std::process::id())); + std::fs::create_dir_all(&db_path)?; + + println!("Database path: {}", db_path.display()); + + // Create a new database + let config = OurDBConfig { + path: db_path.clone(), + incremental_mode, + file_size: Some(50 * 1024 * 1024), // 50MB + keysize: Some(keysize), + }; + + let mut db = OurDB::new(config)?; + + // Prepare test data + let test_data = vec![b'X'; record_size]; + + // Benchmark write operations + println!("\nBenchmarking writes..."); + let start = Instant::now(); + + let mut ids = Vec::with_capacity(num_operations); + for i in 0..num_operations { + let id = if incremental_mode { + db.set(OurDBSetArgs { id: None, data: &test_data })? + } else { + // In key-value mode, we provide explicit IDs + let id = i as u32 + 1; + db.set(OurDBSetArgs { id: Some(id), data: &test_data })?; + id + }; + ids.push(id); + } + + let write_duration = start.elapsed(); + print_performance_stats("Write", num_operations, write_duration); + + // Benchmark read operations (sequential) + println!("\nBenchmarking sequential reads..."); + let start = Instant::now(); + + for &id in &ids { + let _ = db.get(id)?; + } + + let read_duration = start.elapsed(); + print_performance_stats("Sequential read", num_operations, read_duration); + + // Benchmark random reads + println!("\nBenchmarking random reads..."); + let start = Instant::now(); + + use std::collections::HashSet; + let mut rng = rand::thread_rng(); + let mut random_indices = HashSet::new(); + + // Select 20% of the IDs randomly for testing + let sample_size = num_operations / 5; + while random_indices.len() < sample_size { + let idx = rand::Rng::gen_range(&mut rng, 0..ids.len()); + random_indices.insert(idx); + } + + for idx in random_indices { + let _ = db.get(ids[idx])?; + } + + let random_read_duration = start.elapsed(); + print_performance_stats("Random read", sample_size, random_read_duration); + + // Benchmark update operations + println!("\nBenchmarking updates..."); + let start = Instant::now(); + + for &id in &ids[0..num_operations/2] { + db.set(OurDBSetArgs { id: Some(id), data: &test_data })?; + } + + let update_duration = start.elapsed(); + print_performance_stats("Update", num_operations/2, update_duration); + + // Benchmark history retrieval + println!("\nBenchmarking history retrieval..."); + let start = Instant::now(); + + for &id in &ids[0..num_operations/10] { + let _ = db.get_history(id, 2)?; + } + + let history_duration = start.elapsed(); + print_performance_stats("History retrieval", num_operations/10, history_duration); + + // Benchmark delete operations + println!("\nBenchmarking deletes..."); + let start = Instant::now(); + + for &id in &ids[0..num_operations/4] { + db.delete(id)?; + } + + let delete_duration = start.elapsed(); + print_performance_stats("Delete", num_operations/4, delete_duration); + + // Close and clean up + db.close()?; + std::fs::remove_dir_all(&db_path)?; + + println!("\nBenchmark completed successfully"); + + Ok(()) +} + +fn parse_args(args: &[String]) -> (usize, usize, bool, u8) { + let mut num_operations = 10000; + let mut record_size = 100; + let mut incremental_mode = true; + let mut keysize = 4; + + for i in 1..args.len() { + if args[i] == "--ops" && i + 1 < args.len() { + if let Ok(n) = args[i + 1].parse() { + num_operations = n; + } + } else if args[i] == "--size" && i + 1 < args.len() { + if let Ok(n) = args[i + 1].parse() { + record_size = n; + } + } else if args[i] == "--keyvalue" { + incremental_mode = false; + } else if args[i] == "--keysize" && i + 1 < args.len() { + if let Ok(n) = args[i + 1].parse() { + if [2, 3, 4, 6].contains(&n) { + keysize = n; + } + } + } else if args[i] == "--help" { + print_usage(); + std::process::exit(0); + } + } + + (num_operations, record_size, incremental_mode, keysize) +} + +fn print_usage() { + println!("OurDB Benchmark Tool"); + println!("Usage: cargo run --example benchmark [OPTIONS]"); + println!(); + println!("Options:"); + println!(" --ops N Number of operations to perform (default: 10000)"); + println!(" --size N Size of each record in bytes (default: 100)"); + println!(" --keyvalue Use key-value mode instead of incremental mode"); + println!(" --keysize N Key size in bytes (2, 3, 4, or 6) (default: 4)"); + println!(" --help Print this help message"); +} + +fn print_performance_stats(operation: &str, count: usize, duration: Duration) { + let ops_per_second = count as f64 / duration.as_secs_f64(); + let ms_per_op = duration.as_secs_f64() * 1000.0 / count as f64; + + println!("{} performance:", operation); + println!(" Total time: {:.2} seconds", duration.as_secs_f64()); + println!(" Operations: {}", count); + println!(" Speed: {:.2} ops/sec", ops_per_second); + println!(" Average: {:.3} ms/op", ms_per_op); +} diff --git a/ourdb/src/backend.rs b/ourdb/src/backend.rs new file mode 100644 index 0000000..03ca37a --- /dev/null +++ b/ourdb/src/backend.rs @@ -0,0 +1,335 @@ +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, Seek, SeekFrom, Write}; + + +use crc32fast::Hasher; + +use crate::error::Error; +use crate::location::Location; +use crate::OurDB; + +// Header size: 2 bytes (size) + 4 bytes (CRC32) + 6 bytes (previous location) +pub const HEADER_SIZE: usize = 12; + +impl OurDB { + /// Selects and opens a database file for read/write operations + pub(crate) fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error> { + if file_nr > 65535 { + return Err(Error::InvalidOperation("File number needs to be < 65536".to_string())); + } + + let path = self.path.join(format!("{}.db", file_nr)); + + // Always close the current file if it's open + self.file = None; + + // Create file if it doesn't exist + if !path.exists() { + self.create_new_db_file(file_nr)?; + } + + // Open the file fresh + let file = OpenOptions::new() + .read(true) + .write(true) + .open(&path)?; + + self.file = Some(file); + self.file_nr = file_nr; + + Ok(()) + } + + /// Creates a new database file + pub(crate) fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error> { + let new_file_path = self.path.join(format!("{}.db", file_nr)); + let mut file = File::create(&new_file_path)?; + + // Write a single byte to make all positions start from 1 + file.write_all(&[0u8])?; + + Ok(()) + } + + /// Gets the file number to use for the next write operation + pub(crate) fn get_file_nr(&mut self) -> Result { + let path = self.path.join(format!("{}.db", self.last_used_file_nr)); + + if !path.exists() { + self.create_new_db_file(self.last_used_file_nr)?; + return Ok(self.last_used_file_nr); + } + + let metadata = fs::metadata(&path)?; + if metadata.len() >= self.file_size as u64 { + self.last_used_file_nr += 1; + self.create_new_db_file(self.last_used_file_nr)?; + } + + Ok(self.last_used_file_nr) + } + + /// Stores data at the specified ID with history tracking + pub(crate) fn set_(&mut self, id: u32, old_location: Location, data: &[u8]) -> Result<(), Error> { + // Validate data size - maximum is u16::MAX (65535 bytes or ~64KB) + if data.len() > u16::MAX as usize { + return Err(Error::InvalidOperation( + format!("Data size exceeds maximum allowed size of {} bytes", u16::MAX) + )); + } + + // Get file number to use + let file_nr = self.get_file_nr()?; + + // Select the file + self.db_file_select(file_nr)?; + + // Get current file position for lookup + let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?; + file.seek(SeekFrom::End(0))?; + let position = file.stream_position()? as u32; + + // Create new location + let new_location = Location { + file_nr, + position, + }; + + // Calculate CRC of data + let crc = calculate_crc(data); + + // Create header + let mut header = vec![0u8; HEADER_SIZE]; + + // Write size (2 bytes) + let size = data.len() as u16; // Safe now because we've validated the size + header[0] = (size & 0xFF) as u8; + header[1] = ((size >> 8) & 0xFF) as u8; + + // Write CRC (4 bytes) + header[2] = (crc & 0xFF) as u8; + header[3] = ((crc >> 8) & 0xFF) as u8; + header[4] = ((crc >> 16) & 0xFF) as u8; + header[5] = ((crc >> 24) & 0xFF) as u8; + + // Write previous location (6 bytes) + let prev_bytes = old_location.to_bytes(); + for (i, &byte) in prev_bytes.iter().enumerate().take(6) { + header[6 + i] = byte; + } + + // Write header + file.write_all(&header)?; + + // Write actual data + file.write_all(data)?; + file.flush()?; + + // Update lookup table with new position + self.lookup.set(id, new_location)?; + + Ok(()) + } + + /// Retrieves data at the specified location + pub(crate) fn get_(&mut self, location: Location) -> Result, Error> { + if location.position == 0 { + return Err(Error::NotFound(format!("Record not found, location: {:?}", location))); + } + + // Select the file + self.db_file_select(location.file_nr)?; + + let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?; + + // Read header + file.seek(SeekFrom::Start(location.position as u64))?; + let mut header = vec![0u8; HEADER_SIZE]; + file.read_exact(&mut header)?; + + // Parse size (2 bytes) + let size = u16::from(header[0]) | (u16::from(header[1]) << 8); + + // Parse CRC (4 bytes) + let stored_crc = u32::from(header[2]) + | (u32::from(header[3]) << 8) + | (u32::from(header[4]) << 16) + | (u32::from(header[5]) << 24); + + // Read data + let mut data = vec![0u8; size as usize]; + file.read_exact(&mut data)?; + + // Verify CRC + let calculated_crc = calculate_crc(&data); + if calculated_crc != stored_crc { + return Err(Error::DataCorruption("CRC mismatch: data corruption detected".to_string())); + } + + Ok(data) + } + + /// Retrieves the previous position for a record (for history tracking) + pub(crate) fn get_prev_pos_(&mut self, location: Location) -> Result { + if location.position == 0 { + return Err(Error::NotFound("Record not found".to_string())); + } + + // Select the file + self.db_file_select(location.file_nr)?; + + let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?; + + // Skip size and CRC (6 bytes) + file.seek(SeekFrom::Start(location.position as u64 + 6))?; + + // Read previous location (6 bytes) + let mut prev_bytes = vec![0u8; 6]; + file.read_exact(&mut prev_bytes)?; + + // Create location from bytes + Location::from_bytes(&prev_bytes, 6) + } + + /// Deletes the record at the specified location + pub(crate) fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error> { + if location.position == 0 { + return Err(Error::NotFound("Record not found".to_string())); + } + + // Select the file + self.db_file_select(location.file_nr)?; + + let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?; + + // Read size first + file.seek(SeekFrom::Start(location.position as u64))?; + let mut size_bytes = vec![0u8; 2]; + file.read_exact(&mut size_bytes)?; + let size = u16::from(size_bytes[0]) | (u16::from(size_bytes[1]) << 8); + + // Write zeros for the entire record (header + data) + let zeros = vec![0u8; HEADER_SIZE + size as usize]; + file.seek(SeekFrom::Start(location.position as u64))?; + file.write_all(&zeros)?; + + // Clear lookup entry + self.lookup.delete(id)?; + + Ok(()) + } + + /// Condenses the database by removing empty records and updating positions + pub fn condense(&mut self) -> Result<(), Error> { + // Create a temporary directory + let temp_path = self.path.join("temp"); + fs::create_dir_all(&temp_path)?; + + // Get all file numbers + let mut file_numbers = Vec::new(); + for entry in fs::read_dir(&self.path)? { + let entry = entry?; + let path = entry.path(); + + if path.is_file() && path.extension().map_or(false, |ext| ext == "db") { + if let Some(stem) = path.file_stem() { + if let Ok(file_nr) = stem.to_string_lossy().parse::() { + file_numbers.push(file_nr); + } + } + } + } + + // Process each file + for file_nr in file_numbers { + let src_path = self.path.join(format!("{}.db", file_nr)); + let temp_file_path = temp_path.join(format!("{}.db", file_nr)); + + // Create new file + let mut temp_file = File::create(&temp_file_path)?; + temp_file.write_all(&[0u8])?; // Initialize with a byte + + // Open source file + let mut src_file = File::open(&src_path)?; + + // Read and process records + let mut buffer = vec![0u8; 1024]; // Read in chunks + let mut position = 0; + + while let Ok(bytes_read) = src_file.read(&mut buffer) { + if bytes_read == 0 { + break; + } + + // Process the chunk + // This is a simplified version - in a real implementation, + // you would need to handle records that span chunk boundaries + + position += bytes_read; + } + + // TODO: Implement proper record copying and position updating + // This would involve: + // 1. Reading each record from the source file + // 2. If not deleted (all zeros), copy to temp file + // 3. Update lookup table with new positions + } + + // TODO: Replace original files with temp files + + // Clean up + fs::remove_dir_all(&temp_path)?; + + Ok(()) + } +} + +/// Calculates CRC32 for the data +fn calculate_crc(data: &[u8]) -> u32 { + let mut hasher = Hasher::new(); + hasher.update(data); + hasher.finalize() +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + use super::*; + use crate::{OurDB, OurDBConfig, OurDBSetArgs}; + use std::env::temp_dir; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn get_temp_dir() -> PathBuf { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + temp_dir().join(format!("ourdb_backend_test_{}", timestamp)) + } + + #[test] + fn test_backend_operations() { + let temp_dir = get_temp_dir(); + + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: false, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Test set and get + let test_data = b"Test data for backend operations"; + let id = 1; + + db.set(OurDBSetArgs { id: Some(id), data: test_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, test_data); + + // Clean up + db.destroy().unwrap(); + } +} diff --git a/ourdb/src/error.rs b/ourdb/src/error.rs new file mode 100644 index 0000000..9819066 --- /dev/null +++ b/ourdb/src/error.rs @@ -0,0 +1,41 @@ +use thiserror::Error; + +/// Error types for OurDB operations +#[derive(Error, Debug)] +pub enum Error { + /// IO errors from file operations + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + /// Data corruption errors + #[error("Data corruption: {0}")] + DataCorruption(String), + + /// Invalid operation errors + #[error("Invalid operation: {0}")] + InvalidOperation(String), + + /// Lookup table errors + #[error("Lookup error: {0}")] + LookupError(String), + + /// Record not found errors + #[error("Record not found: {0}")] + NotFound(String), + + /// Other errors + #[error("Error: {0}")] + Other(String), +} + +impl From for Error { + fn from(msg: String) -> Self { + Error::Other(msg) + } +} + +impl From<&str> for Error { + fn from(msg: &str) -> Self { + Error::Other(msg.to_string()) + } +} diff --git a/ourdb/src/lib.rs b/ourdb/src/lib.rs new file mode 100644 index 0000000..b89463d --- /dev/null +++ b/ourdb/src/lib.rs @@ -0,0 +1,267 @@ +mod error; +mod location; +mod lookup; +mod backend; + +pub use error::Error; +pub use location::Location; +pub use lookup::LookupTable; + +use std::fs::File; +use std::path::PathBuf; + +/// OurDB is a lightweight, efficient key-value database implementation that provides +/// data persistence with history tracking capabilities. +pub struct OurDB { + /// Directory path for storage + path: PathBuf, + /// Whether to use auto-increment mode + incremental_mode: bool, + /// Maximum file size (default: 500MB) + file_size: u32, + /// Lookup table for mapping keys to locations + lookup: LookupTable, + /// Currently open file + file: Option, + /// Current file number + file_nr: u16, + /// Last used file number + last_used_file_nr: u16, +} + +/// Configuration for creating a new OurDB instance +pub struct OurDBConfig { + /// Directory path for storage + pub path: PathBuf, + /// Whether to use auto-increment mode + pub incremental_mode: bool, + /// Maximum file size (default: 500MB) + pub file_size: Option, + /// Lookup table key size + pub keysize: Option, +} + +/// Arguments for setting a value in OurDB +pub struct OurDBSetArgs<'a> { + /// ID for the record (optional in incremental mode) + pub id: Option, + /// Data to store + pub data: &'a [u8], +} + +impl OurDB { + /// Creates a new OurDB instance with the given configuration + pub fn new(config: OurDBConfig) -> Result { + // Create directory if it doesn't exist + std::fs::create_dir_all(&config.path)?; + + // Create lookup table + let lookup_path = config.path.join("lookup"); + std::fs::create_dir_all(&lookup_path)?; + + let lookup_config = lookup::LookupConfig { + size: 1000000, // Default size + keysize: config.keysize.unwrap_or(4), + lookuppath: lookup_path.to_string_lossy().to_string(), + incremental_mode: config.incremental_mode, + }; + + let lookup = LookupTable::new(lookup_config)?; + + let mut db = OurDB { + path: config.path, + incremental_mode: config.incremental_mode, + file_size: config.file_size.unwrap_or(500 * (1 << 20)), // 500MB default + lookup, + file: None, + file_nr: 0, + last_used_file_nr: 0, + }; + + // Load existing metadata if available + db.load()?; + + Ok(db) + } + + /// Sets a value in the database + /// + /// In incremental mode: + /// - If ID is provided, it updates an existing record + /// - If ID is not provided, it creates a new record with auto-generated ID + /// + /// In key-value mode: + /// - ID must be provided + pub fn set(&mut self, args: OurDBSetArgs) -> Result { + if self.incremental_mode { + if let Some(id) = args.id { + // This is an update + let location = self.lookup.get(id)?; + if location.position == 0 { + return Err(Error::InvalidOperation( + "Cannot set ID for insertions when incremental mode is enabled".to_string() + )); + } + + self.set_(id, location, args.data)?; + Ok(id) + } else { + // This is an insert + let id = self.lookup.get_next_id()?; + self.set_(id, Location::default(), args.data)?; + Ok(id) + } + } else { + // Using key-value mode + let id = args.id.ok_or_else(|| Error::InvalidOperation( + "ID must be provided when incremental is disabled".to_string() + ))?; + + let location = self.lookup.get(id)?; + self.set_(id, location, args.data)?; + Ok(id) + } + } + + /// Retrieves data stored at the specified key position + pub fn get(&mut self, id: u32) -> Result, Error> { + let location = self.lookup.get(id)?; + self.get_(location) + } + + /// Retrieves a list of previous values for the specified key + /// + /// The depth parameter controls how many historical values to retrieve (maximum) + pub fn get_history(&mut self, id: u32, depth: u8) -> Result>, Error> { + let mut result = Vec::new(); + let mut current_location = self.lookup.get(id)?; + + // Traverse the history chain up to specified depth + for _ in 0..depth { + // Get current value + let data = self.get_(current_location)?; + result.push(data); + + // Try to get previous location + match self.get_prev_pos_(current_location) { + Ok(location) => { + if location.position == 0 { + break; + } + current_location = location; + } + Err(_) => break, + } + } + + Ok(result) + } + + /// Deletes the data at the specified key position + pub fn delete(&mut self, id: u32) -> Result<(), Error> { + let location = self.lookup.get(id)?; + self.delete_(id, location)?; + self.lookup.delete(id)?; + Ok(()) + } + + /// Returns the next ID which will be used when storing in incremental mode + pub fn get_next_id(&mut self) -> Result { + if !self.incremental_mode { + return Err(Error::InvalidOperation("Incremental mode is not enabled".to_string())); + } + self.lookup.get_next_id() + } + + /// Closes the database, ensuring all data is saved + pub fn close(&mut self) -> Result<(), Error> { + self.save()?; + self.close_(); + Ok(()) + } + + /// Destroys the database, removing all files + pub fn destroy(&mut self) -> Result<(), Error> { + let _ = self.close(); + std::fs::remove_dir_all(&self.path)?; + Ok(()) + } + + // Helper methods + fn lookup_dump_path(&self) -> PathBuf { + self.path.join("lookup_dump.db") + } + + fn load(&mut self) -> Result<(), Error> { + let dump_path = self.lookup_dump_path(); + if dump_path.exists() { + self.lookup.import_sparse(&dump_path.to_string_lossy())?; + } + Ok(()) + } + + fn save(&mut self) -> Result<(), Error> { + self.lookup.export_sparse(&self.lookup_dump_path().to_string_lossy())?; + Ok(()) + } + + fn close_(&mut self) { + self.file = None; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::env::temp_dir; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn get_temp_dir() -> PathBuf { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + temp_dir().join(format!("ourdb_test_{}", timestamp)) + } + + #[test] + fn test_basic_operations() { + let temp_dir = get_temp_dir(); + + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Test set and get + let test_data = b"Hello, OurDB!"; + let id = db.set(OurDBSetArgs { id: None, data: test_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, test_data); + + // Test update + let updated_data = b"Updated data"; + db.set(OurDBSetArgs { id: Some(id), data: updated_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, updated_data); + + // Test history + let history = db.get_history(id, 2).unwrap(); + assert_eq!(history.len(), 2); + assert_eq!(history[0], updated_data); + assert_eq!(history[1], test_data); + + // Test delete + db.delete(id).unwrap(); + assert!(db.get(id).is_err()); + + // Clean up + db.destroy().unwrap(); + } +} diff --git a/ourdb/src/location.rs b/ourdb/src/location.rs new file mode 100644 index 0000000..9284fab --- /dev/null +++ b/ourdb/src/location.rs @@ -0,0 +1,168 @@ +use crate::error::Error; + +/// Location represents a physical position in a database file +/// +/// It consists of a file number and a position within that file. +/// This allows OurDB to span multiple files for large datasets. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Location { + /// File number (0-65535) + pub file_nr: u16, + /// Position within the file + pub position: u32, +} + +impl Location { + /// Creates a new Location from bytes based on keysize + /// + /// - keysize = 2: Only position (2 bytes), file_nr = 0 + /// - keysize = 3: Only position (3 bytes), file_nr = 0 + /// - keysize = 4: Only position (4 bytes), file_nr = 0 + /// - keysize = 6: file_nr (2 bytes) + position (4 bytes) + pub fn from_bytes(bytes: &[u8], keysize: u8) -> Result { + // Validate keysize + if ![2, 3, 4, 6].contains(&keysize) { + return Err(Error::InvalidOperation(format!("Invalid keysize: {}", keysize))); + } + + // Create padded bytes + let mut padded = vec![0u8; keysize as usize]; + let start_idx = keysize as usize - bytes.len(); + if start_idx < 0 { + return Err(Error::InvalidOperation("Input bytes exceed keysize".to_string())); + } + + for (i, &b) in bytes.iter().enumerate() { + if i + start_idx < padded.len() { + padded[start_idx + i] = b; + } + } + + let mut location = Location::default(); + + match keysize { + 2 => { + // Only position, 2 bytes big endian + location.position = u32::from(padded[0]) << 8 | u32::from(padded[1]); + location.file_nr = 0; + + // Verify limits + if location.position > 0xFFFF { + return Err(Error::InvalidOperation( + "Position exceeds max value for keysize=2 (max 65535)".to_string() + )); + } + }, + 3 => { + // Only position, 3 bytes big endian + location.position = u32::from(padded[0]) << 16 | u32::from(padded[1]) << 8 | u32::from(padded[2]); + location.file_nr = 0; + + // Verify limits + if location.position > 0xFFFFFF { + return Err(Error::InvalidOperation( + "Position exceeds max value for keysize=3 (max 16777215)".to_string() + )); + } + }, + 4 => { + // Only position, 4 bytes big endian + location.position = u32::from(padded[0]) << 24 | u32::from(padded[1]) << 16 + | u32::from(padded[2]) << 8 | u32::from(padded[3]); + location.file_nr = 0; + }, + 6 => { + // 2 bytes file_nr + 4 bytes position, all big endian + location.file_nr = u16::from(padded[0]) << 8 | u16::from(padded[1]); + location.position = u32::from(padded[2]) << 24 | u32::from(padded[3]) << 16 + | u32::from(padded[4]) << 8 | u32::from(padded[5]); + }, + _ => unreachable!(), + } + + Ok(location) + } + + /// Converts the location to bytes (always 6 bytes) + /// + /// Format: [file_nr (2 bytes)][position (4 bytes)] + pub fn to_bytes(&self) -> Vec { + let mut bytes = Vec::with_capacity(6); + + // Put file_nr first (2 bytes) + bytes.push((self.file_nr >> 8) as u8); + bytes.push(self.file_nr as u8); + + // Put position next (4 bytes) + bytes.push((self.position >> 24) as u8); + bytes.push((self.position >> 16) as u8); + bytes.push((self.position >> 8) as u8); + bytes.push(self.position as u8); + + bytes + } + + /// Converts the location to a u64 value + /// + /// The file_nr is stored in the most significant bits + pub fn to_u64(&self) -> u64 { + (u64::from(self.file_nr) << 32) | u64::from(self.position) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_location_from_bytes_keysize_2() { + let bytes = vec![0x12, 0x34]; + let location = Location::from_bytes(&bytes, 2).unwrap(); + assert_eq!(location.file_nr, 0); + assert_eq!(location.position, 0x1234); + } + + #[test] + fn test_location_from_bytes_keysize_3() { + let bytes = vec![0x12, 0x34, 0x56]; + let location = Location::from_bytes(&bytes, 3).unwrap(); + assert_eq!(location.file_nr, 0); + assert_eq!(location.position, 0x123456); + } + + #[test] + fn test_location_from_bytes_keysize_4() { + let bytes = vec![0x12, 0x34, 0x56, 0x78]; + let location = Location::from_bytes(&bytes, 4).unwrap(); + assert_eq!(location.file_nr, 0); + assert_eq!(location.position, 0x12345678); + } + + #[test] + fn test_location_from_bytes_keysize_6() { + let bytes = vec![0xAB, 0xCD, 0x12, 0x34, 0x56, 0x78]; + let location = Location::from_bytes(&bytes, 6).unwrap(); + assert_eq!(location.file_nr, 0xABCD); + assert_eq!(location.position, 0x12345678); + } + + #[test] + fn test_location_to_bytes() { + let location = Location { + file_nr: 0xABCD, + position: 0x12345678, + }; + let bytes = location.to_bytes(); + assert_eq!(bytes, vec![0xAB, 0xCD, 0x12, 0x34, 0x56, 0x78]); + } + + #[test] + fn test_location_to_u64() { + let location = Location { + file_nr: 0xABCD, + position: 0x12345678, + }; + let value = location.to_u64(); + assert_eq!(value, 0xABCD_0000_0000 | 0x12345678); + } +} diff --git a/ourdb/src/lookup.rs b/ourdb/src/lookup.rs new file mode 100644 index 0000000..da5c610 --- /dev/null +++ b/ourdb/src/lookup.rs @@ -0,0 +1,519 @@ +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::Path; + +use crate::error::Error; +use crate::location::Location; + +const DATA_FILE_NAME: &str = "data"; +const INCREMENTAL_FILE_NAME: &str = ".inc"; + +/// Configuration for creating a new lookup table +pub struct LookupConfig { + /// Size of the lookup table + pub size: u32, + /// Size of each entry in bytes (2-6) + /// - 2: For databases with < 65,536 records + /// - 3: For databases with < 16,777,216 records + /// - 4: For databases with < 4,294,967,296 records + /// - 6: For large databases requiring multiple files + pub keysize: u8, + /// Path for disk-based lookup + pub lookuppath: String, + /// Whether to use incremental mode + pub incremental_mode: bool, +} + +/// Lookup table maps keys to physical locations in the backend storage +pub struct LookupTable { + /// Size of each entry in bytes (2-6) + keysize: u8, + /// Path for disk-based lookup + lookuppath: String, + /// In-memory data for memory-based lookup + data: Vec, + /// Next empty slot if incremental mode is enabled + incremental: Option, +} + +impl LookupTable { + /// Creates a new lookup table with the given configuration + pub fn new(config: LookupConfig) -> Result { + // Verify keysize is valid + if ![2, 3, 4, 6].contains(&config.keysize) { + return Err(Error::InvalidOperation(format!("Invalid keysize: {}", config.keysize))); + } + + let incremental = if config.incremental_mode { + Some(get_incremental_info(&config)?) + } else { + None + }; + + if !config.lookuppath.is_empty() { + // Create directory if it doesn't exist + fs::create_dir_all(&config.lookuppath)?; + + // For disk-based lookup, create empty file if it doesn't exist + let data_path = Path::new(&config.lookuppath).join(DATA_FILE_NAME); + if !data_path.exists() { + let data = vec![0u8; config.size as usize * config.keysize as usize]; + fs::write(&data_path, &data)?; + } + + Ok(LookupTable { + data: Vec::new(), + keysize: config.keysize, + lookuppath: config.lookuppath, + incremental, + }) + } else { + // For memory-based lookup + Ok(LookupTable { + data: vec![0u8; config.size as usize * config.keysize as usize], + keysize: config.keysize, + lookuppath: String::new(), + incremental, + }) + } + } + + /// Gets a location for the given ID + pub fn get(&self, id: u32) -> Result { + let entry_size = self.keysize as usize; + + if !self.lookuppath.is_empty() { + // Disk-based lookup + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + + // Check file size first + let file_size = fs::metadata(&data_path)?.len(); + let start_pos = id as u64 * entry_size as u64; + + if start_pos + entry_size as u64 > file_size { + return Err(Error::LookupError(format!( + "Invalid read for get in lut: {}: {} would exceed file size {}", + self.lookuppath, start_pos + entry_size as u64, file_size + ))); + } + + // Read directly from file + let mut file = File::open(&data_path)?; + file.seek(SeekFrom::Start(start_pos))?; + + let mut data = vec![0u8; entry_size]; + let bytes_read = file.read(&mut data)?; + + if bytes_read < entry_size { + return Err(Error::LookupError(format!( + "Incomplete read: expected {} bytes but got {}", + entry_size, bytes_read + ))); + } + + return Location::from_bytes(&data, self.keysize); + } + + // Memory-based lookup + if (id * self.keysize as u32) as usize >= self.data.len() { + return Err(Error::LookupError("Index out of bounds".to_string())); + } + + let start = (id * self.keysize as u32) as usize; + let end = start + entry_size; + + Location::from_bytes(&self.data[start..end], self.keysize) + } + + /// Sets a location for the given ID + pub fn set(&mut self, id: u32, location: Location) -> Result<(), Error> { + let entry_size = self.keysize as usize; + + // Handle incremental mode + if let Some(incremental) = self.incremental { + if id == incremental { + self.increment_index()?; + } + + if id > incremental { + return Err(Error::InvalidOperation( + "Cannot set ID for insertions when incremental mode is enabled".to_string() + )); + } + } + + // Convert location to bytes based on keysize + let location_bytes = match self.keysize { + 2 => { + if location.file_nr != 0 { + return Err(Error::InvalidOperation("file_nr must be 0 for keysize=2".to_string())); + } + if location.position > 0xFFFF { + return Err(Error::InvalidOperation( + "position exceeds max value for keysize=2 (max 65535)".to_string() + )); + } + vec![(location.position >> 8) as u8, location.position as u8] + }, + 3 => { + if location.file_nr != 0 { + return Err(Error::InvalidOperation("file_nr must be 0 for keysize=3".to_string())); + } + if location.position > 0xFFFFFF { + return Err(Error::InvalidOperation( + "position exceeds max value for keysize=3 (max 16777215)".to_string() + )); + } + vec![ + (location.position >> 16) as u8, + (location.position >> 8) as u8, + location.position as u8 + ] + }, + 4 => { + if location.file_nr != 0 { + return Err(Error::InvalidOperation("file_nr must be 0 for keysize=4".to_string())); + } + vec![ + (location.position >> 24) as u8, + (location.position >> 16) as u8, + (location.position >> 8) as u8, + location.position as u8 + ] + }, + 6 => { + // Full location with file_nr and position + location.to_bytes() + }, + _ => return Err(Error::InvalidOperation(format!("Invalid keysize: {}", self.keysize))), + }; + + if !self.lookuppath.is_empty() { + // Disk-based lookup + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + let mut file = OpenOptions::new().write(true).open(data_path)?; + + let start_pos = id as u64 * entry_size as u64; + file.seek(SeekFrom::Start(start_pos))?; + file.write_all(&location_bytes)?; + } else { + // Memory-based lookup + let start = (id * self.keysize as u32) as usize; + if start + entry_size > self.data.len() { + return Err(Error::LookupError("Index out of bounds".to_string())); + } + + for (i, &byte) in location_bytes.iter().enumerate() { + self.data[start + i] = byte; + } + } + + Ok(()) + } + + /// Deletes an entry for the given ID + pub fn delete(&mut self, id: u32) -> Result<(), Error> { + // Set location to all zeros + self.set(id, Location::default()) + } + + /// Gets the next available ID in incremental mode + pub fn get_next_id(&self) -> Result { + let incremental = self.incremental.ok_or_else(|| + Error::InvalidOperation("Lookup table not in incremental mode".to_string()) + )?; + + let table_size = if !self.lookuppath.is_empty() { + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + fs::metadata(data_path)?.len() as u32 + } else { + self.data.len() as u32 + }; + + if incremental * self.keysize as u32 >= table_size { + return Err(Error::LookupError("Lookup table is full".to_string())); + } + + Ok(incremental) + } + + /// Increments the index in incremental mode + pub fn increment_index(&mut self) -> Result<(), Error> { + let mut incremental = self.incremental.ok_or_else(|| + Error::InvalidOperation("Lookup table not in incremental mode".to_string()) + )?; + + incremental += 1; + self.incremental = Some(incremental); + + if !self.lookuppath.is_empty() { + let inc_path = Path::new(&self.lookuppath).join(INCREMENTAL_FILE_NAME); + fs::write(inc_path, incremental.to_string())?; + } + + Ok(()) + } + + /// Exports the lookup table to a file + pub fn export_data(&self, path: &str) -> Result<(), Error> { + if !self.lookuppath.is_empty() { + // For disk-based lookup, just copy the file + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + fs::copy(data_path, path)?; + } else { + // For memory-based lookup, write the data to file + fs::write(path, &self.data)?; + } + Ok(()) + } + + /// Imports the lookup table from a file + pub fn import_data(&mut self, path: &str) -> Result<(), Error> { + if !self.lookuppath.is_empty() { + // For disk-based lookup, copy the file + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + fs::copy(path, data_path)?; + } else { + // For memory-based lookup, read the data from file + self.data = fs::read(path)?; + } + Ok(()) + } + + /// Exports only non-zero entries to save space + pub fn export_sparse(&self, path: &str) -> Result<(), Error> { + let mut output = Vec::new(); + let entry_size = self.keysize as usize; + + if !self.lookuppath.is_empty() { + // For disk-based lookup + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + let mut file = File::open(&data_path)?; + let file_size = fs::metadata(&data_path)?.len(); + let max_entries = file_size / entry_size as u64; + + for id in 0..max_entries { + file.seek(SeekFrom::Start(id * entry_size as u64))?; + + let mut buffer = vec![0u8; entry_size]; + let bytes_read = file.read(&mut buffer)?; + + if bytes_read < entry_size { + break; + } + + // Check if entry is non-zero + if buffer.iter().any(|&b| b != 0) { + // Write ID (4 bytes) + entry + output.extend_from_slice(&(id as u32).to_be_bytes()); + output.extend_from_slice(&buffer); + } + } + } else { + // For memory-based lookup + let max_entries = self.data.len() / entry_size; + + for id in 0..max_entries { + let start = id * entry_size; + let entry = &self.data[start..start + entry_size]; + + // Check if entry is non-zero + if entry.iter().any(|&b| b != 0) { + // Write ID (4 bytes) + entry + output.extend_from_slice(&(id as u32).to_be_bytes()); + output.extend_from_slice(entry); + } + } + } + + // Write the output to file + fs::write(path, &output)?; + Ok(()) + } + + /// Imports sparse data (only non-zero entries) + pub fn import_sparse(&mut self, path: &str) -> Result<(), Error> { + let data = fs::read(path)?; + let entry_size = self.keysize as usize; + let record_size = 4 + entry_size; // ID (4 bytes) + entry + + if data.len() % record_size != 0 { + return Err(Error::DataCorruption( + "Invalid sparse data format: size mismatch".to_string() + )); + } + + for chunk_start in (0..data.len()).step_by(record_size) { + if chunk_start + record_size > data.len() { + break; + } + + // Extract ID (4 bytes) + let id_bytes = &data[chunk_start..chunk_start + 4]; + let id = u32::from_be_bytes([id_bytes[0], id_bytes[1], id_bytes[2], id_bytes[3]]); + + // Extract entry + let entry = &data[chunk_start + 4..chunk_start + record_size]; + + // Create location from entry + let location = Location::from_bytes(entry, self.keysize)?; + + // Set the entry + self.set(id, location)?; + } + + Ok(()) + } + + /// Finds the highest ID with a non-zero entry + pub fn find_last_entry(&mut self) -> Result { + let mut last_id = 0u32; + let entry_size = self.keysize as usize; + + if !self.lookuppath.is_empty() { + // For disk-based lookup + let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); + let mut file = File::open(&data_path)?; + let file_size = fs::metadata(&data_path)?.len(); + + let mut buffer = vec![0u8; entry_size]; + let mut pos = 0u32; + + while (pos as u64 * entry_size as u64) < file_size { + file.seek(SeekFrom::Start(pos as u64 * entry_size as u64))?; + + let bytes_read = file.read(&mut buffer)?; + if bytes_read == 0 || bytes_read < entry_size { + break; + } + + let location = Location::from_bytes(&buffer, self.keysize)?; + if location.position != 0 || location.file_nr != 0 { + last_id = pos; + } + + pos += 1; + } + } else { + // For memory-based lookup + for i in 0..(self.data.len() / entry_size) as u32 { + if let Ok(location) = self.get(i) { + if location.position != 0 || location.file_nr != 0 { + last_id = i; + } + } + } + } + + Ok(last_id) + } +} + +/// Helper function to get the incremental value +fn get_incremental_info(config: &LookupConfig) -> Result { + if !config.incremental_mode { + return Ok(0); + } + + if !config.lookuppath.is_empty() { + let inc_path = Path::new(&config.lookuppath).join(INCREMENTAL_FILE_NAME); + + if !inc_path.exists() { + // Create a separate file for storing the incremental value + fs::write(&inc_path, "1")?; + } + + let inc_str = fs::read_to_string(&inc_path)?; + let incremental = match inc_str.trim().parse::() { + Ok(val) => val, + Err(_) => { + // If the value is invalid, reset it to 1 + fs::write(&inc_path, "1")?; + 1 + } + }; + + Ok(incremental) + } else { + // For memory-based lookup, start with 1 + Ok(1) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + use super::*; + use std::env::temp_dir; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn get_temp_dir() -> PathBuf { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + temp_dir().join(format!("ourdb_lookup_test_{}", timestamp)) + } + + #[test] + fn test_memory_lookup() { + let config = LookupConfig { + size: 1000, + keysize: 4, + lookuppath: String::new(), + incremental_mode: true, + }; + + let mut lookup = LookupTable::new(config).unwrap(); + + // Test set and get + let location = Location { + file_nr: 0, + position: 12345, + }; + + lookup.set(1, location).unwrap(); + let retrieved = lookup.get(1).unwrap(); + + assert_eq!(retrieved.file_nr, location.file_nr); + assert_eq!(retrieved.position, location.position); + + // Test incremental mode + let next_id = lookup.get_next_id().unwrap(); + assert_eq!(next_id, 2); + + lookup.increment_index().unwrap(); + let next_id = lookup.get_next_id().unwrap(); + assert_eq!(next_id, 3); + } + + #[test] + fn test_disk_lookup() { + let temp_dir = get_temp_dir(); + fs::create_dir_all(&temp_dir).unwrap(); + + let config = LookupConfig { + size: 1000, + keysize: 4, + lookuppath: temp_dir.to_string_lossy().to_string(), + incremental_mode: true, + }; + + let mut lookup = LookupTable::new(config).unwrap(); + + // Test set and get + let location = Location { + file_nr: 0, + position: 12345, + }; + + lookup.set(1, location).unwrap(); + let retrieved = lookup.get(1).unwrap(); + + assert_eq!(retrieved.file_nr, location.file_nr); + assert_eq!(retrieved.position, location.position); + + // Clean up + fs::remove_dir_all(temp_dir).unwrap(); + } +} diff --git a/ourdb/tests/integration_tests.rs b/ourdb/tests/integration_tests.rs new file mode 100644 index 0000000..7f9d831 --- /dev/null +++ b/ourdb/tests/integration_tests.rs @@ -0,0 +1,311 @@ +use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::env::temp_dir; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; +use rand; + +// Helper function to create a unique temporary directory for tests +fn get_temp_dir() -> PathBuf { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let random_part = rand::random::(); + let dir = temp_dir().join(format!("ourdb_test_{}_{}", timestamp, random_part)); + + // Ensure the directory exists and is empty + if dir.exists() { + std::fs::remove_dir_all(&dir).unwrap(); + } + std::fs::create_dir_all(&dir).unwrap(); + + dir +} + +#[test] +fn test_basic_operations() { + let temp_dir = get_temp_dir(); + + // Create a new database with incremental mode + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + + let mut db = OurDB::new(config).unwrap(); + + // Test set and get + let test_data = b"Hello, OurDB!"; + let id = db.set(OurDBSetArgs { id: None, data: test_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, test_data); + + // Test update + let updated_data = b"Updated data"; + db.set(OurDBSetArgs { id: Some(id), data: updated_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, updated_data); + + // Test history + let history = db.get_history(id, 2).unwrap(); + assert_eq!(history.len(), 2); + assert_eq!(history[0], updated_data); + assert_eq!(history[1], test_data); + + // Test delete + db.delete(id).unwrap(); + assert!(db.get(id).is_err()); + + // Clean up + db.destroy().unwrap(); +} + +#[test] +fn test_key_value_mode() { + let temp_dir = get_temp_dir(); + + + // Create a new database with key-value mode + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: false, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Test set with explicit ID + let test_data = b"Key-value data"; + let id = 42; + db.set(OurDBSetArgs { id: Some(id), data: test_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, test_data); + + // Verify next_id fails in key-value mode + assert!(db.get_next_id().is_err()); + + // Clean up + db.destroy().unwrap(); +} + +#[test] +fn test_incremental_mode() { + let temp_dir = get_temp_dir(); + + // Create a new database with incremental mode + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + + let mut db = OurDB::new(config).unwrap(); + + // Test auto-increment IDs + let data1 = b"First record"; + let id1 = db.set(OurDBSetArgs { id: None, data: data1 }).unwrap(); + + let data2 = b"Second record"; + let id2 = db.set(OurDBSetArgs { id: None, data: data2 }).unwrap(); + + // IDs should be sequential + assert_eq!(id2, id1 + 1); + + // Verify get_next_id works + let next_id = db.get_next_id().unwrap(); + assert_eq!(next_id, id2 + 1); + + // Clean up + db.destroy().unwrap(); +} + +#[test] +fn test_persistence() { + let temp_dir = get_temp_dir(); + + + // Create data in a new database + { + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + let test_data = b"Persistent data"; + let id = db.set(OurDBSetArgs { id: None, data: test_data }).unwrap(); + + // Explicitly close the database + db.close().unwrap(); + + // ID should be 1 in a new database + assert_eq!(id, 1); + } + + // Reopen the database and verify data persists + { + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Verify data is still there + let retrieved = db.get(1).unwrap(); + assert_eq!(retrieved, b"Persistent data"); + + // Verify incremental counter persisted + let next_id = db.get_next_id().unwrap(); + assert_eq!(next_id, 2); + + // Clean up + db.destroy().unwrap(); + } +} + +#[test] +fn test_different_keysizes() { + for keysize in [2, 3, 4, 6].iter() { + let temp_dir = get_temp_dir(); + + // Ensure the directory exists + std::fs::create_dir_all(&temp_dir).unwrap(); + + // Create a new database with specified keysize + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: Some(*keysize), + }; + + let mut db = OurDB::new(config).unwrap(); + + // Test basic operations + let test_data = b"Keysize test data"; + let id = db.set(OurDBSetArgs { id: None, data: test_data }).unwrap(); + + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved, test_data); + + // Clean up + db.destroy().unwrap(); + } +} + +#[test] +fn test_large_data() { + let temp_dir = get_temp_dir(); + + // Create a new database + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Create a large data set (60KB - within the 64KB limit) + let large_data = vec![b'X'; 60 * 1024]; + + // Store and retrieve large data + let id = db.set(OurDBSetArgs { id: None, data: &large_data }).unwrap(); + let retrieved = db.get(id).unwrap(); + + assert_eq!(retrieved.len(), large_data.len()); + assert_eq!(retrieved, large_data); + + // Clean up + db.destroy().unwrap(); +} + +#[test] +fn test_exceed_size_limit() { + let temp_dir = get_temp_dir(); + + // Create a new database + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: None, + keysize: None, + }; + + let mut db = OurDB::new(config).unwrap(); + + // Create data larger than the 64KB limit (70KB) + let oversized_data = vec![b'X'; 70 * 1024]; + + // Attempt to store data that exceeds the size limit + let result = db.set(OurDBSetArgs { id: None, data: &oversized_data }); + + // Verify that an error is returned + assert!(result.is_err(), "Expected an error when storing data larger than 64KB"); + + // Clean up + db.destroy().unwrap(); +} + +#[test] +fn test_multiple_files() { + let temp_dir = get_temp_dir(); + + + // Create a new database with small file size to force multiple files + let config = OurDBConfig { + path: temp_dir.clone(), + incremental_mode: true, + file_size: Some(1024), // Very small file size (1KB) + keysize: Some(6), // 6-byte keysize for multiple files + }; + + let mut db = OurDB::new(config).unwrap(); + + // Store enough data to span multiple files + let data_size = 500; // bytes per record + let test_data = vec![b'A'; data_size]; + + let mut ids = Vec::new(); + for _ in 0..10 { + let id = db.set(OurDBSetArgs { id: None, data: &test_data }).unwrap(); + ids.push(id); + } + + // Verify all data can be retrieved + for &id in &ids { + let retrieved = db.get(id).unwrap(); + assert_eq!(retrieved.len(), data_size); + } + + // Verify multiple files were created + let files = fs::read_dir(&temp_dir).unwrap() + .filter_map(Result::ok) + .filter(|entry| { + let path = entry.path(); + path.is_file() && path.extension().map_or(false, |ext| ext == "db") + }) + .count(); + + assert!(files > 1, "Expected multiple database files, found {}", files); + + // Clean up + db.destroy().unwrap(); +}