This commit is contained in:
2025-04-20 06:34:31 +02:00
parent 838e966dc9
commit 189971509a
23 changed files with 2913 additions and 13 deletions

180
tst/Cargo.lock generated Normal file
View File

@@ -0,0 +1,180 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.172"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "ourdb"
version = "0.1.0"
dependencies = [
"crc32fast",
"log",
"rand",
"thiserror",
]
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "syn"
version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tst"
version = "0.1.0"
dependencies = [
"log",
"ourdb",
"thiserror",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

31
tst/Cargo.toml Normal file
View File

@@ -0,0 +1,31 @@
[package]
name = "tst"
version = "0.1.0"
edition = "2021"
description = "A persistent ternary search tree implementation using OurDB for storage"
authors = ["OurWorld Team"]
[dependencies]
ourdb = { path = "../ourdb" }
thiserror = "1.0.40"
log = "0.4.17"
[dev-dependencies]
# criterion = "0.5.1"
# Uncomment when benchmarks are implemented
# [[bench]]
# name = "tst_benchmarks"
# harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "prefix_ops"
path = "examples/prefix_ops.rs"
[[example]]
name = "performance"
path = "examples/performance.rs"

183
tst/README.md Normal file
View File

@@ -0,0 +1,183 @@
# Ternary Search Tree (TST)
A persistent ternary search tree implementation in Rust using OurDB for storage.
## Overview
TST is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This implementation provides a persistent ternary search tree that can be used for efficient string key operations, such as auto-complete, routing tables, and more.
A ternary search tree is a type of trie where each node has three children: left, middle, and right. Unlike a radix tree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
Key characteristics:
- Each node stores a single character
- Nodes have three children: left (for characters < current), middle (for next character in key), and right (for characters > current)
- Leaf nodes contain the actual values
- Balanced structure for consistent performance across operations
## Features
- Efficient string key operations
- Persistent storage using OurDB backend
- Balanced tree structure for consistent performance
- Support for binary values
- Thread-safe operations through OurDB
## Usage
Add the dependency to your `Cargo.toml`:
```toml
[dependencies]
tst = { path = "../tst" }
```
### Basic Example
```rust
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Set key-value pairs
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
// Get values by key
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value)); // Prints: world
// List keys by prefix
let keys = tree.list("hel")?; // Returns ["hello", "help"]
println!("Keys with prefix 'hel': {:?}", keys);
// Get all values by prefix
let values = tree.getall("hel")?; // Returns [b"world", b"me"]
// Delete keys
tree.delete("help")?;
Ok(())
}
```
## API
### Creating a TST
```rust
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Create a new ternary search tree and reset if it exists
let mut tree = TST::new("/tmp/tst", true)?;
```
### Setting Values
```rust
// Set a key-value pair
tree.set("key", b"value".to_vec())?;
```
### Getting Values
```rust
// Get a value by key
let value = tree.get("key")?;
```
### Deleting Keys
```rust
// Delete a key
tree.delete("key")?;
```
### Listing Keys by Prefix
```rust
// List all keys with a given prefix
let keys = tree.list("prefix")?;
```
### Getting All Values by Prefix
```rust
// Get all values for keys with a given prefix
let values = tree.getall("prefix")?;
```
## Performance Characteristics
- Search: O(k) where k is the key length
- Insert: O(k) for new keys
- Delete: O(k) plus potential node cleanup
- Space: O(n) where n is the total number of nodes
## Use Cases
TST is particularly useful for:
- Prefix-based searching
- Auto-complete systems
- Dictionary implementations
- Spell checking
- Any application requiring efficient string key operations with persistence
## Implementation Details
The TST implementation uses OurDB for persistent storage:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
- Node serialization includes version tracking for format evolution
## Running Tests
The project includes a comprehensive test suite that verifies all functionality:
```bash
# Run all tests
cargo test
# Run specific test file
cargo test --test basic_test
cargo test --test prefix_test
```
## Running Examples
The project includes example applications that demonstrate how to use the TST:
```bash
# Run the basic usage example
cargo run --example basic_usage
# Run the prefix operations example
cargo run --example prefix_ops
# Run the performance test
cargo run --example performance
```
## Comparison with RadixTree
While both TST and RadixTree provide efficient string key operations, they have different characteristics:
- **TST**: Stores one character per node, with a balanced structure for consistent performance across operations.
- **RadixTree**: Compresses common prefixes, which can be more space-efficient for keys with long common prefixes.
Choose TST when:
- You need balanced performance across all operations
- Your keys don't share long common prefixes
- You want a simpler implementation with predictable performance
Choose RadixTree when:
- Space efficiency is a priority
- Your keys share long common prefixes
- You prioritize lookup performance over balanced performance
## License
This project is licensed under the same license as the HeroCode project.

View File

@@ -0,0 +1,76 @@
use tst::TST;
use std::time::Instant;
use std::io::{self, Write};
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Store some data
println!("Inserting data...");
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
tree.set("helicopter", b"flying".to_vec())?;
tree.set("apple", b"fruit".to_vec())?;
tree.set("application", b"software".to_vec())?;
tree.set("banana", b"yellow".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// List keys with prefix
println!("\nListing keys with prefix 'hel':");
let start = Instant::now();
let keys = tree.list("hel")?;
let duration = start.elapsed();
for key in &keys {
println!(" {}", key);
}
println!("Found {} keys in {:?}", keys.len(), duration);
// Get all values with prefix
println!("\nGetting all values with prefix 'app':");
let start = Instant::now();
let values = tree.getall("app")?;
let duration = start.elapsed();
for (i, value) in values.iter().enumerate() {
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
}
println!("Found {} values in {:?}", values.len(), duration);
// Delete a key
println!("\nDeleting 'help'...");
tree.delete("help")?;
// Verify deletion
println!("Listing keys with prefix 'hel' after deletion:");
let keys_after = tree.list("hel")?;
for key in &keys_after {
println!(" {}", key);
}
// Try to get a deleted key
match tree.get("help") {
Ok(_) => println!("Unexpectedly found 'help' after deletion!"),
Err(e) => println!("As expected, 'help' was not found: {}", e),
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

134
tst/examples/performance.rs Normal file
View File

@@ -0,0 +1,134 @@
use tst::TST;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Number of records to insert
const TOTAL_RECORDS: usize = 100_000;
// How often to report progress (every X records)
const PROGRESS_INTERVAL: usize = 1_000;
// How many records to use for performance sampling
const PERFORMANCE_SAMPLE_SIZE: usize = 100;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Use smaller values to avoid exceeding OurDB's size limit
let value = format!("val{}", i).into_bytes();
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
}
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
println!("Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

123
tst/examples/prefix_ops.rs Normal file
View File

@@ -0,0 +1,123 @@
use tst::TST;
use std::time::Instant;
use std::io::{self, Write};
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_prefix_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Insert a variety of keys with different prefixes
println!("Inserting data with various prefixes...");
// Names
let names = [
"Alice", "Alexander", "Amanda", "Andrew", "Amy",
"Bob", "Barbara", "Benjamin", "Brenda", "Brian",
"Charlie", "Catherine", "Christopher", "Cynthia", "Carl",
"David", "Diana", "Daniel", "Deborah", "Donald",
"Edward", "Elizabeth", "Eric", "Emily", "Ethan"
];
for (i, name) in names.iter().enumerate() {
let value = format!("person-{}", i).into_bytes();
tree.set(name, value)?;
}
// Cities
let cities = [
"New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
"Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose",
"Austin", "Jacksonville", "Fort Worth", "Columbus", "San Francisco",
"Charlotte", "Indianapolis", "Seattle", "Denver", "Washington"
];
for (i, city) in cities.iter().enumerate() {
let value = format!("city-{}", i).into_bytes();
tree.set(city, value)?;
}
// Countries
let countries = [
"United States", "Canada", "Mexico", "Brazil", "Argentina",
"United Kingdom", "France", "Germany", "Italy", "Spain",
"China", "Japan", "India", "Australia", "Russia"
];
for (i, country) in countries.iter().enumerate() {
let value = format!("country-{}", i).into_bytes();
tree.set(country, value)?;
}
println!("Total items inserted: {}", names.len() + cities.len() + countries.len());
// Test prefix operations
test_prefix(&mut tree, "A")?;
test_prefix(&mut tree, "B")?;
test_prefix(&mut tree, "C")?;
test_prefix(&mut tree, "San")?;
test_prefix(&mut tree, "United")?;
// Test non-existent prefix
test_prefix(&mut tree, "Z")?;
// Test empty prefix (should return all keys)
println!("\nTesting empty prefix (should return all keys):");
let start = Instant::now();
let all_keys = tree.list("")?;
let duration = start.elapsed();
println!("Found {} keys with empty prefix in {:?}", all_keys.len(), duration);
println!("First 5 keys (alphabetically):");
for key in all_keys.iter().take(5) {
println!(" {}", key);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
fn test_prefix(tree: &mut TST, prefix: &str) -> Result<(), tst::Error> {
println!("\nTesting prefix '{}':", prefix);
// Test list operation
let start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}", keys.len(), prefix, list_duration);
if !keys.is_empty() {
println!("Keys:");
for key in &keys {
println!(" {}", key);
}
// Test getall operation
let start = Instant::now();
let values = tree.getall(prefix)?;
let getall_duration = start.elapsed();
println!("Retrieved {} values in {:?}", values.len(), getall_duration);
println!("First value: {}",
if !values.is_empty() {
String::from_utf8_lossy(&values[0])
} else {
"None".into()
});
}
Ok(())
}

36
tst/src/error.rs Normal file
View File

@@ -0,0 +1,36 @@
//! Error types for the TST module.
use thiserror::Error;
use std::io;
/// Error type for TST operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// IO error.
#[error("IO error: {0}")]
IO(#[from] io::Error),
}

122
tst/src/lib.rs Normal file
View File

@@ -0,0 +1,122 @@
//! TST is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent ternary search tree that can be used for efficient
//! string key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::TSTNode;
use ourdb::OurDB;
/// TST represents a ternary search tree data structure with persistent storage.
pub struct TST {
/// Database for persistent storage
db: OurDB,
/// Database ID of the root node
root_id: Option<u32>,
}
impl TST {
/// Creates a new TST with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `TST` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
operations::new_tst(path, reset)
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
operations::set(self, key, value)
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
operations::get(self, key)
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
operations::delete(self, key)
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
operations::list(self, prefix)
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}

49
tst/src/node.rs Normal file
View File

@@ -0,0 +1,49 @@
//! Node types for the TST module.
/// Represents a node in the ternary search tree.
#[derive(Debug, Clone, PartialEq)]
pub struct TSTNode {
/// The character stored at this node.
pub character: char,
/// Value stored at this node (empty if not end of key).
pub value: Vec<u8>,
/// Whether this node represents the end of a key.
pub is_end_of_key: bool,
/// Reference to the left child node (for characters < current character).
pub left_id: Option<u32>,
/// Reference to the middle child node (for next character in key).
pub middle_id: Option<u32>,
/// Reference to the right child node (for characters > current character).
pub right_id: Option<u32>,
}
impl TSTNode {
/// Creates a new node.
pub fn new(character: char, value: Vec<u8>, is_end_of_key: bool) -> Self {
Self {
character,
value,
is_end_of_key,
left_id: None,
middle_id: None,
right_id: None,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
character: '\0', // Use null character for root
value: Vec::new(),
is_end_of_key: false,
left_id: None,
middle_id: None,
right_id: None,
}
}
}

418
tst/src/operations.rs Normal file
View File

@@ -0,0 +1,418 @@
//! Implementation of TST operations.
use crate::error::Error;
use crate::node::TSTNode;
use crate::TST;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new TST with the specified database path.
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
// If the path exists and reset is true, remove it first
let path_buf = PathBuf::from(path);
if path_buf.exists() && reset {
std::fs::remove_dir_all(&path_buf)?;
}
// Create the directory if it doesn't exist
std::fs::create_dir_all(&path_buf)?;
let config = OurDBConfig {
path: path_buf,
incremental_mode: true,
file_size: Some(1024 * 1024), // 10MB file size for better performance with large datasets
keysize: Some(4), // Use keysize=4 (default)
};
let mut db = OurDB::new(config)?;
let root_id = if db.get_next_id()? == 1 || reset {
// Create a new root node
let root = TSTNode::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
Some(root_id)
} else {
// Use existing root node
Some(1) // Root node always has ID 1
};
Ok(TST {
db,
root_id,
})
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
set_recursive(tree, root_id, &chars, 0, value)?;
Ok(())
}
/// Recursive helper function for setting a key-value pair.
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
let mut node = tree.get_node(node_id)?;
if pos >= chars.len() {
// We've reached the end of the key
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
let current_char = chars[pos];
if node.character == '\0' {
// Root node or empty node, set the character
node.character = current_char;
let node_id = tree.save_node(Some(node_id), &node)?;
// Continue with the next character
if pos + 1 < chars.len() {
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
let mut updated_node = tree.get_node(node_id)?;
updated_node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &updated_node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
} else {
// This is the last character
let mut updated_node = tree.get_node(node_id)?;
updated_node.is_end_of_key = true;
updated_node.value = value;
return tree.save_node(Some(node_id), &updated_node);
}
}
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
return set_recursive(tree, left_id, chars, pos, value);
} else {
// Create new left node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.left_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
return set_recursive(tree, right_id, chars, pos, value);
} else {
// Create new right node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.right_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else {
// Character matches, go middle (next character)
if pos + 1 >= chars.len() {
// This is the last character
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
if let Some(middle_id) = node.middle_id {
return set_recursive(tree, middle_id, chars, pos + 1, value);
} else {
// Create new middle node
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
}
}
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let node = tree.get_node(node_id)?;
if node.is_end_of_key {
Ok(node.value.clone())
} else {
Err(Error::KeyNotFound(key.to_string()))
}
}
/// Finds a node by key.
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
let node = tree.get_node(node_id)?;
if pos >= chars.len() {
return Ok(node_id);
}
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_node(tree, left_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_node(tree, right_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
}
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let mut node = tree.get_node(node_id)?;
if !node.is_end_of_key {
return Err(Error::KeyNotFound(key.to_string()));
}
// If the node has a middle child, just mark it as not end of key
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
return Ok(());
}
// Otherwise, we need to remove the node and update its parent
// This is more complex and would require tracking the path to the node
// For simplicity, we'll just mark it as not end of key for now
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, root_id, String::new(), &mut result)?;
return Ok(result);
}
// Find the node corresponding to the prefix
let chars: Vec<char> = prefix.chars().collect();
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
Ok(id) => id,
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
};
// Collect all keys from the subtree
collect_keys_with_prefix(tree, node_id, prefix.to_string(), &mut result)?;
Ok(result)
}
/// Finds the node corresponding to a prefix.
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
if pos >= chars.len() {
return Ok(node_id);
}
let node = tree.get_node(node_id)?;
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_prefix_node(tree, left_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_prefix_node(tree, right_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character of the prefix
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_prefix_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
}
}
/// Collects all keys with a given prefix.
fn collect_keys_with_prefix(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(current_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
let mut new_path = current_path.clone();
new_path.push(node.character);
collect_all_keys(tree, middle_id, new_path, result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Recursively collects all keys under a node.
fn collect_all_keys(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
new_path.push(node.character);
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
for key in keys {
if let Ok(value) = get(tree, &key) {
values.push(value);
}
}
Ok(values)
}
impl TST {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
let data = self.db.get(node_id)?;
TSTNode::deserialize(&data)
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
}

134
tst/src/serialize.rs Normal file
View File

@@ -0,0 +1,134 @@
//! Serialization and deserialization for TST nodes.
use crate::error::Error;
use crate::node::TSTNode;
/// Current binary format version.
const VERSION: u8 = 1;
impl TSTNode {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Version
buffer.push(VERSION);
// Character (as UTF-32)
let char_bytes = (self.character as u32).to_le_bytes();
buffer.extend_from_slice(&char_bytes);
// Is end of key
buffer.push(if self.is_end_of_key { 1 } else { 0 });
// Value (only if is_end_of_key)
if self.is_end_of_key {
let value_len = (self.value.len() as u32).to_le_bytes();
buffer.extend_from_slice(&value_len);
buffer.extend_from_slice(&self.value);
} else {
// Zero length
buffer.extend_from_slice(&[0, 0, 0, 0]);
}
// Child pointers
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&left_id);
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&middle_id);
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&right_id);
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.len() < 14 { // Minimum size: version + char + is_end + value_len + 3 child IDs
return Err(Error::Deserialization("Data too short".to_string()));
}
let mut pos = 0;
// Version
let version = data[pos];
pos += 1;
if version != VERSION {
return Err(Error::Deserialization(format!("Unsupported version: {}", version)));
}
// Character
let char_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let char_code = u32::from_le_bytes(char_bytes);
let character = char::from_u32(char_code)
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
pos += 4;
// Is end of key
let is_end_of_key = data[pos] != 0;
pos += 1;
// Value length
let value_len_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
pos += 4;
// Value
let value = if value_len > 0 {
if pos + value_len > data.len() {
return Err(Error::Deserialization("Value length exceeds data".to_string()));
}
data[pos..pos+value_len].to_vec()
} else {
Vec::new()
};
pos += value_len;
// Child pointers
if pos + 12 > data.len() {
return Err(Error::Deserialization("Data too short for child pointers".to_string()));
}
let left_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let left_id = u32::from_le_bytes(left_id_bytes);
pos += 4;
let middle_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let middle_id = u32::from_le_bytes(middle_id_bytes);
pos += 4;
let right_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let right_id = u32::from_le_bytes(right_id_bytes);
Ok(TSTNode {
character,
value,
is_end_of_key,
left_id: if left_id == 0 { None } else { Some(left_id) },
middle_id: if middle_id == 0 { None } else { Some(middle_id) },
right_id: if right_id == 0 { None } else { Some(right_id) },
})
}
}
/// Gets the common prefix of two strings.
pub fn get_common_prefix(a: &str, b: &str) -> String {
let mut result = String::new();
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let min_len = a_chars.len().min(b_chars.len());
for i in 0..min_len {
if a_chars[i] == b_chars[i] {
result.push(a_chars[i]);
} else {
break;
}
}
result
}

215
tst/tests/basic_test.rs Normal file
View File

@@ -0,0 +1,215 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let path = temp_dir().join(format!("tst_test_{}", timestamp));
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_create_tst() {
let path = get_test_db_path();
let result = TST::new(&path, true);
assert!(result.is_ok());
cleanup_test_db(&path);
}
#[test]
fn test_set_and_get() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test setting and getting a key
let key = "test_key";
let value = b"test_value".to_vec();
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
let get_result = tree.get(key);
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), value);
cleanup_test_db(&path);
}
#[test]
fn test_get_nonexistent_key() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test getting a key that doesn't exist
let get_result = tree.get("nonexistent_key");
assert!(get_result.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_delete() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Set a key
let key = "delete_test";
let value = b"to_be_deleted".to_vec();
tree.set(key, value).unwrap();
// Verify it exists
let get_result = tree.get(key);
assert!(get_result.is_ok());
// Delete it
let delete_result = tree.delete(key);
assert!(delete_result.is_ok());
// Verify it's gone
let get_after_delete = tree.get(key);
assert!(get_after_delete.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_multiple_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert multiple keys
let keys = ["apple", "banana", "cherry", "date", "elderberry"];
for (i, key) in keys.iter().enumerate() {
let value = format!("value_{}", i).into_bytes();
tree.set(key, value).unwrap();
}
// Verify all keys exist
for (i, key) in keys.iter().enumerate() {
let expected_value = format!("value_{}", i).into_bytes();
let get_result = tree.get(key).unwrap();
assert_eq!(get_result, expected_value);
}
cleanup_test_db(&path);
}
#[test]
fn test_list_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let keys = [
"apple", "application", "append",
"banana", "bandana",
"cherry", "chocolate"
];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test prefix "app"
let app_keys = tree.list("app").unwrap();
assert_eq!(app_keys.len(), 3);
assert!(app_keys.contains(&"apple".to_string()));
assert!(app_keys.contains(&"application".to_string()));
assert!(app_keys.contains(&"append".to_string()));
// Test prefix "ban"
let ban_keys = tree.list("ban").unwrap();
assert_eq!(ban_keys.len(), 2);
assert!(ban_keys.contains(&"banana".to_string()));
assert!(ban_keys.contains(&"bandana".to_string()));
// Test prefix "c"
let c_keys = tree.list("c").unwrap();
assert_eq!(c_keys.len(), 2);
assert!(c_keys.contains(&"cherry".to_string()));
assert!(c_keys.contains(&"chocolate".to_string()));
// Test non-existent prefix
let z_keys = tree.list("z").unwrap();
assert_eq!(z_keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_getall_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let keys = [
"apple", "application", "append",
"banana", "bandana",
"cherry", "chocolate"
];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test getall with prefix "app"
let app_values = tree.getall("app").unwrap();
assert_eq!(app_values.len(), 3);
// Convert values to strings for easier comparison
let app_value_strings: Vec<String> = app_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
assert!(app_value_strings.contains(&"apple".to_string()));
assert!(app_value_strings.contains(&"application".to_string()));
assert!(app_value_strings.contains(&"append".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_empty_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert some keys
let keys = ["apple", "banana", "cherry"];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test list with empty prefix (should return all keys)
let all_keys = tree.list("").unwrap();
assert_eq!(all_keys.len(), keys.len());
for key in &keys {
assert!(all_keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}

215
tst/tests/prefix_test.rs Normal file
View File

@@ -0,0 +1,215 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let path = temp_dir().join(format!("tst_prefix_test_{}", timestamp));
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_prefix_with_common_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "test"
let keys = tree.list("test").unwrap();
assert_eq!(keys.len(), 5);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Test prefix "teste"
let keys = tree.list("teste").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"tested".to_string()));
assert!(keys.contains(&"tester".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_different_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with different prefixes
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
("date", b"fruit4".to_vec()),
("elderberry", b"fruit5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test each prefix
for (key, _) in &test_data {
let prefix = &key[0..1]; // First character
let keys = tree.list(prefix).unwrap();
assert!(keys.contains(&key.to_string()));
}
// Test non-existent prefix
let keys = tree.list("z").unwrap();
assert_eq!(keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_empty_string() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert some keys
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test empty prefix (should return all keys)
let keys = tree.list("").unwrap();
assert_eq!(keys.len(), test_data.len());
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}
#[test]
fn test_getall_with_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test getall with prefix "test"
let values = tree.getall("test").unwrap();
assert_eq!(values.len(), 5);
for (_, value) in &test_data {
assert!(values.contains(value));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_unicode_characters() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with Unicode characters
let test_data = [
("café", b"coffee".to_vec()),
("cafétéria", b"cafeteria".to_vec()),
("caffè", b"italian coffee".to_vec()),
("café au lait", b"coffee with milk".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "café"
let keys = tree.list("café").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// Test prefix "caf"
let keys = tree.list("caf").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_long_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert long keys
let test_data = [
("this_is_a_very_long_key_for_testing_purposes_1", b"value1".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_2", b"value2".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_3", b"value3".to_vec()),
("this_is_another_long_key_for_testing", b"value4".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "this_is_a_very"
let keys = tree.list("this_is_a_very").unwrap();
assert_eq!(keys.len(), 3);
// Test prefix "this_is"
let keys = tree.list("this_is").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}