This commit is contained in:
despiegk 2025-04-20 06:34:31 +02:00
parent 838e966dc9
commit 189971509a
23 changed files with 2913 additions and 13 deletions

View File

@ -117,13 +117,8 @@ impl ProductComponentBuilder {
}
}
<<<<<<< HEAD
/// Product represents a product or service offered in the system
#[derive(Debug, Clone, Serialize, Deserialize)]
=======
/// Product represents a product or service offered by the Freezone
#[derive(Debug, Clone, Serialize, Deserialize, CustomType)]
>>>>>>> builders_in_script
pub struct Product {
pub id: i64,
pub name: String,

95
ourdb/Cargo.lock generated
View File

@ -29,6 +29,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "bitflags"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
@ -181,6 +187,22 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "errno"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "getrandom"
version = "0.2.15"
@ -189,7 +211,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@ -250,6 +284,12 @@ version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "log"
version = "0.4.27"
@ -291,6 +331,7 @@ dependencies = [
"criterion",
"log",
"rand",
"tempfile",
"thiserror",
]
@ -349,6 +390,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
[[package]]
name = "rand"
version = "0.8.5"
@ -376,7 +423,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
"getrandom 0.2.15",
]
[[package]]
@ -428,6 +475,19 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustix"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "rustversion"
version = "1.0.20"
@ -492,6 +552,19 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tempfile"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
dependencies = [
"fastrand",
"getrandom 0.3.2",
"once_cell",
"rustix",
"windows-sys",
]
[[package]]
name = "thiserror"
version = "1.0.69"
@ -544,6 +617,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
@ -694,6 +776,15 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]]
name = "zerocopy"
version = "0.8.24"

View File

@ -13,6 +13,7 @@ rand = "0.8.5"
[dev-dependencies]
criterion = "0.5.1"
tempfile = "3.8.0"
[[bench]]
name = "ourdb_benchmarks"

View File

@ -26,7 +26,7 @@ use std::path::PathBuf;
fn main() -> Result<(), ourdb::Error> {
// Create a new database
let config = OurDBConfig {
path: PathBuf::from("/path/to/db"),
path: PathBuf::from("/tmp/ourdb"),
incremental_mode: true,
file_size: None, // Use default (500MB)
keysize: None, // Use default (4 bytes)

View File

@ -0,0 +1,277 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
use tempfile::tempdir;
fn criterion_benchmark(c: &mut Criterion) {
// Create a temporary directory for benchmarks
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_path_buf();
// Benchmark set operation (insertion)
c.bench_function("set", |b| {
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024), // 10MB
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; 100]; // 100 bytes of data
let mut i = 0;
b.iter(|| {
let args = OurDBSetArgs {
id: None, // Let the DB assign an ID
data: &test_data,
};
black_box(db.set(args).unwrap());
i += 1;
});
});
// Setup database with data for other benchmarks
let setup_config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024), // 10MB
keysize: None,
};
let mut setup_db = OurDB::new(setup_config).unwrap();
let test_data = vec![b'X'; 100]; // 100 bytes of data
let mut ids = Vec::with_capacity(1000);
// Insert 1000 records
for _ in 0..1000 {
let args = OurDBSetArgs {
id: None,
data: &test_data,
};
let id = setup_db.set(args).unwrap();
ids.push(id);
}
// Benchmark get operation
c.bench_function("get", |b| {
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let mut i = 0;
b.iter(|| {
let id = ids[i % ids.len()];
black_box(db.get(id).unwrap());
i += 1;
});
});
// Benchmark update operation
c.bench_function("update", |b| {
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let updated_data = vec![b'Y'; 100]; // Different data for updates
let mut i = 0;
b.iter(|| {
let id = ids[i % ids.len()];
let args = OurDBSetArgs {
id: Some(id),
data: &updated_data,
};
black_box(db.set(args).unwrap());
i += 1;
});
});
// Benchmark get_history operation
c.bench_function("get_history", |b| {
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let mut i = 0;
b.iter(|| {
let id = ids[i % ids.len()];
black_box(db.get_history(id, 2).unwrap());
i += 1;
});
});
// Benchmark delete operation
c.bench_function("delete", |b| {
// Create a fresh database for deletion benchmarks
let delete_dir = tempdir().expect("Failed to create temp directory");
let delete_path = delete_dir.path().to_path_buf();
let config = OurDBConfig {
path: delete_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; 100];
// Setup keys to delete
let mut delete_ids = Vec::with_capacity(1000);
for _ in 0..1000 {
let args = OurDBSetArgs {
id: None,
data: &test_data,
};
let id = db.set(args).unwrap();
delete_ids.push(id);
}
let mut i = 0;
b.iter(|| {
let id = delete_ids[i % delete_ids.len()];
// Only try to delete if it exists (not already deleted)
if db.get(id).is_ok() {
black_box(db.delete(id).unwrap());
}
i += 1;
});
});
// Benchmark key-value mode vs incremental mode
let mut group = c.benchmark_group("mode_comparison");
// Benchmark set in key-value mode
group.bench_function("set_keyvalue_mode", |b| {
let kv_dir = tempdir().expect("Failed to create temp directory");
let kv_path = kv_dir.path().to_path_buf();
let config = OurDBConfig {
path: kv_path.clone(),
incremental_mode: false, // Key-value mode
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; 100];
let mut i = 0;
b.iter(|| {
let id = i + 1; // Explicit ID
let args = OurDBSetArgs {
id: Some(id as u32),
data: &test_data,
};
black_box(db.set(args).unwrap());
i += 1;
});
});
// Benchmark set in incremental mode
group.bench_function("set_incremental_mode", |b| {
let inc_dir = tempdir().expect("Failed to create temp directory");
let inc_path = inc_dir.path().to_path_buf();
let config = OurDBConfig {
path: inc_path.clone(),
incremental_mode: true, // Incremental mode
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; 100];
b.iter(|| {
let args = OurDBSetArgs {
id: None, // Auto-generated ID
data: &test_data,
};
black_box(db.set(args).unwrap());
});
});
group.finish();
// Benchmark with different record sizes
let mut size_group = c.benchmark_group("record_size");
for &size in &[10, 100, 1000, 10000] {
size_group.bench_function(format!("set_size_{}", size), |b| {
let size_dir = tempdir().expect("Failed to create temp directory");
let size_path = size_dir.path().to_path_buf();
let config = OurDBConfig {
path: size_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; size];
b.iter(|| {
let args = OurDBSetArgs {
id: None,
data: &test_data,
};
black_box(db.set(args).unwrap());
});
});
size_group.bench_function(format!("get_size_{}", size), |b| {
let size_dir = tempdir().expect("Failed to create temp directory");
let size_path = size_dir.path().to_path_buf();
let config = OurDBConfig {
path: size_path.clone(),
incremental_mode: true,
file_size: Some(10 * 1024 * 1024),
keysize: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = vec![b'X'; size];
// Insert some records first
let mut size_ids = Vec::with_capacity(100);
for _ in 0..100 {
let args = OurDBSetArgs {
id: None,
data: &test_data,
};
let id = db.set(args).unwrap();
size_ids.push(id);
}
let mut i = 0;
b.iter(|| {
let id = size_ids[i % size_ids.len()];
black_box(db.get(id).unwrap());
i += 1;
});
});
}
size_group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@ -38,7 +38,7 @@ use radixtree::RadixTree;
fn main() -> Result<(), radixtree::Error> {
// Create a new radix tree
let mut tree = RadixTree::new("/path/to/storage", false)?;
let mut tree = RadixTree::new("/tmp/radix", false)?;
// Set key-value pairs
tree.set("hello", b"world".to_vec())?;
@ -68,10 +68,10 @@ fn main() -> Result<(), radixtree::Error> {
```rust
// Create a new radix tree
let mut tree = RadixTree::new("/path/to/storage", false)?;
let mut tree = RadixTree::new("/tmp/radix", false)?;
// Create a new radix tree and reset if it exists
let mut tree = RadixTree::new("/path/to/storage", true)?;
let mut tree = RadixTree::new("/tmp/radix", true)?;
```
### Setting Values

View File

@ -0,0 +1,121 @@
use radixtree::RadixTree;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Use much smaller batches to avoid hitting OurDB's size limit
const BATCH_SIZE: usize = 1_000;
const NUM_BATCHES: usize = 1_000; // Total records: 1,000,000
const PROGRESS_INTERVAL: usize = 100;
fn main() -> Result<(), radixtree::Error> {
// Overall metrics
let total_start_time = Instant::now();
let mut total_records_inserted = 0;
let mut batch_times = Vec::with_capacity(NUM_BATCHES);
println!("Will insert up to {} records in batches of {}",
BATCH_SIZE * NUM_BATCHES, BATCH_SIZE);
// Process in batches to avoid OurDB size limits
for batch in 0..NUM_BATCHES {
// Create a new database for each batch
let batch_path = std::env::temp_dir().join(format!("radixtree_batch_{}", batch));
// Clean up any existing database
if batch_path.exists() {
std::fs::remove_dir_all(&batch_path)?;
}
std::fs::create_dir_all(&batch_path)?;
println!("\nBatch {}/{}: Creating new radix tree...", batch + 1, NUM_BATCHES);
let mut tree = RadixTree::new(batch_path.to_str().unwrap(), true)?;
let batch_start_time = Instant::now();
let mut last_progress_time = Instant::now();
let mut last_progress_count = 0;
// Insert records for this batch
for i in 0..BATCH_SIZE {
let global_index = batch * BATCH_SIZE + i;
let key = format!("key:{:08}", global_index);
let value = format!("val{}", global_index).into_bytes();
tree.set(&key, value)?;
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == BATCH_SIZE - 1 {
let records_since_last = i + 1 - last_progress_count;
let time_since_last = last_progress_time.elapsed();
let records_per_second = records_since_last as f64 / time_since_last.as_secs_f64();
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, BATCH_SIZE,
(i + 1) as f64 / BATCH_SIZE as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_progress_time = Instant::now();
last_progress_count = i + 1;
}
}
let batch_duration = batch_start_time.elapsed();
batch_times.push(batch_duration);
total_records_inserted += BATCH_SIZE;
println!("\nBatch {}/{} completed in {:?} ({:.2} records/sec)",
batch + 1, NUM_BATCHES,
batch_duration,
BATCH_SIZE as f64 / batch_duration.as_secs_f64());
// Test random access performance for this batch
println!("Testing access performance for batch {}...", batch + 1);
let mut total_get_time = Duration::new(0, 0);
let num_samples = 100;
// Use a simple distribution pattern
for i in 0..num_samples {
// Distribute samples across the batch
let sample_id = batch * BATCH_SIZE + (i * (BATCH_SIZE / num_samples));
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("Testing prefix search performance...");
let prefix = format!("key:{:02}", batch % 100);
let list_start = Instant::now();
let keys = tree.list(&prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Overall performance summary
let total_duration = total_start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", total_records_inserted, total_duration);
println!("Average insertion rate: {:.2} records/second",
total_records_inserted as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (batch number vs. time):");
for (i, duration) in batch_times.iter().enumerate() {
if i % 10 == 0 || i == batch_times.len() - 1 { // Only show every 10th point
println!(" Batch {}: {:?} ({:.2} records/sec)",
i + 1,
duration,
BATCH_SIZE as f64 / duration.as_secs_f64());
}
}
Ok(())
}

View File

@ -0,0 +1,134 @@
use radixtree::RadixTree;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Number of records to insert
const TOTAL_RECORDS: usize = 1_000_000;
// How often to report progress (every X records)
const PROGRESS_INTERVAL: usize = 10_000;
// How many records to use for performance sampling
const PERFORMANCE_SAMPLE_SIZE: usize = 1000;
fn main() -> Result<(), radixtree::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("radixtree_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating radix tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new radix tree
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Use smaller values to avoid exceeding OurDB's size limit
let value = format!("val{}", i).into_bytes();
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
}
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
println!("Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

View File

@ -13,8 +13,8 @@ pub fn new_radix_tree(path: &str, reset: bool) -> Result<RadixTree, Error> {
let config = OurDBConfig {
path: PathBuf::from(path),
incremental_mode: true,
file_size: Some(1024 * 1024), // 1MB file size
keysize: Some(4), // Default key size
file_size: Some(1024 * 1024 * 10), // 10MB file size for better performance with large datasets
keysize: Some(6), // Use keysize=6 to support multiple files (file_nr + position)
};
let mut db = OurDB::new(config)?;

180
tst/Cargo.lock generated Normal file
View File

@ -0,0 +1,180 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.172"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "ourdb"
version = "0.1.0"
dependencies = [
"crc32fast",
"log",
"rand",
"thiserror",
]
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "syn"
version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tst"
version = "0.1.0"
dependencies = [
"log",
"ourdb",
"thiserror",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

31
tst/Cargo.toml Normal file
View File

@ -0,0 +1,31 @@
[package]
name = "tst"
version = "0.1.0"
edition = "2021"
description = "A persistent ternary search tree implementation using OurDB for storage"
authors = ["OurWorld Team"]
[dependencies]
ourdb = { path = "../ourdb" }
thiserror = "1.0.40"
log = "0.4.17"
[dev-dependencies]
# criterion = "0.5.1"
# Uncomment when benchmarks are implemented
# [[bench]]
# name = "tst_benchmarks"
# harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "prefix_ops"
path = "examples/prefix_ops.rs"
[[example]]
name = "performance"
path = "examples/performance.rs"

183
tst/README.md Normal file
View File

@ -0,0 +1,183 @@
# Ternary Search Tree (TST)
A persistent ternary search tree implementation in Rust using OurDB for storage.
## Overview
TST is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This implementation provides a persistent ternary search tree that can be used for efficient string key operations, such as auto-complete, routing tables, and more.
A ternary search tree is a type of trie where each node has three children: left, middle, and right. Unlike a radix tree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
Key characteristics:
- Each node stores a single character
- Nodes have three children: left (for characters < current), middle (for next character in key), and right (for characters > current)
- Leaf nodes contain the actual values
- Balanced structure for consistent performance across operations
## Features
- Efficient string key operations
- Persistent storage using OurDB backend
- Balanced tree structure for consistent performance
- Support for binary values
- Thread-safe operations through OurDB
## Usage
Add the dependency to your `Cargo.toml`:
```toml
[dependencies]
tst = { path = "../tst" }
```
### Basic Example
```rust
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Set key-value pairs
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
// Get values by key
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value)); // Prints: world
// List keys by prefix
let keys = tree.list("hel")?; // Returns ["hello", "help"]
println!("Keys with prefix 'hel': {:?}", keys);
// Get all values by prefix
let values = tree.getall("hel")?; // Returns [b"world", b"me"]
// Delete keys
tree.delete("help")?;
Ok(())
}
```
## API
### Creating a TST
```rust
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Create a new ternary search tree and reset if it exists
let mut tree = TST::new("/tmp/tst", true)?;
```
### Setting Values
```rust
// Set a key-value pair
tree.set("key", b"value".to_vec())?;
```
### Getting Values
```rust
// Get a value by key
let value = tree.get("key")?;
```
### Deleting Keys
```rust
// Delete a key
tree.delete("key")?;
```
### Listing Keys by Prefix
```rust
// List all keys with a given prefix
let keys = tree.list("prefix")?;
```
### Getting All Values by Prefix
```rust
// Get all values for keys with a given prefix
let values = tree.getall("prefix")?;
```
## Performance Characteristics
- Search: O(k) where k is the key length
- Insert: O(k) for new keys
- Delete: O(k) plus potential node cleanup
- Space: O(n) where n is the total number of nodes
## Use Cases
TST is particularly useful for:
- Prefix-based searching
- Auto-complete systems
- Dictionary implementations
- Spell checking
- Any application requiring efficient string key operations with persistence
## Implementation Details
The TST implementation uses OurDB for persistent storage:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
- Node serialization includes version tracking for format evolution
## Running Tests
The project includes a comprehensive test suite that verifies all functionality:
```bash
# Run all tests
cargo test
# Run specific test file
cargo test --test basic_test
cargo test --test prefix_test
```
## Running Examples
The project includes example applications that demonstrate how to use the TST:
```bash
# Run the basic usage example
cargo run --example basic_usage
# Run the prefix operations example
cargo run --example prefix_ops
# Run the performance test
cargo run --example performance
```
## Comparison with RadixTree
While both TST and RadixTree provide efficient string key operations, they have different characteristics:
- **TST**: Stores one character per node, with a balanced structure for consistent performance across operations.
- **RadixTree**: Compresses common prefixes, which can be more space-efficient for keys with long common prefixes.
Choose TST when:
- You need balanced performance across all operations
- Your keys don't share long common prefixes
- You want a simpler implementation with predictable performance
Choose RadixTree when:
- Space efficiency is a priority
- Your keys share long common prefixes
- You prioritize lookup performance over balanced performance
## License
This project is licensed under the same license as the HeroCode project.

View File

@ -0,0 +1,76 @@
use tst::TST;
use std::time::Instant;
use std::io::{self, Write};
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Store some data
println!("Inserting data...");
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
tree.set("helicopter", b"flying".to_vec())?;
tree.set("apple", b"fruit".to_vec())?;
tree.set("application", b"software".to_vec())?;
tree.set("banana", b"yellow".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// List keys with prefix
println!("\nListing keys with prefix 'hel':");
let start = Instant::now();
let keys = tree.list("hel")?;
let duration = start.elapsed();
for key in &keys {
println!(" {}", key);
}
println!("Found {} keys in {:?}", keys.len(), duration);
// Get all values with prefix
println!("\nGetting all values with prefix 'app':");
let start = Instant::now();
let values = tree.getall("app")?;
let duration = start.elapsed();
for (i, value) in values.iter().enumerate() {
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
}
println!("Found {} values in {:?}", values.len(), duration);
// Delete a key
println!("\nDeleting 'help'...");
tree.delete("help")?;
// Verify deletion
println!("Listing keys with prefix 'hel' after deletion:");
let keys_after = tree.list("hel")?;
for key in &keys_after {
println!(" {}", key);
}
// Try to get a deleted key
match tree.get("help") {
Ok(_) => println!("Unexpectedly found 'help' after deletion!"),
Err(e) => println!("As expected, 'help' was not found: {}", e),
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

134
tst/examples/performance.rs Normal file
View File

@ -0,0 +1,134 @@
use tst::TST;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Number of records to insert
const TOTAL_RECORDS: usize = 100_000;
// How often to report progress (every X records)
const PROGRESS_INTERVAL: usize = 1_000;
// How many records to use for performance sampling
const PERFORMANCE_SAMPLE_SIZE: usize = 100;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Use smaller values to avoid exceeding OurDB's size limit
let value = format!("val{}", i).into_bytes();
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
}
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
println!("Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

123
tst/examples/prefix_ops.rs Normal file
View File

@ -0,0 +1,123 @@
use tst::TST;
use std::time::Instant;
use std::io::{self, Write};
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_prefix_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Insert a variety of keys with different prefixes
println!("Inserting data with various prefixes...");
// Names
let names = [
"Alice", "Alexander", "Amanda", "Andrew", "Amy",
"Bob", "Barbara", "Benjamin", "Brenda", "Brian",
"Charlie", "Catherine", "Christopher", "Cynthia", "Carl",
"David", "Diana", "Daniel", "Deborah", "Donald",
"Edward", "Elizabeth", "Eric", "Emily", "Ethan"
];
for (i, name) in names.iter().enumerate() {
let value = format!("person-{}", i).into_bytes();
tree.set(name, value)?;
}
// Cities
let cities = [
"New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
"Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose",
"Austin", "Jacksonville", "Fort Worth", "Columbus", "San Francisco",
"Charlotte", "Indianapolis", "Seattle", "Denver", "Washington"
];
for (i, city) in cities.iter().enumerate() {
let value = format!("city-{}", i).into_bytes();
tree.set(city, value)?;
}
// Countries
let countries = [
"United States", "Canada", "Mexico", "Brazil", "Argentina",
"United Kingdom", "France", "Germany", "Italy", "Spain",
"China", "Japan", "India", "Australia", "Russia"
];
for (i, country) in countries.iter().enumerate() {
let value = format!("country-{}", i).into_bytes();
tree.set(country, value)?;
}
println!("Total items inserted: {}", names.len() + cities.len() + countries.len());
// Test prefix operations
test_prefix(&mut tree, "A")?;
test_prefix(&mut tree, "B")?;
test_prefix(&mut tree, "C")?;
test_prefix(&mut tree, "San")?;
test_prefix(&mut tree, "United")?;
// Test non-existent prefix
test_prefix(&mut tree, "Z")?;
// Test empty prefix (should return all keys)
println!("\nTesting empty prefix (should return all keys):");
let start = Instant::now();
let all_keys = tree.list("")?;
let duration = start.elapsed();
println!("Found {} keys with empty prefix in {:?}", all_keys.len(), duration);
println!("First 5 keys (alphabetically):");
for key in all_keys.iter().take(5) {
println!(" {}", key);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
fn test_prefix(tree: &mut TST, prefix: &str) -> Result<(), tst::Error> {
println!("\nTesting prefix '{}':", prefix);
// Test list operation
let start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}", keys.len(), prefix, list_duration);
if !keys.is_empty() {
println!("Keys:");
for key in &keys {
println!(" {}", key);
}
// Test getall operation
let start = Instant::now();
let values = tree.getall(prefix)?;
let getall_duration = start.elapsed();
println!("Retrieved {} values in {:?}", values.len(), getall_duration);
println!("First value: {}",
if !values.is_empty() {
String::from_utf8_lossy(&values[0])
} else {
"None".into()
});
}
Ok(())
}

36
tst/src/error.rs Normal file
View File

@ -0,0 +1,36 @@
//! Error types for the TST module.
use thiserror::Error;
use std::io;
/// Error type for TST operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// IO error.
#[error("IO error: {0}")]
IO(#[from] io::Error),
}

122
tst/src/lib.rs Normal file
View File

@ -0,0 +1,122 @@
//! TST is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent ternary search tree that can be used for efficient
//! string key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::TSTNode;
use ourdb::OurDB;
/// TST represents a ternary search tree data structure with persistent storage.
pub struct TST {
/// Database for persistent storage
db: OurDB,
/// Database ID of the root node
root_id: Option<u32>,
}
impl TST {
/// Creates a new TST with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `TST` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
operations::new_tst(path, reset)
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
operations::set(self, key, value)
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
operations::get(self, key)
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
operations::delete(self, key)
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
operations::list(self, prefix)
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}

49
tst/src/node.rs Normal file
View File

@ -0,0 +1,49 @@
//! Node types for the TST module.
/// Represents a node in the ternary search tree.
#[derive(Debug, Clone, PartialEq)]
pub struct TSTNode {
/// The character stored at this node.
pub character: char,
/// Value stored at this node (empty if not end of key).
pub value: Vec<u8>,
/// Whether this node represents the end of a key.
pub is_end_of_key: bool,
/// Reference to the left child node (for characters < current character).
pub left_id: Option<u32>,
/// Reference to the middle child node (for next character in key).
pub middle_id: Option<u32>,
/// Reference to the right child node (for characters > current character).
pub right_id: Option<u32>,
}
impl TSTNode {
/// Creates a new node.
pub fn new(character: char, value: Vec<u8>, is_end_of_key: bool) -> Self {
Self {
character,
value,
is_end_of_key,
left_id: None,
middle_id: None,
right_id: None,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
character: '\0', // Use null character for root
value: Vec::new(),
is_end_of_key: false,
left_id: None,
middle_id: None,
right_id: None,
}
}
}

418
tst/src/operations.rs Normal file
View File

@ -0,0 +1,418 @@
//! Implementation of TST operations.
use crate::error::Error;
use crate::node::TSTNode;
use crate::TST;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new TST with the specified database path.
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
// If the path exists and reset is true, remove it first
let path_buf = PathBuf::from(path);
if path_buf.exists() && reset {
std::fs::remove_dir_all(&path_buf)?;
}
// Create the directory if it doesn't exist
std::fs::create_dir_all(&path_buf)?;
let config = OurDBConfig {
path: path_buf,
incremental_mode: true,
file_size: Some(1024 * 1024), // 10MB file size for better performance with large datasets
keysize: Some(4), // Use keysize=4 (default)
};
let mut db = OurDB::new(config)?;
let root_id = if db.get_next_id()? == 1 || reset {
// Create a new root node
let root = TSTNode::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
Some(root_id)
} else {
// Use existing root node
Some(1) // Root node always has ID 1
};
Ok(TST {
db,
root_id,
})
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
set_recursive(tree, root_id, &chars, 0, value)?;
Ok(())
}
/// Recursive helper function for setting a key-value pair.
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
let mut node = tree.get_node(node_id)?;
if pos >= chars.len() {
// We've reached the end of the key
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
let current_char = chars[pos];
if node.character == '\0' {
// Root node or empty node, set the character
node.character = current_char;
let node_id = tree.save_node(Some(node_id), &node)?;
// Continue with the next character
if pos + 1 < chars.len() {
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
let mut updated_node = tree.get_node(node_id)?;
updated_node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &updated_node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
} else {
// This is the last character
let mut updated_node = tree.get_node(node_id)?;
updated_node.is_end_of_key = true;
updated_node.value = value;
return tree.save_node(Some(node_id), &updated_node);
}
}
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
return set_recursive(tree, left_id, chars, pos, value);
} else {
// Create new left node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.left_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
return set_recursive(tree, right_id, chars, pos, value);
} else {
// Create new right node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.right_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else {
// Character matches, go middle (next character)
if pos + 1 >= chars.len() {
// This is the last character
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
if let Some(middle_id) = node.middle_id {
return set_recursive(tree, middle_id, chars, pos + 1, value);
} else {
// Create new middle node
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
}
}
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let node = tree.get_node(node_id)?;
if node.is_end_of_key {
Ok(node.value.clone())
} else {
Err(Error::KeyNotFound(key.to_string()))
}
}
/// Finds a node by key.
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
let node = tree.get_node(node_id)?;
if pos >= chars.len() {
return Ok(node_id);
}
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_node(tree, left_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_node(tree, right_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
}
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let mut node = tree.get_node(node_id)?;
if !node.is_end_of_key {
return Err(Error::KeyNotFound(key.to_string()));
}
// If the node has a middle child, just mark it as not end of key
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
return Ok(());
}
// Otherwise, we need to remove the node and update its parent
// This is more complex and would require tracking the path to the node
// For simplicity, we'll just mark it as not end of key for now
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, root_id, String::new(), &mut result)?;
return Ok(result);
}
// Find the node corresponding to the prefix
let chars: Vec<char> = prefix.chars().collect();
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
Ok(id) => id,
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
};
// Collect all keys from the subtree
collect_keys_with_prefix(tree, node_id, prefix.to_string(), &mut result)?;
Ok(result)
}
/// Finds the node corresponding to a prefix.
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
if pos >= chars.len() {
return Ok(node_id);
}
let node = tree.get_node(node_id)?;
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_prefix_node(tree, left_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_prefix_node(tree, right_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character of the prefix
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_prefix_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
}
}
/// Collects all keys with a given prefix.
fn collect_keys_with_prefix(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(current_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
let mut new_path = current_path.clone();
new_path.push(node.character);
collect_all_keys(tree, middle_id, new_path, result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Recursively collects all keys under a node.
fn collect_all_keys(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
new_path.push(node.character);
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
for key in keys {
if let Ok(value) = get(tree, &key) {
values.push(value);
}
}
Ok(values)
}
impl TST {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
let data = self.db.get(node_id)?;
TSTNode::deserialize(&data)
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
}

134
tst/src/serialize.rs Normal file
View File

@ -0,0 +1,134 @@
//! Serialization and deserialization for TST nodes.
use crate::error::Error;
use crate::node::TSTNode;
/// Current binary format version.
const VERSION: u8 = 1;
impl TSTNode {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Version
buffer.push(VERSION);
// Character (as UTF-32)
let char_bytes = (self.character as u32).to_le_bytes();
buffer.extend_from_slice(&char_bytes);
// Is end of key
buffer.push(if self.is_end_of_key { 1 } else { 0 });
// Value (only if is_end_of_key)
if self.is_end_of_key {
let value_len = (self.value.len() as u32).to_le_bytes();
buffer.extend_from_slice(&value_len);
buffer.extend_from_slice(&self.value);
} else {
// Zero length
buffer.extend_from_slice(&[0, 0, 0, 0]);
}
// Child pointers
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&left_id);
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&middle_id);
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&right_id);
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.len() < 14 { // Minimum size: version + char + is_end + value_len + 3 child IDs
return Err(Error::Deserialization("Data too short".to_string()));
}
let mut pos = 0;
// Version
let version = data[pos];
pos += 1;
if version != VERSION {
return Err(Error::Deserialization(format!("Unsupported version: {}", version)));
}
// Character
let char_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let char_code = u32::from_le_bytes(char_bytes);
let character = char::from_u32(char_code)
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
pos += 4;
// Is end of key
let is_end_of_key = data[pos] != 0;
pos += 1;
// Value length
let value_len_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
pos += 4;
// Value
let value = if value_len > 0 {
if pos + value_len > data.len() {
return Err(Error::Deserialization("Value length exceeds data".to_string()));
}
data[pos..pos+value_len].to_vec()
} else {
Vec::new()
};
pos += value_len;
// Child pointers
if pos + 12 > data.len() {
return Err(Error::Deserialization("Data too short for child pointers".to_string()));
}
let left_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let left_id = u32::from_le_bytes(left_id_bytes);
pos += 4;
let middle_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let middle_id = u32::from_le_bytes(middle_id_bytes);
pos += 4;
let right_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let right_id = u32::from_le_bytes(right_id_bytes);
Ok(TSTNode {
character,
value,
is_end_of_key,
left_id: if left_id == 0 { None } else { Some(left_id) },
middle_id: if middle_id == 0 { None } else { Some(middle_id) },
right_id: if right_id == 0 { None } else { Some(right_id) },
})
}
}
/// Gets the common prefix of two strings.
pub fn get_common_prefix(a: &str, b: &str) -> String {
let mut result = String::new();
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let min_len = a_chars.len().min(b_chars.len());
for i in 0..min_len {
if a_chars[i] == b_chars[i] {
result.push(a_chars[i]);
} else {
break;
}
}
result
}

215
tst/tests/basic_test.rs Normal file
View File

@ -0,0 +1,215 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let path = temp_dir().join(format!("tst_test_{}", timestamp));
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_create_tst() {
let path = get_test_db_path();
let result = TST::new(&path, true);
assert!(result.is_ok());
cleanup_test_db(&path);
}
#[test]
fn test_set_and_get() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test setting and getting a key
let key = "test_key";
let value = b"test_value".to_vec();
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
let get_result = tree.get(key);
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), value);
cleanup_test_db(&path);
}
#[test]
fn test_get_nonexistent_key() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test getting a key that doesn't exist
let get_result = tree.get("nonexistent_key");
assert!(get_result.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_delete() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Set a key
let key = "delete_test";
let value = b"to_be_deleted".to_vec();
tree.set(key, value).unwrap();
// Verify it exists
let get_result = tree.get(key);
assert!(get_result.is_ok());
// Delete it
let delete_result = tree.delete(key);
assert!(delete_result.is_ok());
// Verify it's gone
let get_after_delete = tree.get(key);
assert!(get_after_delete.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_multiple_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert multiple keys
let keys = ["apple", "banana", "cherry", "date", "elderberry"];
for (i, key) in keys.iter().enumerate() {
let value = format!("value_{}", i).into_bytes();
tree.set(key, value).unwrap();
}
// Verify all keys exist
for (i, key) in keys.iter().enumerate() {
let expected_value = format!("value_{}", i).into_bytes();
let get_result = tree.get(key).unwrap();
assert_eq!(get_result, expected_value);
}
cleanup_test_db(&path);
}
#[test]
fn test_list_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let keys = [
"apple", "application", "append",
"banana", "bandana",
"cherry", "chocolate"
];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test prefix "app"
let app_keys = tree.list("app").unwrap();
assert_eq!(app_keys.len(), 3);
assert!(app_keys.contains(&"apple".to_string()));
assert!(app_keys.contains(&"application".to_string()));
assert!(app_keys.contains(&"append".to_string()));
// Test prefix "ban"
let ban_keys = tree.list("ban").unwrap();
assert_eq!(ban_keys.len(), 2);
assert!(ban_keys.contains(&"banana".to_string()));
assert!(ban_keys.contains(&"bandana".to_string()));
// Test prefix "c"
let c_keys = tree.list("c").unwrap();
assert_eq!(c_keys.len(), 2);
assert!(c_keys.contains(&"cherry".to_string()));
assert!(c_keys.contains(&"chocolate".to_string()));
// Test non-existent prefix
let z_keys = tree.list("z").unwrap();
assert_eq!(z_keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_getall_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let keys = [
"apple", "application", "append",
"banana", "bandana",
"cherry", "chocolate"
];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test getall with prefix "app"
let app_values = tree.getall("app").unwrap();
assert_eq!(app_values.len(), 3);
// Convert values to strings for easier comparison
let app_value_strings: Vec<String> = app_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
assert!(app_value_strings.contains(&"apple".to_string()));
assert!(app_value_strings.contains(&"application".to_string()));
assert!(app_value_strings.contains(&"append".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_empty_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert some keys
let keys = ["apple", "banana", "cherry"];
for key in &keys {
tree.set(key, key.as_bytes().to_vec()).unwrap();
}
// Test list with empty prefix (should return all keys)
let all_keys = tree.list("").unwrap();
assert_eq!(all_keys.len(), keys.len());
for key in &keys {
assert!(all_keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}

215
tst/tests/prefix_test.rs Normal file
View File

@ -0,0 +1,215 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let path = temp_dir().join(format!("tst_prefix_test_{}", timestamp));
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_prefix_with_common_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "test"
let keys = tree.list("test").unwrap();
assert_eq!(keys.len(), 5);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Test prefix "teste"
let keys = tree.list("teste").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"tested".to_string()));
assert!(keys.contains(&"tester".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_different_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with different prefixes
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
("date", b"fruit4".to_vec()),
("elderberry", b"fruit5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test each prefix
for (key, _) in &test_data {
let prefix = &key[0..1]; // First character
let keys = tree.list(prefix).unwrap();
assert!(keys.contains(&key.to_string()));
}
// Test non-existent prefix
let keys = tree.list("z").unwrap();
assert_eq!(keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_empty_string() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert some keys
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test empty prefix (should return all keys)
let keys = tree.list("").unwrap();
assert_eq!(keys.len(), test_data.len());
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}
#[test]
fn test_getall_with_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test getall with prefix "test"
let values = tree.getall("test").unwrap();
assert_eq!(values.len(), 5);
for (_, value) in &test_data {
assert!(values.contains(value));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_unicode_characters() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with Unicode characters
let test_data = [
("café", b"coffee".to_vec()),
("cafétéria", b"cafeteria".to_vec()),
("caffè", b"italian coffee".to_vec()),
("café au lait", b"coffee with milk".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "café"
let keys = tree.list("café").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// Test prefix "caf"
let keys = tree.list("caf").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_long_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert long keys
let test_data = [
("this_is_a_very_long_key_for_testing_purposes_1", b"value1".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_2", b"value2".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_3", b"value3".to_vec()),
("this_is_another_long_key_for_testing", b"value4".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "this_is_a_very"
let keys = tree.list("this_is_a_very").unwrap();
assert_eq!(keys.len(), 3);
// Test prefix "this_is"
let keys = tree.list("this_is").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}

365
tst_implementation_plan.md Normal file
View File

@ -0,0 +1,365 @@
# Ternary Search Tree (TST) Implementation Plan
## 1. Overview
A Ternary Search Tree (TST) is a type of trie where each node has three children: left, middle, and right. Unlike a RadixTree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
```mermaid
graph TD
A[Root Node 'r'] --> B[Left Child 'a']
A --> C[Middle Child 'o']
A --> D[Right Child 't']
C --> E[Middle Child 'o']
E --> F[Middle Child 'm' - End of Key]
E --> G[Middle Child 't' - End of Key]
```
The TST implementation will use OurDB as the backend for persistent storage, similar to the existing RadixTree implementation. The goal is to provide a more balanced tree structure that offers consistent performance across all operations (set, get, delete, list).
## 2. Core Data Structures
### 2.1 TST Node Structure
```rust
pub struct TSTNode {
// The character stored at this node
pub character: char,
// Value stored at this node (empty if not end of key)
pub value: Vec<u8>,
// Whether this node represents the end of a key
pub is_end_of_key: bool,
// References to child nodes
pub left_id: Option<u32>, // For characters < current character
pub middle_id: Option<u32>, // For characters == current character (next character in key)
pub right_id: Option<u32>, // For characters > current character
}
```
### 2.2 TST Structure
```rust
pub struct TST {
// Database for persistent storage
db: OurDB,
// Database ID of the root node
root_id: Option<u32>,
}
```
## 3. API Design
The TST will maintain similar core functionality to RadixTree but with an API that better suits its structure:
```rust
impl TST {
// Creates a new TST with the specified database path
pub fn new(path: &str, reset: bool) -> Result<Self, Error>;
// Sets a key-value pair in the tree
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error>;
// Gets a value by key from the tree
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error>;
// Deletes a key from the tree
pub fn delete(&mut self, key: &str) -> Result<(), Error>;
// Lists all keys with a given prefix
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error>;
// Gets all values for keys with a given prefix
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error>;
}
```
## 4. Implementation Strategy
### 4.1 Phase 1: Core Data Structures and Serialization
```mermaid
graph TD
A[Define TSTNode and TST structs] --> B[Implement serialization/deserialization]
B --> C[Implement Error handling]
C --> D[Implement OurDB integration]
```
1. Define the `TSTNode` and `TST` structs
2. Implement serialization and deserialization for `TSTNode`
3. Define error types for TST-specific errors
4. Implement OurDB integration for node storage and retrieval
### 4.2 Phase 2: Basic Tree Operations
```mermaid
graph TD
A[Implement new] --> B[Implement set]
B --> C[Implement get]
C --> D[Implement helper functions]
```
1. Implement the `new()` function for creating a new TST
2. Implement the `set()` function for inserting key-value pairs
3. Implement the `get()` function for retrieving values
4. Implement helper functions for node traversal and manipulation
### 4.3 Phase 3: Advanced Tree Operations
```mermaid
graph TD
A[Implement delete] --> B[Implement list]
B --> C[Implement getall]
C --> D[Optimize operations]
```
1. Implement the `delete()` function for removing keys
2. Implement the `list()` function for prefix-based key listing
3. Implement the `getall()` function for retrieving all values with a prefix
4. Optimize operations for balanced performance
### 4.4 Phase 4: Testing and Performance Evaluation
```mermaid
graph TD
A[Create unit tests] --> B[Create integration tests]
B --> C[Create performance tests]
C --> D[Compare with RadixTree]
D --> E[Optimize based on results]
```
1. Create unit tests for each component
2. Create integration tests for the complete system
3. Create performance tests similar to RadixTree's
4. Compare performance with RadixTree
5. Optimize based on performance results
## 5. Implementation Details
### 5.1 Node Structure and Storage
Each TST node will store a single character and have three child pointers (left, middle, right). The nodes will be serialized and stored in OurDB, with node references using OurDB record IDs.
### 5.2 Key Operations
#### 5.2.1 Insertion (set)
```mermaid
graph TD
A[Start at root] --> B{Root exists?}
B -- No --> C[Create root node]
B -- Yes --> D[Compare current char with node char]
D -- Less than --> E[Go to left child]
D -- Equal to --> F[Go to middle child]
D -- Greater than --> G[Go to right child]
E --> H{Child exists?}
F --> H
G --> H
H -- No --> I[Create new node]
H -- Yes --> J[Continue with next char]
I --> J
J --> K{End of key?}
K -- Yes --> L[Set value and mark as end of key]
K -- No --> D
```
1. Start at the root node
2. For each character in the key:
- If the character is less than the current node's character, go to the left child
- If the character is equal to the current node's character, go to the middle child
- If the character is greater than the current node's character, go to the right child
- If the child doesn't exist, create a new node
3. When the end of the key is reached, set the value and mark the node as end of key
#### 5.2.2 Lookup (get)
1. Start at the root node
2. For each character in the key:
- If the character is less than the current node's character, go to the left child
- If the character is equal to the current node's character, go to the middle child
- If the character is greater than the current node's character, go to the right child
- If the child doesn't exist, the key is not found
3. When the end of the key is reached, check if the node is marked as end of key
- If yes, return the value
- If no, the key is not found
#### 5.2.3 Deletion (delete)
1. Find the node corresponding to the end of the key
2. If the node has no children, remove it and update its parent
3. If the node has children, mark it as not end of key and clear its value
4. Recursively clean up any nodes that are no longer needed
#### 5.2.4 Prefix Operations (list, getall)
1. Find the node corresponding to the end of the prefix
2. Perform a traversal of the subtree rooted at that node
3. Collect all keys (for list) or values (for getall) from nodes marked as end of key
### 5.3 Serialization and OurDB Integration
#### 5.3.1 Node Structure for Serialization
Each TSTNode will be serialized with the following logical structure:
1. Version marker (for future format evolution)
2. Character data
3. Is-end-of-key flag
4. Value (if is-end-of-key is true)
5. Child node references (left, middle, right)
#### 5.3.2 OurDB Integration
The TST will use OurDB for node storage and retrieval:
1. **Node Storage**: Each node will be serialized and stored as a record in OurDB.
```rust
fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
```
2. **Node Retrieval**: Nodes will be retrieved from OurDB and deserialized.
```rust
fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
let data = self.db.get(node_id)?;
TSTNode::deserialize(&data)
}
```
3. **Root Node Management**: The TST will maintain a root node ID for traversal.
#### 5.3.3 Handling Large Datasets
For large datasets, we'll implement a batching approach similar to the RadixTree's large-scale tests:
1. **Batch Processing**: Process large datasets in manageable batches to avoid OurDB size limitations.
2. **Database Partitioning**: Create separate database instances for very large datasets.
3. **Memory Management**: Implement efficient memory usage patterns to avoid excessive memory consumption.
## 6. Project Structure
```
tst/
├── Cargo.toml
├── src/
│ ├── lib.rs # Public API and re-exports
│ ├── node.rs # TSTNode implementation
│ ├── serialize.rs # Serialization and deserialization
│ ├── error.rs # Error types
│ └── operations.rs # Tree operations implementation
├── tests/
│ ├── basic_test.rs # Basic operations tests
│ ├── prefix_test.rs # Prefix operations tests
│ └── edge_cases.rs # Edge case tests
└── examples/
├── basic_usage.rs # Basic usage example
├── prefix_ops.rs # Prefix operations example
└── performance.rs # Performance benchmark
```
## 7. Performance Considerations
### 7.1 Advantages of TST over RadixTree
1. **Balanced Structure**: TST naturally maintains a more balanced structure, which can lead to more consistent performance across operations.
2. **Character-by-Character Comparison**: TST performs character-by-character comparisons, which can be more efficient for certain workloads.
3. **Efficient Prefix Operations**: TST can efficiently handle prefix operations by traversing the middle child path.
### 7.2 Potential Optimizations
1. **Node Caching**: Cache frequently accessed nodes to reduce database operations.
2. **Balancing Techniques**: Implement balancing techniques to ensure the tree remains balanced.
3. **Batch Operations**: Support batch operations for improved performance.
4. **Memory Management**: Implement efficient memory usage patterns to avoid excessive memory consumption.
## 8. Testing Strategy
### 8.1 Unit Tests
1. Test `TSTNode` serialization/deserialization
2. Test character comparison operations
3. Test error handling
### 8.2 Integration Tests
1. Test basic CRUD operations
2. Test prefix operations
3. Test edge cases (empty keys, very long keys, etc.)
4. Test with large datasets
### 8.3 Performance Tests
1. Measure throughput for set/get operations
2. Measure latency for different operations
3. Test with different tree sizes and key distributions
4. Compare performance with RadixTree
#### 8.3.1 Performance Benchmarking
We'll create comprehensive benchmarks to compare the TST implementation with RadixTree:
```rust
// Example benchmark structure
fn benchmark_set_operations(tree_type: &str, num_records: usize) -> Duration {
let start_time = Instant::now();
// Create tree (TST or RadixTree)
let mut tree = match tree_type {
"tst" => create_tst(),
"radix" => create_radix_tree(),
_ => panic!("Unknown tree type"),
};
// Insert records
for i in 0..num_records {
let key = format!("key:{:08}", i);
let value = format!("val{}", i).into_bytes();
tree.set(&key, value).unwrap();
}
start_time.elapsed()
}
```
We'll benchmark the following operations:
- Set (insertion)
- Get (lookup)
- Delete
- List (prefix search)
- GetAll (prefix values)
For each operation, we'll measure:
- Throughput (operations per second)
- Latency (time per operation)
- Memory usage
- Database size
We'll test with various dataset characteristics:
- Small datasets (100-1,000 keys)
- Medium datasets (10,000-100,000 keys)
- Large datasets (1,000,000+ keys)
- Keys with common prefixes
- Keys with random distribution
- Long keys vs. short keys
## 9. Timeline and Milestones
1. **Week 1**: Core data structures and serialization
2. **Week 2**: Basic tree operations
3. **Week 3**: Advanced tree operations
4. **Week 4**: Testing and performance evaluation
5. **Week 5**: Optimization and documentation
## 10. Conclusion
This implementation plan provides a roadmap for creating a Ternary Search Tree (TST) as an alternative to the RadixTree implementation. The TST will maintain the same core functionality while providing a more balanced tree structure and aiming for balanced performance across all operations.
The implementation will leverage OurDB for persistent storage, similar to RadixTree, but with a different node structure and traversal algorithm that better suits the TST approach.